1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * ---------------------------------------------------------------------------- 35 * "THE BEER-WARE LICENSE" (Revision 42): 36 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 37 * can do whatever you want with this stuff. If we meet some day, and you think 38 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 39 * ---------------------------------------------------------------------------- 40 * 41 * Copyright (c) 1982, 1986, 1988, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 78 * $FreeBSD: src/sys/kern/subr_disk.c,v 1.20.2.6 2001/10/05 07:14:57 peter Exp $ 79 * $FreeBSD: src/sys/ufs/ufs/ufs_disksubr.c,v 1.44.2.3 2001/03/05 05:42:19 obrien Exp $ 80 * $DragonFly: src/sys/kern/subr_disk.c,v 1.40 2008/06/05 18:06:32 swildner Exp $ 81 */ 82 83 #include <sys/param.h> 84 #include <sys/systm.h> 85 #include <sys/kernel.h> 86 #include <sys/proc.h> 87 #include <sys/sysctl.h> 88 #include <sys/buf.h> 89 #include <sys/conf.h> 90 #include <sys/disklabel.h> 91 #include <sys/disklabel32.h> 92 #include <sys/disklabel64.h> 93 #include <sys/diskslice.h> 94 #include <sys/diskmbr.h> 95 #include <sys/disk.h> 96 #include <sys/malloc.h> 97 #include <sys/sysctl.h> 98 #include <machine/md_var.h> 99 #include <sys/ctype.h> 100 #include <sys/syslog.h> 101 #include <sys/device.h> 102 #include <sys/msgport.h> 103 #include <sys/msgport2.h> 104 #include <sys/buf2.h> 105 #include <vfs/devfs/devfs.h> 106 107 #include <sys/thread2.h> 108 109 #include <sys/queue.h> 110 #include <sys/lock.h> 111 112 static MALLOC_DEFINE(M_DISK, "disk", "disk data"); 113 114 static void disk_msg_autofree_reply(lwkt_port_t, lwkt_msg_t); 115 static void disk_msg_core(void *); 116 static int disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe); 117 static void disk_probe(struct disk *dp, int reprobe); 118 static void _setdiskinfo(struct disk *disk, struct disk_info *info); 119 120 static d_open_t diskopen; 121 static d_close_t diskclose; 122 static d_ioctl_t diskioctl; 123 static d_strategy_t diskstrategy; 124 static d_psize_t diskpsize; 125 static d_clone_t diskclone; 126 static d_dump_t diskdump; 127 128 static LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist); 129 130 static struct dev_ops disk_ops = { 131 { "disk", 0, D_DISK }, 132 .d_open = diskopen, 133 .d_close = diskclose, 134 .d_read = physread, 135 .d_write = physwrite, 136 .d_ioctl = diskioctl, 137 .d_strategy = diskstrategy, 138 .d_dump = diskdump, 139 .d_psize = diskpsize, 140 .d_clone = diskclone 141 }; 142 143 static struct objcache *disk_msg_cache; 144 145 struct objcache_malloc_args disk_msg_malloc_args = { 146 sizeof(struct disk_msg), M_DISK }; 147 148 static struct lwkt_port disk_dispose_port; 149 static struct lwkt_port disk_msg_port; 150 151 152 static int 153 disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe) 154 { 155 struct disk_info *info = &dp->d_info; 156 struct diskslice *sp = &dp->d_slice->dss_slices[slice]; 157 disklabel_ops_t ops; 158 struct partinfo part; 159 const char *msg; 160 cdev_t ndev; 161 unsigned long i; 162 163 //lp.opaque = NULL; 164 165 ops = &disklabel32_ops; 166 msg = ops->op_readdisklabel(dev, sp, &sp->ds_label, info); 167 if (msg && !strcmp(msg, "no disk label")) { 168 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe_slice: trying with disklabel64\n"); 169 ops = &disklabel64_ops; 170 msg = ops->op_readdisklabel(dev, sp, &sp->ds_label, info); 171 } 172 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe_slice: label: %s\n", (msg)?msg:"is NULL"); 173 if (msg == NULL) { 174 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe_slice: found %d partitions in the label\n", ops->op_getnumparts(sp->ds_label)); 175 if (slice != WHOLE_DISK_SLICE) 176 ops->op_adjust_label_reserved(dp->d_slice, slice, sp); 177 else 178 sp->ds_reserved = 0; 179 180 sp->ds_ops = ops; 181 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe_slice: lp.opaque: %x\n", sp->ds_label.opaque); 182 for (i = 0; i < ops->op_getnumparts(sp->ds_label); i++) { 183 ops->op_loadpartinfo(sp->ds_label, i, &part); 184 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe_slice: partinfo says fstype=%d for part %d\n", part.fstype, i); 185 if (part.fstype) { 186 if (reprobe && 187 (ndev = devfs_find_device_by_name("%s%c", 188 dev->si_name, 'a'+ (char)i))) { 189 /* Device already exists and is still valid */ 190 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe_slice: reprobing and device remained valid, mark it\n"); 191 ndev->si_flags |= SI_REPROBE_TEST; 192 } else { 193 ndev = make_dev(&disk_ops, 194 dkmakeminor(dkunit(dp->d_cdev), slice, i), 195 UID_ROOT, GID_OPERATOR, 0640, 196 "%s%c", dev->si_name, 'a'+ (char)i); 197 ndev->si_disk = dp; 198 if (dp->d_info.d_serialno) { 199 make_dev_alias(ndev, "serno/%s.s%d%c", dp->d_info.d_serialno, slice - 1, 'a' + (char)i); 200 } 201 ndev->si_flags |= SI_REPROBE_TEST; 202 } 203 204 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe_slice:end: lp.opaque: %x\n", ndev->si_disk->d_slice->dss_slices[slice].ds_label.opaque); 205 } 206 } 207 } else if (info->d_dsflags & DSO_COMPATLABEL) { 208 msg = NULL; 209 if (sp->ds_size >= 0x100000000ULL) 210 ops = &disklabel64_ops; 211 else 212 ops = &disklabel32_ops; 213 sp->ds_label = ops->op_clone_label(info, sp); 214 } else { 215 if (sp->ds_type == DOSPTYP_386BSD /* XXX */) 216 log(LOG_WARNING, "%s: cannot find label (%s)\n", 217 dev->si_name, msg); 218 } 219 220 if (msg == NULL) { 221 sp->ds_wlabel = FALSE; 222 } 223 224 return (msg ? EINVAL : 0); 225 } 226 227 228 static void 229 disk_probe(struct disk *dp, int reprobe) 230 { 231 struct disk_info *info = &dp->d_info; 232 cdev_t dev = dp->d_cdev; 233 cdev_t ndev; 234 int error, i; 235 236 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe called for %s\n", dp->d_cdev->si_name); 237 KKASSERT (info->d_media_blksize != 0); 238 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe: info set!\n"); 239 240 dp->d_slice = dsmakeslicestruct(BASE_SLICE, info); 241 242 error = mbrinit(dev, info, &(dp->d_slice)); 243 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe: &dp->d_slice is: %x, %x\n", &dp->d_slice, dp->d_slice); 244 if (error != 0) { 245 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe: mbrinit() failed with error: %d\n", error); 246 return; 247 } else { 248 devfs_debug(DEVFS_DEBUG_DEBUG, "mbrinit succeeded, found %d slices\n", dp->d_slice->dss_nslices); 249 if (dp->d_slice->dss_nslices == BASE_SLICE) { 250 dp->d_slice->dss_slices[COMPATIBILITY_SLICE].ds_size = info->d_media_blocks; 251 dp->d_slice->dss_slices[COMPATIBILITY_SLICE].ds_reserved = 0; 252 if (reprobe && 253 (ndev = devfs_find_device_by_name("%ss%d", 254 dev->si_name, COMPATIBILITY_SLICE))) { 255 /* Device already exists and is still valid */ 256 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe: reprobing and device remained valid, mark it\n"); 257 ndev->si_flags |= SI_REPROBE_TEST; 258 } else { 259 ndev = make_dev(&disk_ops, 260 dkmakewholeslice(dkunit(dev), COMPATIBILITY_SLICE), 261 UID_ROOT, GID_OPERATOR, 0640, 262 "%ss%d", dev->si_name, COMPATIBILITY_SLICE); 263 264 ndev->si_disk = dp; 265 if (dp->d_info.d_serialno) { 266 make_dev_alias(ndev, "serno/%s.s%d", 267 dp->d_info.d_serialno, 268 COMPATIBILITY_SLICE); 269 } 270 ndev->si_flags |= SI_REPROBE_TEST; 271 } 272 273 dp->d_slice->dss_slices[COMPATIBILITY_SLICE].ds_dev = ndev; 274 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe: type of slice is :%x\n", dp->d_slice->dss_slices[COMPATIBILITY_SLICE].ds_type ); 275 276 dp->d_slice->dss_first_bsd_slice = COMPATIBILITY_SLICE; 277 disk_probe_slice(dp, ndev, COMPATIBILITY_SLICE, reprobe); 278 279 } 280 for (i = BASE_SLICE; i < dp->d_slice->dss_nslices; i++) { 281 if (reprobe && 282 (ndev = devfs_find_device_by_name("%ss%d", 283 dev->si_name, i-1))) { 284 /* Device already exists and is still valid */ 285 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe: reprobing and device remained valid, mark it\n"); 286 ndev->si_flags |= SI_REPROBE_TEST; 287 } else { 288 ndev = make_dev(&disk_ops, 289 dkmakewholeslice(dkunit(dev), i), 290 UID_ROOT, GID_OPERATOR, 0640, 291 "%ss%d", dev->si_name, i-1); 292 if (dp->d_info.d_serialno) { 293 make_dev_alias(ndev, "serno/%s.s%d", 294 dp->d_info.d_serialno, 295 i - 1); 296 } 297 ndev->si_disk = dp; 298 ndev->si_flags |= SI_REPROBE_TEST; 299 } 300 dp->d_slice->dss_slices[i].ds_reserved = 0; 301 dp->d_slice->dss_slices[i].ds_dev = ndev; 302 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_probe-> type of slice is :%x\n", dp->d_slice->dss_slices[i].ds_type ); 303 if (dp->d_slice->dss_slices[i].ds_type == DOSPTYP_386BSD) { 304 if (!dp->d_slice->dss_first_bsd_slice) 305 dp->d_slice->dss_first_bsd_slice = i; 306 disk_probe_slice(dp, ndev, i, reprobe); 307 } 308 } 309 } 310 } 311 312 313 static void 314 disk_msg_core(void *arg) 315 { 316 uint8_t run = 1; 317 struct disk *dp; 318 struct diskslice *sp; 319 disk_msg_t msg; 320 321 322 lwkt_initport_thread(&disk_msg_port, curthread); 323 wakeup(curthread); 324 325 while (run) { 326 msg = (disk_msg_t)lwkt_waitport(&disk_msg_port, 0); 327 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_msg_core, new msg: %x\n", (unsigned int)msg->hdr.u.ms_result); 328 329 switch (msg->hdr.u.ms_result) { 330 331 case DISK_DISK_PROBE: 332 dp = (struct disk *)msg->load; 333 disk_probe(dp, 0); 334 break; 335 336 case DISK_DISK_DESTROY: 337 dp = (struct disk *)msg->load; 338 devfs_destroy_subnames(dp->d_cdev->si_name); 339 devfs_destroy_dev(dp->d_cdev); 340 LIST_REMOVE(dp, d_list); 341 //devfs_destroy_dev(dp->d_rawdev); //XXX: needed? when? 342 if (dp->d_info.d_serialno) { 343 kfree(dp->d_info.d_serialno, M_TEMP); 344 dp->d_info.d_serialno = NULL; 345 } 346 break; 347 348 case DISK_UNPROBE: 349 dp = (struct disk *)msg->load; 350 devfs_destroy_subnames(dp->d_cdev->si_name); 351 break; 352 353 case DISK_SLICE_REPROBE: 354 dp = (struct disk *)msg->load; 355 sp = (struct diskslice *)msg->load2; 356 devfs_clr_subnames_flag(sp->ds_dev->si_name, SI_REPROBE_TEST); 357 devfs_debug(DEVFS_DEBUG_DEBUG, 358 "DISK_SLICE_REPROBE: %s\n", 359 sp->ds_dev->si_name); 360 disk_probe_slice(dp, sp->ds_dev, dkslice(sp->ds_dev), 1); 361 devfs_destroy_subnames_without_flag(sp->ds_dev->si_name, 362 SI_REPROBE_TEST); 363 break; 364 365 case DISK_DISK_REPROBE: 366 dp = (struct disk *)msg->load; 367 devfs_clr_subnames_flag(dp->d_cdev->si_name, SI_REPROBE_TEST); 368 devfs_debug(DEVFS_DEBUG_DEBUG, 369 "DISK_DISK_REPROBE: %s\n", 370 dp->d_cdev->si_name); 371 disk_probe(dp, 1); 372 devfs_destroy_subnames_without_flag(dp->d_cdev->si_name, 373 SI_REPROBE_TEST); 374 break; 375 376 case DISK_SYNC: 377 break; 378 379 default: 380 devfs_debug(DEVFS_DEBUG_WARNING, "disk_msg_core: unknown message received at core\n"); 381 } 382 383 lwkt_replymsg((lwkt_msg_t)msg, 0); 384 } 385 lwkt_exit(); 386 } 387 388 389 /** 390 * Acts as a message drain. Any message that is replied to here gets destroyed and 391 * the memory freed. 392 **/ 393 static void 394 disk_msg_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 395 { 396 objcache_put(disk_msg_cache, msg); 397 } 398 399 400 void 401 disk_msg_send(uint32_t cmd, void *load, void *load2) 402 { 403 disk_msg_t disk_msg; 404 lwkt_port_t port = &disk_msg_port; 405 406 disk_msg = objcache_get(disk_msg_cache, M_WAITOK); 407 408 lwkt_initmsg(&disk_msg->hdr, &disk_dispose_port, 0); 409 410 disk_msg->hdr.u.ms_result = cmd; 411 disk_msg->load = load; 412 disk_msg->load2 = load2; 413 KKASSERT(port); 414 lwkt_sendmsg(port, (lwkt_msg_t)disk_msg); 415 } 416 417 void 418 disk_msg_send_sync(uint32_t cmd, void *load, void *load2) 419 { 420 struct lwkt_port rep_port; 421 disk_msg_t disk_msg = objcache_get(disk_msg_cache, M_WAITOK); 422 disk_msg_t msg_incoming; 423 lwkt_port_t port = &disk_msg_port; 424 425 lwkt_initport_thread(&rep_port, curthread); 426 lwkt_initmsg(&disk_msg->hdr, &rep_port, 0); 427 428 disk_msg->hdr.u.ms_result = cmd; 429 disk_msg->load = load; 430 disk_msg->load2 = load2; 431 432 KKASSERT(port); 433 lwkt_sendmsg(port, (lwkt_msg_t)disk_msg); 434 msg_incoming = lwkt_waitport(&rep_port, 0); 435 } 436 437 /* 438 * Create a raw device for the dev_ops template (which is returned). Also 439 * create a slice and unit managed disk and overload the user visible 440 * device space with it. 441 * 442 * NOTE: The returned raw device is NOT a slice and unit managed device. 443 * It is an actual raw device representing the raw disk as specified by 444 * the passed dev_ops. The disk layer not only returns such a raw device, 445 * it also uses it internally when passing (modified) commands through. 446 */ 447 cdev_t 448 disk_create(int unit, struct disk *dp, struct dev_ops *raw_ops) 449 { 450 cdev_t rawdev; 451 452 rawdev = make_only_dev(raw_ops, dkmakewholedisk(unit), 453 UID_ROOT, GID_OPERATOR, 0640, 454 "%s%d", raw_ops->head.name, unit); 455 456 457 bzero(dp, sizeof(*dp)); 458 459 dp->d_rawdev = rawdev; 460 dp->d_raw_ops = raw_ops; 461 dp->d_dev_ops = &disk_ops; 462 dp->d_cdev = make_dev(&disk_ops, 463 dkmakewholedisk(unit), 464 UID_ROOT, GID_OPERATOR, 0640, 465 "%s%d", raw_ops->head.name, unit); 466 467 dp->d_cdev->si_disk = dp; 468 469 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_create called for %s\n", dp->d_cdev->si_name); 470 LIST_INSERT_HEAD(&disklist, dp, d_list); 471 return (dp->d_rawdev); 472 } 473 474 475 static void 476 _setdiskinfo(struct disk *disk, struct disk_info *info) 477 { 478 char *oldserialno; 479 480 devfs_debug(DEVFS_DEBUG_DEBUG, 481 "_setdiskinfo called for disk -1-: %x\n", disk); 482 oldserialno = disk->d_info.d_serialno; 483 bcopy(info, &disk->d_info, sizeof(disk->d_info)); 484 info = &disk->d_info; 485 486 /* 487 * The serial number is duplicated so the caller can throw 488 * their copy away. 489 */ 490 if (info->d_serialno && info->d_serialno[0]) { 491 info->d_serialno = kstrdup(info->d_serialno, M_TEMP); 492 if (disk->d_cdev) { 493 make_dev_alias(disk->d_cdev, "serno/%s", 494 info->d_serialno); 495 } 496 } else { 497 info->d_serialno = NULL; 498 } 499 if (oldserialno) 500 kfree(oldserialno, M_TEMP); 501 502 /* 503 * The caller may set d_media_size or d_media_blocks and we 504 * calculate the other. 505 */ 506 KKASSERT(info->d_media_size == 0 || info->d_media_blksize == 0); 507 if (info->d_media_size == 0 && info->d_media_blocks) { 508 info->d_media_size = (u_int64_t)info->d_media_blocks * 509 info->d_media_blksize; 510 } else if (info->d_media_size && info->d_media_blocks == 0 && 511 info->d_media_blksize) { 512 info->d_media_blocks = info->d_media_size / 513 info->d_media_blksize; 514 } 515 516 /* 517 * The si_* fields for rawdev are not set until after the 518 * disk_create() call, so someone using the cooked version 519 * of the raw device (i.e. da0s0) will not get the right 520 * si_iosize_max unless we fix it up here. 521 */ 522 if (disk->d_cdev && disk->d_rawdev && 523 disk->d_cdev->si_iosize_max == 0) { 524 disk->d_cdev->si_iosize_max = disk->d_rawdev->si_iosize_max; 525 disk->d_cdev->si_bsize_phys = disk->d_rawdev->si_bsize_phys; 526 disk->d_cdev->si_bsize_best = disk->d_rawdev->si_bsize_best; 527 } 528 } 529 530 /* 531 * Disk drivers must call this routine when media parameters are available 532 * or have changed. 533 */ 534 void 535 disk_setdiskinfo(struct disk *disk, struct disk_info *info) 536 { 537 _setdiskinfo(disk, info); 538 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_setdiskinfo called for disk -2-: %x\n", disk); 539 disk_msg_send(DISK_DISK_PROBE, disk, NULL); 540 } 541 542 void 543 disk_setdiskinfo_sync(struct disk *disk, struct disk_info *info) 544 { 545 _setdiskinfo(disk, info); 546 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_setdiskinfo_sync called for disk -2-: %x\n", disk); 547 disk_msg_send_sync(DISK_DISK_PROBE, disk, NULL); 548 } 549 550 /* 551 * This routine is called when an adapter detaches. The higher level 552 * managed disk device is destroyed while the lower level raw device is 553 * released. 554 */ 555 void 556 disk_destroy(struct disk *disk) 557 { 558 disk_msg_send_sync(DISK_DISK_DESTROY, disk, NULL); 559 return; 560 } 561 562 int 563 disk_dumpcheck(cdev_t dev, u_int64_t *count, u_int64_t *blkno, u_int *secsize) 564 { 565 struct partinfo pinfo; 566 int error; 567 568 bzero(&pinfo, sizeof(pinfo)); 569 error = dev_dioctl(dev, DIOCGPART, (void *)&pinfo, 0, proc0.p_ucred); 570 if (error) 571 return (error); 572 if (pinfo.media_blksize == 0) 573 return (ENXIO); 574 *count = (u_int64_t)Maxmem * PAGE_SIZE / pinfo.media_blksize; 575 if (dumplo64 < pinfo.reserved_blocks || 576 dumplo64 + *count > pinfo.media_blocks) { 577 return (ENOSPC); 578 } 579 *blkno = dumplo64 + pinfo.media_offset / pinfo.media_blksize; 580 *secsize = pinfo.media_blksize; 581 return (0); 582 } 583 584 void 585 disk_unprobe(struct disk *disk) 586 { 587 if (disk == NULL) 588 return; 589 590 disk_msg_send_sync(DISK_UNPROBE, disk, NULL); 591 } 592 593 void 594 disk_invalidate (struct disk *disk) 595 { 596 devfs_debug(DEVFS_DEBUG_INFO, "disk_invalidate for %s\n", disk->d_cdev->si_name); 597 if (disk->d_slice) 598 dsgone(&disk->d_slice); 599 } 600 601 struct disk * 602 disk_enumerate(struct disk *disk) 603 { 604 if (!disk) 605 return (LIST_FIRST(&disklist)); 606 else 607 return (LIST_NEXT(disk, d_list)); 608 } 609 610 static 611 int 612 sysctl_disks(SYSCTL_HANDLER_ARGS) 613 { 614 struct disk *disk; 615 int error, first; 616 617 disk = NULL; 618 first = 1; 619 620 while ((disk = disk_enumerate(disk))) { 621 if (!first) { 622 error = SYSCTL_OUT(req, " ", 1); 623 if (error) 624 return error; 625 } else { 626 first = 0; 627 } 628 error = SYSCTL_OUT(req, disk->d_rawdev->si_name, 629 strlen(disk->d_rawdev->si_name)); 630 if (error) 631 return error; 632 } 633 error = SYSCTL_OUT(req, "", 1); 634 return error; 635 } 636 637 SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 638 sysctl_disks, "A", "names of available disks"); 639 640 /* 641 * Open a disk device or partition. 642 */ 643 static 644 int 645 diskopen(struct dev_open_args *ap) 646 { 647 cdev_t dev = ap->a_head.a_dev; 648 struct disk *dp; 649 int error; 650 651 devfs_debug(DEVFS_DEBUG_DEBUG, "diskopen: name is %s\n", dev->si_name); 652 653 /* 654 * dp can't be NULL here XXX. 655 */ 656 dp = dev->si_disk; 657 if (dp == NULL) 658 return (ENXIO); 659 error = 0; 660 661 /* 662 * Deal with open races 663 */ 664 while (dp->d_flags & DISKFLAG_LOCK) { 665 dp->d_flags |= DISKFLAG_WANTED; 666 error = tsleep(dp, PCATCH, "diskopen", hz); 667 if (error) 668 return (error); 669 } 670 dp->d_flags |= DISKFLAG_LOCK; 671 672 devfs_debug(DEVFS_DEBUG_DEBUG, "diskopen: -2- name is %s\n", dev->si_name); 673 674 /* 675 * Open the underlying raw device. 676 */ 677 if (!dsisopen(dp->d_slice)) { 678 #if 0 679 if (!pdev->si_iosize_max) 680 pdev->si_iosize_max = dev->si_iosize_max; 681 #endif 682 error = dev_dopen(dp->d_rawdev, ap->a_oflags, 683 ap->a_devtype, ap->a_cred); 684 } 685 #if 0 686 /* 687 * Inherit properties from the underlying device now that it is 688 * open. 689 */ 690 dev_dclone(dev); 691 #endif 692 693 if (error) 694 goto out; 695 error = dsopen(dev, ap->a_devtype, dp->d_info.d_dsflags, 696 &dp->d_slice, &dp->d_info); 697 if (!dsisopen(dp->d_slice)) { 698 dev_dclose(dp->d_rawdev, ap->a_oflags, ap->a_devtype); 699 } 700 out: 701 dp->d_flags &= ~DISKFLAG_LOCK; 702 if (dp->d_flags & DISKFLAG_WANTED) { 703 dp->d_flags &= ~DISKFLAG_WANTED; 704 wakeup(dp); 705 } 706 707 return(error); 708 } 709 710 /* 711 * Close a disk device or partition 712 */ 713 static 714 int 715 diskclose(struct dev_close_args *ap) 716 { 717 cdev_t dev = ap->a_head.a_dev; 718 struct disk *dp; 719 int error; 720 721 error = 0; 722 dp = dev->si_disk; 723 724 devfs_debug(DEVFS_DEBUG_DEBUG, "diskclose: name %s\n", dev->si_name); 725 726 dsclose(dev, ap->a_devtype, dp->d_slice); 727 if (!dsisopen(dp->d_slice)) { 728 devfs_debug(DEVFS_DEBUG_DEBUG, "diskclose is closing underlying device\n"); 729 error = dev_dclose(dp->d_rawdev, ap->a_fflag, ap->a_devtype); 730 } 731 return (error); 732 } 733 734 /* 735 * First execute the ioctl on the disk device, and if it isn't supported 736 * try running it on the backing device. 737 */ 738 static 739 int 740 diskioctl(struct dev_ioctl_args *ap) 741 { 742 cdev_t dev = ap->a_head.a_dev; 743 struct disk *dp; 744 int error; 745 746 dp = dev->si_disk; 747 if (dp == NULL) 748 return (ENXIO); 749 750 devfs_debug(DEVFS_DEBUG_DEBUG, "diskioctl: cmd is: %x (name: %s)\n", ap->a_cmd, dev->si_name); 751 devfs_debug(DEVFS_DEBUG_DEBUG, "diskioctl: &dp->d_slice is: %x, %x\n", &dp->d_slice, dp->d_slice); 752 753 devfs_debug(DEVFS_DEBUG_DEBUG, "diskioctl:1: says lp.opaque is: %x\n", dp->d_slice->dss_slices[0].ds_label.opaque); 754 755 error = dsioctl(dev, ap->a_cmd, ap->a_data, ap->a_fflag, 756 &dp->d_slice, &dp->d_info); 757 758 devfs_debug(DEVFS_DEBUG_DEBUG, "diskioctl:2: says lp.opaque is: %x\n", dp->d_slice->dss_slices[0].ds_label.opaque); 759 760 if (error == ENOIOCTL) { 761 devfs_debug(DEVFS_DEBUG_DEBUG, "diskioctl: going for dev_dioctl instead!\n"); 762 error = dev_dioctl(dp->d_rawdev, ap->a_cmd, ap->a_data, 763 ap->a_fflag, ap->a_cred); 764 } 765 return (error); 766 } 767 768 /* 769 * Execute strategy routine 770 */ 771 static 772 int 773 diskstrategy(struct dev_strategy_args *ap) 774 { 775 cdev_t dev = ap->a_head.a_dev; 776 struct bio *bio = ap->a_bio; 777 struct bio *nbio; 778 struct disk *dp; 779 780 dp = dev->si_disk; 781 782 if (dp == NULL) { 783 bio->bio_buf->b_error = ENXIO; 784 bio->bio_buf->b_flags |= B_ERROR; 785 biodone(bio); 786 return(0); 787 } 788 KKASSERT(dev->si_disk == dp); 789 790 /* 791 * The dscheck() function will also transform the slice relative 792 * block number i.e. bio->bio_offset into a block number that can be 793 * passed directly to the underlying raw device. If dscheck() 794 * returns NULL it will have handled the bio for us (e.g. EOF 795 * or error due to being beyond the device size). 796 */ 797 if ((nbio = dscheck(dev, bio, dp->d_slice)) != NULL) { 798 dev_dstrategy(dp->d_rawdev, nbio); 799 } else { 800 devfs_debug(DEVFS_DEBUG_DEBUG, "diskstrategy: dscheck NULL!!! biodone time!\n"); 801 biodone(bio); 802 } 803 return(0); 804 } 805 806 /* 807 * Return the partition size in ?blocks? 808 */ 809 static 810 int 811 diskpsize(struct dev_psize_args *ap) 812 { 813 cdev_t dev = ap->a_head.a_dev; 814 struct disk *dp; 815 816 dp = dev->si_disk; 817 if (dp == NULL) 818 return(ENODEV); 819 ap->a_result = dssize(dev, &dp->d_slice); 820 return(0); 821 } 822 823 /* 824 * When new device entries are instantiated, make sure they inherit our 825 * si_disk structure and block and iosize limits from the raw device. 826 * 827 * This routine is always called synchronously in the context of the 828 * client. 829 * 830 * XXX The various io and block size constraints are not always initialized 831 * properly by devices. 832 */ 833 static 834 int 835 diskclone(struct dev_clone_args *ap) 836 { 837 cdev_t dev = ap->a_head.a_dev; 838 struct disk *dp; 839 dp = dev->si_disk; 840 841 KKASSERT(dp != NULL); 842 dev->si_disk = dp; 843 dev->si_iosize_max = dp->d_rawdev->si_iosize_max; 844 dev->si_bsize_phys = dp->d_rawdev->si_bsize_phys; 845 dev->si_bsize_best = dp->d_rawdev->si_bsize_best; 846 return(0); 847 } 848 849 int 850 diskdump(struct dev_dump_args *ap) 851 { 852 cdev_t dev = ap->a_head.a_dev; 853 struct disk *dp = dev->si_disk; 854 int error; 855 856 error = disk_dumpcheck(dev, &ap->a_count, &ap->a_blkno, &ap->a_secsize); 857 if (error == 0) { 858 ap->a_head.a_dev = dp->d_rawdev; 859 error = dev_doperate(&ap->a_head); 860 } 861 862 return(error); 863 } 864 865 866 SYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD, 867 0, sizeof(struct diskslices), "sizeof(struct diskslices)"); 868 869 SYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD, 870 0, sizeof(struct disk), "sizeof(struct disk)"); 871 872 873 /* 874 * Seek sort for disks. 875 * 876 * The bio_queue keep two queues, sorted in ascending block order. The first 877 * queue holds those requests which are positioned after the current block 878 * (in the first request); the second, which starts at queue->switch_point, 879 * holds requests which came in after their block number was passed. Thus 880 * we implement a one way scan, retracting after reaching the end of the drive 881 * to the first request on the second queue, at which time it becomes the 882 * first queue. 883 * 884 * A one-way scan is natural because of the way UNIX read-ahead blocks are 885 * allocated. 886 */ 887 void 888 bioqdisksort(struct bio_queue_head *bioq, struct bio *bio) 889 { 890 struct bio *bq; 891 struct bio *bn; 892 struct bio *be; 893 894 be = TAILQ_LAST(&bioq->queue, bio_queue); 895 /* 896 * If the queue is empty or we are an 897 * ordered transaction, then it's easy. 898 */ 899 if ((bq = bioq_first(bioq)) == NULL || 900 (bio->bio_buf->b_flags & B_ORDERED) != 0) { 901 bioq_insert_tail(bioq, bio); 902 return; 903 } else if (bioq->insert_point != NULL) { 904 905 /* 906 * A certain portion of the list is 907 * "locked" to preserve ordering, so 908 * we can only insert after the insert 909 * point. 910 */ 911 bq = bioq->insert_point; 912 } else { 913 914 /* 915 * If we lie before the last removed (currently active) 916 * request, and are not inserting ourselves into the 917 * "locked" portion of the list, then we must add ourselves 918 * to the second request list. 919 */ 920 if (bio->bio_offset < bioq->last_offset) { 921 bq = bioq->switch_point; 922 /* 923 * If we are starting a new secondary list, 924 * then it's easy. 925 */ 926 if (bq == NULL) { 927 bioq->switch_point = bio; 928 bioq_insert_tail(bioq, bio); 929 return; 930 } 931 /* 932 * If we lie ahead of the current switch point, 933 * insert us before the switch point and move 934 * the switch point. 935 */ 936 if (bio->bio_offset < bq->bio_offset) { 937 bioq->switch_point = bio; 938 TAILQ_INSERT_BEFORE(bq, bio, bio_act); 939 return; 940 } 941 } else { 942 if (bioq->switch_point != NULL) 943 be = TAILQ_PREV(bioq->switch_point, 944 bio_queue, bio_act); 945 /* 946 * If we lie between last_offset and bq, 947 * insert before bq. 948 */ 949 if (bio->bio_offset < bq->bio_offset) { 950 TAILQ_INSERT_BEFORE(bq, bio, bio_act); 951 return; 952 } 953 } 954 } 955 956 /* 957 * Request is at/after our current position in the list. 958 * Optimize for sequential I/O by seeing if we go at the tail. 959 */ 960 if (bio->bio_offset > be->bio_offset) { 961 TAILQ_INSERT_AFTER(&bioq->queue, be, bio, bio_act); 962 return; 963 } 964 965 /* Otherwise, insertion sort */ 966 while ((bn = TAILQ_NEXT(bq, bio_act)) != NULL) { 967 968 /* 969 * We want to go after the current request if it is the end 970 * of the first request list, or if the next request is a 971 * larger cylinder than our request. 972 */ 973 if (bn == bioq->switch_point 974 || bio->bio_offset < bn->bio_offset) 975 break; 976 bq = bn; 977 } 978 TAILQ_INSERT_AFTER(&bioq->queue, bq, bio, bio_act); 979 } 980 981 /* 982 * Disk error is the preface to plaintive error messages 983 * about failing disk transfers. It prints messages of the form 984 985 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 986 987 * if the offset of the error in the transfer and a disk label 988 * are both available. blkdone should be -1 if the position of the error 989 * is unknown; the disklabel pointer may be null from drivers that have not 990 * been converted to use them. The message is printed with kprintf 991 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 992 * The message should be completed (with at least a newline) with kprintf 993 * or log(-1, ...), respectively. There is no trailing space. 994 */ 995 void 996 diskerr(struct bio *bio, cdev_t dev, const char *what, int pri, int donecnt) 997 { 998 struct buf *bp = bio->bio_buf; 999 const char *term; 1000 1001 switch(bp->b_cmd) { 1002 case BUF_CMD_READ: 1003 term = "read"; 1004 break; 1005 case BUF_CMD_WRITE: 1006 term = "write"; 1007 break; 1008 default: 1009 term = "access"; 1010 break; 1011 } 1012 //sname = dsname(dev, unit, slice, part, partname); 1013 kprintf("%s: %s %sing ", dev->si_name, what, term); 1014 kprintf("offset %012llx for %d", 1015 (long long)bio->bio_offset, 1016 bp->b_bcount); 1017 1018 if (donecnt) 1019 kprintf(" (%d bytes completed)", donecnt); 1020 } 1021 1022 /* 1023 * Locate a disk device 1024 */ 1025 cdev_t 1026 disk_locate(const char *devname) 1027 { 1028 return devfs_find_device_by_name(devname); 1029 } 1030 1031 1032 void 1033 disk_config(void *arg) 1034 { 1035 disk_msg_send_sync(DISK_SYNC, NULL, NULL); 1036 } 1037 1038 1039 static void 1040 disk_init(void) 1041 { 1042 struct thread* td_core; 1043 devfs_debug(DEVFS_DEBUG_DEBUG, "disk_init() called\n"); 1044 1045 disk_msg_cache = objcache_create("disk-msg-cache", 0, 0, 1046 NULL, NULL, NULL, 1047 objcache_malloc_alloc, 1048 objcache_malloc_free, 1049 &disk_msg_malloc_args ); 1050 1051 /* Initialize the reply-only port which acts as a message drain */ 1052 lwkt_initport_replyonly(&disk_dispose_port, disk_msg_autofree_reply); 1053 1054 lwkt_create(disk_msg_core, /*args*/NULL, &td_core, NULL, 1055 0, 0, "disk_msg_core"); 1056 1057 tsleep(td_core, 0, "diskcore", 0); 1058 } 1059 1060 1061 static void 1062 disk_uninit(void) 1063 { 1064 devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_uninit() called\n"); 1065 1066 objcache_destroy(disk_msg_cache); 1067 1068 } 1069 1070 1071 SYSINIT(disk_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, disk_init, NULL); 1072 SYSUNINIT(disk_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, disk_uninit, NULL); 1073