1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 23 * All rights reserved. 24 * 25 * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org> 26 */ 27 28 #include <sys/zfs_context.h> 29 #include <sys/param.h> 30 #include <sys/kernel.h> 31 #include <sys/bio.h> 32 #include <sys/file.h> 33 #include <sys/spa.h> 34 #include <sys/spa_impl.h> 35 #include <sys/vdev_impl.h> 36 #include <sys/vdev_os.h> 37 #include <sys/fs/zfs.h> 38 #include <sys/zio.h> 39 #include <geom/geom.h> 40 #include <geom/geom_disk.h> 41 #include <geom/geom_int.h> 42 43 #ifndef g_topology_locked 44 #define g_topology_locked() sx_xlocked(&topology_lock) 45 #endif 46 47 /* 48 * Virtual device vector for GEOM. 49 */ 50 51 static g_attrchanged_t vdev_geom_attrchanged; 52 struct g_class zfs_vdev_class = { 53 .name = "ZFS::VDEV", 54 .version = G_VERSION, 55 .attrchanged = vdev_geom_attrchanged, 56 }; 57 58 struct consumer_vdev_elem { 59 SLIST_ENTRY(consumer_vdev_elem) elems; 60 vdev_t *vd; 61 }; 62 63 SLIST_HEAD(consumer_priv_t, consumer_vdev_elem); 64 /* BEGIN CSTYLED */ 65 _Static_assert(sizeof (((struct g_consumer *)NULL)->private) 66 == sizeof (struct consumer_priv_t*), 67 "consumer_priv_t* can't be stored in g_consumer.private"); 68 69 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); 70 71 SYSCTL_DECL(_vfs_zfs_vdev); 72 /* Don't send BIO_FLUSH. */ 73 static int vdev_geom_bio_flush_disable; 74 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN, 75 &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); 76 /* Don't send BIO_DELETE. */ 77 static int vdev_geom_bio_delete_disable; 78 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN, 79 &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); 80 /* END CSTYLED */ 81 82 /* Declare local functions */ 83 static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read); 84 85 /* 86 * Thread local storage used to indicate when a thread is probing geoms 87 * for their guids. If NULL, this thread is not tasting geoms. If non NULL, 88 * it is looking for a replacement for the vdev_t* that is its value. 89 */ 90 uint_t zfs_geom_probe_vdev_key; 91 92 static void 93 vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp, 94 boolean_t do_null_update) 95 { 96 boolean_t needs_update = B_FALSE; 97 char *physpath; 98 int error, physpath_len; 99 100 physpath_len = MAXPATHLEN; 101 physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); 102 error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); 103 if (error == 0) { 104 char *old_physpath; 105 106 /* g_topology lock ensures that vdev has not been closed */ 107 g_topology_assert(); 108 old_physpath = vd->vdev_physpath; 109 vd->vdev_physpath = spa_strdup(physpath); 110 111 if (old_physpath != NULL) { 112 needs_update = (strcmp(old_physpath, 113 vd->vdev_physpath) != 0); 114 spa_strfree(old_physpath); 115 } else 116 needs_update = do_null_update; 117 } 118 g_free(physpath); 119 120 /* 121 * If the physical path changed, update the config. 122 * Only request an update for previously unset physpaths if 123 * requested by the caller. 124 */ 125 if (needs_update) 126 spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE); 127 128 } 129 130 static void 131 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr) 132 { 133 struct consumer_priv_t *priv; 134 struct consumer_vdev_elem *elem; 135 136 priv = (struct consumer_priv_t *)&cp->private; 137 if (SLIST_EMPTY(priv)) 138 return; 139 140 SLIST_FOREACH(elem, priv, elems) { 141 vdev_t *vd = elem->vd; 142 if (strcmp(attr, "GEOM::physpath") == 0) { 143 vdev_geom_set_physpath(vd, cp, /* null_update */B_TRUE); 144 return; 145 } 146 } 147 } 148 149 static void 150 vdev_geom_resize(struct g_consumer *cp) 151 { 152 struct consumer_priv_t *priv; 153 struct consumer_vdev_elem *elem; 154 spa_t *spa; 155 vdev_t *vd; 156 157 priv = (struct consumer_priv_t *)&cp->private; 158 if (SLIST_EMPTY(priv)) 159 return; 160 161 SLIST_FOREACH(elem, priv, elems) { 162 vd = elem->vd; 163 if (vd->vdev_state != VDEV_STATE_HEALTHY) 164 continue; 165 spa = vd->vdev_spa; 166 if (!spa->spa_autoexpand) 167 continue; 168 vdev_online(spa, vd->vdev_guid, ZFS_ONLINE_EXPAND, NULL); 169 } 170 } 171 172 static void 173 vdev_geom_orphan(struct g_consumer *cp) 174 { 175 struct consumer_priv_t *priv; 176 // cppcheck-suppress uninitvar 177 struct consumer_vdev_elem *elem; 178 179 g_topology_assert(); 180 181 priv = (struct consumer_priv_t *)&cp->private; 182 if (SLIST_EMPTY(priv)) 183 /* Vdev close in progress. Ignore the event. */ 184 return; 185 186 /* 187 * Orphan callbacks occur from the GEOM event thread. 188 * Concurrent with this call, new I/O requests may be 189 * working their way through GEOM about to find out 190 * (only once executed by the g_down thread) that we've 191 * been orphaned from our disk provider. These I/Os 192 * must be retired before we can detach our consumer. 193 * This is most easily achieved by acquiring the 194 * SPA ZIO configuration lock as a writer, but doing 195 * so with the GEOM topology lock held would cause 196 * a lock order reversal. Instead, rely on the SPA's 197 * async removal support to invoke a close on this 198 * vdev once it is safe to do so. 199 */ 200 // cppcheck-suppress All 201 SLIST_FOREACH(elem, priv, elems) { 202 // cppcheck-suppress uninitvar 203 vdev_t *vd = elem->vd; 204 205 vd->vdev_remove_wanted = B_TRUE; 206 spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); 207 } 208 } 209 210 static struct g_consumer * 211 vdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity) 212 { 213 struct g_geom *gp; 214 struct g_consumer *cp; 215 int error; 216 217 g_topology_assert(); 218 219 ZFS_LOG(1, "Attaching to %s.", pp->name); 220 221 if (sanity) { 222 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) { 223 ZFS_LOG(1, "Failing attach of %s. " 224 "Incompatible sectorsize %d\n", 225 pp->name, pp->sectorsize); 226 return (NULL); 227 } else if (pp->mediasize < SPA_MINDEVSIZE) { 228 ZFS_LOG(1, "Failing attach of %s. " 229 "Incompatible mediasize %ju\n", 230 pp->name, pp->mediasize); 231 return (NULL); 232 } 233 } 234 235 /* Do we have geom already? No? Create one. */ 236 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) { 237 if (gp->flags & G_GEOM_WITHER) 238 continue; 239 if (strcmp(gp->name, "zfs::vdev") != 0) 240 continue; 241 break; 242 } 243 if (gp == NULL) { 244 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev"); 245 gp->orphan = vdev_geom_orphan; 246 gp->attrchanged = vdev_geom_attrchanged; 247 gp->resize = vdev_geom_resize; 248 cp = g_new_consumer(gp); 249 error = g_attach(cp, pp); 250 if (error != 0) { 251 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__, 252 __LINE__, error); 253 vdev_geom_detach(cp, B_FALSE); 254 return (NULL); 255 } 256 error = g_access(cp, 1, 0, 1); 257 if (error != 0) { 258 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__, 259 __LINE__, error); 260 vdev_geom_detach(cp, B_FALSE); 261 return (NULL); 262 } 263 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name); 264 } else { 265 /* Check if we are already connected to this provider. */ 266 LIST_FOREACH(cp, &gp->consumer, consumer) { 267 if (cp->provider == pp) { 268 ZFS_LOG(1, "Found consumer for %s.", pp->name); 269 break; 270 } 271 } 272 if (cp == NULL) { 273 cp = g_new_consumer(gp); 274 error = g_attach(cp, pp); 275 if (error != 0) { 276 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", 277 __func__, __LINE__, error); 278 vdev_geom_detach(cp, B_FALSE); 279 return (NULL); 280 } 281 error = g_access(cp, 1, 0, 1); 282 if (error != 0) { 283 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 284 __func__, __LINE__, error); 285 vdev_geom_detach(cp, B_FALSE); 286 return (NULL); 287 } 288 ZFS_LOG(1, "Created consumer for %s.", pp->name); 289 } else { 290 error = g_access(cp, 1, 0, 1); 291 if (error != 0) { 292 ZFS_LOG(1, "%s(%d): g_access failed: %d\n", 293 __func__, __LINE__, error); 294 return (NULL); 295 } 296 ZFS_LOG(1, "Used existing consumer for %s.", pp->name); 297 } 298 } 299 300 if (vd != NULL) 301 vd->vdev_tsd = cp; 302 303 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 304 return (cp); 305 } 306 307 static void 308 vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read) 309 { 310 struct g_geom *gp; 311 312 g_topology_assert(); 313 314 ZFS_LOG(1, "Detaching from %s.", 315 cp->provider && cp->provider->name ? cp->provider->name : "NULL"); 316 317 gp = cp->geom; 318 if (open_for_read) 319 g_access(cp, -1, 0, -1); 320 /* Destroy consumer on last close. */ 321 if (cp->acr == 0 && cp->ace == 0) { 322 if (cp->acw > 0) 323 g_access(cp, 0, -cp->acw, 0); 324 if (cp->provider != NULL) { 325 ZFS_LOG(1, "Destroying consumer for %s.", 326 cp->provider->name ? cp->provider->name : "NULL"); 327 g_detach(cp); 328 } 329 g_destroy_consumer(cp); 330 } 331 /* Destroy geom if there are no consumers left. */ 332 if (LIST_EMPTY(&gp->consumer)) { 333 ZFS_LOG(1, "Destroyed geom %s.", gp->name); 334 g_wither_geom(gp, ENXIO); 335 } 336 } 337 338 static void 339 vdev_geom_close_locked(vdev_t *vd) 340 { 341 struct g_consumer *cp; 342 struct consumer_priv_t *priv; 343 struct consumer_vdev_elem *elem, *elem_temp; 344 345 g_topology_assert(); 346 347 cp = vd->vdev_tsd; 348 vd->vdev_delayed_close = B_FALSE; 349 if (cp == NULL) 350 return; 351 352 ZFS_LOG(1, "Closing access to %s.", cp->provider->name); 353 KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__)); 354 priv = (struct consumer_priv_t *)&cp->private; 355 vd->vdev_tsd = NULL; 356 SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) { 357 if (elem->vd == vd) { 358 SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems); 359 g_free(elem); 360 } 361 } 362 363 vdev_geom_detach(cp, B_TRUE); 364 } 365 366 /* 367 * Issue one or more bios to the vdev in parallel 368 * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO 369 * operation is described by parallel entries from each array. There may be 370 * more bios actually issued than entries in the array 371 */ 372 static void 373 vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets, 374 off_t *sizes, int *errors, int ncmds) 375 { 376 struct bio **bios; 377 uint8_t *p; 378 off_t off, maxio, s, end; 379 int i, n_bios, j; 380 size_t bios_size; 381 382 maxio = maxphys - (maxphys % cp->provider->sectorsize); 383 n_bios = 0; 384 385 /* How many bios are required for all commands ? */ 386 for (i = 0; i < ncmds; i++) 387 n_bios += (sizes[i] + maxio - 1) / maxio; 388 389 /* Allocate memory for the bios */ 390 bios_size = n_bios * sizeof (struct bio *); 391 bios = kmem_zalloc(bios_size, KM_SLEEP); 392 393 /* Prepare and issue all of the bios */ 394 for (i = j = 0; i < ncmds; i++) { 395 off = offsets[i]; 396 p = datas[i]; 397 s = sizes[i]; 398 end = off + s; 399 ASSERT0(off % cp->provider->sectorsize); 400 ASSERT0(s % cp->provider->sectorsize); 401 402 for (; off < end; off += maxio, p += maxio, s -= maxio, j++) { 403 bios[j] = g_alloc_bio(); 404 bios[j]->bio_cmd = cmds[i]; 405 bios[j]->bio_done = NULL; 406 bios[j]->bio_offset = off; 407 bios[j]->bio_length = MIN(s, maxio); 408 bios[j]->bio_data = (caddr_t)p; 409 g_io_request(bios[j], cp); 410 } 411 } 412 ASSERT3S(j, ==, n_bios); 413 414 /* Wait for all of the bios to complete, and clean them up */ 415 for (i = j = 0; i < ncmds; i++) { 416 off = offsets[i]; 417 s = sizes[i]; 418 end = off + s; 419 420 for (; off < end; off += maxio, s -= maxio, j++) { 421 errors[i] = biowait(bios[j], "vdev_geom_io") || 422 errors[i]; 423 g_destroy_bio(bios[j]); 424 } 425 } 426 kmem_free(bios, bios_size); 427 } 428 429 /* 430 * Read the vdev config from a device. Return the number of valid labels that 431 * were found. The vdev config will be returned in config if and only if at 432 * least one valid label was found. 433 */ 434 static int 435 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp) 436 { 437 struct g_provider *pp; 438 nvlist_t *config; 439 vdev_phys_t *vdev_lists[VDEV_LABELS]; 440 char *buf; 441 size_t buflen; 442 uint64_t psize, state, txg; 443 off_t offsets[VDEV_LABELS]; 444 off_t size; 445 off_t sizes[VDEV_LABELS]; 446 int cmds[VDEV_LABELS]; 447 int errors[VDEV_LABELS]; 448 int l, nlabels; 449 450 g_topology_assert_not(); 451 452 pp = cp->provider; 453 ZFS_LOG(1, "Reading config from %s...", pp->name); 454 455 psize = pp->mediasize; 456 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 457 458 size = sizeof (*vdev_lists[0]) + pp->sectorsize - 459 ((sizeof (*vdev_lists[0]) - 1) % pp->sectorsize) - 1; 460 461 buflen = sizeof (vdev_lists[0]->vp_nvlist); 462 463 /* Create all of the IO requests */ 464 for (l = 0; l < VDEV_LABELS; l++) { 465 cmds[l] = BIO_READ; 466 vdev_lists[l] = kmem_alloc(size, KM_SLEEP); 467 offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE; 468 sizes[l] = size; 469 errors[l] = 0; 470 ASSERT0(offsets[l] % pp->sectorsize); 471 } 472 473 /* Issue the IO requests */ 474 vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors, 475 VDEV_LABELS); 476 477 /* Parse the labels */ 478 config = *configp = NULL; 479 nlabels = 0; 480 for (l = 0; l < VDEV_LABELS; l++) { 481 if (errors[l] != 0) 482 continue; 483 484 buf = vdev_lists[l]->vp_nvlist; 485 486 if (nvlist_unpack(buf, buflen, &config, 0) != 0) 487 continue; 488 489 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 490 &state) != 0 || state > POOL_STATE_L2CACHE) { 491 nvlist_free(config); 492 continue; 493 } 494 495 if (state != POOL_STATE_SPARE && 496 state != POOL_STATE_L2CACHE && 497 (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 498 &txg) != 0 || txg == 0)) { 499 nvlist_free(config); 500 continue; 501 } 502 503 if (*configp != NULL) 504 nvlist_free(*configp); 505 *configp = config; 506 nlabels++; 507 } 508 509 /* Free the label storage */ 510 for (l = 0; l < VDEV_LABELS; l++) 511 kmem_free(vdev_lists[l], size); 512 513 return (nlabels); 514 } 515 516 static void 517 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id) 518 { 519 nvlist_t **new_configs; 520 uint64_t i; 521 522 if (id < *count) 523 return; 524 new_configs = kmem_zalloc((id + 1) * sizeof (nvlist_t *), 525 KM_SLEEP); 526 for (i = 0; i < *count; i++) 527 new_configs[i] = (*configs)[i]; 528 if (*configs != NULL) 529 kmem_free(*configs, *count * sizeof (void *)); 530 *configs = new_configs; 531 *count = id + 1; 532 } 533 534 static void 535 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg, 536 const char *name, uint64_t *known_pool_guid) 537 { 538 nvlist_t *vdev_tree; 539 uint64_t pool_guid; 540 uint64_t vdev_guid; 541 uint64_t id, txg, known_txg; 542 char *pname; 543 544 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 || 545 strcmp(pname, name) != 0) 546 goto ignore; 547 548 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) 549 goto ignore; 550 551 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0) 552 goto ignore; 553 554 if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 555 goto ignore; 556 557 if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0) 558 goto ignore; 559 560 txg = fnvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG); 561 562 if (*known_pool_guid != 0) { 563 if (pool_guid != *known_pool_guid) 564 goto ignore; 565 } else 566 *known_pool_guid = pool_guid; 567 568 resize_configs(configs, count, id); 569 570 if ((*configs)[id] != NULL) { 571 known_txg = fnvlist_lookup_uint64((*configs)[id], 572 ZPOOL_CONFIG_POOL_TXG); 573 if (txg <= known_txg) 574 goto ignore; 575 nvlist_free((*configs)[id]); 576 } 577 578 (*configs)[id] = cfg; 579 return; 580 581 ignore: 582 nvlist_free(cfg); 583 } 584 585 int 586 vdev_geom_read_pool_label(const char *name, 587 nvlist_t ***configs, uint64_t *count) 588 { 589 struct g_class *mp; 590 struct g_geom *gp; 591 struct g_provider *pp; 592 struct g_consumer *zcp; 593 nvlist_t *vdev_cfg; 594 uint64_t pool_guid; 595 int nlabels; 596 597 DROP_GIANT(); 598 g_topology_lock(); 599 600 *configs = NULL; 601 *count = 0; 602 pool_guid = 0; 603 LIST_FOREACH(mp, &g_classes, class) { 604 if (mp == &zfs_vdev_class) 605 continue; 606 LIST_FOREACH(gp, &mp->geom, geom) { 607 if (gp->flags & G_GEOM_WITHER) 608 continue; 609 LIST_FOREACH(pp, &gp->provider, provider) { 610 if (pp->flags & G_PF_WITHER) 611 continue; 612 zcp = vdev_geom_attach(pp, NULL, B_TRUE); 613 if (zcp == NULL) 614 continue; 615 g_topology_unlock(); 616 nlabels = vdev_geom_read_config(zcp, &vdev_cfg); 617 g_topology_lock(); 618 vdev_geom_detach(zcp, B_TRUE); 619 if (nlabels == 0) 620 continue; 621 ZFS_LOG(1, "successfully read vdev config"); 622 623 process_vdev_config(configs, count, 624 vdev_cfg, name, &pool_guid); 625 } 626 } 627 } 628 g_topology_unlock(); 629 PICKUP_GIANT(); 630 631 return (*count > 0 ? 0 : ENOENT); 632 } 633 634 enum match { 635 NO_MATCH = 0, /* No matching labels found */ 636 TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid */ 637 ZERO_MATCH = 1, /* Should never be returned */ 638 ONE_MATCH = 2, /* 1 label matching the vdev_guid */ 639 TWO_MATCH = 3, /* 2 label matching the vdev_guid */ 640 THREE_MATCH = 4, /* 3 label matching the vdev_guid */ 641 FULL_MATCH = 5 /* all labels match the vdev_guid */ 642 }; 643 644 static enum match 645 vdev_attach_ok(vdev_t *vd, struct g_provider *pp) 646 { 647 nvlist_t *config; 648 uint64_t pool_guid, top_guid, vdev_guid; 649 struct g_consumer *cp; 650 int nlabels; 651 652 cp = vdev_geom_attach(pp, NULL, B_TRUE); 653 if (cp == NULL) { 654 ZFS_LOG(1, "Unable to attach tasting instance to %s.", 655 pp->name); 656 return (NO_MATCH); 657 } 658 g_topology_unlock(); 659 nlabels = vdev_geom_read_config(cp, &config); 660 g_topology_lock(); 661 vdev_geom_detach(cp, B_TRUE); 662 if (nlabels == 0) { 663 ZFS_LOG(1, "Unable to read config from %s.", pp->name); 664 return (NO_MATCH); 665 } 666 667 pool_guid = 0; 668 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); 669 top_guid = 0; 670 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); 671 vdev_guid = 0; 672 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 673 nvlist_free(config); 674 675 /* 676 * Check that the label's pool guid matches the desired guid. 677 * Inactive spares and L2ARCs do not have any pool guid in the label. 678 */ 679 if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { 680 ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", 681 pp->name, 682 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); 683 return (NO_MATCH); 684 } 685 686 /* 687 * Check that the label's vdev guid matches the desired guid. 688 * The second condition handles possible race on vdev detach, when 689 * remaining vdev receives GUID of destroyed top level mirror vdev. 690 */ 691 if (vdev_guid == vd->vdev_guid) { 692 ZFS_LOG(1, "guids match for provider %s.", pp->name); 693 return (ZERO_MATCH + nlabels); 694 } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { 695 ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); 696 return (TOPGUID_MATCH); 697 } 698 ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", 699 pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); 700 return (NO_MATCH); 701 } 702 703 static struct g_consumer * 704 vdev_geom_attach_by_guids(vdev_t *vd) 705 { 706 struct g_class *mp; 707 struct g_geom *gp; 708 struct g_provider *pp, *best_pp; 709 struct g_consumer *cp; 710 const char *vdpath; 711 enum match match, best_match; 712 713 g_topology_assert(); 714 715 vdpath = vd->vdev_path + sizeof ("/dev/") - 1; 716 cp = NULL; 717 best_pp = NULL; 718 best_match = NO_MATCH; 719 LIST_FOREACH(mp, &g_classes, class) { 720 if (mp == &zfs_vdev_class) 721 continue; 722 LIST_FOREACH(gp, &mp->geom, geom) { 723 if (gp->flags & G_GEOM_WITHER) 724 continue; 725 LIST_FOREACH(pp, &gp->provider, provider) { 726 match = vdev_attach_ok(vd, pp); 727 if (match > best_match) { 728 best_match = match; 729 best_pp = pp; 730 } else if (match == best_match) { 731 if (strcmp(pp->name, vdpath) == 0) { 732 best_pp = pp; 733 } 734 } 735 if (match == FULL_MATCH) 736 goto out; 737 } 738 } 739 } 740 741 out: 742 if (best_pp) { 743 cp = vdev_geom_attach(best_pp, vd, B_TRUE); 744 if (cp == NULL) { 745 printf("ZFS WARNING: Unable to attach to %s.\n", 746 best_pp->name); 747 } 748 } 749 return (cp); 750 } 751 752 static struct g_consumer * 753 vdev_geom_open_by_guids(vdev_t *vd) 754 { 755 struct g_consumer *cp; 756 char *buf; 757 size_t len; 758 759 g_topology_assert(); 760 761 ZFS_LOG(1, "Searching by guids [%ju:%ju].", 762 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid); 763 cp = vdev_geom_attach_by_guids(vd); 764 if (cp != NULL) { 765 len = strlen(cp->provider->name) + strlen("/dev/") + 1; 766 buf = kmem_alloc(len, KM_SLEEP); 767 768 snprintf(buf, len, "/dev/%s", cp->provider->name); 769 spa_strfree(vd->vdev_path); 770 vd->vdev_path = buf; 771 772 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.", 773 (uintmax_t)spa_guid(vd->vdev_spa), 774 (uintmax_t)vd->vdev_guid, cp->provider->name); 775 } else { 776 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.", 777 (uintmax_t)spa_guid(vd->vdev_spa), 778 (uintmax_t)vd->vdev_guid); 779 } 780 781 return (cp); 782 } 783 784 static struct g_consumer * 785 vdev_geom_open_by_path(vdev_t *vd, int check_guid) 786 { 787 struct g_provider *pp; 788 struct g_consumer *cp; 789 790 g_topology_assert(); 791 792 cp = NULL; 793 pp = g_provider_by_name(vd->vdev_path + sizeof ("/dev/") - 1); 794 if (pp != NULL) { 795 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); 796 if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) 797 cp = vdev_geom_attach(pp, vd, B_FALSE); 798 } 799 800 return (cp); 801 } 802 803 static int 804 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 805 uint64_t *logical_ashift, uint64_t *physical_ashift) 806 { 807 struct g_provider *pp; 808 struct g_consumer *cp; 809 int error, has_trim; 810 uint16_t rate; 811 812 /* 813 * Set the TLS to indicate downstack that we 814 * should not access zvols 815 */ 816 VERIFY0(tsd_set(zfs_geom_probe_vdev_key, vd)); 817 818 /* 819 * We must have a pathname, and it must be absolute. 820 */ 821 if (vd->vdev_path == NULL || strncmp(vd->vdev_path, "/dev/", 5) != 0) { 822 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 823 return (EINVAL); 824 } 825 826 /* 827 * Reopen the device if it's not currently open. Otherwise, 828 * just update the physical size of the device. 829 */ 830 if ((cp = vd->vdev_tsd) != NULL) { 831 ASSERT(vd->vdev_reopening); 832 goto skip_open; 833 } 834 835 DROP_GIANT(); 836 g_topology_lock(); 837 error = 0; 838 839 if (vd->vdev_spa->spa_is_splitting || 840 ((vd->vdev_prevstate == VDEV_STATE_UNKNOWN && 841 (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE || 842 vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)))) { 843 /* 844 * We are dealing with a vdev that hasn't been previously 845 * opened (since boot), and we are not loading an 846 * existing pool configuration. This looks like a 847 * vdev add operation to a new or existing pool. 848 * Assume the user really wants to do this, and find 849 * GEOM provider by its name, ignoring GUID mismatches. 850 * 851 * XXPOLICY: It would be safer to only allow a device 852 * that is unlabeled or labeled but missing 853 * GUID information to be opened in this fashion, 854 * unless we are doing a split, in which case we 855 * should allow any guid. 856 */ 857 cp = vdev_geom_open_by_path(vd, 0); 858 } else { 859 /* 860 * Try using the recorded path for this device, but only 861 * accept it if its label data contains the expected GUIDs. 862 */ 863 cp = vdev_geom_open_by_path(vd, 1); 864 if (cp == NULL) { 865 /* 866 * The device at vd->vdev_path doesn't have the 867 * expected GUIDs. The disks might have merely 868 * moved around so try all other GEOM providers 869 * to find one with the right GUIDs. 870 */ 871 cp = vdev_geom_open_by_guids(vd); 872 } 873 } 874 875 /* Clear the TLS now that tasting is done */ 876 VERIFY0(tsd_set(zfs_geom_probe_vdev_key, NULL)); 877 878 if (cp == NULL) { 879 ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path); 880 error = ENOENT; 881 } else { 882 struct consumer_priv_t *priv; 883 struct consumer_vdev_elem *elem; 884 int spamode; 885 886 priv = (struct consumer_priv_t *)&cp->private; 887 if (cp->private == NULL) 888 SLIST_INIT(priv); 889 elem = g_malloc(sizeof (*elem), M_WAITOK|M_ZERO); 890 elem->vd = vd; 891 SLIST_INSERT_HEAD(priv, elem, elems); 892 893 spamode = spa_mode(vd->vdev_spa); 894 if (cp->provider->sectorsize > VDEV_PAD_SIZE || 895 !ISP2(cp->provider->sectorsize)) { 896 ZFS_LOG(1, "Provider %s has unsupported sectorsize.", 897 cp->provider->name); 898 899 vdev_geom_close_locked(vd); 900 error = EINVAL; 901 cp = NULL; 902 } else if (cp->acw == 0 && (spamode & FWRITE) != 0) { 903 int i; 904 905 for (i = 0; i < 5; i++) { 906 error = g_access(cp, 0, 1, 0); 907 if (error == 0) 908 break; 909 g_topology_unlock(); 910 tsleep(vd, 0, "vdev", hz / 2); 911 g_topology_lock(); 912 } 913 if (error != 0) { 914 printf("ZFS WARNING: Unable to open %s for " 915 "writing (error=%d).\n", 916 cp->provider->name, error); 917 vdev_geom_close_locked(vd); 918 cp = NULL; 919 } 920 } 921 } 922 923 /* Fetch initial physical path information for this device. */ 924 if (cp != NULL) { 925 vdev_geom_attrchanged(cp, "GEOM::physpath"); 926 927 /* Set other GEOM characteristics */ 928 vdev_geom_set_physpath(vd, cp, /* do_null_update */B_FALSE); 929 } 930 931 g_topology_unlock(); 932 PICKUP_GIANT(); 933 if (cp == NULL) { 934 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 935 vdev_dbgmsg(vd, "vdev_geom_open: failed to open [error=%d]", 936 error); 937 return (error); 938 } 939 skip_open: 940 pp = cp->provider; 941 942 /* 943 * Determine the actual size of the device. 944 */ 945 *max_psize = *psize = pp->mediasize; 946 947 /* 948 * Determine the device's minimum transfer size and preferred 949 * transfer size. 950 */ 951 *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; 952 *physical_ashift = 0; 953 if (pp->stripesize && pp->stripesize > (1 << *logical_ashift) && 954 ISP2(pp->stripesize) && pp->stripesize <= (1 << ASHIFT_MAX) && 955 pp->stripeoffset == 0) 956 *physical_ashift = highbit(pp->stripesize) - 1; 957 958 /* 959 * Clear the nowritecache settings, so that on a vdev_reopen() 960 * we will try again. 961 */ 962 vd->vdev_nowritecache = B_FALSE; 963 964 /* Inform the ZIO pipeline that we are non-rotational. */ 965 error = g_getattr("GEOM::rotation_rate", cp, &rate); 966 if (error == 0 && rate == DISK_RR_NON_ROTATING) 967 vd->vdev_nonrot = B_TRUE; 968 else 969 vd->vdev_nonrot = B_FALSE; 970 971 /* Set when device reports it supports TRIM. */ 972 error = g_getattr("GEOM::candelete", cp, &has_trim); 973 vd->vdev_has_trim = (error == 0 && has_trim); 974 975 /* Set when device reports it supports secure TRIM. */ 976 /* unavailable on FreeBSD */ 977 vd->vdev_has_securetrim = B_FALSE; 978 979 return (0); 980 } 981 982 static void 983 vdev_geom_close(vdev_t *vd) 984 { 985 struct g_consumer *cp; 986 boolean_t locked; 987 988 cp = vd->vdev_tsd; 989 990 DROP_GIANT(); 991 locked = g_topology_locked(); 992 if (!locked) 993 g_topology_lock(); 994 995 if (!vd->vdev_reopening || 996 (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 || 997 (cp->provider != NULL && cp->provider->error != 0)))) 998 vdev_geom_close_locked(vd); 999 1000 if (!locked) 1001 g_topology_unlock(); 1002 PICKUP_GIANT(); 1003 } 1004 1005 static void 1006 vdev_geom_io_intr(struct bio *bp) 1007 { 1008 vdev_t *vd; 1009 zio_t *zio; 1010 1011 zio = bp->bio_caller1; 1012 vd = zio->io_vd; 1013 zio->io_error = bp->bio_error; 1014 if (zio->io_error == 0 && bp->bio_resid != 0) 1015 zio->io_error = SET_ERROR(EIO); 1016 1017 switch (zio->io_error) { 1018 case ENOTSUP: 1019 /* 1020 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know 1021 * that future attempts will never succeed. In this case 1022 * we set a persistent flag so that we don't bother with 1023 * requests in the future. 1024 */ 1025 switch (bp->bio_cmd) { 1026 case BIO_FLUSH: 1027 vd->vdev_nowritecache = B_TRUE; 1028 break; 1029 case BIO_DELETE: 1030 break; 1031 } 1032 break; 1033 case ENXIO: 1034 if (!vd->vdev_remove_wanted) { 1035 /* 1036 * If provider's error is set we assume it is being 1037 * removed. 1038 */ 1039 if (bp->bio_to->error != 0) { 1040 vd->vdev_remove_wanted = B_TRUE; 1041 spa_async_request(zio->io_spa, 1042 SPA_ASYNC_REMOVE); 1043 } else if (!vd->vdev_delayed_close) { 1044 vd->vdev_delayed_close = B_TRUE; 1045 } 1046 } 1047 break; 1048 } 1049 1050 /* 1051 * We have to split bio freeing into two parts, because the ABD code 1052 * cannot be called in this context and vdev_op_io_done is not called 1053 * for ZIO_TYPE_IOCTL zio-s. 1054 */ 1055 if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { 1056 g_destroy_bio(bp); 1057 zio->io_bio = NULL; 1058 } 1059 zio_delay_interrupt(zio); 1060 } 1061 1062 static void 1063 vdev_geom_io_start(zio_t *zio) 1064 { 1065 vdev_t *vd; 1066 struct g_consumer *cp; 1067 struct bio *bp; 1068 1069 vd = zio->io_vd; 1070 1071 switch (zio->io_type) { 1072 case ZIO_TYPE_IOCTL: 1073 /* XXPOLICY */ 1074 if (!vdev_readable(vd)) { 1075 zio->io_error = SET_ERROR(ENXIO); 1076 zio_interrupt(zio); 1077 return; 1078 } else { 1079 switch (zio->io_cmd) { 1080 case DKIOCFLUSHWRITECACHE: 1081 if (zfs_nocacheflush || 1082 vdev_geom_bio_flush_disable) 1083 break; 1084 if (vd->vdev_nowritecache) { 1085 zio->io_error = SET_ERROR(ENOTSUP); 1086 break; 1087 } 1088 goto sendreq; 1089 default: 1090 zio->io_error = SET_ERROR(ENOTSUP); 1091 } 1092 } 1093 1094 zio_execute(zio); 1095 return; 1096 case ZIO_TYPE_TRIM: 1097 if (!vdev_geom_bio_delete_disable) { 1098 goto sendreq; 1099 } 1100 zio_execute(zio); 1101 return; 1102 default: 1103 ; 1104 /* PASSTHROUGH --- placate compiler */ 1105 } 1106 sendreq: 1107 ASSERT(zio->io_type == ZIO_TYPE_READ || 1108 zio->io_type == ZIO_TYPE_WRITE || 1109 zio->io_type == ZIO_TYPE_TRIM || 1110 zio->io_type == ZIO_TYPE_IOCTL); 1111 1112 cp = vd->vdev_tsd; 1113 if (cp == NULL) { 1114 zio->io_error = SET_ERROR(ENXIO); 1115 zio_interrupt(zio); 1116 return; 1117 } 1118 bp = g_alloc_bio(); 1119 bp->bio_caller1 = zio; 1120 switch (zio->io_type) { 1121 case ZIO_TYPE_READ: 1122 case ZIO_TYPE_WRITE: 1123 zio->io_target_timestamp = zio_handle_io_delay(zio); 1124 bp->bio_offset = zio->io_offset; 1125 bp->bio_length = zio->io_size; 1126 if (zio->io_type == ZIO_TYPE_READ) { 1127 bp->bio_cmd = BIO_READ; 1128 bp->bio_data = 1129 abd_borrow_buf(zio->io_abd, zio->io_size); 1130 } else { 1131 bp->bio_cmd = BIO_WRITE; 1132 bp->bio_data = 1133 abd_borrow_buf_copy(zio->io_abd, zio->io_size); 1134 } 1135 break; 1136 case ZIO_TYPE_TRIM: 1137 bp->bio_cmd = BIO_DELETE; 1138 bp->bio_data = NULL; 1139 bp->bio_offset = zio->io_offset; 1140 bp->bio_length = zio->io_size; 1141 break; 1142 case ZIO_TYPE_IOCTL: 1143 bp->bio_cmd = BIO_FLUSH; 1144 bp->bio_data = NULL; 1145 bp->bio_offset = cp->provider->mediasize; 1146 bp->bio_length = 0; 1147 break; 1148 default: 1149 panic("invalid zio->io_type: %d\n", zio->io_type); 1150 } 1151 bp->bio_done = vdev_geom_io_intr; 1152 zio->io_bio = bp; 1153 1154 g_io_request(bp, cp); 1155 } 1156 1157 static void 1158 vdev_geom_io_done(zio_t *zio) 1159 { 1160 struct bio *bp = zio->io_bio; 1161 1162 if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { 1163 ASSERT3P(bp, ==, NULL); 1164 return; 1165 } 1166 1167 if (bp == NULL) { 1168 ASSERT3S(zio->io_error, ==, ENXIO); 1169 return; 1170 } 1171 1172 if (zio->io_type == ZIO_TYPE_READ) 1173 abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size); 1174 else 1175 abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size); 1176 1177 g_destroy_bio(bp); 1178 zio->io_bio = NULL; 1179 } 1180 1181 static void 1182 vdev_geom_hold(vdev_t *vd) 1183 { 1184 } 1185 1186 static void 1187 vdev_geom_rele(vdev_t *vd) 1188 { 1189 } 1190 1191 vdev_ops_t vdev_disk_ops = { 1192 .vdev_op_init = NULL, 1193 .vdev_op_fini = NULL, 1194 .vdev_op_open = vdev_geom_open, 1195 .vdev_op_close = vdev_geom_close, 1196 .vdev_op_asize = vdev_default_asize, 1197 .vdev_op_min_asize = vdev_default_min_asize, 1198 .vdev_op_min_alloc = NULL, 1199 .vdev_op_io_start = vdev_geom_io_start, 1200 .vdev_op_io_done = vdev_geom_io_done, 1201 .vdev_op_state_change = NULL, 1202 .vdev_op_need_resilver = NULL, 1203 .vdev_op_hold = vdev_geom_hold, 1204 .vdev_op_rele = vdev_geom_rele, 1205 .vdev_op_remap = NULL, 1206 .vdev_op_xlate = vdev_default_xlate, 1207 .vdev_op_rebuild_asize = NULL, 1208 .vdev_op_metaslab_init = NULL, 1209 .vdev_op_config_generate = NULL, 1210 .vdev_op_nparity = NULL, 1211 .vdev_op_ndisks = NULL, 1212 .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ 1213 .vdev_op_leaf = B_TRUE /* leaf vdev */ 1214 }; 1215