1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2017, IBM Corporation. All rights reserved. 4 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 5 */ 6 7 /* 8 * NVMe over PCIe transport 9 */ 10 11 #include "spdk/stdinc.h" 12 #include "spdk/env.h" 13 #include "spdk/likely.h" 14 #include "spdk/string.h" 15 #include "nvme_internal.h" 16 #include "nvme_pcie_internal.h" 17 18 struct nvme_pcie_enum_ctx { 19 struct spdk_nvme_probe_ctx *probe_ctx; 20 struct spdk_pci_addr pci_addr; 21 bool has_pci_addr; 22 }; 23 24 static uint16_t g_signal_lock; 25 static bool g_sigset = false; 26 static spdk_nvme_pcie_hotplug_filter_cb g_hotplug_filter_cb; 27 28 static void 29 nvme_sigbus_fault_sighandler(const void *failure_addr, void *ctx) 30 { 31 void *map_address; 32 uint16_t flag = 0; 33 34 if (!__atomic_compare_exchange_n(&g_signal_lock, &flag, 1, false, __ATOMIC_ACQUIRE, 35 __ATOMIC_RELAXED)) { 36 SPDK_DEBUGLOG(nvme, "request g_signal_lock failed\n"); 37 return; 38 } 39 40 if (g_thread_mmio_ctrlr == NULL) { 41 return; 42 } 43 44 if (!g_thread_mmio_ctrlr->is_remapped) { 45 map_address = mmap((void *)g_thread_mmio_ctrlr->regs, g_thread_mmio_ctrlr->regs_size, 46 PROT_READ | PROT_WRITE, 47 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); 48 if (map_address == MAP_FAILED) { 49 SPDK_ERRLOG("mmap failed\n"); 50 __atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE); 51 return; 52 } 53 memset(map_address, 0xFF, sizeof(struct spdk_nvme_registers)); 54 g_thread_mmio_ctrlr->regs = (volatile struct spdk_nvme_registers *)map_address; 55 g_thread_mmio_ctrlr->is_remapped = true; 56 } 57 __atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE); 58 } 59 60 static void 61 _nvme_pcie_event_process(struct spdk_pci_event *event, void *cb_ctx) 62 { 63 struct spdk_nvme_transport_id trid; 64 struct spdk_nvme_ctrlr *ctrlr; 65 66 if (event->action == SPDK_UEVENT_ADD) { 67 if (spdk_process_is_primary()) { 68 if (g_hotplug_filter_cb == NULL || g_hotplug_filter_cb(&event->traddr)) { 69 /* The enumerate interface implement the add operation */ 70 spdk_pci_device_allow(&event->traddr); 71 } 72 } 73 } else if (event->action == SPDK_UEVENT_REMOVE) { 74 memset(&trid, 0, sizeof(trid)); 75 spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); 76 77 if (spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &event->traddr) < 0) { 78 SPDK_ERRLOG("Failed to format pci address\n"); 79 return; 80 } 81 82 ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid, NULL); 83 if (ctrlr == NULL) { 84 return; 85 } 86 SPDK_DEBUGLOG(nvme, "remove nvme address: %s\n", trid.traddr); 87 88 nvme_ctrlr_lock(ctrlr); 89 nvme_ctrlr_fail(ctrlr, true); 90 nvme_ctrlr_unlock(ctrlr); 91 92 /* get the user app to clean up and stop I/O */ 93 if (ctrlr->remove_cb) { 94 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 95 ctrlr->remove_cb(ctrlr->cb_ctx, ctrlr); 96 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 97 } 98 } 99 } 100 101 static int 102 _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) 103 { 104 struct spdk_nvme_ctrlr *ctrlr, *tmp; 105 struct spdk_pci_event event; 106 int rc = 0; 107 108 if (g_spdk_nvme_driver->hotplug_fd >= 0) { 109 while (spdk_pci_get_event(g_spdk_nvme_driver->hotplug_fd, &event) > 0) { 110 _nvme_pcie_event_process(&event, probe_ctx->cb_ctx); 111 } 112 } 113 114 /* Initiate removal of physically hotremoved PCI controllers. Even after 115 * they're hotremoved from the system, SPDK might still report them via RPC. 116 */ 117 TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) { 118 bool do_remove = false; 119 struct nvme_pcie_ctrlr *pctrlr; 120 121 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 122 continue; 123 } 124 125 pctrlr = nvme_pcie_ctrlr(ctrlr); 126 if (spdk_pci_device_is_removed(pctrlr->devhandle)) { 127 do_remove = true; 128 rc = 1; 129 } 130 131 if (do_remove) { 132 nvme_ctrlr_lock(ctrlr); 133 nvme_ctrlr_fail(ctrlr, true); 134 nvme_ctrlr_unlock(ctrlr); 135 if (ctrlr->remove_cb) { 136 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 137 ctrlr->remove_cb(ctrlr->cb_ctx, ctrlr); 138 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 139 } 140 } 141 } 142 return rc; 143 } 144 145 static volatile void * 146 nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset) 147 { 148 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 149 150 return (volatile void *)((uintptr_t)pctrlr->regs + offset); 151 } 152 153 static volatile struct spdk_nvme_registers * 154 nvme_pcie_ctrlr_get_registers(struct spdk_nvme_ctrlr *ctrlr) 155 { 156 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 157 158 return pctrlr->regs; 159 } 160 161 static int 162 nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value) 163 { 164 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 165 166 assert(offset <= sizeof(struct spdk_nvme_registers) - 4); 167 g_thread_mmio_ctrlr = pctrlr; 168 spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value); 169 g_thread_mmio_ctrlr = NULL; 170 return 0; 171 } 172 173 static int 174 nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value) 175 { 176 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 177 178 assert(offset <= sizeof(struct spdk_nvme_registers) - 8); 179 g_thread_mmio_ctrlr = pctrlr; 180 spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value); 181 g_thread_mmio_ctrlr = NULL; 182 return 0; 183 } 184 185 static int 186 nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value) 187 { 188 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 189 190 assert(offset <= sizeof(struct spdk_nvme_registers) - 4); 191 assert(value != NULL); 192 g_thread_mmio_ctrlr = pctrlr; 193 *value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset)); 194 g_thread_mmio_ctrlr = NULL; 195 if (~(*value) == 0) { 196 return -1; 197 } 198 199 return 0; 200 } 201 202 static int 203 nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value) 204 { 205 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 206 207 assert(offset <= sizeof(struct spdk_nvme_registers) - 8); 208 assert(value != NULL); 209 g_thread_mmio_ctrlr = pctrlr; 210 *value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset)); 211 g_thread_mmio_ctrlr = NULL; 212 if (~(*value) == 0) { 213 return -1; 214 } 215 216 return 0; 217 } 218 219 static int 220 nvme_pcie_ctrlr_set_asq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) 221 { 222 return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, asq), 223 value); 224 } 225 226 static int 227 nvme_pcie_ctrlr_set_acq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) 228 { 229 return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, acq), 230 value); 231 } 232 233 static int 234 nvme_pcie_ctrlr_set_aqa(struct nvme_pcie_ctrlr *pctrlr, const union spdk_nvme_aqa_register *aqa) 235 { 236 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, aqa.raw), 237 aqa->raw); 238 } 239 240 static int 241 nvme_pcie_ctrlr_get_cmbloc(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbloc_register *cmbloc) 242 { 243 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbloc.raw), 244 &cmbloc->raw); 245 } 246 247 static int 248 nvme_pcie_ctrlr_get_cmbsz(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbsz_register *cmbsz) 249 { 250 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 251 &cmbsz->raw); 252 } 253 254 static int 255 nvme_pcie_ctrlr_get_pmrcap(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrcap_register *pmrcap) 256 { 257 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 258 &pmrcap->raw); 259 } 260 261 static int 262 nvme_pcie_ctrlr_set_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl) 263 { 264 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw), 265 pmrctl->raw); 266 } 267 268 static int 269 nvme_pcie_ctrlr_get_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl) 270 { 271 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw), 272 &pmrctl->raw); 273 } 274 275 static int 276 nvme_pcie_ctrlr_get_pmrsts(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrsts_register *pmrsts) 277 { 278 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrsts.raw), 279 &pmrsts->raw); 280 } 281 282 static int 283 nvme_pcie_ctrlr_set_pmrmscl(struct nvme_pcie_ctrlr *pctrlr, uint32_t value) 284 { 285 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscl.raw), 286 value); 287 } 288 289 static int 290 nvme_pcie_ctrlr_set_pmrmscu(struct nvme_pcie_ctrlr *pctrlr, uint32_t value) 291 { 292 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscu), 293 value); 294 } 295 296 static uint32_t 297 nvme_pcie_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 298 { 299 /* 300 * For commands requiring more than 2 PRP entries, one PRP will be 301 * embedded in the command (prp1), and the rest of the PRP entries 302 * will be in a list pointed to by the command (prp2). The number 303 * of PRP entries in the list is defined by 304 * NVME_MAX_PRP_LIST_ENTRIES. 305 * 306 * Note that the max xfer size is not (MAX_ENTRIES + 1) * page_size 307 * because the first PRP entry may not be aligned on a 4KiB 308 * boundary. 309 */ 310 return NVME_MAX_PRP_LIST_ENTRIES * ctrlr->page_size; 311 } 312 313 static uint16_t 314 nvme_pcie_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 315 { 316 return NVME_MAX_SGL_DESCRIPTORS; 317 } 318 319 static void 320 nvme_pcie_ctrlr_map_cmb(struct nvme_pcie_ctrlr *pctrlr) 321 { 322 int rc; 323 void *addr = NULL; 324 uint32_t bir; 325 union spdk_nvme_cmbsz_register cmbsz; 326 union spdk_nvme_cmbloc_register cmbloc; 327 uint64_t size, unit_size, offset, bar_size = 0, bar_phys_addr = 0; 328 329 if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) || 330 nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 331 SPDK_ERRLOG("get registers failed\n"); 332 goto exit; 333 } 334 335 if (!cmbsz.bits.sz) { 336 goto exit; 337 } 338 339 bir = cmbloc.bits.bir; 340 /* Values 0 2 3 4 5 are valid for BAR */ 341 if (bir > 5 || bir == 1) { 342 goto exit; 343 } 344 345 /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */ 346 unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu); 347 /* controller memory buffer size in Bytes */ 348 size = unit_size * cmbsz.bits.sz; 349 /* controller memory buffer offset from BAR in Bytes */ 350 offset = unit_size * cmbloc.bits.ofst; 351 352 rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, 353 &bar_phys_addr, &bar_size); 354 if ((rc != 0) || addr == NULL) { 355 goto exit; 356 } 357 358 if (offset > bar_size) { 359 goto exit; 360 } 361 362 if (size > bar_size - offset) { 363 goto exit; 364 } 365 366 pctrlr->cmb.bar_va = addr; 367 pctrlr->cmb.bar_pa = bar_phys_addr; 368 pctrlr->cmb.size = size; 369 pctrlr->cmb.current_offset = offset; 370 371 if (!cmbsz.bits.sqs) { 372 pctrlr->ctrlr.opts.use_cmb_sqs = false; 373 } 374 375 return; 376 exit: 377 pctrlr->ctrlr.opts.use_cmb_sqs = false; 378 return; 379 } 380 381 static int 382 nvme_pcie_ctrlr_unmap_cmb(struct nvme_pcie_ctrlr *pctrlr) 383 { 384 int rc = 0; 385 union spdk_nvme_cmbloc_register cmbloc; 386 void *addr = pctrlr->cmb.bar_va; 387 388 if (addr) { 389 if (pctrlr->cmb.mem_register_addr) { 390 spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size); 391 } 392 393 if (nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 394 SPDK_ERRLOG("get_cmbloc() failed\n"); 395 return -EIO; 396 } 397 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, cmbloc.bits.bir, addr); 398 } 399 return rc; 400 } 401 402 static int 403 nvme_pcie_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 404 { 405 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 406 407 if (pctrlr->cmb.bar_va == NULL) { 408 SPDK_DEBUGLOG(nvme, "CMB not available\n"); 409 return -ENOTSUP; 410 } 411 412 if (ctrlr->opts.use_cmb_sqs) { 413 SPDK_ERRLOG("CMB is already in use for submission queues.\n"); 414 return -ENOTSUP; 415 } 416 417 return 0; 418 } 419 420 static void * 421 nvme_pcie_ctrlr_map_io_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 422 { 423 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 424 union spdk_nvme_cmbsz_register cmbsz; 425 union spdk_nvme_cmbloc_register cmbloc; 426 uint64_t mem_register_start, mem_register_end; 427 int rc; 428 429 if (pctrlr->cmb.mem_register_addr != NULL) { 430 *size = pctrlr->cmb.mem_register_size; 431 return pctrlr->cmb.mem_register_addr; 432 } 433 434 *size = 0; 435 436 if (pctrlr->cmb.bar_va == NULL) { 437 SPDK_DEBUGLOG(nvme, "CMB not available\n"); 438 return NULL; 439 } 440 441 if (ctrlr->opts.use_cmb_sqs) { 442 SPDK_ERRLOG("CMB is already in use for submission queues.\n"); 443 return NULL; 444 } 445 446 if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) || 447 nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 448 SPDK_ERRLOG("get registers failed\n"); 449 return NULL; 450 } 451 452 /* If only SQS is supported */ 453 if (!(cmbsz.bits.wds || cmbsz.bits.rds)) { 454 return NULL; 455 } 456 457 /* If CMB is less than 4MiB in size then abort CMB mapping */ 458 if (pctrlr->cmb.size < (1ULL << 22)) { 459 return NULL; 460 } 461 462 mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset + 463 VALUE_2MB - 1); 464 mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset + 465 pctrlr->cmb.size); 466 467 rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start); 468 if (rc) { 469 SPDK_ERRLOG("spdk_mem_register() failed\n"); 470 return NULL; 471 } 472 473 pctrlr->cmb.mem_register_addr = (void *)mem_register_start; 474 pctrlr->cmb.mem_register_size = mem_register_end - mem_register_start; 475 476 *size = pctrlr->cmb.mem_register_size; 477 return pctrlr->cmb.mem_register_addr; 478 } 479 480 static int 481 nvme_pcie_ctrlr_unmap_io_cmb(struct spdk_nvme_ctrlr *ctrlr) 482 { 483 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 484 int rc; 485 486 if (pctrlr->cmb.mem_register_addr == NULL) { 487 return 0; 488 } 489 490 rc = spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size); 491 492 if (rc == 0) { 493 pctrlr->cmb.mem_register_addr = NULL; 494 pctrlr->cmb.mem_register_size = 0; 495 } 496 497 return rc; 498 } 499 500 static void 501 nvme_pcie_ctrlr_map_pmr(struct nvme_pcie_ctrlr *pctrlr) 502 { 503 int rc; 504 void *addr = NULL; 505 uint32_t bir; 506 union spdk_nvme_pmrcap_register pmrcap; 507 uint64_t bar_size = 0, bar_phys_addr = 0; 508 509 if (!pctrlr->regs->cap.bits.pmrs) { 510 return; 511 } 512 513 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 514 SPDK_ERRLOG("get registers failed\n"); 515 return; 516 } 517 518 bir = pmrcap.bits.bir; 519 /* Values 2 3 4 5 are valid for BAR */ 520 if (bir > 5 || bir < 2) { 521 SPDK_ERRLOG("invalid base indicator register value\n"); 522 return; 523 } 524 525 rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, &bar_phys_addr, &bar_size); 526 if ((rc != 0) || addr == NULL) { 527 SPDK_ERRLOG("could not map the bar %d\n", bir); 528 return; 529 } 530 531 if (pmrcap.bits.cmss) { 532 uint32_t pmrmscl, pmrmscu, cmse = 1; 533 union spdk_nvme_pmrsts_register pmrsts; 534 535 /* Enable Controller Memory Space */ 536 pmrmscl = (uint32_t)((bar_phys_addr & 0xFFFFF000ULL) | (cmse << 1)); 537 pmrmscu = (uint32_t)((bar_phys_addr >> 32ULL) & 0xFFFFFFFFULL); 538 539 if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, pmrmscu)) { 540 SPDK_ERRLOG("set_pmrmscu() failed\n"); 541 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 542 return; 543 } 544 545 if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, pmrmscl)) { 546 SPDK_ERRLOG("set_pmrmscl() failed\n"); 547 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 548 return; 549 } 550 551 if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) { 552 SPDK_ERRLOG("get pmrsts failed\n"); 553 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 554 return; 555 } 556 557 if (pmrsts.bits.cbai) { 558 SPDK_ERRLOG("Controller Memory Space Enable Failure\n"); 559 SPDK_ERRLOG("CBA Invalid - Host Addresses cannot reference PMR\n"); 560 } else { 561 SPDK_DEBUGLOG(nvme, "Controller Memory Space Enable Success\n"); 562 SPDK_DEBUGLOG(nvme, "Host Addresses can reference PMR\n"); 563 } 564 } 565 566 pctrlr->pmr.bar_va = addr; 567 pctrlr->pmr.bar_pa = bar_phys_addr; 568 pctrlr->pmr.size = pctrlr->ctrlr.pmr_size = bar_size; 569 } 570 571 static int 572 nvme_pcie_ctrlr_unmap_pmr(struct nvme_pcie_ctrlr *pctrlr) 573 { 574 int rc = 0; 575 union spdk_nvme_pmrcap_register pmrcap; 576 void *addr = pctrlr->pmr.bar_va; 577 578 if (addr == NULL) { 579 return rc; 580 } 581 582 if (pctrlr->pmr.mem_register_addr) { 583 spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size); 584 } 585 586 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 587 SPDK_ERRLOG("get_pmrcap() failed\n"); 588 return -EIO; 589 } 590 591 if (pmrcap.bits.cmss) { 592 if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, 0)) { 593 SPDK_ERRLOG("set_pmrmscu() failed\n"); 594 } 595 596 if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, 0)) { 597 SPDK_ERRLOG("set_pmrmscl() failed\n"); 598 } 599 } 600 601 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, pmrcap.bits.bir, addr); 602 603 return rc; 604 } 605 606 static int 607 nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable) 608 { 609 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 610 union spdk_nvme_pmrcap_register pmrcap; 611 union spdk_nvme_pmrctl_register pmrctl; 612 union spdk_nvme_pmrsts_register pmrsts; 613 uint8_t pmrto, pmrtu; 614 uint64_t timeout_in_ms, ticks_per_ms, timeout_in_ticks, now_ticks; 615 616 if (!pctrlr->regs->cap.bits.pmrs) { 617 SPDK_ERRLOG("PMR is not supported by the controller\n"); 618 return -ENOTSUP; 619 } 620 621 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 622 SPDK_ERRLOG("get registers failed\n"); 623 return -EIO; 624 } 625 626 pmrto = pmrcap.bits.pmrto; 627 pmrtu = pmrcap.bits.pmrtu; 628 629 if (pmrtu > 1) { 630 SPDK_ERRLOG("PMR Time Units Invalid\n"); 631 return -EINVAL; 632 } 633 634 ticks_per_ms = spdk_get_ticks_hz() / 1000; 635 timeout_in_ms = pmrto * (pmrtu ? (60 * 1000) : 500); 636 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 637 638 if (nvme_pcie_ctrlr_get_pmrctl(pctrlr, &pmrctl)) { 639 SPDK_ERRLOG("get pmrctl failed\n"); 640 return -EIO; 641 } 642 643 if (enable && pmrctl.bits.en != 0) { 644 SPDK_ERRLOG("PMR is already enabled\n"); 645 return -EINVAL; 646 } else if (!enable && pmrctl.bits.en != 1) { 647 SPDK_ERRLOG("PMR is already disabled\n"); 648 return -EINVAL; 649 } 650 651 pmrctl.bits.en = enable; 652 653 if (nvme_pcie_ctrlr_set_pmrctl(pctrlr, &pmrctl)) { 654 SPDK_ERRLOG("set pmrctl failed\n"); 655 return -EIO; 656 } 657 658 now_ticks = spdk_get_ticks(); 659 660 do { 661 if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) { 662 SPDK_ERRLOG("get pmrsts failed\n"); 663 return -EIO; 664 } 665 666 if (pmrsts.bits.nrdy == enable && 667 spdk_get_ticks() > now_ticks + timeout_in_ticks) { 668 SPDK_ERRLOG("PMR Enable - Timed Out\n"); 669 return -ETIMEDOUT; 670 } 671 } while (pmrsts.bits.nrdy == enable); 672 673 SPDK_DEBUGLOG(nvme, "PMR %s\n", enable ? "Enabled" : "Disabled"); 674 675 return 0; 676 } 677 678 static int 679 nvme_pcie_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 680 { 681 return nvme_pcie_ctrlr_config_pmr(ctrlr, true); 682 } 683 684 static int 685 nvme_pcie_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 686 { 687 return nvme_pcie_ctrlr_config_pmr(ctrlr, false); 688 } 689 690 static void * 691 nvme_pcie_ctrlr_map_io_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 692 { 693 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 694 union spdk_nvme_pmrcap_register pmrcap; 695 uint64_t mem_register_start, mem_register_end; 696 int rc; 697 698 if (!pctrlr->regs->cap.bits.pmrs) { 699 SPDK_ERRLOG("PMR is not supported by the controller\n"); 700 return NULL; 701 } 702 703 if (pctrlr->pmr.mem_register_addr != NULL) { 704 *size = pctrlr->pmr.mem_register_size; 705 return pctrlr->pmr.mem_register_addr; 706 } 707 708 *size = 0; 709 710 if (pctrlr->pmr.bar_va == NULL) { 711 SPDK_DEBUGLOG(nvme, "PMR not available\n"); 712 return NULL; 713 } 714 715 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 716 SPDK_ERRLOG("get registers failed\n"); 717 return NULL; 718 } 719 720 /* Check if WDS / RDS is supported */ 721 if (!(pmrcap.bits.wds || pmrcap.bits.rds)) { 722 return NULL; 723 } 724 725 /* If PMR is less than 4MiB in size then abort PMR mapping */ 726 if (pctrlr->pmr.size < (1ULL << 22)) { 727 return NULL; 728 } 729 730 mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + VALUE_2MB - 1); 731 mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + pctrlr->pmr.size); 732 733 rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start); 734 if (rc) { 735 SPDK_ERRLOG("spdk_mem_register() failed\n"); 736 return NULL; 737 } 738 739 pctrlr->pmr.mem_register_addr = (void *)mem_register_start; 740 pctrlr->pmr.mem_register_size = mem_register_end - mem_register_start; 741 742 *size = pctrlr->pmr.mem_register_size; 743 return pctrlr->pmr.mem_register_addr; 744 } 745 746 static int 747 nvme_pcie_ctrlr_unmap_io_pmr(struct spdk_nvme_ctrlr *ctrlr) 748 { 749 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 750 int rc; 751 752 if (pctrlr->pmr.mem_register_addr == NULL) { 753 return -ENXIO; 754 } 755 756 rc = spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size); 757 758 if (rc == 0) { 759 pctrlr->pmr.mem_register_addr = NULL; 760 pctrlr->pmr.mem_register_size = 0; 761 } 762 763 return rc; 764 } 765 766 static int 767 nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr *pctrlr) 768 { 769 int rc; 770 void *addr = NULL; 771 uint64_t phys_addr = 0, size = 0; 772 773 rc = spdk_pci_device_map_bar(pctrlr->devhandle, 0, &addr, 774 &phys_addr, &size); 775 776 if ((addr == NULL) || (rc != 0)) { 777 SPDK_ERRLOG("nvme_pcicfg_map_bar failed with rc %d or bar %p\n", 778 rc, addr); 779 return -1; 780 } 781 782 pctrlr->regs = (volatile struct spdk_nvme_registers *)addr; 783 pctrlr->regs_size = size; 784 pctrlr->doorbell_base = (volatile uint32_t *)&pctrlr->regs->doorbell[0].sq_tdbl; 785 nvme_pcie_ctrlr_map_cmb(pctrlr); 786 nvme_pcie_ctrlr_map_pmr(pctrlr); 787 788 return 0; 789 } 790 791 static int 792 nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr) 793 { 794 int rc = 0; 795 void *addr = (void *)pctrlr->regs; 796 797 if (pctrlr->ctrlr.is_removed) { 798 return rc; 799 } 800 801 rc = nvme_pcie_ctrlr_unmap_pmr(pctrlr); 802 if (rc != 0) { 803 SPDK_ERRLOG("nvme_ctrlr_unmap_pmr failed with error code %d\n", rc); 804 return -1; 805 } 806 807 rc = nvme_pcie_ctrlr_unmap_cmb(pctrlr); 808 if (rc != 0) { 809 SPDK_ERRLOG("nvme_ctrlr_unmap_cmb failed with error code %d\n", rc); 810 return -1; 811 } 812 813 if (addr && spdk_process_is_primary()) { 814 /* NOTE: addr may have been remapped here. We're relying on DPDK to call 815 * munmap internally. 816 */ 817 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr); 818 } 819 return rc; 820 } 821 822 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 823 static int 824 pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) 825 { 826 struct spdk_nvme_transport_id trid = {}; 827 struct nvme_pcie_enum_ctx *enum_ctx = ctx; 828 struct spdk_nvme_ctrlr *ctrlr; 829 struct spdk_pci_addr pci_addr; 830 831 pci_addr = spdk_pci_device_get_addr(pci_dev); 832 833 spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); 834 spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); 835 836 ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid, NULL); 837 if (!spdk_process_is_primary()) { 838 if (!ctrlr) { 839 SPDK_ERRLOG("Controller must be constructed in the primary process first.\n"); 840 return -1; 841 } 842 843 return nvme_ctrlr_add_process(ctrlr, pci_dev); 844 } 845 846 /* check whether user passes the pci_addr */ 847 if (enum_ctx->has_pci_addr && 848 (spdk_pci_addr_compare(&pci_addr, &enum_ctx->pci_addr) != 0)) { 849 return 1; 850 } 851 852 return nvme_ctrlr_probe(&trid, enum_ctx->probe_ctx, pci_dev); 853 } 854 855 static int 856 nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, 857 bool direct_connect) 858 { 859 struct nvme_pcie_enum_ctx enum_ctx = {}; 860 861 enum_ctx.probe_ctx = probe_ctx; 862 863 if (strlen(probe_ctx->trid.traddr) != 0) { 864 if (spdk_pci_addr_parse(&enum_ctx.pci_addr, probe_ctx->trid.traddr)) { 865 return -1; 866 } 867 enum_ctx.has_pci_addr = true; 868 } 869 870 /* Only the primary process can monitor hotplug. */ 871 if (spdk_process_is_primary()) { 872 if (_nvme_pcie_hotplug_monitor(probe_ctx) > 0) { 873 /* Some removal events were received. Return immediately, avoiding 874 * an spdk_pci_enumerate() which could trigger issue #3205. 875 */ 876 return 0; 877 } 878 } 879 880 if (enum_ctx.has_pci_addr == false) { 881 return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), 882 pcie_nvme_enum_cb, &enum_ctx); 883 } else { 884 return spdk_pci_device_attach(spdk_pci_nvme_get_driver(), 885 pcie_nvme_enum_cb, &enum_ctx, &enum_ctx.pci_addr); 886 } 887 } 888 889 static struct spdk_nvme_ctrlr * 890 nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 891 const struct spdk_nvme_ctrlr_opts *opts, 892 void *devhandle) 893 { 894 struct spdk_pci_device *pci_dev = devhandle; 895 struct nvme_pcie_ctrlr *pctrlr; 896 union spdk_nvme_cap_register cap; 897 uint16_t cmd_reg; 898 int rc; 899 struct spdk_pci_id pci_id; 900 901 rc = spdk_pci_device_claim(pci_dev); 902 if (rc < 0) { 903 SPDK_ERRLOG("could not claim device %s (%s)\n", 904 trid->traddr, spdk_strerror(-rc)); 905 return NULL; 906 } 907 908 pctrlr = spdk_zmalloc(sizeof(struct nvme_pcie_ctrlr), 64, NULL, 909 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 910 if (pctrlr == NULL) { 911 spdk_pci_device_unclaim(pci_dev); 912 SPDK_ERRLOG("could not allocate ctrlr\n"); 913 return NULL; 914 } 915 916 pctrlr->is_remapped = false; 917 pctrlr->ctrlr.is_removed = false; 918 pctrlr->devhandle = devhandle; 919 pctrlr->ctrlr.opts = *opts; 920 pctrlr->ctrlr.trid = *trid; 921 pctrlr->ctrlr.opts.admin_queue_size = spdk_max(pctrlr->ctrlr.opts.admin_queue_size, 922 NVME_PCIE_MIN_ADMIN_QUEUE_SIZE); 923 pci_id = spdk_pci_device_get_id(pci_dev); 924 pctrlr->ctrlr.quirks = nvme_get_quirks(&pci_id); 925 926 rc = nvme_ctrlr_construct(&pctrlr->ctrlr); 927 if (rc != 0) { 928 spdk_pci_device_unclaim(pci_dev); 929 spdk_free(pctrlr); 930 return NULL; 931 } 932 933 rc = nvme_pcie_ctrlr_allocate_bars(pctrlr); 934 if (rc != 0) { 935 spdk_pci_device_unclaim(pci_dev); 936 spdk_free(pctrlr); 937 return NULL; 938 } 939 940 /* Enable PCI busmaster and disable INTx */ 941 spdk_pci_device_cfg_read16(pci_dev, &cmd_reg, 4); 942 cmd_reg |= 0x404; 943 spdk_pci_device_cfg_write16(pci_dev, cmd_reg, 4); 944 945 if (nvme_ctrlr_get_cap(&pctrlr->ctrlr, &cap)) { 946 SPDK_ERRLOG("get_cap() failed\n"); 947 spdk_pci_device_unclaim(pci_dev); 948 spdk_free(pctrlr); 949 return NULL; 950 } 951 952 /* Doorbell stride is 2 ^ (dstrd + 2), 953 * but we want multiples of 4, so drop the + 2 */ 954 pctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd; 955 956 rc = nvme_pcie_ctrlr_construct_admin_qpair(&pctrlr->ctrlr, pctrlr->ctrlr.opts.admin_queue_size); 957 if (rc != 0) { 958 nvme_ctrlr_destruct(&pctrlr->ctrlr); 959 return NULL; 960 } 961 962 /* Construct the primary process properties */ 963 rc = nvme_ctrlr_add_process(&pctrlr->ctrlr, pci_dev); 964 if (rc != 0) { 965 nvme_ctrlr_destruct(&pctrlr->ctrlr); 966 return NULL; 967 } 968 969 if (g_sigset != true) { 970 spdk_pci_register_error_handler(nvme_sigbus_fault_sighandler, 971 NULL); 972 g_sigset = true; 973 } 974 975 return &pctrlr->ctrlr; 976 } 977 978 static int 979 nvme_pcie_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 980 { 981 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 982 struct nvme_pcie_qpair *padminq = nvme_pcie_qpair(ctrlr->adminq); 983 union spdk_nvme_aqa_register aqa; 984 985 if (nvme_pcie_ctrlr_set_asq(pctrlr, padminq->cmd_bus_addr)) { 986 SPDK_ERRLOG("set_asq() failed\n"); 987 return -EIO; 988 } 989 990 if (nvme_pcie_ctrlr_set_acq(pctrlr, padminq->cpl_bus_addr)) { 991 SPDK_ERRLOG("set_acq() failed\n"); 992 return -EIO; 993 } 994 995 aqa.raw = 0; 996 /* acqs and asqs are 0-based. */ 997 aqa.bits.acqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; 998 aqa.bits.asqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; 999 1000 if (nvme_pcie_ctrlr_set_aqa(pctrlr, &aqa)) { 1001 SPDK_ERRLOG("set_aqa() failed\n"); 1002 return -EIO; 1003 } 1004 1005 return 0; 1006 } 1007 1008 static int 1009 nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 1010 { 1011 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 1012 struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr); 1013 1014 if (ctrlr->adminq) { 1015 nvme_pcie_qpair_destroy(ctrlr->adminq); 1016 } 1017 1018 nvme_ctrlr_destruct_finish(ctrlr); 1019 1020 nvme_pcie_ctrlr_free_bars(pctrlr); 1021 1022 if (devhandle) { 1023 spdk_pci_device_unclaim(devhandle); 1024 spdk_pci_device_detach(devhandle); 1025 } 1026 1027 spdk_free(pctrlr); 1028 1029 return 0; 1030 } 1031 1032 static int 1033 nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 1034 int (*iter_fn)(struct nvme_request *req, void *arg), 1035 void *arg) 1036 { 1037 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1038 struct nvme_tracker *tr, *tmp; 1039 int rc; 1040 1041 assert(iter_fn != NULL); 1042 1043 TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { 1044 assert(tr->req != NULL); 1045 1046 rc = iter_fn(tr->req, arg); 1047 if (rc != 0) { 1048 return rc; 1049 } 1050 } 1051 1052 return 0; 1053 } 1054 1055 void 1056 spdk_nvme_pcie_set_hotplug_filter(spdk_nvme_pcie_hotplug_filter_cb filter_cb) 1057 { 1058 g_hotplug_filter_cb = filter_cb; 1059 } 1060 1061 static struct spdk_pci_id nvme_pci_driver_id[] = { 1062 { 1063 .class_id = SPDK_PCI_CLASS_NVME, 1064 .vendor_id = SPDK_PCI_ANY_ID, 1065 .device_id = SPDK_PCI_ANY_ID, 1066 .subvendor_id = SPDK_PCI_ANY_ID, 1067 .subdevice_id = SPDK_PCI_ANY_ID, 1068 }, 1069 { .vendor_id = 0, /* sentinel */ }, 1070 }; 1071 1072 SPDK_PCI_DRIVER_REGISTER(nvme, nvme_pci_driver_id, 1073 SPDK_PCI_DRIVER_NEED_MAPPING | SPDK_PCI_DRIVER_WC_ACTIVATE); 1074 1075 const struct spdk_nvme_transport_ops pcie_ops = { 1076 .name = "PCIE", 1077 .type = SPDK_NVME_TRANSPORT_PCIE, 1078 .ctrlr_construct = nvme_pcie_ctrlr_construct, 1079 .ctrlr_scan = nvme_pcie_ctrlr_scan, 1080 .ctrlr_destruct = nvme_pcie_ctrlr_destruct, 1081 .ctrlr_enable = nvme_pcie_ctrlr_enable, 1082 1083 .ctrlr_get_registers = nvme_pcie_ctrlr_get_registers, 1084 .ctrlr_set_reg_4 = nvme_pcie_ctrlr_set_reg_4, 1085 .ctrlr_set_reg_8 = nvme_pcie_ctrlr_set_reg_8, 1086 .ctrlr_get_reg_4 = nvme_pcie_ctrlr_get_reg_4, 1087 .ctrlr_get_reg_8 = nvme_pcie_ctrlr_get_reg_8, 1088 1089 .ctrlr_get_max_xfer_size = nvme_pcie_ctrlr_get_max_xfer_size, 1090 .ctrlr_get_max_sges = nvme_pcie_ctrlr_get_max_sges, 1091 1092 .ctrlr_reserve_cmb = nvme_pcie_ctrlr_reserve_cmb, 1093 .ctrlr_map_cmb = nvme_pcie_ctrlr_map_io_cmb, 1094 .ctrlr_unmap_cmb = nvme_pcie_ctrlr_unmap_io_cmb, 1095 1096 .ctrlr_enable_pmr = nvme_pcie_ctrlr_enable_pmr, 1097 .ctrlr_disable_pmr = nvme_pcie_ctrlr_disable_pmr, 1098 .ctrlr_map_pmr = nvme_pcie_ctrlr_map_io_pmr, 1099 .ctrlr_unmap_pmr = nvme_pcie_ctrlr_unmap_io_pmr, 1100 1101 .ctrlr_create_io_qpair = nvme_pcie_ctrlr_create_io_qpair, 1102 .ctrlr_delete_io_qpair = nvme_pcie_ctrlr_delete_io_qpair, 1103 .ctrlr_connect_qpair = nvme_pcie_ctrlr_connect_qpair, 1104 .ctrlr_disconnect_qpair = nvme_pcie_ctrlr_disconnect_qpair, 1105 1106 .qpair_abort_reqs = nvme_pcie_qpair_abort_reqs, 1107 .qpair_reset = nvme_pcie_qpair_reset, 1108 .qpair_submit_request = nvme_pcie_qpair_submit_request, 1109 .qpair_process_completions = nvme_pcie_qpair_process_completions, 1110 .qpair_iterate_requests = nvme_pcie_qpair_iterate_requests, 1111 .admin_qpair_abort_aers = nvme_pcie_admin_qpair_abort_aers, 1112 1113 .poll_group_create = nvme_pcie_poll_group_create, 1114 .poll_group_connect_qpair = nvme_pcie_poll_group_connect_qpair, 1115 .poll_group_disconnect_qpair = nvme_pcie_poll_group_disconnect_qpair, 1116 .poll_group_add = nvme_pcie_poll_group_add, 1117 .poll_group_remove = nvme_pcie_poll_group_remove, 1118 .poll_group_process_completions = nvme_pcie_poll_group_process_completions, 1119 .poll_group_destroy = nvme_pcie_poll_group_destroy, 1120 .poll_group_get_stats = nvme_pcie_poll_group_get_stats, 1121 .poll_group_free_stats = nvme_pcie_poll_group_free_stats 1122 }; 1123 1124 SPDK_NVME_TRANSPORT_REGISTER(pcie, &pcie_ops); 1125