1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. All rights reserved. 3 * Copyright (c) 2017, IBM Corporation. All rights reserved. 4 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 5 */ 6 7 /* 8 * NVMe over PCIe transport 9 */ 10 11 #include "spdk/stdinc.h" 12 #include "spdk/env.h" 13 #include "spdk/likely.h" 14 #include "spdk/string.h" 15 #include "nvme_internal.h" 16 #include "nvme_pcie_internal.h" 17 18 struct nvme_pcie_enum_ctx { 19 struct spdk_nvme_probe_ctx *probe_ctx; 20 struct spdk_pci_addr pci_addr; 21 bool has_pci_addr; 22 }; 23 24 static uint16_t g_signal_lock; 25 static bool g_sigset = false; 26 static spdk_nvme_pcie_hotplug_filter_cb g_hotplug_filter_cb; 27 28 static void 29 nvme_sigbus_fault_sighandler(const void *failure_addr, void *ctx) 30 { 31 void *map_address; 32 uint16_t flag = 0; 33 34 if (!__atomic_compare_exchange_n(&g_signal_lock, &flag, 1, false, __ATOMIC_ACQUIRE, 35 __ATOMIC_RELAXED)) { 36 SPDK_DEBUGLOG(nvme, "request g_signal_lock failed\n"); 37 return; 38 } 39 40 if (g_thread_mmio_ctrlr == NULL) { 41 return; 42 } 43 44 if (!g_thread_mmio_ctrlr->is_remapped) { 45 map_address = mmap((void *)g_thread_mmio_ctrlr->regs, g_thread_mmio_ctrlr->regs_size, 46 PROT_READ | PROT_WRITE, 47 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); 48 if (map_address == MAP_FAILED) { 49 SPDK_ERRLOG("mmap failed\n"); 50 __atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE); 51 return; 52 } 53 memset(map_address, 0xFF, sizeof(struct spdk_nvme_registers)); 54 g_thread_mmio_ctrlr->regs = (volatile struct spdk_nvme_registers *)map_address; 55 g_thread_mmio_ctrlr->is_remapped = true; 56 } 57 __atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE); 58 } 59 60 static void 61 _nvme_pcie_event_process(struct spdk_pci_event *event, void *cb_ctx) 62 { 63 struct spdk_nvme_transport_id trid; 64 struct spdk_nvme_ctrlr *ctrlr; 65 66 if (event->action == SPDK_UEVENT_ADD) { 67 if (spdk_process_is_primary()) { 68 if (g_hotplug_filter_cb == NULL || g_hotplug_filter_cb(&event->traddr)) { 69 /* The enumerate interface implement the add operation */ 70 spdk_pci_device_allow(&event->traddr); 71 } 72 } 73 } else if (event->action == SPDK_UEVENT_REMOVE) { 74 memset(&trid, 0, sizeof(trid)); 75 spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); 76 77 if (spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &event->traddr) < 0) { 78 SPDK_ERRLOG("Failed to format pci address\n"); 79 return; 80 } 81 82 ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid); 83 if (ctrlr == NULL) { 84 return; 85 } 86 SPDK_DEBUGLOG(nvme, "remove nvme address: %s\n", trid.traddr); 87 88 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 89 nvme_ctrlr_fail(ctrlr, true); 90 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 91 92 /* get the user app to clean up and stop I/O */ 93 if (ctrlr->remove_cb) { 94 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 95 ctrlr->remove_cb(ctrlr->cb_ctx, ctrlr); 96 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 97 } 98 } 99 } 100 101 static int 102 _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) 103 { 104 struct spdk_nvme_ctrlr *ctrlr, *tmp; 105 struct spdk_pci_event event; 106 107 if (g_spdk_nvme_driver->hotplug_fd < 0) { 108 return 0; 109 } 110 111 while (spdk_pci_get_event(g_spdk_nvme_driver->hotplug_fd, &event) > 0) { 112 _nvme_pcie_event_process(&event, probe_ctx->cb_ctx); 113 } 114 115 /* Initiate removal of physically hotremoved PCI controllers. Even after 116 * they're hotremoved from the system, SPDK might still report them via RPC. 117 */ 118 TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) { 119 bool do_remove = false; 120 struct nvme_pcie_ctrlr *pctrlr; 121 122 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 123 continue; 124 } 125 126 pctrlr = nvme_pcie_ctrlr(ctrlr); 127 if (spdk_pci_device_is_removed(pctrlr->devhandle)) { 128 do_remove = true; 129 } 130 131 if (do_remove) { 132 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 133 nvme_ctrlr_fail(ctrlr, true); 134 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 135 if (ctrlr->remove_cb) { 136 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 137 ctrlr->remove_cb(ctrlr->cb_ctx, ctrlr); 138 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 139 } 140 } 141 } 142 return 0; 143 } 144 145 static volatile void * 146 nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset) 147 { 148 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 149 150 return (volatile void *)((uintptr_t)pctrlr->regs + offset); 151 } 152 153 static int 154 nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value) 155 { 156 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 157 158 assert(offset <= sizeof(struct spdk_nvme_registers) - 4); 159 g_thread_mmio_ctrlr = pctrlr; 160 spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value); 161 g_thread_mmio_ctrlr = NULL; 162 return 0; 163 } 164 165 static int 166 nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value) 167 { 168 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 169 170 assert(offset <= sizeof(struct spdk_nvme_registers) - 8); 171 g_thread_mmio_ctrlr = pctrlr; 172 spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value); 173 g_thread_mmio_ctrlr = NULL; 174 return 0; 175 } 176 177 static int 178 nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value) 179 { 180 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 181 182 assert(offset <= sizeof(struct spdk_nvme_registers) - 4); 183 assert(value != NULL); 184 g_thread_mmio_ctrlr = pctrlr; 185 *value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset)); 186 g_thread_mmio_ctrlr = NULL; 187 if (~(*value) == 0) { 188 return -1; 189 } 190 191 return 0; 192 } 193 194 static int 195 nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value) 196 { 197 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 198 199 assert(offset <= sizeof(struct spdk_nvme_registers) - 8); 200 assert(value != NULL); 201 g_thread_mmio_ctrlr = pctrlr; 202 *value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset)); 203 g_thread_mmio_ctrlr = NULL; 204 if (~(*value) == 0) { 205 return -1; 206 } 207 208 return 0; 209 } 210 211 static int 212 nvme_pcie_ctrlr_set_asq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) 213 { 214 return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, asq), 215 value); 216 } 217 218 static int 219 nvme_pcie_ctrlr_set_acq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) 220 { 221 return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, acq), 222 value); 223 } 224 225 static int 226 nvme_pcie_ctrlr_set_aqa(struct nvme_pcie_ctrlr *pctrlr, const union spdk_nvme_aqa_register *aqa) 227 { 228 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, aqa.raw), 229 aqa->raw); 230 } 231 232 static int 233 nvme_pcie_ctrlr_get_cmbloc(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbloc_register *cmbloc) 234 { 235 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbloc.raw), 236 &cmbloc->raw); 237 } 238 239 static int 240 nvme_pcie_ctrlr_get_cmbsz(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbsz_register *cmbsz) 241 { 242 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 243 &cmbsz->raw); 244 } 245 246 static int 247 nvme_pcie_ctrlr_get_pmrcap(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrcap_register *pmrcap) 248 { 249 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 250 &pmrcap->raw); 251 } 252 253 static int 254 nvme_pcie_ctrlr_set_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl) 255 { 256 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw), 257 pmrctl->raw); 258 } 259 260 static int 261 nvme_pcie_ctrlr_get_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl) 262 { 263 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw), 264 &pmrctl->raw); 265 } 266 267 static int 268 nvme_pcie_ctrlr_get_pmrsts(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrsts_register *pmrsts) 269 { 270 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrsts.raw), 271 &pmrsts->raw); 272 } 273 274 static int 275 nvme_pcie_ctrlr_set_pmrmscl(struct nvme_pcie_ctrlr *pctrlr, uint32_t value) 276 { 277 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscl.raw), 278 value); 279 } 280 281 static int 282 nvme_pcie_ctrlr_set_pmrmscu(struct nvme_pcie_ctrlr *pctrlr, uint32_t value) 283 { 284 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscu), 285 value); 286 } 287 288 static uint32_t 289 nvme_pcie_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 290 { 291 /* 292 * For commands requiring more than 2 PRP entries, one PRP will be 293 * embedded in the command (prp1), and the rest of the PRP entries 294 * will be in a list pointed to by the command (prp2). The number 295 * of PRP entries in the list is defined by 296 * NVME_MAX_PRP_LIST_ENTRIES. 297 * 298 * Note that the max xfer size is not (MAX_ENTRIES + 1) * page_size 299 * because the first PRP entry may not be aligned on a 4KiB 300 * boundary. 301 */ 302 return NVME_MAX_PRP_LIST_ENTRIES * ctrlr->page_size; 303 } 304 305 static uint16_t 306 nvme_pcie_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 307 { 308 return NVME_MAX_SGL_DESCRIPTORS; 309 } 310 311 static void 312 nvme_pcie_ctrlr_map_cmb(struct nvme_pcie_ctrlr *pctrlr) 313 { 314 int rc; 315 void *addr = NULL; 316 uint32_t bir; 317 union spdk_nvme_cmbsz_register cmbsz; 318 union spdk_nvme_cmbloc_register cmbloc; 319 uint64_t size, unit_size, offset, bar_size = 0, bar_phys_addr = 0; 320 321 if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) || 322 nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 323 SPDK_ERRLOG("get registers failed\n"); 324 goto exit; 325 } 326 327 if (!cmbsz.bits.sz) { 328 goto exit; 329 } 330 331 bir = cmbloc.bits.bir; 332 /* Values 0 2 3 4 5 are valid for BAR */ 333 if (bir > 5 || bir == 1) { 334 goto exit; 335 } 336 337 /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */ 338 unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu); 339 /* controller memory buffer size in Bytes */ 340 size = unit_size * cmbsz.bits.sz; 341 /* controller memory buffer offset from BAR in Bytes */ 342 offset = unit_size * cmbloc.bits.ofst; 343 344 rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, 345 &bar_phys_addr, &bar_size); 346 if ((rc != 0) || addr == NULL) { 347 goto exit; 348 } 349 350 if (offset > bar_size) { 351 goto exit; 352 } 353 354 if (size > bar_size - offset) { 355 goto exit; 356 } 357 358 pctrlr->cmb.bar_va = addr; 359 pctrlr->cmb.bar_pa = bar_phys_addr; 360 pctrlr->cmb.size = size; 361 pctrlr->cmb.current_offset = offset; 362 363 if (!cmbsz.bits.sqs) { 364 pctrlr->ctrlr.opts.use_cmb_sqs = false; 365 } 366 367 return; 368 exit: 369 pctrlr->ctrlr.opts.use_cmb_sqs = false; 370 return; 371 } 372 373 static int 374 nvme_pcie_ctrlr_unmap_cmb(struct nvme_pcie_ctrlr *pctrlr) 375 { 376 int rc = 0; 377 union spdk_nvme_cmbloc_register cmbloc; 378 void *addr = pctrlr->cmb.bar_va; 379 380 if (addr) { 381 if (pctrlr->cmb.mem_register_addr) { 382 spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size); 383 } 384 385 if (nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 386 SPDK_ERRLOG("get_cmbloc() failed\n"); 387 return -EIO; 388 } 389 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, cmbloc.bits.bir, addr); 390 } 391 return rc; 392 } 393 394 static int 395 nvme_pcie_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 396 { 397 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 398 399 if (pctrlr->cmb.bar_va == NULL) { 400 SPDK_DEBUGLOG(nvme, "CMB not available\n"); 401 return -ENOTSUP; 402 } 403 404 if (ctrlr->opts.use_cmb_sqs) { 405 SPDK_ERRLOG("CMB is already in use for submission queues.\n"); 406 return -ENOTSUP; 407 } 408 409 return 0; 410 } 411 412 static void * 413 nvme_pcie_ctrlr_map_io_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 414 { 415 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 416 union spdk_nvme_cmbsz_register cmbsz; 417 union spdk_nvme_cmbloc_register cmbloc; 418 uint64_t mem_register_start, mem_register_end; 419 int rc; 420 421 if (pctrlr->cmb.mem_register_addr != NULL) { 422 *size = pctrlr->cmb.mem_register_size; 423 return pctrlr->cmb.mem_register_addr; 424 } 425 426 *size = 0; 427 428 if (pctrlr->cmb.bar_va == NULL) { 429 SPDK_DEBUGLOG(nvme, "CMB not available\n"); 430 return NULL; 431 } 432 433 if (ctrlr->opts.use_cmb_sqs) { 434 SPDK_ERRLOG("CMB is already in use for submission queues.\n"); 435 return NULL; 436 } 437 438 if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) || 439 nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 440 SPDK_ERRLOG("get registers failed\n"); 441 return NULL; 442 } 443 444 /* If only SQS is supported */ 445 if (!(cmbsz.bits.wds || cmbsz.bits.rds)) { 446 return NULL; 447 } 448 449 /* If CMB is less than 4MiB in size then abort CMB mapping */ 450 if (pctrlr->cmb.size < (1ULL << 22)) { 451 return NULL; 452 } 453 454 mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset + 455 VALUE_2MB - 1); 456 mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset + 457 pctrlr->cmb.size); 458 459 rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start); 460 if (rc) { 461 SPDK_ERRLOG("spdk_mem_register() failed\n"); 462 return NULL; 463 } 464 465 pctrlr->cmb.mem_register_addr = (void *)mem_register_start; 466 pctrlr->cmb.mem_register_size = mem_register_end - mem_register_start; 467 468 *size = pctrlr->cmb.mem_register_size; 469 return pctrlr->cmb.mem_register_addr; 470 } 471 472 static int 473 nvme_pcie_ctrlr_unmap_io_cmb(struct spdk_nvme_ctrlr *ctrlr) 474 { 475 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 476 int rc; 477 478 if (pctrlr->cmb.mem_register_addr == NULL) { 479 return 0; 480 } 481 482 rc = spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size); 483 484 if (rc == 0) { 485 pctrlr->cmb.mem_register_addr = NULL; 486 pctrlr->cmb.mem_register_size = 0; 487 } 488 489 return rc; 490 } 491 492 static void 493 nvme_pcie_ctrlr_map_pmr(struct nvme_pcie_ctrlr *pctrlr) 494 { 495 int rc; 496 void *addr = NULL; 497 uint32_t bir; 498 union spdk_nvme_pmrcap_register pmrcap; 499 uint64_t bar_size = 0, bar_phys_addr = 0; 500 501 if (!pctrlr->regs->cap.bits.pmrs) { 502 return; 503 } 504 505 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 506 SPDK_ERRLOG("get registers failed\n"); 507 return; 508 } 509 510 bir = pmrcap.bits.bir; 511 /* Values 2 3 4 5 are valid for BAR */ 512 if (bir > 5 || bir < 2) { 513 SPDK_ERRLOG("invalid base indicator register value\n"); 514 return; 515 } 516 517 rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, &bar_phys_addr, &bar_size); 518 if ((rc != 0) || addr == NULL) { 519 SPDK_ERRLOG("could not map the bar %d\n", bir); 520 return; 521 } 522 523 if (pmrcap.bits.cmss) { 524 uint32_t pmrmscl, pmrmscu, cmse = 1; 525 union spdk_nvme_pmrsts_register pmrsts; 526 527 /* Enable Controller Memory Space */ 528 pmrmscl = (uint32_t)((bar_phys_addr & 0xFFFFF000ULL) | (cmse << 1)); 529 pmrmscu = (uint32_t)((bar_phys_addr >> 32ULL) & 0xFFFFFFFFULL); 530 531 if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, pmrmscu)) { 532 SPDK_ERRLOG("set_pmrmscu() failed\n"); 533 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 534 return; 535 } 536 537 if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, pmrmscl)) { 538 SPDK_ERRLOG("set_pmrmscl() failed\n"); 539 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 540 return; 541 } 542 543 if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) { 544 SPDK_ERRLOG("get pmrsts failed\n"); 545 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 546 return; 547 } 548 549 if (pmrsts.bits.cbai) { 550 SPDK_ERRLOG("Controller Memory Space Enable Failure\n"); 551 SPDK_ERRLOG("CBA Invalid - Host Addresses cannot reference PMR\n"); 552 } else { 553 SPDK_DEBUGLOG(nvme, "Controller Memory Space Enable Success\n"); 554 SPDK_DEBUGLOG(nvme, "Host Addresses can reference PMR\n"); 555 } 556 } 557 558 pctrlr->pmr.bar_va = addr; 559 pctrlr->pmr.bar_pa = bar_phys_addr; 560 pctrlr->pmr.size = pctrlr->ctrlr.pmr_size = bar_size; 561 } 562 563 static int 564 nvme_pcie_ctrlr_unmap_pmr(struct nvme_pcie_ctrlr *pctrlr) 565 { 566 int rc = 0; 567 union spdk_nvme_pmrcap_register pmrcap; 568 void *addr = pctrlr->pmr.bar_va; 569 570 if (addr == NULL) { 571 return rc; 572 } 573 574 if (pctrlr->pmr.mem_register_addr) { 575 spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size); 576 } 577 578 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 579 SPDK_ERRLOG("get_pmrcap() failed\n"); 580 return -EIO; 581 } 582 583 if (pmrcap.bits.cmss) { 584 if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, 0)) { 585 SPDK_ERRLOG("set_pmrmscu() failed\n"); 586 } 587 588 if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, 0)) { 589 SPDK_ERRLOG("set_pmrmscl() failed\n"); 590 } 591 } 592 593 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, pmrcap.bits.bir, addr); 594 595 return rc; 596 } 597 598 static int 599 nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable) 600 { 601 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 602 union spdk_nvme_pmrcap_register pmrcap; 603 union spdk_nvme_pmrctl_register pmrctl; 604 union spdk_nvme_pmrsts_register pmrsts; 605 uint8_t pmrto, pmrtu; 606 uint64_t timeout_in_ms, ticks_per_ms, timeout_in_ticks, now_ticks; 607 608 if (!pctrlr->regs->cap.bits.pmrs) { 609 SPDK_ERRLOG("PMR is not supported by the controller\n"); 610 return -ENOTSUP; 611 } 612 613 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 614 SPDK_ERRLOG("get registers failed\n"); 615 return -EIO; 616 } 617 618 pmrto = pmrcap.bits.pmrto; 619 pmrtu = pmrcap.bits.pmrtu; 620 621 if (pmrtu > 1) { 622 SPDK_ERRLOG("PMR Time Units Invalid\n"); 623 return -EINVAL; 624 } 625 626 ticks_per_ms = spdk_get_ticks_hz() / 1000; 627 timeout_in_ms = pmrto * (pmrtu ? (60 * 1000) : 500); 628 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 629 630 if (nvme_pcie_ctrlr_get_pmrctl(pctrlr, &pmrctl)) { 631 SPDK_ERRLOG("get pmrctl failed\n"); 632 return -EIO; 633 } 634 635 if (enable && pmrctl.bits.en != 0) { 636 SPDK_ERRLOG("PMR is already enabled\n"); 637 return -EINVAL; 638 } else if (!enable && pmrctl.bits.en != 1) { 639 SPDK_ERRLOG("PMR is already disabled\n"); 640 return -EINVAL; 641 } 642 643 pmrctl.bits.en = enable; 644 645 if (nvme_pcie_ctrlr_set_pmrctl(pctrlr, &pmrctl)) { 646 SPDK_ERRLOG("set pmrctl failed\n"); 647 return -EIO; 648 } 649 650 now_ticks = spdk_get_ticks(); 651 652 do { 653 if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) { 654 SPDK_ERRLOG("get pmrsts failed\n"); 655 return -EIO; 656 } 657 658 if (pmrsts.bits.nrdy == enable && 659 spdk_get_ticks() > now_ticks + timeout_in_ticks) { 660 SPDK_ERRLOG("PMR Enable - Timed Out\n"); 661 return -ETIMEDOUT; 662 } 663 } while (pmrsts.bits.nrdy == enable); 664 665 SPDK_DEBUGLOG(nvme, "PMR %s\n", enable ? "Enabled" : "Disabled"); 666 667 return 0; 668 } 669 670 static int 671 nvme_pcie_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 672 { 673 return nvme_pcie_ctrlr_config_pmr(ctrlr, true); 674 } 675 676 static int 677 nvme_pcie_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 678 { 679 return nvme_pcie_ctrlr_config_pmr(ctrlr, false); 680 } 681 682 static void * 683 nvme_pcie_ctrlr_map_io_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 684 { 685 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 686 union spdk_nvme_pmrcap_register pmrcap; 687 uint64_t mem_register_start, mem_register_end; 688 int rc; 689 690 if (!pctrlr->regs->cap.bits.pmrs) { 691 SPDK_ERRLOG("PMR is not supported by the controller\n"); 692 return NULL; 693 } 694 695 if (pctrlr->pmr.mem_register_addr != NULL) { 696 *size = pctrlr->pmr.mem_register_size; 697 return pctrlr->pmr.mem_register_addr; 698 } 699 700 *size = 0; 701 702 if (pctrlr->pmr.bar_va == NULL) { 703 SPDK_DEBUGLOG(nvme, "PMR not available\n"); 704 return NULL; 705 } 706 707 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 708 SPDK_ERRLOG("get registers failed\n"); 709 return NULL; 710 } 711 712 /* Check if WDS / RDS is supported */ 713 if (!(pmrcap.bits.wds || pmrcap.bits.rds)) { 714 return NULL; 715 } 716 717 /* If PMR is less than 4MiB in size then abort PMR mapping */ 718 if (pctrlr->pmr.size < (1ULL << 22)) { 719 return NULL; 720 } 721 722 mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + VALUE_2MB - 1); 723 mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + pctrlr->pmr.size); 724 725 rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start); 726 if (rc) { 727 SPDK_ERRLOG("spdk_mem_register() failed\n"); 728 return NULL; 729 } 730 731 pctrlr->pmr.mem_register_addr = (void *)mem_register_start; 732 pctrlr->pmr.mem_register_size = mem_register_end - mem_register_start; 733 734 *size = pctrlr->pmr.mem_register_size; 735 return pctrlr->pmr.mem_register_addr; 736 } 737 738 static int 739 nvme_pcie_ctrlr_unmap_io_pmr(struct spdk_nvme_ctrlr *ctrlr) 740 { 741 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 742 int rc; 743 744 if (pctrlr->pmr.mem_register_addr == NULL) { 745 return -ENXIO; 746 } 747 748 rc = spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size); 749 750 if (rc == 0) { 751 pctrlr->pmr.mem_register_addr = NULL; 752 pctrlr->pmr.mem_register_size = 0; 753 } 754 755 return rc; 756 } 757 758 static int 759 nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr *pctrlr) 760 { 761 int rc; 762 void *addr = NULL; 763 uint64_t phys_addr = 0, size = 0; 764 765 rc = spdk_pci_device_map_bar(pctrlr->devhandle, 0, &addr, 766 &phys_addr, &size); 767 768 if ((addr == NULL) || (rc != 0)) { 769 SPDK_ERRLOG("nvme_pcicfg_map_bar failed with rc %d or bar %p\n", 770 rc, addr); 771 return -1; 772 } 773 774 pctrlr->regs = (volatile struct spdk_nvme_registers *)addr; 775 pctrlr->regs_size = size; 776 pctrlr->doorbell_base = (volatile uint32_t *)&pctrlr->regs->doorbell[0].sq_tdbl; 777 nvme_pcie_ctrlr_map_cmb(pctrlr); 778 nvme_pcie_ctrlr_map_pmr(pctrlr); 779 780 return 0; 781 } 782 783 static int 784 nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr) 785 { 786 int rc = 0; 787 void *addr = (void *)pctrlr->regs; 788 789 if (pctrlr->ctrlr.is_removed) { 790 return rc; 791 } 792 793 rc = nvme_pcie_ctrlr_unmap_pmr(pctrlr); 794 if (rc != 0) { 795 SPDK_ERRLOG("nvme_ctrlr_unmap_pmr failed with error code %d\n", rc); 796 return -1; 797 } 798 799 rc = nvme_pcie_ctrlr_unmap_cmb(pctrlr); 800 if (rc != 0) { 801 SPDK_ERRLOG("nvme_ctrlr_unmap_cmb failed with error code %d\n", rc); 802 return -1; 803 } 804 805 if (addr && spdk_process_is_primary()) { 806 /* NOTE: addr may have been remapped here. We're relying on DPDK to call 807 * munmap internally. 808 */ 809 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr); 810 } 811 return rc; 812 } 813 814 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 815 static int 816 pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) 817 { 818 struct spdk_nvme_transport_id trid = {}; 819 struct nvme_pcie_enum_ctx *enum_ctx = ctx; 820 struct spdk_nvme_ctrlr *ctrlr; 821 struct spdk_pci_addr pci_addr; 822 823 pci_addr = spdk_pci_device_get_addr(pci_dev); 824 825 spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); 826 spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); 827 828 ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid); 829 if (!spdk_process_is_primary()) { 830 if (!ctrlr) { 831 SPDK_ERRLOG("Controller must be constructed in the primary process first.\n"); 832 return -1; 833 } 834 835 return nvme_ctrlr_add_process(ctrlr, pci_dev); 836 } 837 838 /* check whether user passes the pci_addr */ 839 if (enum_ctx->has_pci_addr && 840 (spdk_pci_addr_compare(&pci_addr, &enum_ctx->pci_addr) != 0)) { 841 return 1; 842 } 843 844 return nvme_ctrlr_probe(&trid, enum_ctx->probe_ctx, pci_dev); 845 } 846 847 static int 848 nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, 849 bool direct_connect) 850 { 851 struct nvme_pcie_enum_ctx enum_ctx = {}; 852 853 enum_ctx.probe_ctx = probe_ctx; 854 855 if (strlen(probe_ctx->trid.traddr) != 0) { 856 if (spdk_pci_addr_parse(&enum_ctx.pci_addr, probe_ctx->trid.traddr)) { 857 return -1; 858 } 859 enum_ctx.has_pci_addr = true; 860 } 861 862 /* Only the primary process can monitor hotplug. */ 863 if (spdk_process_is_primary()) { 864 _nvme_pcie_hotplug_monitor(probe_ctx); 865 } 866 867 if (enum_ctx.has_pci_addr == false) { 868 return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), 869 pcie_nvme_enum_cb, &enum_ctx); 870 } else { 871 return spdk_pci_device_attach(spdk_pci_nvme_get_driver(), 872 pcie_nvme_enum_cb, &enum_ctx, &enum_ctx.pci_addr); 873 } 874 } 875 876 static struct spdk_nvme_ctrlr * 877 nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 878 const struct spdk_nvme_ctrlr_opts *opts, 879 void *devhandle) 880 { 881 struct spdk_pci_device *pci_dev = devhandle; 882 struct nvme_pcie_ctrlr *pctrlr; 883 union spdk_nvme_cap_register cap; 884 uint16_t cmd_reg; 885 int rc; 886 struct spdk_pci_id pci_id; 887 888 rc = spdk_pci_device_claim(pci_dev); 889 if (rc < 0) { 890 SPDK_ERRLOG("could not claim device %s (%s)\n", 891 trid->traddr, spdk_strerror(-rc)); 892 return NULL; 893 } 894 895 pctrlr = spdk_zmalloc(sizeof(struct nvme_pcie_ctrlr), 64, NULL, 896 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 897 if (pctrlr == NULL) { 898 spdk_pci_device_unclaim(pci_dev); 899 SPDK_ERRLOG("could not allocate ctrlr\n"); 900 return NULL; 901 } 902 903 pctrlr->is_remapped = false; 904 pctrlr->ctrlr.is_removed = false; 905 pctrlr->devhandle = devhandle; 906 pctrlr->ctrlr.opts = *opts; 907 pctrlr->ctrlr.trid = *trid; 908 pctrlr->ctrlr.opts.admin_queue_size = spdk_max(pctrlr->ctrlr.opts.admin_queue_size, 909 NVME_PCIE_MIN_ADMIN_QUEUE_SIZE); 910 911 rc = nvme_ctrlr_construct(&pctrlr->ctrlr); 912 if (rc != 0) { 913 spdk_pci_device_unclaim(pci_dev); 914 spdk_free(pctrlr); 915 return NULL; 916 } 917 918 rc = nvme_pcie_ctrlr_allocate_bars(pctrlr); 919 if (rc != 0) { 920 spdk_pci_device_unclaim(pci_dev); 921 spdk_free(pctrlr); 922 return NULL; 923 } 924 925 /* Enable PCI busmaster and disable INTx */ 926 spdk_pci_device_cfg_read16(pci_dev, &cmd_reg, 4); 927 cmd_reg |= 0x404; 928 spdk_pci_device_cfg_write16(pci_dev, cmd_reg, 4); 929 930 if (nvme_ctrlr_get_cap(&pctrlr->ctrlr, &cap)) { 931 SPDK_ERRLOG("get_cap() failed\n"); 932 spdk_pci_device_unclaim(pci_dev); 933 spdk_free(pctrlr); 934 return NULL; 935 } 936 937 /* Doorbell stride is 2 ^ (dstrd + 2), 938 * but we want multiples of 4, so drop the + 2 */ 939 pctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd; 940 941 pci_id = spdk_pci_device_get_id(pci_dev); 942 pctrlr->ctrlr.quirks = nvme_get_quirks(&pci_id); 943 944 rc = nvme_pcie_ctrlr_construct_admin_qpair(&pctrlr->ctrlr, pctrlr->ctrlr.opts.admin_queue_size); 945 if (rc != 0) { 946 nvme_ctrlr_destruct(&pctrlr->ctrlr); 947 return NULL; 948 } 949 950 /* Construct the primary process properties */ 951 rc = nvme_ctrlr_add_process(&pctrlr->ctrlr, pci_dev); 952 if (rc != 0) { 953 nvme_ctrlr_destruct(&pctrlr->ctrlr); 954 return NULL; 955 } 956 957 if (g_sigset != true) { 958 spdk_pci_register_error_handler(nvme_sigbus_fault_sighandler, 959 NULL); 960 g_sigset = true; 961 } 962 963 return &pctrlr->ctrlr; 964 } 965 966 static int 967 nvme_pcie_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 968 { 969 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 970 struct nvme_pcie_qpair *padminq = nvme_pcie_qpair(ctrlr->adminq); 971 union spdk_nvme_aqa_register aqa; 972 973 if (nvme_pcie_ctrlr_set_asq(pctrlr, padminq->cmd_bus_addr)) { 974 SPDK_ERRLOG("set_asq() failed\n"); 975 return -EIO; 976 } 977 978 if (nvme_pcie_ctrlr_set_acq(pctrlr, padminq->cpl_bus_addr)) { 979 SPDK_ERRLOG("set_acq() failed\n"); 980 return -EIO; 981 } 982 983 aqa.raw = 0; 984 /* acqs and asqs are 0-based. */ 985 aqa.bits.acqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; 986 aqa.bits.asqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; 987 988 if (nvme_pcie_ctrlr_set_aqa(pctrlr, &aqa)) { 989 SPDK_ERRLOG("set_aqa() failed\n"); 990 return -EIO; 991 } 992 993 return 0; 994 } 995 996 static int 997 nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 998 { 999 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 1000 struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr); 1001 1002 if (ctrlr->adminq) { 1003 nvme_pcie_qpair_destroy(ctrlr->adminq); 1004 } 1005 1006 nvme_ctrlr_destruct_finish(ctrlr); 1007 1008 nvme_ctrlr_free_processes(ctrlr); 1009 1010 nvme_pcie_ctrlr_free_bars(pctrlr); 1011 1012 if (devhandle) { 1013 spdk_pci_device_unclaim(devhandle); 1014 spdk_pci_device_detach(devhandle); 1015 } 1016 1017 spdk_free(pctrlr); 1018 1019 return 0; 1020 } 1021 1022 static int 1023 nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 1024 int (*iter_fn)(struct nvme_request *req, void *arg), 1025 void *arg) 1026 { 1027 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1028 struct nvme_tracker *tr, *tmp; 1029 int rc; 1030 1031 assert(iter_fn != NULL); 1032 1033 TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { 1034 assert(tr->req != NULL); 1035 1036 rc = iter_fn(tr->req, arg); 1037 if (rc != 0) { 1038 return rc; 1039 } 1040 } 1041 1042 return 0; 1043 } 1044 1045 void 1046 spdk_nvme_pcie_set_hotplug_filter(spdk_nvme_pcie_hotplug_filter_cb filter_cb) 1047 { 1048 g_hotplug_filter_cb = filter_cb; 1049 } 1050 1051 static struct spdk_pci_id nvme_pci_driver_id[] = { 1052 { 1053 .class_id = SPDK_PCI_CLASS_NVME, 1054 .vendor_id = SPDK_PCI_ANY_ID, 1055 .device_id = SPDK_PCI_ANY_ID, 1056 .subvendor_id = SPDK_PCI_ANY_ID, 1057 .subdevice_id = SPDK_PCI_ANY_ID, 1058 }, 1059 { .vendor_id = 0, /* sentinel */ }, 1060 }; 1061 1062 SPDK_PCI_DRIVER_REGISTER(nvme, nvme_pci_driver_id, 1063 SPDK_PCI_DRIVER_NEED_MAPPING | SPDK_PCI_DRIVER_WC_ACTIVATE); 1064 1065 const struct spdk_nvme_transport_ops pcie_ops = { 1066 .name = "PCIE", 1067 .type = SPDK_NVME_TRANSPORT_PCIE, 1068 .ctrlr_construct = nvme_pcie_ctrlr_construct, 1069 .ctrlr_scan = nvme_pcie_ctrlr_scan, 1070 .ctrlr_destruct = nvme_pcie_ctrlr_destruct, 1071 .ctrlr_enable = nvme_pcie_ctrlr_enable, 1072 1073 .ctrlr_set_reg_4 = nvme_pcie_ctrlr_set_reg_4, 1074 .ctrlr_set_reg_8 = nvme_pcie_ctrlr_set_reg_8, 1075 .ctrlr_get_reg_4 = nvme_pcie_ctrlr_get_reg_4, 1076 .ctrlr_get_reg_8 = nvme_pcie_ctrlr_get_reg_8, 1077 1078 .ctrlr_get_max_xfer_size = nvme_pcie_ctrlr_get_max_xfer_size, 1079 .ctrlr_get_max_sges = nvme_pcie_ctrlr_get_max_sges, 1080 1081 .ctrlr_reserve_cmb = nvme_pcie_ctrlr_reserve_cmb, 1082 .ctrlr_map_cmb = nvme_pcie_ctrlr_map_io_cmb, 1083 .ctrlr_unmap_cmb = nvme_pcie_ctrlr_unmap_io_cmb, 1084 1085 .ctrlr_enable_pmr = nvme_pcie_ctrlr_enable_pmr, 1086 .ctrlr_disable_pmr = nvme_pcie_ctrlr_disable_pmr, 1087 .ctrlr_map_pmr = nvme_pcie_ctrlr_map_io_pmr, 1088 .ctrlr_unmap_pmr = nvme_pcie_ctrlr_unmap_io_pmr, 1089 1090 .ctrlr_create_io_qpair = nvme_pcie_ctrlr_create_io_qpair, 1091 .ctrlr_delete_io_qpair = nvme_pcie_ctrlr_delete_io_qpair, 1092 .ctrlr_connect_qpair = nvme_pcie_ctrlr_connect_qpair, 1093 .ctrlr_disconnect_qpair = nvme_pcie_ctrlr_disconnect_qpair, 1094 1095 .qpair_abort_reqs = nvme_pcie_qpair_abort_reqs, 1096 .qpair_reset = nvme_pcie_qpair_reset, 1097 .qpair_submit_request = nvme_pcie_qpair_submit_request, 1098 .qpair_process_completions = nvme_pcie_qpair_process_completions, 1099 .qpair_iterate_requests = nvme_pcie_qpair_iterate_requests, 1100 .admin_qpair_abort_aers = nvme_pcie_admin_qpair_abort_aers, 1101 1102 .poll_group_create = nvme_pcie_poll_group_create, 1103 .poll_group_connect_qpair = nvme_pcie_poll_group_connect_qpair, 1104 .poll_group_disconnect_qpair = nvme_pcie_poll_group_disconnect_qpair, 1105 .poll_group_add = nvme_pcie_poll_group_add, 1106 .poll_group_remove = nvme_pcie_poll_group_remove, 1107 .poll_group_process_completions = nvme_pcie_poll_group_process_completions, 1108 .poll_group_destroy = nvme_pcie_poll_group_destroy, 1109 .poll_group_get_stats = nvme_pcie_poll_group_get_stats, 1110 .poll_group_free_stats = nvme_pcie_poll_group_free_stats 1111 }; 1112 1113 SPDK_NVME_TRANSPORT_REGISTER(pcie, &pcie_ops); 1114