1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2017, IBM Corporation. All rights reserved. 4 * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved. 5 */ 6 7 /* 8 * NVMe over PCIe transport 9 */ 10 11 #include "spdk/stdinc.h" 12 #include "spdk/env.h" 13 #include "spdk/likely.h" 14 #include "spdk/string.h" 15 #include "nvme_internal.h" 16 #include "nvme_pcie_internal.h" 17 18 struct nvme_pcie_enum_ctx { 19 struct spdk_nvme_probe_ctx *probe_ctx; 20 struct spdk_pci_addr pci_addr; 21 bool has_pci_addr; 22 }; 23 24 static uint16_t g_signal_lock; 25 static bool g_sigset = false; 26 static spdk_nvme_pcie_hotplug_filter_cb g_hotplug_filter_cb; 27 28 static void 29 nvme_sigbus_fault_sighandler(const void *failure_addr, void *ctx) 30 { 31 void *map_address; 32 uint16_t flag = 0; 33 34 if (!__atomic_compare_exchange_n(&g_signal_lock, &flag, 1, false, __ATOMIC_ACQUIRE, 35 __ATOMIC_RELAXED)) { 36 SPDK_DEBUGLOG(nvme, "request g_signal_lock failed\n"); 37 return; 38 } 39 40 if (g_thread_mmio_ctrlr == NULL) { 41 return; 42 } 43 44 if (!g_thread_mmio_ctrlr->is_remapped) { 45 map_address = mmap((void *)g_thread_mmio_ctrlr->regs, g_thread_mmio_ctrlr->regs_size, 46 PROT_READ | PROT_WRITE, 47 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); 48 if (map_address == MAP_FAILED) { 49 SPDK_ERRLOG("mmap failed\n"); 50 __atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE); 51 return; 52 } 53 memset(map_address, 0xFF, sizeof(struct spdk_nvme_registers)); 54 g_thread_mmio_ctrlr->regs = (volatile struct spdk_nvme_registers *)map_address; 55 g_thread_mmio_ctrlr->is_remapped = true; 56 } 57 __atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE); 58 } 59 60 static void 61 _nvme_pcie_event_process(struct spdk_pci_event *event, void *cb_ctx) 62 { 63 struct spdk_nvme_transport_id trid; 64 struct spdk_nvme_ctrlr *ctrlr; 65 66 if (event->action == SPDK_UEVENT_ADD) { 67 if (spdk_process_is_primary()) { 68 if (g_hotplug_filter_cb == NULL || g_hotplug_filter_cb(&event->traddr)) { 69 /* The enumerate interface implement the add operation */ 70 spdk_pci_device_allow(&event->traddr); 71 } 72 } 73 } else if (event->action == SPDK_UEVENT_REMOVE) { 74 memset(&trid, 0, sizeof(trid)); 75 spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); 76 77 if (spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &event->traddr) < 0) { 78 SPDK_ERRLOG("Failed to format pci address\n"); 79 return; 80 } 81 82 ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid); 83 if (ctrlr == NULL) { 84 return; 85 } 86 SPDK_DEBUGLOG(nvme, "remove nvme address: %s\n", trid.traddr); 87 88 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 89 nvme_ctrlr_fail(ctrlr, true); 90 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 91 92 /* get the user app to clean up and stop I/O */ 93 if (ctrlr->remove_cb) { 94 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 95 ctrlr->remove_cb(ctrlr->cb_ctx, ctrlr); 96 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 97 } 98 } 99 } 100 101 static int 102 _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) 103 { 104 struct spdk_nvme_ctrlr *ctrlr, *tmp; 105 struct spdk_pci_event event; 106 107 if (g_spdk_nvme_driver->hotplug_fd >= 0) { 108 while (spdk_pci_get_event(g_spdk_nvme_driver->hotplug_fd, &event) > 0) { 109 _nvme_pcie_event_process(&event, probe_ctx->cb_ctx); 110 } 111 } 112 113 /* Initiate removal of physically hotremoved PCI controllers. Even after 114 * they're hotremoved from the system, SPDK might still report them via RPC. 115 */ 116 TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) { 117 bool do_remove = false; 118 struct nvme_pcie_ctrlr *pctrlr; 119 120 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { 121 continue; 122 } 123 124 pctrlr = nvme_pcie_ctrlr(ctrlr); 125 if (spdk_pci_device_is_removed(pctrlr->devhandle)) { 126 do_remove = true; 127 } 128 129 if (do_remove) { 130 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); 131 nvme_ctrlr_fail(ctrlr, true); 132 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); 133 if (ctrlr->remove_cb) { 134 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); 135 ctrlr->remove_cb(ctrlr->cb_ctx, ctrlr); 136 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); 137 } 138 } 139 } 140 return 0; 141 } 142 143 static volatile void * 144 nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset) 145 { 146 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 147 148 return (volatile void *)((uintptr_t)pctrlr->regs + offset); 149 } 150 151 static int 152 nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value) 153 { 154 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 155 156 assert(offset <= sizeof(struct spdk_nvme_registers) - 4); 157 g_thread_mmio_ctrlr = pctrlr; 158 spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value); 159 g_thread_mmio_ctrlr = NULL; 160 return 0; 161 } 162 163 static int 164 nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value) 165 { 166 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 167 168 assert(offset <= sizeof(struct spdk_nvme_registers) - 8); 169 g_thread_mmio_ctrlr = pctrlr; 170 spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value); 171 g_thread_mmio_ctrlr = NULL; 172 return 0; 173 } 174 175 static int 176 nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value) 177 { 178 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 179 180 assert(offset <= sizeof(struct spdk_nvme_registers) - 4); 181 assert(value != NULL); 182 g_thread_mmio_ctrlr = pctrlr; 183 *value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset)); 184 g_thread_mmio_ctrlr = NULL; 185 if (~(*value) == 0) { 186 return -1; 187 } 188 189 return 0; 190 } 191 192 static int 193 nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value) 194 { 195 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 196 197 assert(offset <= sizeof(struct spdk_nvme_registers) - 8); 198 assert(value != NULL); 199 g_thread_mmio_ctrlr = pctrlr; 200 *value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset)); 201 g_thread_mmio_ctrlr = NULL; 202 if (~(*value) == 0) { 203 return -1; 204 } 205 206 return 0; 207 } 208 209 static int 210 nvme_pcie_ctrlr_set_asq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) 211 { 212 return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, asq), 213 value); 214 } 215 216 static int 217 nvme_pcie_ctrlr_set_acq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) 218 { 219 return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, acq), 220 value); 221 } 222 223 static int 224 nvme_pcie_ctrlr_set_aqa(struct nvme_pcie_ctrlr *pctrlr, const union spdk_nvme_aqa_register *aqa) 225 { 226 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, aqa.raw), 227 aqa->raw); 228 } 229 230 static int 231 nvme_pcie_ctrlr_get_cmbloc(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbloc_register *cmbloc) 232 { 233 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbloc.raw), 234 &cmbloc->raw); 235 } 236 237 static int 238 nvme_pcie_ctrlr_get_cmbsz(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbsz_register *cmbsz) 239 { 240 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), 241 &cmbsz->raw); 242 } 243 244 static int 245 nvme_pcie_ctrlr_get_pmrcap(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrcap_register *pmrcap) 246 { 247 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw), 248 &pmrcap->raw); 249 } 250 251 static int 252 nvme_pcie_ctrlr_set_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl) 253 { 254 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw), 255 pmrctl->raw); 256 } 257 258 static int 259 nvme_pcie_ctrlr_get_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl) 260 { 261 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw), 262 &pmrctl->raw); 263 } 264 265 static int 266 nvme_pcie_ctrlr_get_pmrsts(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrsts_register *pmrsts) 267 { 268 return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrsts.raw), 269 &pmrsts->raw); 270 } 271 272 static int 273 nvme_pcie_ctrlr_set_pmrmscl(struct nvme_pcie_ctrlr *pctrlr, uint32_t value) 274 { 275 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscl.raw), 276 value); 277 } 278 279 static int 280 nvme_pcie_ctrlr_set_pmrmscu(struct nvme_pcie_ctrlr *pctrlr, uint32_t value) 281 { 282 return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscu), 283 value); 284 } 285 286 static uint32_t 287 nvme_pcie_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 288 { 289 /* 290 * For commands requiring more than 2 PRP entries, one PRP will be 291 * embedded in the command (prp1), and the rest of the PRP entries 292 * will be in a list pointed to by the command (prp2). The number 293 * of PRP entries in the list is defined by 294 * NVME_MAX_PRP_LIST_ENTRIES. 295 * 296 * Note that the max xfer size is not (MAX_ENTRIES + 1) * page_size 297 * because the first PRP entry may not be aligned on a 4KiB 298 * boundary. 299 */ 300 return NVME_MAX_PRP_LIST_ENTRIES * ctrlr->page_size; 301 } 302 303 static uint16_t 304 nvme_pcie_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 305 { 306 return NVME_MAX_SGL_DESCRIPTORS; 307 } 308 309 static void 310 nvme_pcie_ctrlr_map_cmb(struct nvme_pcie_ctrlr *pctrlr) 311 { 312 int rc; 313 void *addr = NULL; 314 uint32_t bir; 315 union spdk_nvme_cmbsz_register cmbsz; 316 union spdk_nvme_cmbloc_register cmbloc; 317 uint64_t size, unit_size, offset, bar_size = 0, bar_phys_addr = 0; 318 319 if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) || 320 nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 321 SPDK_ERRLOG("get registers failed\n"); 322 goto exit; 323 } 324 325 if (!cmbsz.bits.sz) { 326 goto exit; 327 } 328 329 bir = cmbloc.bits.bir; 330 /* Values 0 2 3 4 5 are valid for BAR */ 331 if (bir > 5 || bir == 1) { 332 goto exit; 333 } 334 335 /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */ 336 unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu); 337 /* controller memory buffer size in Bytes */ 338 size = unit_size * cmbsz.bits.sz; 339 /* controller memory buffer offset from BAR in Bytes */ 340 offset = unit_size * cmbloc.bits.ofst; 341 342 rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, 343 &bar_phys_addr, &bar_size); 344 if ((rc != 0) || addr == NULL) { 345 goto exit; 346 } 347 348 if (offset > bar_size) { 349 goto exit; 350 } 351 352 if (size > bar_size - offset) { 353 goto exit; 354 } 355 356 pctrlr->cmb.bar_va = addr; 357 pctrlr->cmb.bar_pa = bar_phys_addr; 358 pctrlr->cmb.size = size; 359 pctrlr->cmb.current_offset = offset; 360 361 if (!cmbsz.bits.sqs) { 362 pctrlr->ctrlr.opts.use_cmb_sqs = false; 363 } 364 365 return; 366 exit: 367 pctrlr->ctrlr.opts.use_cmb_sqs = false; 368 return; 369 } 370 371 static int 372 nvme_pcie_ctrlr_unmap_cmb(struct nvme_pcie_ctrlr *pctrlr) 373 { 374 int rc = 0; 375 union spdk_nvme_cmbloc_register cmbloc; 376 void *addr = pctrlr->cmb.bar_va; 377 378 if (addr) { 379 if (pctrlr->cmb.mem_register_addr) { 380 spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size); 381 } 382 383 if (nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 384 SPDK_ERRLOG("get_cmbloc() failed\n"); 385 return -EIO; 386 } 387 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, cmbloc.bits.bir, addr); 388 } 389 return rc; 390 } 391 392 static int 393 nvme_pcie_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr) 394 { 395 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 396 397 if (pctrlr->cmb.bar_va == NULL) { 398 SPDK_DEBUGLOG(nvme, "CMB not available\n"); 399 return -ENOTSUP; 400 } 401 402 if (ctrlr->opts.use_cmb_sqs) { 403 SPDK_ERRLOG("CMB is already in use for submission queues.\n"); 404 return -ENOTSUP; 405 } 406 407 return 0; 408 } 409 410 static void * 411 nvme_pcie_ctrlr_map_io_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 412 { 413 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 414 union spdk_nvme_cmbsz_register cmbsz; 415 union spdk_nvme_cmbloc_register cmbloc; 416 uint64_t mem_register_start, mem_register_end; 417 int rc; 418 419 if (pctrlr->cmb.mem_register_addr != NULL) { 420 *size = pctrlr->cmb.mem_register_size; 421 return pctrlr->cmb.mem_register_addr; 422 } 423 424 *size = 0; 425 426 if (pctrlr->cmb.bar_va == NULL) { 427 SPDK_DEBUGLOG(nvme, "CMB not available\n"); 428 return NULL; 429 } 430 431 if (ctrlr->opts.use_cmb_sqs) { 432 SPDK_ERRLOG("CMB is already in use for submission queues.\n"); 433 return NULL; 434 } 435 436 if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) || 437 nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { 438 SPDK_ERRLOG("get registers failed\n"); 439 return NULL; 440 } 441 442 /* If only SQS is supported */ 443 if (!(cmbsz.bits.wds || cmbsz.bits.rds)) { 444 return NULL; 445 } 446 447 /* If CMB is less than 4MiB in size then abort CMB mapping */ 448 if (pctrlr->cmb.size < (1ULL << 22)) { 449 return NULL; 450 } 451 452 mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset + 453 VALUE_2MB - 1); 454 mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset + 455 pctrlr->cmb.size); 456 457 rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start); 458 if (rc) { 459 SPDK_ERRLOG("spdk_mem_register() failed\n"); 460 return NULL; 461 } 462 463 pctrlr->cmb.mem_register_addr = (void *)mem_register_start; 464 pctrlr->cmb.mem_register_size = mem_register_end - mem_register_start; 465 466 *size = pctrlr->cmb.mem_register_size; 467 return pctrlr->cmb.mem_register_addr; 468 } 469 470 static int 471 nvme_pcie_ctrlr_unmap_io_cmb(struct spdk_nvme_ctrlr *ctrlr) 472 { 473 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 474 int rc; 475 476 if (pctrlr->cmb.mem_register_addr == NULL) { 477 return 0; 478 } 479 480 rc = spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size); 481 482 if (rc == 0) { 483 pctrlr->cmb.mem_register_addr = NULL; 484 pctrlr->cmb.mem_register_size = 0; 485 } 486 487 return rc; 488 } 489 490 static void 491 nvme_pcie_ctrlr_map_pmr(struct nvme_pcie_ctrlr *pctrlr) 492 { 493 int rc; 494 void *addr = NULL; 495 uint32_t bir; 496 union spdk_nvme_pmrcap_register pmrcap; 497 uint64_t bar_size = 0, bar_phys_addr = 0; 498 499 if (!pctrlr->regs->cap.bits.pmrs) { 500 return; 501 } 502 503 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 504 SPDK_ERRLOG("get registers failed\n"); 505 return; 506 } 507 508 bir = pmrcap.bits.bir; 509 /* Values 2 3 4 5 are valid for BAR */ 510 if (bir > 5 || bir < 2) { 511 SPDK_ERRLOG("invalid base indicator register value\n"); 512 return; 513 } 514 515 rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, &bar_phys_addr, &bar_size); 516 if ((rc != 0) || addr == NULL) { 517 SPDK_ERRLOG("could not map the bar %d\n", bir); 518 return; 519 } 520 521 if (pmrcap.bits.cmss) { 522 uint32_t pmrmscl, pmrmscu, cmse = 1; 523 union spdk_nvme_pmrsts_register pmrsts; 524 525 /* Enable Controller Memory Space */ 526 pmrmscl = (uint32_t)((bar_phys_addr & 0xFFFFF000ULL) | (cmse << 1)); 527 pmrmscu = (uint32_t)((bar_phys_addr >> 32ULL) & 0xFFFFFFFFULL); 528 529 if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, pmrmscu)) { 530 SPDK_ERRLOG("set_pmrmscu() failed\n"); 531 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 532 return; 533 } 534 535 if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, pmrmscl)) { 536 SPDK_ERRLOG("set_pmrmscl() failed\n"); 537 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 538 return; 539 } 540 541 if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) { 542 SPDK_ERRLOG("get pmrsts failed\n"); 543 spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr); 544 return; 545 } 546 547 if (pmrsts.bits.cbai) { 548 SPDK_ERRLOG("Controller Memory Space Enable Failure\n"); 549 SPDK_ERRLOG("CBA Invalid - Host Addresses cannot reference PMR\n"); 550 } else { 551 SPDK_DEBUGLOG(nvme, "Controller Memory Space Enable Success\n"); 552 SPDK_DEBUGLOG(nvme, "Host Addresses can reference PMR\n"); 553 } 554 } 555 556 pctrlr->pmr.bar_va = addr; 557 pctrlr->pmr.bar_pa = bar_phys_addr; 558 pctrlr->pmr.size = pctrlr->ctrlr.pmr_size = bar_size; 559 } 560 561 static int 562 nvme_pcie_ctrlr_unmap_pmr(struct nvme_pcie_ctrlr *pctrlr) 563 { 564 int rc = 0; 565 union spdk_nvme_pmrcap_register pmrcap; 566 void *addr = pctrlr->pmr.bar_va; 567 568 if (addr == NULL) { 569 return rc; 570 } 571 572 if (pctrlr->pmr.mem_register_addr) { 573 spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size); 574 } 575 576 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 577 SPDK_ERRLOG("get_pmrcap() failed\n"); 578 return -EIO; 579 } 580 581 if (pmrcap.bits.cmss) { 582 if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, 0)) { 583 SPDK_ERRLOG("set_pmrmscu() failed\n"); 584 } 585 586 if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, 0)) { 587 SPDK_ERRLOG("set_pmrmscl() failed\n"); 588 } 589 } 590 591 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, pmrcap.bits.bir, addr); 592 593 return rc; 594 } 595 596 static int 597 nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable) 598 { 599 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 600 union spdk_nvme_pmrcap_register pmrcap; 601 union spdk_nvme_pmrctl_register pmrctl; 602 union spdk_nvme_pmrsts_register pmrsts; 603 uint8_t pmrto, pmrtu; 604 uint64_t timeout_in_ms, ticks_per_ms, timeout_in_ticks, now_ticks; 605 606 if (!pctrlr->regs->cap.bits.pmrs) { 607 SPDK_ERRLOG("PMR is not supported by the controller\n"); 608 return -ENOTSUP; 609 } 610 611 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 612 SPDK_ERRLOG("get registers failed\n"); 613 return -EIO; 614 } 615 616 pmrto = pmrcap.bits.pmrto; 617 pmrtu = pmrcap.bits.pmrtu; 618 619 if (pmrtu > 1) { 620 SPDK_ERRLOG("PMR Time Units Invalid\n"); 621 return -EINVAL; 622 } 623 624 ticks_per_ms = spdk_get_ticks_hz() / 1000; 625 timeout_in_ms = pmrto * (pmrtu ? (60 * 1000) : 500); 626 timeout_in_ticks = timeout_in_ms * ticks_per_ms; 627 628 if (nvme_pcie_ctrlr_get_pmrctl(pctrlr, &pmrctl)) { 629 SPDK_ERRLOG("get pmrctl failed\n"); 630 return -EIO; 631 } 632 633 if (enable && pmrctl.bits.en != 0) { 634 SPDK_ERRLOG("PMR is already enabled\n"); 635 return -EINVAL; 636 } else if (!enable && pmrctl.bits.en != 1) { 637 SPDK_ERRLOG("PMR is already disabled\n"); 638 return -EINVAL; 639 } 640 641 pmrctl.bits.en = enable; 642 643 if (nvme_pcie_ctrlr_set_pmrctl(pctrlr, &pmrctl)) { 644 SPDK_ERRLOG("set pmrctl failed\n"); 645 return -EIO; 646 } 647 648 now_ticks = spdk_get_ticks(); 649 650 do { 651 if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) { 652 SPDK_ERRLOG("get pmrsts failed\n"); 653 return -EIO; 654 } 655 656 if (pmrsts.bits.nrdy == enable && 657 spdk_get_ticks() > now_ticks + timeout_in_ticks) { 658 SPDK_ERRLOG("PMR Enable - Timed Out\n"); 659 return -ETIMEDOUT; 660 } 661 } while (pmrsts.bits.nrdy == enable); 662 663 SPDK_DEBUGLOG(nvme, "PMR %s\n", enable ? "Enabled" : "Disabled"); 664 665 return 0; 666 } 667 668 static int 669 nvme_pcie_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) 670 { 671 return nvme_pcie_ctrlr_config_pmr(ctrlr, true); 672 } 673 674 static int 675 nvme_pcie_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr) 676 { 677 return nvme_pcie_ctrlr_config_pmr(ctrlr, false); 678 } 679 680 static void * 681 nvme_pcie_ctrlr_map_io_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size) 682 { 683 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 684 union spdk_nvme_pmrcap_register pmrcap; 685 uint64_t mem_register_start, mem_register_end; 686 int rc; 687 688 if (!pctrlr->regs->cap.bits.pmrs) { 689 SPDK_ERRLOG("PMR is not supported by the controller\n"); 690 return NULL; 691 } 692 693 if (pctrlr->pmr.mem_register_addr != NULL) { 694 *size = pctrlr->pmr.mem_register_size; 695 return pctrlr->pmr.mem_register_addr; 696 } 697 698 *size = 0; 699 700 if (pctrlr->pmr.bar_va == NULL) { 701 SPDK_DEBUGLOG(nvme, "PMR not available\n"); 702 return NULL; 703 } 704 705 if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) { 706 SPDK_ERRLOG("get registers failed\n"); 707 return NULL; 708 } 709 710 /* Check if WDS / RDS is supported */ 711 if (!(pmrcap.bits.wds || pmrcap.bits.rds)) { 712 return NULL; 713 } 714 715 /* If PMR is less than 4MiB in size then abort PMR mapping */ 716 if (pctrlr->pmr.size < (1ULL << 22)) { 717 return NULL; 718 } 719 720 mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + VALUE_2MB - 1); 721 mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + pctrlr->pmr.size); 722 723 rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start); 724 if (rc) { 725 SPDK_ERRLOG("spdk_mem_register() failed\n"); 726 return NULL; 727 } 728 729 pctrlr->pmr.mem_register_addr = (void *)mem_register_start; 730 pctrlr->pmr.mem_register_size = mem_register_end - mem_register_start; 731 732 *size = pctrlr->pmr.mem_register_size; 733 return pctrlr->pmr.mem_register_addr; 734 } 735 736 static int 737 nvme_pcie_ctrlr_unmap_io_pmr(struct spdk_nvme_ctrlr *ctrlr) 738 { 739 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 740 int rc; 741 742 if (pctrlr->pmr.mem_register_addr == NULL) { 743 return -ENXIO; 744 } 745 746 rc = spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size); 747 748 if (rc == 0) { 749 pctrlr->pmr.mem_register_addr = NULL; 750 pctrlr->pmr.mem_register_size = 0; 751 } 752 753 return rc; 754 } 755 756 static int 757 nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr *pctrlr) 758 { 759 int rc; 760 void *addr = NULL; 761 uint64_t phys_addr = 0, size = 0; 762 763 rc = spdk_pci_device_map_bar(pctrlr->devhandle, 0, &addr, 764 &phys_addr, &size); 765 766 if ((addr == NULL) || (rc != 0)) { 767 SPDK_ERRLOG("nvme_pcicfg_map_bar failed with rc %d or bar %p\n", 768 rc, addr); 769 return -1; 770 } 771 772 pctrlr->regs = (volatile struct spdk_nvme_registers *)addr; 773 pctrlr->regs_size = size; 774 pctrlr->doorbell_base = (volatile uint32_t *)&pctrlr->regs->doorbell[0].sq_tdbl; 775 nvme_pcie_ctrlr_map_cmb(pctrlr); 776 nvme_pcie_ctrlr_map_pmr(pctrlr); 777 778 return 0; 779 } 780 781 static int 782 nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr) 783 { 784 int rc = 0; 785 void *addr = (void *)pctrlr->regs; 786 787 if (pctrlr->ctrlr.is_removed) { 788 return rc; 789 } 790 791 rc = nvme_pcie_ctrlr_unmap_pmr(pctrlr); 792 if (rc != 0) { 793 SPDK_ERRLOG("nvme_ctrlr_unmap_pmr failed with error code %d\n", rc); 794 return -1; 795 } 796 797 rc = nvme_pcie_ctrlr_unmap_cmb(pctrlr); 798 if (rc != 0) { 799 SPDK_ERRLOG("nvme_ctrlr_unmap_cmb failed with error code %d\n", rc); 800 return -1; 801 } 802 803 if (addr && spdk_process_is_primary()) { 804 /* NOTE: addr may have been remapped here. We're relying on DPDK to call 805 * munmap internally. 806 */ 807 rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr); 808 } 809 return rc; 810 } 811 812 /* This function must only be called while holding g_spdk_nvme_driver->lock */ 813 static int 814 pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) 815 { 816 struct spdk_nvme_transport_id trid = {}; 817 struct nvme_pcie_enum_ctx *enum_ctx = ctx; 818 struct spdk_nvme_ctrlr *ctrlr; 819 struct spdk_pci_addr pci_addr; 820 821 pci_addr = spdk_pci_device_get_addr(pci_dev); 822 823 spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); 824 spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); 825 826 ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid); 827 if (!spdk_process_is_primary()) { 828 if (!ctrlr) { 829 SPDK_ERRLOG("Controller must be constructed in the primary process first.\n"); 830 return -1; 831 } 832 833 return nvme_ctrlr_add_process(ctrlr, pci_dev); 834 } 835 836 /* check whether user passes the pci_addr */ 837 if (enum_ctx->has_pci_addr && 838 (spdk_pci_addr_compare(&pci_addr, &enum_ctx->pci_addr) != 0)) { 839 return 1; 840 } 841 842 return nvme_ctrlr_probe(&trid, enum_ctx->probe_ctx, pci_dev); 843 } 844 845 static int 846 nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, 847 bool direct_connect) 848 { 849 struct nvme_pcie_enum_ctx enum_ctx = {}; 850 851 enum_ctx.probe_ctx = probe_ctx; 852 853 if (strlen(probe_ctx->trid.traddr) != 0) { 854 if (spdk_pci_addr_parse(&enum_ctx.pci_addr, probe_ctx->trid.traddr)) { 855 return -1; 856 } 857 enum_ctx.has_pci_addr = true; 858 } 859 860 /* Only the primary process can monitor hotplug. */ 861 if (spdk_process_is_primary()) { 862 _nvme_pcie_hotplug_monitor(probe_ctx); 863 } 864 865 if (enum_ctx.has_pci_addr == false) { 866 return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), 867 pcie_nvme_enum_cb, &enum_ctx); 868 } else { 869 return spdk_pci_device_attach(spdk_pci_nvme_get_driver(), 870 pcie_nvme_enum_cb, &enum_ctx, &enum_ctx.pci_addr); 871 } 872 } 873 874 static struct spdk_nvme_ctrlr * 875 nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 876 const struct spdk_nvme_ctrlr_opts *opts, 877 void *devhandle) 878 { 879 struct spdk_pci_device *pci_dev = devhandle; 880 struct nvme_pcie_ctrlr *pctrlr; 881 union spdk_nvme_cap_register cap; 882 uint16_t cmd_reg; 883 int rc; 884 struct spdk_pci_id pci_id; 885 886 rc = spdk_pci_device_claim(pci_dev); 887 if (rc < 0) { 888 SPDK_ERRLOG("could not claim device %s (%s)\n", 889 trid->traddr, spdk_strerror(-rc)); 890 return NULL; 891 } 892 893 pctrlr = spdk_zmalloc(sizeof(struct nvme_pcie_ctrlr), 64, NULL, 894 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 895 if (pctrlr == NULL) { 896 spdk_pci_device_unclaim(pci_dev); 897 SPDK_ERRLOG("could not allocate ctrlr\n"); 898 return NULL; 899 } 900 901 pctrlr->is_remapped = false; 902 pctrlr->ctrlr.is_removed = false; 903 pctrlr->devhandle = devhandle; 904 pctrlr->ctrlr.opts = *opts; 905 pctrlr->ctrlr.trid = *trid; 906 pctrlr->ctrlr.opts.admin_queue_size = spdk_max(pctrlr->ctrlr.opts.admin_queue_size, 907 NVME_PCIE_MIN_ADMIN_QUEUE_SIZE); 908 pci_id = spdk_pci_device_get_id(pci_dev); 909 pctrlr->ctrlr.quirks = nvme_get_quirks(&pci_id); 910 911 rc = nvme_ctrlr_construct(&pctrlr->ctrlr); 912 if (rc != 0) { 913 spdk_pci_device_unclaim(pci_dev); 914 spdk_free(pctrlr); 915 return NULL; 916 } 917 918 rc = nvme_pcie_ctrlr_allocate_bars(pctrlr); 919 if (rc != 0) { 920 spdk_pci_device_unclaim(pci_dev); 921 spdk_free(pctrlr); 922 return NULL; 923 } 924 925 /* Enable PCI busmaster and disable INTx */ 926 spdk_pci_device_cfg_read16(pci_dev, &cmd_reg, 4); 927 cmd_reg |= 0x404; 928 spdk_pci_device_cfg_write16(pci_dev, cmd_reg, 4); 929 930 if (nvme_ctrlr_get_cap(&pctrlr->ctrlr, &cap)) { 931 SPDK_ERRLOG("get_cap() failed\n"); 932 spdk_pci_device_unclaim(pci_dev); 933 spdk_free(pctrlr); 934 return NULL; 935 } 936 937 /* Doorbell stride is 2 ^ (dstrd + 2), 938 * but we want multiples of 4, so drop the + 2 */ 939 pctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd; 940 941 rc = nvme_pcie_ctrlr_construct_admin_qpair(&pctrlr->ctrlr, pctrlr->ctrlr.opts.admin_queue_size); 942 if (rc != 0) { 943 nvme_ctrlr_destruct(&pctrlr->ctrlr); 944 return NULL; 945 } 946 947 /* Construct the primary process properties */ 948 rc = nvme_ctrlr_add_process(&pctrlr->ctrlr, pci_dev); 949 if (rc != 0) { 950 nvme_ctrlr_destruct(&pctrlr->ctrlr); 951 return NULL; 952 } 953 954 if (g_sigset != true) { 955 spdk_pci_register_error_handler(nvme_sigbus_fault_sighandler, 956 NULL); 957 g_sigset = true; 958 } 959 960 return &pctrlr->ctrlr; 961 } 962 963 static int 964 nvme_pcie_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 965 { 966 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 967 struct nvme_pcie_qpair *padminq = nvme_pcie_qpair(ctrlr->adminq); 968 union spdk_nvme_aqa_register aqa; 969 970 if (nvme_pcie_ctrlr_set_asq(pctrlr, padminq->cmd_bus_addr)) { 971 SPDK_ERRLOG("set_asq() failed\n"); 972 return -EIO; 973 } 974 975 if (nvme_pcie_ctrlr_set_acq(pctrlr, padminq->cpl_bus_addr)) { 976 SPDK_ERRLOG("set_acq() failed\n"); 977 return -EIO; 978 } 979 980 aqa.raw = 0; 981 /* acqs and asqs are 0-based. */ 982 aqa.bits.acqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; 983 aqa.bits.asqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; 984 985 if (nvme_pcie_ctrlr_set_aqa(pctrlr, &aqa)) { 986 SPDK_ERRLOG("set_aqa() failed\n"); 987 return -EIO; 988 } 989 990 return 0; 991 } 992 993 static int 994 nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 995 { 996 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 997 struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr); 998 999 if (ctrlr->adminq) { 1000 nvme_pcie_qpair_destroy(ctrlr->adminq); 1001 } 1002 1003 nvme_ctrlr_destruct_finish(ctrlr); 1004 1005 nvme_ctrlr_free_processes(ctrlr); 1006 1007 nvme_pcie_ctrlr_free_bars(pctrlr); 1008 1009 if (devhandle) { 1010 spdk_pci_device_unclaim(devhandle); 1011 spdk_pci_device_detach(devhandle); 1012 } 1013 1014 spdk_free(pctrlr); 1015 1016 return 0; 1017 } 1018 1019 static int 1020 nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 1021 int (*iter_fn)(struct nvme_request *req, void *arg), 1022 void *arg) 1023 { 1024 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1025 struct nvme_tracker *tr, *tmp; 1026 int rc; 1027 1028 assert(iter_fn != NULL); 1029 1030 TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { 1031 assert(tr->req != NULL); 1032 1033 rc = iter_fn(tr->req, arg); 1034 if (rc != 0) { 1035 return rc; 1036 } 1037 } 1038 1039 return 0; 1040 } 1041 1042 void 1043 spdk_nvme_pcie_set_hotplug_filter(spdk_nvme_pcie_hotplug_filter_cb filter_cb) 1044 { 1045 g_hotplug_filter_cb = filter_cb; 1046 } 1047 1048 static struct spdk_pci_id nvme_pci_driver_id[] = { 1049 { 1050 .class_id = SPDK_PCI_CLASS_NVME, 1051 .vendor_id = SPDK_PCI_ANY_ID, 1052 .device_id = SPDK_PCI_ANY_ID, 1053 .subvendor_id = SPDK_PCI_ANY_ID, 1054 .subdevice_id = SPDK_PCI_ANY_ID, 1055 }, 1056 { .vendor_id = 0, /* sentinel */ }, 1057 }; 1058 1059 SPDK_PCI_DRIVER_REGISTER(nvme, nvme_pci_driver_id, 1060 SPDK_PCI_DRIVER_NEED_MAPPING | SPDK_PCI_DRIVER_WC_ACTIVATE); 1061 1062 const struct spdk_nvme_transport_ops pcie_ops = { 1063 .name = "PCIE", 1064 .type = SPDK_NVME_TRANSPORT_PCIE, 1065 .ctrlr_construct = nvme_pcie_ctrlr_construct, 1066 .ctrlr_scan = nvme_pcie_ctrlr_scan, 1067 .ctrlr_destruct = nvme_pcie_ctrlr_destruct, 1068 .ctrlr_enable = nvme_pcie_ctrlr_enable, 1069 1070 .ctrlr_set_reg_4 = nvme_pcie_ctrlr_set_reg_4, 1071 .ctrlr_set_reg_8 = nvme_pcie_ctrlr_set_reg_8, 1072 .ctrlr_get_reg_4 = nvme_pcie_ctrlr_get_reg_4, 1073 .ctrlr_get_reg_8 = nvme_pcie_ctrlr_get_reg_8, 1074 1075 .ctrlr_get_max_xfer_size = nvme_pcie_ctrlr_get_max_xfer_size, 1076 .ctrlr_get_max_sges = nvme_pcie_ctrlr_get_max_sges, 1077 1078 .ctrlr_reserve_cmb = nvme_pcie_ctrlr_reserve_cmb, 1079 .ctrlr_map_cmb = nvme_pcie_ctrlr_map_io_cmb, 1080 .ctrlr_unmap_cmb = nvme_pcie_ctrlr_unmap_io_cmb, 1081 1082 .ctrlr_enable_pmr = nvme_pcie_ctrlr_enable_pmr, 1083 .ctrlr_disable_pmr = nvme_pcie_ctrlr_disable_pmr, 1084 .ctrlr_map_pmr = nvme_pcie_ctrlr_map_io_pmr, 1085 .ctrlr_unmap_pmr = nvme_pcie_ctrlr_unmap_io_pmr, 1086 1087 .ctrlr_create_io_qpair = nvme_pcie_ctrlr_create_io_qpair, 1088 .ctrlr_delete_io_qpair = nvme_pcie_ctrlr_delete_io_qpair, 1089 .ctrlr_connect_qpair = nvme_pcie_ctrlr_connect_qpair, 1090 .ctrlr_disconnect_qpair = nvme_pcie_ctrlr_disconnect_qpair, 1091 1092 .qpair_abort_reqs = nvme_pcie_qpair_abort_reqs, 1093 .qpair_reset = nvme_pcie_qpair_reset, 1094 .qpair_submit_request = nvme_pcie_qpair_submit_request, 1095 .qpair_process_completions = nvme_pcie_qpair_process_completions, 1096 .qpair_iterate_requests = nvme_pcie_qpair_iterate_requests, 1097 .admin_qpair_abort_aers = nvme_pcie_admin_qpair_abort_aers, 1098 1099 .poll_group_create = nvme_pcie_poll_group_create, 1100 .poll_group_connect_qpair = nvme_pcie_poll_group_connect_qpair, 1101 .poll_group_disconnect_qpair = nvme_pcie_poll_group_disconnect_qpair, 1102 .poll_group_add = nvme_pcie_poll_group_add, 1103 .poll_group_remove = nvme_pcie_poll_group_remove, 1104 .poll_group_process_completions = nvme_pcie_poll_group_process_completions, 1105 .poll_group_destroy = nvme_pcie_poll_group_destroy, 1106 .poll_group_get_stats = nvme_pcie_poll_group_get_stats, 1107 .poll_group_free_stats = nvme_pcie_poll_group_free_stats 1108 }; 1109 1110 SPDK_NVME_TRANSPORT_REGISTER(pcie, &pcie_ops); 1111