1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "ioat_internal.h" 35 36 #include "spdk/env.h" 37 #include "spdk/util.h" 38 39 #include "spdk_internal/log.h" 40 41 #include <pthread.h> 42 43 struct ioat_driver { 44 pthread_mutex_t lock; 45 TAILQ_HEAD(, spdk_ioat_chan) attached_chans; 46 }; 47 48 static struct ioat_driver g_ioat_driver = { 49 .lock = PTHREAD_MUTEX_INITIALIZER, 50 .attached_chans = TAILQ_HEAD_INITIALIZER(g_ioat_driver.attached_chans), 51 }; 52 53 static uint64_t 54 ioat_get_chansts(struct spdk_ioat_chan *ioat) 55 { 56 return spdk_mmio_read_8(&ioat->regs->chansts); 57 } 58 59 static void 60 ioat_write_chancmp(struct spdk_ioat_chan *ioat, uint64_t addr) 61 { 62 spdk_mmio_write_8(&ioat->regs->chancmp, addr); 63 } 64 65 static void 66 ioat_write_chainaddr(struct spdk_ioat_chan *ioat, uint64_t addr) 67 { 68 spdk_mmio_write_8(&ioat->regs->chainaddr, addr); 69 } 70 71 static inline void 72 ioat_suspend(struct spdk_ioat_chan *ioat) 73 { 74 ioat->regs->chancmd = SPDK_IOAT_CHANCMD_SUSPEND; 75 } 76 77 static inline void 78 ioat_reset(struct spdk_ioat_chan *ioat) 79 { 80 ioat->regs->chancmd = SPDK_IOAT_CHANCMD_RESET; 81 } 82 83 static inline uint32_t 84 ioat_reset_pending(struct spdk_ioat_chan *ioat) 85 { 86 uint8_t cmd; 87 88 cmd = ioat->regs->chancmd; 89 return (cmd & SPDK_IOAT_CHANCMD_RESET) == SPDK_IOAT_CHANCMD_RESET; 90 } 91 92 static int 93 ioat_map_pci_bar(struct spdk_ioat_chan *ioat) 94 { 95 int regs_bar, rc; 96 void *addr; 97 uint64_t phys_addr, size; 98 99 regs_bar = 0; 100 rc = spdk_pci_device_map_bar(ioat->device, regs_bar, &addr, &phys_addr, &size); 101 if (rc != 0 || addr == NULL) { 102 SPDK_ERRLOG("pci_device_map_range failed with error code %d\n", 103 rc); 104 return -1; 105 } 106 107 ioat->regs = (volatile struct spdk_ioat_registers *)addr; 108 109 return 0; 110 } 111 112 static int 113 ioat_unmap_pci_bar(struct spdk_ioat_chan *ioat) 114 { 115 int rc = 0; 116 void *addr = (void *)ioat->regs; 117 118 if (addr) { 119 rc = spdk_pci_device_unmap_bar(ioat->device, 0, addr); 120 } 121 return rc; 122 } 123 124 125 static inline uint32_t 126 ioat_get_active(struct spdk_ioat_chan *ioat) 127 { 128 return (ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1); 129 } 130 131 static inline uint32_t 132 ioat_get_ring_space(struct spdk_ioat_chan *ioat) 133 { 134 return (1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1; 135 } 136 137 static uint32_t 138 ioat_get_ring_index(struct spdk_ioat_chan *ioat, uint32_t index) 139 { 140 return index & ((1 << ioat->ring_size_order) - 1); 141 } 142 143 static void 144 ioat_get_ring_entry(struct spdk_ioat_chan *ioat, uint32_t index, 145 struct ioat_descriptor **desc, 146 union spdk_ioat_hw_desc **hw_desc) 147 { 148 uint32_t i = ioat_get_ring_index(ioat, index); 149 150 *desc = &ioat->ring[i]; 151 *hw_desc = &ioat->hw_ring[i]; 152 } 153 154 static uint64_t 155 ioat_get_desc_phys_addr(struct spdk_ioat_chan *ioat, uint32_t index) 156 { 157 return ioat->hw_ring_phys_addr + 158 ioat_get_ring_index(ioat, index) * sizeof(union spdk_ioat_hw_desc); 159 } 160 161 static void 162 ioat_submit_single(struct spdk_ioat_chan *ioat) 163 { 164 ioat->head++; 165 } 166 167 static void 168 ioat_flush(struct spdk_ioat_chan *ioat) 169 { 170 ioat->regs->dmacount = (uint16_t)ioat->head; 171 } 172 173 static struct ioat_descriptor * 174 ioat_prep_null(struct spdk_ioat_chan *ioat) 175 { 176 struct ioat_descriptor *desc; 177 union spdk_ioat_hw_desc *hw_desc; 178 179 if (ioat_get_ring_space(ioat) < 1) { 180 return NULL; 181 } 182 183 ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc); 184 185 hw_desc->dma.u.control_raw = 0; 186 hw_desc->dma.u.control.op = SPDK_IOAT_OP_COPY; 187 hw_desc->dma.u.control.null = 1; 188 hw_desc->dma.u.control.completion_update = 1; 189 190 hw_desc->dma.size = 8; 191 hw_desc->dma.src_addr = 0; 192 hw_desc->dma.dest_addr = 0; 193 194 desc->callback_fn = NULL; 195 desc->callback_arg = NULL; 196 197 ioat_submit_single(ioat); 198 199 return desc; 200 } 201 202 static struct ioat_descriptor * 203 ioat_prep_copy(struct spdk_ioat_chan *ioat, uint64_t dst, 204 uint64_t src, uint32_t len) 205 { 206 struct ioat_descriptor *desc; 207 union spdk_ioat_hw_desc *hw_desc; 208 209 assert(len <= ioat->max_xfer_size); 210 211 if (ioat_get_ring_space(ioat) < 1) { 212 return NULL; 213 } 214 215 ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc); 216 217 hw_desc->dma.u.control_raw = 0; 218 hw_desc->dma.u.control.op = SPDK_IOAT_OP_COPY; 219 hw_desc->dma.u.control.completion_update = 1; 220 221 hw_desc->dma.size = len; 222 hw_desc->dma.src_addr = src; 223 hw_desc->dma.dest_addr = dst; 224 225 desc->callback_fn = NULL; 226 desc->callback_arg = NULL; 227 228 ioat_submit_single(ioat); 229 230 return desc; 231 } 232 233 static struct ioat_descriptor * 234 ioat_prep_fill(struct spdk_ioat_chan *ioat, uint64_t dst, 235 uint64_t fill_pattern, uint32_t len) 236 { 237 struct ioat_descriptor *desc; 238 union spdk_ioat_hw_desc *hw_desc; 239 240 assert(len <= ioat->max_xfer_size); 241 242 if (ioat_get_ring_space(ioat) < 1) { 243 return NULL; 244 } 245 246 ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc); 247 248 hw_desc->fill.u.control_raw = 0; 249 hw_desc->fill.u.control.op = SPDK_IOAT_OP_FILL; 250 hw_desc->fill.u.control.completion_update = 1; 251 252 hw_desc->fill.size = len; 253 hw_desc->fill.src_data = fill_pattern; 254 hw_desc->fill.dest_addr = dst; 255 256 desc->callback_fn = NULL; 257 desc->callback_arg = NULL; 258 259 ioat_submit_single(ioat); 260 261 return desc; 262 } 263 264 static int ioat_reset_hw(struct spdk_ioat_chan *ioat) 265 { 266 int timeout; 267 uint64_t status; 268 uint32_t chanerr; 269 270 status = ioat_get_chansts(ioat); 271 if (is_ioat_active(status) || is_ioat_idle(status)) { 272 ioat_suspend(ioat); 273 } 274 275 timeout = 20; /* in milliseconds */ 276 while (is_ioat_active(status) || is_ioat_idle(status)) { 277 spdk_delay_us(1000); 278 timeout--; 279 if (timeout == 0) { 280 SPDK_ERRLOG("timed out waiting for suspend\n"); 281 return -1; 282 } 283 status = ioat_get_chansts(ioat); 284 } 285 286 /* 287 * Clear any outstanding errors. 288 * CHANERR is write-1-to-clear, so write the current CHANERR bits back to reset everything. 289 */ 290 chanerr = ioat->regs->chanerr; 291 ioat->regs->chanerr = chanerr; 292 293 ioat_reset(ioat); 294 295 timeout = 20; 296 while (ioat_reset_pending(ioat)) { 297 spdk_delay_us(1000); 298 timeout--; 299 if (timeout == 0) { 300 SPDK_ERRLOG("timed out waiting for reset\n"); 301 return -1; 302 } 303 } 304 305 return 0; 306 } 307 308 static int 309 ioat_process_channel_events(struct spdk_ioat_chan *ioat) 310 { 311 struct ioat_descriptor *desc; 312 uint64_t status, completed_descriptor, hw_desc_phys_addr; 313 uint32_t tail; 314 315 if (ioat->head == ioat->tail) { 316 return 0; 317 } 318 319 status = *ioat->comp_update; 320 completed_descriptor = status & SPDK_IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK; 321 322 if (is_ioat_halted(status)) { 323 SPDK_ERRLOG("Channel halted (%x)\n", ioat->regs->chanerr); 324 return -1; 325 } 326 327 if (completed_descriptor == ioat->last_seen) { 328 return 0; 329 } 330 331 do { 332 tail = ioat_get_ring_index(ioat, ioat->tail); 333 desc = &ioat->ring[tail]; 334 335 if (desc->callback_fn) { 336 desc->callback_fn(desc->callback_arg); 337 } 338 339 hw_desc_phys_addr = ioat_get_desc_phys_addr(ioat, ioat->tail); 340 ioat->tail++; 341 } while (hw_desc_phys_addr != completed_descriptor); 342 343 ioat->last_seen = hw_desc_phys_addr; 344 return 0; 345 } 346 347 static int 348 ioat_channel_destruct(struct spdk_ioat_chan *ioat) 349 { 350 ioat_unmap_pci_bar(ioat); 351 352 if (ioat->ring) { 353 free(ioat->ring); 354 } 355 356 if (ioat->hw_ring) { 357 spdk_free(ioat->hw_ring); 358 } 359 360 if (ioat->comp_update) { 361 spdk_free((void *)ioat->comp_update); 362 ioat->comp_update = NULL; 363 } 364 365 return 0; 366 } 367 368 static int 369 ioat_channel_start(struct spdk_ioat_chan *ioat) 370 { 371 uint8_t xfercap, version; 372 uint64_t status; 373 int i, num_descriptors; 374 uint64_t comp_update_bus_addr = 0; 375 376 if (ioat_map_pci_bar(ioat) != 0) { 377 SPDK_ERRLOG("ioat_map_pci_bar() failed\n"); 378 return -1; 379 } 380 381 version = ioat->regs->cbver; 382 if (version < SPDK_IOAT_VER_3_0) { 383 SPDK_ERRLOG(" unsupported IOAT version %u.%u\n", 384 version >> 4, version & 0xF); 385 return -1; 386 } 387 388 /* Always support DMA copy */ 389 ioat->dma_capabilities = SPDK_IOAT_ENGINE_COPY_SUPPORTED; 390 if (ioat->regs->dmacapability & SPDK_IOAT_DMACAP_BFILL) 391 ioat->dma_capabilities |= SPDK_IOAT_ENGINE_FILL_SUPPORTED; 392 xfercap = ioat->regs->xfercap; 393 394 /* Only bits [4:0] are valid. */ 395 xfercap &= 0x1f; 396 if (xfercap == 0) { 397 /* 0 means 4 GB max transfer size. */ 398 ioat->max_xfer_size = 1ULL << 32; 399 } else if (xfercap < 12) { 400 /* XFCERCAP must be at least 12 (4 KB) according to the spec. */ 401 SPDK_ERRLOG("invalid XFERCAP value %u\n", xfercap); 402 return -1; 403 } else { 404 ioat->max_xfer_size = 1U << xfercap; 405 } 406 407 ioat->comp_update = spdk_zmalloc(sizeof(*ioat->comp_update), SPDK_IOAT_CHANCMP_ALIGN, 408 &comp_update_bus_addr); 409 if (ioat->comp_update == NULL) { 410 return -1; 411 } 412 413 ioat->ring_size_order = IOAT_DEFAULT_ORDER; 414 415 num_descriptors = 1 << ioat->ring_size_order; 416 417 ioat->ring = calloc(num_descriptors, sizeof(struct ioat_descriptor)); 418 if (!ioat->ring) { 419 return -1; 420 } 421 422 ioat->hw_ring = spdk_zmalloc(num_descriptors * sizeof(union spdk_ioat_hw_desc), 64, 423 &ioat->hw_ring_phys_addr); 424 if (!ioat->hw_ring) { 425 return -1; 426 } 427 428 for (i = 0; i < num_descriptors; i++) { 429 ioat->hw_ring[i].generic.next = ioat_get_desc_phys_addr(ioat, i + 1); 430 } 431 432 ioat->head = 0; 433 ioat->tail = 0; 434 ioat->last_seen = 0; 435 436 ioat_reset_hw(ioat); 437 438 ioat->regs->chanctrl = SPDK_IOAT_CHANCTRL_ANY_ERR_ABORT_EN; 439 ioat_write_chancmp(ioat, comp_update_bus_addr); 440 ioat_write_chainaddr(ioat, ioat->hw_ring_phys_addr); 441 442 ioat_prep_null(ioat); 443 ioat_flush(ioat); 444 445 i = 100; 446 while (i-- > 0) { 447 spdk_delay_us(100); 448 status = ioat_get_chansts(ioat); 449 if (is_ioat_idle(status)) 450 break; 451 } 452 453 if (is_ioat_idle(status)) { 454 ioat_process_channel_events(ioat); 455 } else { 456 SPDK_ERRLOG("could not start channel: status = %p\n error = %#x\n", 457 (void *)status, ioat->regs->chanerr); 458 return -1; 459 } 460 461 return 0; 462 } 463 464 /* Caller must hold g_ioat_driver.lock */ 465 static struct spdk_ioat_chan * 466 ioat_attach(void *device) 467 { 468 struct spdk_ioat_chan *ioat; 469 uint32_t cmd_reg; 470 471 ioat = calloc(1, sizeof(struct spdk_ioat_chan)); 472 if (ioat == NULL) { 473 return NULL; 474 } 475 476 /* Enable PCI busmaster. */ 477 spdk_pci_device_cfg_read32(device, &cmd_reg, 4); 478 cmd_reg |= 0x4; 479 spdk_pci_device_cfg_write32(device, cmd_reg, 4); 480 481 ioat->device = device; 482 483 if (ioat_channel_start(ioat) != 0) { 484 ioat_channel_destruct(ioat); 485 free(ioat); 486 return NULL; 487 } 488 489 return ioat; 490 } 491 492 struct ioat_enum_ctx { 493 spdk_ioat_probe_cb probe_cb; 494 spdk_ioat_attach_cb attach_cb; 495 void *cb_ctx; 496 }; 497 498 /* This function must only be called while holding g_ioat_driver.lock */ 499 static int 500 ioat_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) 501 { 502 struct ioat_enum_ctx *enum_ctx = ctx; 503 struct spdk_ioat_chan *ioat; 504 505 /* Verify that this device is not already attached */ 506 TAILQ_FOREACH(ioat, &g_ioat_driver.attached_chans, tailq) { 507 /* 508 * NOTE: This assumes that the PCI abstraction layer will use the same device handle 509 * across enumerations; we could compare by BDF instead if this is not true. 510 */ 511 if (pci_dev == ioat->device) { 512 return 0; 513 } 514 } 515 516 if (enum_ctx->probe_cb(enum_ctx->cb_ctx, pci_dev)) { 517 /* 518 * Since I/OAT init is relatively quick, just perform the full init during probing. 519 * If this turns out to be a bottleneck later, this can be changed to work like 520 * NVMe with a list of devices to initialize in parallel. 521 */ 522 ioat = ioat_attach(pci_dev); 523 if (ioat == NULL) { 524 SPDK_ERRLOG("ioat_attach() failed\n"); 525 return -1; 526 } 527 528 TAILQ_INSERT_TAIL(&g_ioat_driver.attached_chans, ioat, tailq); 529 530 enum_ctx->attach_cb(enum_ctx->cb_ctx, pci_dev, ioat); 531 } 532 533 return 0; 534 } 535 536 int 537 spdk_ioat_probe(void *cb_ctx, spdk_ioat_probe_cb probe_cb, spdk_ioat_attach_cb attach_cb) 538 { 539 int rc; 540 struct ioat_enum_ctx enum_ctx; 541 542 pthread_mutex_lock(&g_ioat_driver.lock); 543 544 enum_ctx.probe_cb = probe_cb; 545 enum_ctx.attach_cb = attach_cb; 546 enum_ctx.cb_ctx = cb_ctx; 547 548 rc = spdk_pci_ioat_enumerate(ioat_enum_cb, &enum_ctx); 549 550 pthread_mutex_unlock(&g_ioat_driver.lock); 551 552 return rc; 553 } 554 555 int 556 spdk_ioat_detach(struct spdk_ioat_chan *ioat) 557 { 558 struct ioat_driver *driver = &g_ioat_driver; 559 560 /* ioat should be in the free list (not registered to a thread) 561 * when calling ioat_detach(). 562 */ 563 pthread_mutex_lock(&driver->lock); 564 TAILQ_REMOVE(&driver->attached_chans, ioat, tailq); 565 pthread_mutex_unlock(&driver->lock); 566 567 ioat_channel_destruct(ioat); 568 free(ioat); 569 570 return 0; 571 } 572 573 #define _2MB_PAGE(ptr) ((ptr) & ~(0x200000 - 1)) 574 #define _2MB_OFFSET(ptr) ((ptr) & (0x200000 - 1)) 575 576 int64_t 577 spdk_ioat_submit_copy(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn, 578 void *dst, const void *src, uint64_t nbytes) 579 { 580 struct ioat_descriptor *last_desc; 581 uint64_t remaining, op_size; 582 uint64_t vdst, vsrc; 583 uint64_t vdst_page, vsrc_page; 584 uint64_t pdst_page, psrc_page; 585 uint32_t orig_head; 586 587 if (!ioat) { 588 return -1; 589 } 590 591 orig_head = ioat->head; 592 593 vdst = (uint64_t)dst; 594 vsrc = (uint64_t)src; 595 vdst_page = vsrc_page = 0; 596 pdst_page = psrc_page = SPDK_VTOPHYS_ERROR; 597 598 remaining = nbytes; 599 while (remaining) { 600 if (_2MB_PAGE(vsrc) != vsrc_page) { 601 vsrc_page = _2MB_PAGE(vsrc); 602 psrc_page = spdk_vtophys((void *)vsrc_page); 603 } 604 605 if (_2MB_PAGE(vdst) != vdst_page) { 606 vdst_page = _2MB_PAGE(vdst); 607 pdst_page = spdk_vtophys((void *)vdst_page); 608 } 609 op_size = remaining; 610 op_size = spdk_min(op_size, (0x200000 - _2MB_OFFSET(vsrc))); 611 op_size = spdk_min(op_size, (0x200000 - _2MB_OFFSET(vdst))); 612 op_size = spdk_min(op_size, ioat->max_xfer_size); 613 remaining -= op_size; 614 615 last_desc = ioat_prep_copy(ioat, 616 pdst_page + _2MB_OFFSET(vdst), 617 psrc_page + _2MB_OFFSET(vsrc), 618 op_size); 619 620 if (remaining == 0 || last_desc == NULL) { 621 break; 622 } 623 624 vsrc += op_size; 625 vdst += op_size; 626 627 } 628 /* Issue null descriptor for null transfer */ 629 if (nbytes == 0) { 630 last_desc = ioat_prep_null(ioat); 631 } 632 633 if (last_desc) { 634 last_desc->callback_fn = cb_fn; 635 last_desc->callback_arg = cb_arg; 636 } else { 637 /* 638 * Ran out of descriptors in the ring - reset head to leave things as they were 639 * in case we managed to fill out any descriptors. 640 */ 641 ioat->head = orig_head; 642 return -1; 643 } 644 645 ioat_flush(ioat); 646 return nbytes; 647 } 648 649 int64_t 650 spdk_ioat_submit_fill(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn, 651 void *dst, uint64_t fill_pattern, uint64_t nbytes) 652 { 653 struct ioat_descriptor *last_desc = NULL; 654 uint64_t remaining, op_size; 655 uint64_t vdst; 656 uint32_t orig_head; 657 658 if (!ioat) { 659 return -1; 660 } 661 662 if (!(ioat->dma_capabilities & SPDK_IOAT_ENGINE_FILL_SUPPORTED)) { 663 SPDK_ERRLOG("Channel does not support memory fill\n"); 664 return -1; 665 } 666 667 orig_head = ioat->head; 668 669 vdst = (uint64_t)dst; 670 remaining = nbytes; 671 672 while (remaining) { 673 op_size = remaining; 674 op_size = spdk_min(op_size, ioat->max_xfer_size); 675 remaining -= op_size; 676 677 last_desc = ioat_prep_fill(ioat, 678 spdk_vtophys((void *)vdst), 679 fill_pattern, 680 op_size); 681 682 if (remaining == 0 || last_desc == NULL) { 683 break; 684 } 685 686 vdst += op_size; 687 } 688 689 if (last_desc) { 690 last_desc->callback_fn = cb_fn; 691 last_desc->callback_arg = cb_arg; 692 } else { 693 /* 694 * Ran out of descriptors in the ring - reset head to leave things as they were 695 * in case we managed to fill out any descriptors. 696 */ 697 ioat->head = orig_head; 698 return -1; 699 } 700 701 ioat_flush(ioat); 702 return nbytes; 703 } 704 705 uint32_t 706 spdk_ioat_get_dma_capabilities(struct spdk_ioat_chan *ioat) 707 { 708 if (!ioat) { 709 return 0; 710 } 711 return ioat->dma_capabilities; 712 } 713 714 int 715 spdk_ioat_process_events(struct spdk_ioat_chan *ioat) 716 { 717 return ioat_process_channel_events(ioat); 718 } 719 720 SPDK_LOG_REGISTER_TRACE_FLAG("ioat", SPDK_TRACE_IOAT) 721