1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved. 3 */ 4 5 #include <dirent.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <string.h> 9 #include <sys/mman.h> 10 #include <sys/queue.h> 11 #include <sys/types.h> 12 #include <sys/file.h> 13 #include <unistd.h> 14 15 #include <rte_hexdump.h> 16 #include <rte_memzone.h> 17 #include <rte_malloc.h> 18 #include <rte_memory.h> 19 #include <rte_spinlock.h> 20 #include <rte_string_fns.h> 21 22 #include "ccp_dev.h" 23 #include "ccp_pmd_private.h" 24 25 static TAILQ_HEAD(, ccp_device) ccp_list = TAILQ_HEAD_INITIALIZER(ccp_list); 26 static int ccp_dev_id; 27 28 int 29 ccp_dev_start(struct rte_cryptodev *dev) 30 { 31 struct ccp_private *priv = dev->data->dev_private; 32 33 priv->last_dev = TAILQ_FIRST(&ccp_list); 34 return 0; 35 } 36 37 struct ccp_queue * 38 ccp_allot_queue(struct rte_cryptodev *cdev, int slot_req) 39 { 40 int i, ret = 0; 41 struct ccp_device *dev; 42 struct ccp_private *priv = cdev->data->dev_private; 43 44 dev = TAILQ_NEXT(priv->last_dev, next); 45 if (unlikely(dev == NULL)) 46 dev = TAILQ_FIRST(&ccp_list); 47 priv->last_dev = dev; 48 if (dev->qidx >= dev->cmd_q_count) 49 dev->qidx = 0; 50 ret = rte_atomic64_read(&dev->cmd_q[dev->qidx].free_slots); 51 if (ret >= slot_req) 52 return &dev->cmd_q[dev->qidx]; 53 for (i = 0; i < dev->cmd_q_count; i++) { 54 dev->qidx++; 55 if (dev->qidx >= dev->cmd_q_count) 56 dev->qidx = 0; 57 ret = rte_atomic64_read(&dev->cmd_q[dev->qidx].free_slots); 58 if (ret >= slot_req) 59 return &dev->cmd_q[dev->qidx]; 60 } 61 return NULL; 62 } 63 64 int 65 ccp_read_hwrng(uint32_t *value) 66 { 67 struct ccp_device *dev; 68 69 TAILQ_FOREACH(dev, &ccp_list, next) { 70 void *vaddr = (void *)(dev->pci->mem_resource[2].addr); 71 72 while (dev->hwrng_retries++ < CCP_MAX_TRNG_RETRIES) { 73 *value = CCP_READ_REG(vaddr, TRNG_OUT_REG); 74 if (*value) { 75 dev->hwrng_retries = 0; 76 return 0; 77 } 78 } 79 dev->hwrng_retries = 0; 80 } 81 return -1; 82 } 83 84 static const struct rte_memzone * 85 ccp_queue_dma_zone_reserve(const char *queue_name, 86 uint32_t queue_size, 87 int socket_id) 88 { 89 const struct rte_memzone *mz; 90 91 mz = rte_memzone_lookup(queue_name); 92 if (mz != 0) { 93 if (((size_t)queue_size <= mz->len) && 94 ((socket_id == SOCKET_ID_ANY) || 95 (socket_id == mz->socket_id))) { 96 CCP_LOG_INFO("re-use memzone already " 97 "allocated for %s", queue_name); 98 return mz; 99 } 100 CCP_LOG_ERR("Incompatible memzone already " 101 "allocated %s, size %u, socket %d. " 102 "Requested size %u, socket %u", 103 queue_name, (uint32_t)mz->len, 104 mz->socket_id, queue_size, socket_id); 105 return NULL; 106 } 107 108 CCP_LOG_INFO("Allocate memzone for %s, size %u on socket %u", 109 queue_name, queue_size, socket_id); 110 111 return rte_memzone_reserve_aligned(queue_name, queue_size, 112 socket_id, RTE_MEMZONE_IOVA_CONTIG, queue_size); 113 } 114 115 /* bitmap support apis */ 116 static inline void 117 ccp_set_bit(unsigned long *bitmap, int n) 118 { 119 __sync_fetch_and_or(&bitmap[WORD_OFFSET(n)], (1UL << BIT_OFFSET(n))); 120 } 121 122 static inline void 123 ccp_clear_bit(unsigned long *bitmap, int n) 124 { 125 __sync_fetch_and_and(&bitmap[WORD_OFFSET(n)], ~(1UL << BIT_OFFSET(n))); 126 } 127 128 static inline uint32_t 129 ccp_get_bit(unsigned long *bitmap, int n) 130 { 131 return ((bitmap[WORD_OFFSET(n)] & (1 << BIT_OFFSET(n))) != 0); 132 } 133 134 135 static inline uint32_t 136 ccp_ffz(unsigned long word) 137 { 138 unsigned long first_zero; 139 140 first_zero = __builtin_ffsl(~word); 141 return first_zero ? (first_zero - 1) : 142 BITS_PER_WORD; 143 } 144 145 static inline uint32_t 146 ccp_find_first_zero_bit(unsigned long *addr, uint32_t limit) 147 { 148 uint32_t i; 149 uint32_t nwords = 0; 150 151 nwords = (limit - 1) / BITS_PER_WORD + 1; 152 for (i = 0; i < nwords; i++) { 153 if (addr[i] == 0UL) 154 return i * BITS_PER_WORD; 155 if (addr[i] < ~(0UL)) 156 break; 157 } 158 return (i == nwords) ? limit : i * BITS_PER_WORD + ccp_ffz(addr[i]); 159 } 160 161 static void 162 ccp_bitmap_set(unsigned long *map, unsigned int start, int len) 163 { 164 unsigned long *p = map + WORD_OFFSET(start); 165 const unsigned int size = start + len; 166 int bits_to_set = BITS_PER_WORD - (start % BITS_PER_WORD); 167 unsigned long mask_to_set = CCP_BITMAP_FIRST_WORD_MASK(start); 168 169 while (len - bits_to_set >= 0) { 170 *p |= mask_to_set; 171 len -= bits_to_set; 172 bits_to_set = BITS_PER_WORD; 173 mask_to_set = ~0UL; 174 p++; 175 } 176 if (len) { 177 mask_to_set &= CCP_BITMAP_LAST_WORD_MASK(size); 178 *p |= mask_to_set; 179 } 180 } 181 182 static void 183 ccp_bitmap_clear(unsigned long *map, unsigned int start, int len) 184 { 185 unsigned long *p = map + WORD_OFFSET(start); 186 const unsigned int size = start + len; 187 int bits_to_clear = BITS_PER_WORD - (start % BITS_PER_WORD); 188 unsigned long mask_to_clear = CCP_BITMAP_FIRST_WORD_MASK(start); 189 190 while (len - bits_to_clear >= 0) { 191 *p &= ~mask_to_clear; 192 len -= bits_to_clear; 193 bits_to_clear = BITS_PER_WORD; 194 mask_to_clear = ~0UL; 195 p++; 196 } 197 if (len) { 198 mask_to_clear &= CCP_BITMAP_LAST_WORD_MASK(size); 199 *p &= ~mask_to_clear; 200 } 201 } 202 203 204 static unsigned long 205 _ccp_find_next_bit(const unsigned long *addr, 206 unsigned long nbits, 207 unsigned long start, 208 unsigned long invert) 209 { 210 unsigned long tmp; 211 212 if (!nbits || start >= nbits) 213 return nbits; 214 215 tmp = addr[start / BITS_PER_WORD] ^ invert; 216 217 /* Handle 1st word. */ 218 tmp &= CCP_BITMAP_FIRST_WORD_MASK(start); 219 start = ccp_round_down(start, BITS_PER_WORD); 220 221 while (!tmp) { 222 start += BITS_PER_WORD; 223 if (start >= nbits) 224 return nbits; 225 226 tmp = addr[start / BITS_PER_WORD] ^ invert; 227 } 228 229 return RTE_MIN(start + (ffs(tmp) - 1), nbits); 230 } 231 232 static unsigned long 233 ccp_find_next_bit(const unsigned long *addr, 234 unsigned long size, 235 unsigned long offset) 236 { 237 return _ccp_find_next_bit(addr, size, offset, 0UL); 238 } 239 240 static unsigned long 241 ccp_find_next_zero_bit(const unsigned long *addr, 242 unsigned long size, 243 unsigned long offset) 244 { 245 return _ccp_find_next_bit(addr, size, offset, ~0UL); 246 } 247 248 /** 249 * bitmap_find_next_zero_area - find a contiguous aligned zero area 250 * @map: The address to base the search on 251 * @size: The bitmap size in bits 252 * @start: The bitnumber to start searching at 253 * @nr: The number of zeroed bits we're looking for 254 */ 255 static unsigned long 256 ccp_bitmap_find_next_zero_area(unsigned long *map, 257 unsigned long size, 258 unsigned long start, 259 unsigned int nr) 260 { 261 unsigned long index, end, i; 262 263 again: 264 index = ccp_find_next_zero_bit(map, size, start); 265 266 end = index + nr; 267 if (end > size) 268 return end; 269 i = ccp_find_next_bit(map, end, index); 270 if (i < end) { 271 start = i + 1; 272 goto again; 273 } 274 return index; 275 } 276 277 static uint32_t 278 ccp_lsb_alloc(struct ccp_queue *cmd_q, unsigned int count) 279 { 280 struct ccp_device *ccp; 281 int start; 282 283 /* First look at the map for the queue */ 284 if (cmd_q->lsb >= 0) { 285 start = (uint32_t)ccp_bitmap_find_next_zero_area(cmd_q->lsbmap, 286 LSB_SIZE, 0, 287 count); 288 if (start < LSB_SIZE) { 289 ccp_bitmap_set(cmd_q->lsbmap, start, count); 290 return start + cmd_q->lsb * LSB_SIZE; 291 } 292 } 293 294 /* try to get an entry from the shared blocks */ 295 ccp = cmd_q->dev; 296 297 rte_spinlock_lock(&ccp->lsb_lock); 298 299 start = (uint32_t)ccp_bitmap_find_next_zero_area(ccp->lsbmap, 300 MAX_LSB_CNT * LSB_SIZE, 301 0, count); 302 if (start <= MAX_LSB_CNT * LSB_SIZE) { 303 ccp_bitmap_set(ccp->lsbmap, start, count); 304 rte_spinlock_unlock(&ccp->lsb_lock); 305 return start * LSB_ITEM_SIZE; 306 } 307 CCP_LOG_ERR("NO LSBs available"); 308 309 rte_spinlock_unlock(&ccp->lsb_lock); 310 311 return 0; 312 } 313 314 static void __rte_unused 315 ccp_lsb_free(struct ccp_queue *cmd_q, 316 unsigned int start, 317 unsigned int count) 318 { 319 int lsbno = start / LSB_SIZE; 320 321 if (!start) 322 return; 323 324 if (cmd_q->lsb == lsbno) { 325 /* An entry from the private LSB */ 326 ccp_bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count); 327 } else { 328 /* From the shared LSBs */ 329 struct ccp_device *ccp = cmd_q->dev; 330 331 rte_spinlock_lock(&ccp->lsb_lock); 332 ccp_bitmap_clear(ccp->lsbmap, start, count); 333 rte_spinlock_unlock(&ccp->lsb_lock); 334 } 335 } 336 337 static int 338 ccp_find_lsb_regions(struct ccp_queue *cmd_q, uint64_t status) 339 { 340 int q_mask = 1 << cmd_q->id; 341 int weight = 0; 342 int j; 343 344 /* Build a bit mask to know which LSBs 345 * this queue has access to. 346 * Don't bother with segment 0 347 * as it has special 348 * privileges. 349 */ 350 cmd_q->lsbmask = 0; 351 status >>= LSB_REGION_WIDTH; 352 for (j = 1; j < MAX_LSB_CNT; j++) { 353 if (status & q_mask) 354 ccp_set_bit(&cmd_q->lsbmask, j); 355 356 status >>= LSB_REGION_WIDTH; 357 } 358 359 for (j = 0; j < MAX_LSB_CNT; j++) 360 if (ccp_get_bit(&cmd_q->lsbmask, j)) 361 weight++; 362 363 CCP_LOG_DBG("Queue %d can access %d LSB regions of mask %lu\n", 364 (int)cmd_q->id, weight, cmd_q->lsbmask); 365 366 return weight ? 0 : -EINVAL; 367 } 368 369 static int 370 ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp, 371 int lsb_cnt, int n_lsbs, 372 unsigned long *lsb_pub) 373 { 374 unsigned long qlsb = 0; 375 int bitno = 0; 376 int qlsb_wgt = 0; 377 int i, j; 378 379 /* For each queue: 380 * If the count of potential LSBs available to a queue matches the 381 * ordinal given to us in lsb_cnt: 382 * Copy the mask of possible LSBs for this queue into "qlsb"; 383 * For each bit in qlsb, see if the corresponding bit in the 384 * aggregation mask is set; if so, we have a match. 385 * If we have a match, clear the bit in the aggregation to 386 * mark it as no longer available. 387 * If there is no match, clear the bit in qlsb and keep looking. 388 */ 389 for (i = 0; i < ccp->cmd_q_count; i++) { 390 struct ccp_queue *cmd_q = &ccp->cmd_q[i]; 391 392 qlsb_wgt = 0; 393 for (j = 0; j < MAX_LSB_CNT; j++) 394 if (ccp_get_bit(&cmd_q->lsbmask, j)) 395 qlsb_wgt++; 396 397 if (qlsb_wgt == lsb_cnt) { 398 qlsb = cmd_q->lsbmask; 399 400 bitno = ffs(qlsb) - 1; 401 while (bitno < MAX_LSB_CNT) { 402 if (ccp_get_bit(lsb_pub, bitno)) { 403 /* We found an available LSB 404 * that this queue can access 405 */ 406 cmd_q->lsb = bitno; 407 ccp_clear_bit(lsb_pub, bitno); 408 break; 409 } 410 ccp_clear_bit(&qlsb, bitno); 411 bitno = ffs(qlsb) - 1; 412 } 413 if (bitno >= MAX_LSB_CNT) 414 return -EINVAL; 415 n_lsbs--; 416 } 417 } 418 return n_lsbs; 419 } 420 421 /* For each queue, from the most- to least-constrained: 422 * find an LSB that can be assigned to the queue. If there are N queues that 423 * can only use M LSBs, where N > M, fail; otherwise, every queue will get a 424 * dedicated LSB. Remaining LSB regions become a shared resource. 425 * If we have fewer LSBs than queues, all LSB regions become shared 426 * resources. 427 */ 428 static int 429 ccp_assign_lsbs(struct ccp_device *ccp) 430 { 431 unsigned long lsb_pub = 0, qlsb = 0; 432 int n_lsbs = 0; 433 int bitno; 434 int i, lsb_cnt; 435 int rc = 0; 436 437 rte_spinlock_init(&ccp->lsb_lock); 438 439 /* Create an aggregate bitmap to get a total count of available LSBs */ 440 for (i = 0; i < ccp->cmd_q_count; i++) 441 lsb_pub |= ccp->cmd_q[i].lsbmask; 442 443 for (i = 0; i < MAX_LSB_CNT; i++) 444 if (ccp_get_bit(&lsb_pub, i)) 445 n_lsbs++; 446 447 if (n_lsbs >= ccp->cmd_q_count) { 448 /* We have enough LSBS to give every queue a private LSB. 449 * Brute force search to start with the queues that are more 450 * constrained in LSB choice. When an LSB is privately 451 * assigned, it is removed from the public mask. 452 * This is an ugly N squared algorithm with some optimization. 453 */ 454 for (lsb_cnt = 1; n_lsbs && (lsb_cnt <= MAX_LSB_CNT); 455 lsb_cnt++) { 456 rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs, 457 &lsb_pub); 458 if (rc < 0) 459 return -EINVAL; 460 n_lsbs = rc; 461 } 462 } 463 464 rc = 0; 465 /* What's left of the LSBs, according to the public mask, now become 466 * shared. Any zero bits in the lsb_pub mask represent an LSB region 467 * that can't be used as a shared resource, so mark the LSB slots for 468 * them as "in use". 469 */ 470 qlsb = lsb_pub; 471 bitno = ccp_find_first_zero_bit(&qlsb, MAX_LSB_CNT); 472 while (bitno < MAX_LSB_CNT) { 473 ccp_bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE); 474 ccp_set_bit(&qlsb, bitno); 475 bitno = ccp_find_first_zero_bit(&qlsb, MAX_LSB_CNT); 476 } 477 478 return rc; 479 } 480 481 static int 482 ccp_add_device(struct ccp_device *dev) 483 { 484 int i; 485 uint32_t qmr, status_lo, status_hi, dma_addr_lo, dma_addr_hi; 486 uint64_t status; 487 struct ccp_queue *cmd_q; 488 const struct rte_memzone *q_mz; 489 void *vaddr; 490 491 if (dev == NULL) 492 return -1; 493 494 dev->id = ccp_dev_id++; 495 dev->qidx = 0; 496 vaddr = (void *)(dev->pci->mem_resource[2].addr); 497 498 if (dev->pci->id.device_id == AMD_PCI_CCP_5B) { 499 CCP_WRITE_REG(vaddr, CMD_TRNG_CTL_OFFSET, 0x00012D57); 500 CCP_WRITE_REG(vaddr, CMD_CONFIG_0_OFFSET, 0x00000003); 501 for (i = 0; i < 12; i++) { 502 CCP_WRITE_REG(vaddr, CMD_AES_MASK_OFFSET, 503 CCP_READ_REG(vaddr, TRNG_OUT_REG)); 504 } 505 CCP_WRITE_REG(vaddr, CMD_QUEUE_MASK_OFFSET, 0x0000001F); 506 CCP_WRITE_REG(vaddr, CMD_QUEUE_PRIO_OFFSET, 0x00005B6D); 507 CCP_WRITE_REG(vaddr, CMD_CMD_TIMEOUT_OFFSET, 0x00000000); 508 509 CCP_WRITE_REG(vaddr, LSB_PRIVATE_MASK_LO_OFFSET, 0x3FFFFFFF); 510 CCP_WRITE_REG(vaddr, LSB_PRIVATE_MASK_HI_OFFSET, 0x000003FF); 511 512 CCP_WRITE_REG(vaddr, CMD_CLK_GATE_CTL_OFFSET, 0x00108823); 513 } 514 CCP_WRITE_REG(vaddr, CMD_REQID_CONFIG_OFFSET, 0x0); 515 516 /* Copy the private LSB mask to the public registers */ 517 status_lo = CCP_READ_REG(vaddr, LSB_PRIVATE_MASK_LO_OFFSET); 518 status_hi = CCP_READ_REG(vaddr, LSB_PRIVATE_MASK_HI_OFFSET); 519 CCP_WRITE_REG(vaddr, LSB_PUBLIC_MASK_LO_OFFSET, status_lo); 520 CCP_WRITE_REG(vaddr, LSB_PUBLIC_MASK_HI_OFFSET, status_hi); 521 status = ((uint64_t)status_hi<<30) | ((uint64_t)status_lo); 522 523 dev->cmd_q_count = 0; 524 /* Find available queues */ 525 qmr = CCP_READ_REG(vaddr, Q_MASK_REG); 526 for (i = 0; i < MAX_HW_QUEUES; i++) { 527 if (!(qmr & (1 << i))) 528 continue; 529 cmd_q = &dev->cmd_q[dev->cmd_q_count++]; 530 cmd_q->dev = dev; 531 cmd_q->id = i; 532 cmd_q->qidx = 0; 533 cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); 534 535 cmd_q->reg_base = (uint8_t *)vaddr + 536 CMD_Q_STATUS_INCR * (i + 1); 537 538 /* CCP queue memory */ 539 snprintf(cmd_q->memz_name, sizeof(cmd_q->memz_name), 540 "%s_%d_%s_%d_%s", 541 "ccp_dev", 542 (int)dev->id, "queue", 543 (int)cmd_q->id, "mem"); 544 q_mz = ccp_queue_dma_zone_reserve(cmd_q->memz_name, 545 cmd_q->qsize, SOCKET_ID_ANY); 546 cmd_q->qbase_addr = (void *)q_mz->addr; 547 cmd_q->qbase_desc = (void *)q_mz->addr; 548 cmd_q->qbase_phys_addr = q_mz->iova; 549 550 cmd_q->qcontrol = 0; 551 /* init control reg to zero */ 552 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_CONTROL_BASE, 553 cmd_q->qcontrol); 554 555 /* Disable the interrupts */ 556 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_INT_ENABLE_BASE, 0x00); 557 CCP_READ_REG(cmd_q->reg_base, CMD_Q_INT_STATUS_BASE); 558 CCP_READ_REG(cmd_q->reg_base, CMD_Q_STATUS_BASE); 559 560 /* Clear the interrupts */ 561 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_INTERRUPT_STATUS_BASE, 562 ALL_INTERRUPTS); 563 564 /* Configure size of each virtual queue accessible to host */ 565 cmd_q->qcontrol &= ~(CMD_Q_SIZE << CMD_Q_SHIFT); 566 cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD_Q_SHIFT; 567 568 dma_addr_lo = low32_value(cmd_q->qbase_phys_addr); 569 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_TAIL_LO_BASE, 570 (uint32_t)dma_addr_lo); 571 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_HEAD_LO_BASE, 572 (uint32_t)dma_addr_lo); 573 574 dma_addr_hi = high32_value(cmd_q->qbase_phys_addr); 575 cmd_q->qcontrol |= (dma_addr_hi << 16); 576 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_CONTROL_BASE, 577 cmd_q->qcontrol); 578 579 /* create LSB Mask map */ 580 if (ccp_find_lsb_regions(cmd_q, status)) 581 CCP_LOG_ERR("queue doesn't have lsb regions"); 582 cmd_q->lsb = -1; 583 584 rte_atomic64_init(&cmd_q->free_slots); 585 rte_atomic64_set(&cmd_q->free_slots, (COMMANDS_PER_QUEUE - 1)); 586 /* unused slot barrier b/w H&T */ 587 } 588 589 if (ccp_assign_lsbs(dev)) 590 CCP_LOG_ERR("Unable to assign lsb region"); 591 592 /* pre-allocate LSB slots */ 593 for (i = 0; i < dev->cmd_q_count; i++) { 594 dev->cmd_q[i].sb_key = 595 ccp_lsb_alloc(&dev->cmd_q[i], 1); 596 dev->cmd_q[i].sb_iv = 597 ccp_lsb_alloc(&dev->cmd_q[i], 1); 598 dev->cmd_q[i].sb_sha = 599 ccp_lsb_alloc(&dev->cmd_q[i], 2); 600 dev->cmd_q[i].sb_hmac = 601 ccp_lsb_alloc(&dev->cmd_q[i], 2); 602 } 603 604 TAILQ_INSERT_TAIL(&ccp_list, dev, next); 605 return 0; 606 } 607 608 static void 609 ccp_remove_device(struct ccp_device *dev) 610 { 611 if (dev == NULL) 612 return; 613 614 TAILQ_REMOVE(&ccp_list, dev, next); 615 } 616 617 int 618 ccp_probe_device(struct rte_pci_device *pci_dev) 619 { 620 struct ccp_device *ccp_dev; 621 622 ccp_dev = rte_zmalloc("ccp_device", sizeof(*ccp_dev), 623 RTE_CACHE_LINE_SIZE); 624 if (ccp_dev == NULL) 625 goto fail; 626 627 ccp_dev->pci = pci_dev; 628 629 /* device is valid, add in list */ 630 if (ccp_add_device(ccp_dev)) { 631 ccp_remove_device(ccp_dev); 632 goto fail; 633 } 634 635 return 0; 636 fail: 637 CCP_LOG_ERR("CCP Device probe failed"); 638 rte_free(ccp_dev); 639 return -1; 640 } 641