1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 #include <rte_eal_paging.h> 14 15 #include "mlx5_common.h" 16 #include "mlx5_common_os.h" 17 #include "mlx5_common_mp.h" 18 #include "mlx5_common_log.h" 19 #include "mlx5_common_defs.h" 20 #include "mlx5_common_private.h" 21 22 uint8_t haswell_broadwell_cpu; 23 24 /* Driver type key for new device global syntax. */ 25 #define MLX5_DRIVER_KEY "driver" 26 27 /* Enable extending memsegs when creating a MR. */ 28 #define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en" 29 30 /* Device parameter to configure implicit registration of mempool memory. */ 31 #define MLX5_MR_MEMPOOL_REG_EN "mr_mempool_reg_en" 32 33 /* The default memory allocator used in PMD. */ 34 #define MLX5_SYS_MEM_EN "sys_mem_en" 35 36 /* 37 * Device parameter to force doorbell register mapping 38 * to non-cahed region eliminating the extra write memory barrier. 39 */ 40 #define MLX5_TX_DB_NC "tx_db_nc" 41 42 /* In case this is an x86_64 intel processor to check if 43 * we should use relaxed ordering. 44 */ 45 #ifdef RTE_ARCH_X86_64 46 /** 47 * This function returns processor identification and feature information 48 * into the registers. 49 * 50 * @param eax, ebx, ecx, edx 51 * Pointers to the registers that will hold cpu information. 52 * @param level 53 * The main category of information returned. 54 */ 55 static inline void mlx5_cpu_id(unsigned int level, 56 unsigned int *eax, unsigned int *ebx, 57 unsigned int *ecx, unsigned int *edx) 58 { 59 __asm__("cpuid\n\t" 60 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 61 : "0" (level)); 62 } 63 #endif 64 65 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 66 67 /* Head of list of drivers. */ 68 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 69 TAILQ_HEAD_INITIALIZER(drivers_list); 70 71 /* Head of devices. */ 72 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 73 TAILQ_HEAD_INITIALIZER(devices_list); 74 static pthread_mutex_t devices_list_lock; 75 76 static const struct { 77 const char *name; 78 unsigned int drv_class; 79 } mlx5_classes[] = { 80 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 81 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 82 /* Keep class "net" for backward compatibility. */ 83 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 84 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 85 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 86 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 87 }; 88 89 static int 90 class_name_to_value(const char *class_name) 91 { 92 unsigned int i; 93 94 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 95 if (strcmp(class_name, mlx5_classes[i].name) == 0) 96 return mlx5_classes[i].drv_class; 97 } 98 return -EINVAL; 99 } 100 101 static struct mlx5_class_driver * 102 driver_get(uint32_t class) 103 { 104 struct mlx5_class_driver *driver; 105 106 TAILQ_FOREACH(driver, &drivers_list, next) { 107 if ((uint32_t)driver->drv_class == class) 108 return driver; 109 } 110 return NULL; 111 } 112 113 int 114 mlx5_kvargs_process(struct mlx5_kvargs_ctrl *mkvlist, const char *const keys[], 115 arg_handler_t handler, void *opaque_arg) 116 { 117 const struct rte_kvargs_pair *pair; 118 uint32_t i, j; 119 120 MLX5_ASSERT(mkvlist && mkvlist->kvlist); 121 /* Process parameters. */ 122 for (i = 0; i < mkvlist->kvlist->count; i++) { 123 pair = &mkvlist->kvlist->pairs[i]; 124 for (j = 0; keys[j] != NULL; ++j) { 125 if (strcmp(pair->key, keys[j]) != 0) 126 continue; 127 if ((*handler)(pair->key, pair->value, opaque_arg) < 0) 128 return -1; 129 mkvlist->is_used[i] = true; 130 break; 131 } 132 } 133 return 0; 134 } 135 136 /** 137 * Prepare a mlx5 kvargs control. 138 * 139 * @param[out] mkvlist 140 * Pointer to mlx5 kvargs control. 141 * @param[in] devargs 142 * The input string containing the key/value associations. 143 * 144 * @return 145 * 0 on success, a negative errno value otherwise and rte_errno is set. 146 */ 147 static int 148 mlx5_kvargs_prepare(struct mlx5_kvargs_ctrl *mkvlist, 149 const struct rte_devargs *devargs) 150 { 151 struct rte_kvargs *kvlist; 152 uint32_t i; 153 154 if (devargs == NULL) 155 return 0; 156 kvlist = rte_kvargs_parse(devargs->args, NULL); 157 if (kvlist == NULL) { 158 rte_errno = EINVAL; 159 return -rte_errno; 160 } 161 /* 162 * rte_kvargs_parse enable key without value, in mlx5 PMDs we disable 163 * this syntax. 164 */ 165 for (i = 0; i < kvlist->count; i++) { 166 const struct rte_kvargs_pair *pair = &kvlist->pairs[i]; 167 if (pair->value == NULL || *(pair->value) == '\0') { 168 DRV_LOG(ERR, "Key %s is missing value.", pair->key); 169 rte_kvargs_free(kvlist); 170 rte_errno = EINVAL; 171 return -rte_errno; 172 } 173 } 174 /* Makes sure all devargs used array is false. */ 175 memset(mkvlist, 0, sizeof(*mkvlist)); 176 mkvlist->kvlist = kvlist; 177 DRV_LOG(DEBUG, "Parse successfully %u devargs.", 178 mkvlist->kvlist->count); 179 return 0; 180 } 181 182 /** 183 * Release a mlx5 kvargs control. 184 * 185 * @param[out] mkvlist 186 * Pointer to mlx5 kvargs control. 187 */ 188 static void 189 mlx5_kvargs_release(struct mlx5_kvargs_ctrl *mkvlist) 190 { 191 if (mkvlist == NULL) 192 return; 193 rte_kvargs_free(mkvlist->kvlist); 194 memset(mkvlist, 0, sizeof(*mkvlist)); 195 } 196 197 /** 198 * Validate device arguments list. 199 * It report about the first unknown parameter. 200 * 201 * @param[in] mkvlist 202 * Pointer to mlx5 kvargs control. 203 * 204 * @return 205 * 0 on success, a negative errno value otherwise and rte_errno is set. 206 */ 207 static int 208 mlx5_kvargs_validate(struct mlx5_kvargs_ctrl *mkvlist) 209 { 210 uint32_t i; 211 212 /* Secondary process should not handle devargs. */ 213 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 214 return 0; 215 if (mkvlist == NULL) 216 return 0; 217 for (i = 0; i < mkvlist->kvlist->count; i++) { 218 if (mkvlist->is_used[i] == 0) { 219 DRV_LOG(ERR, "Key \"%s\" " 220 "is unknown for the provided classes.", 221 mkvlist->kvlist->pairs[i].key); 222 rte_errno = EINVAL; 223 return -rte_errno; 224 } 225 } 226 return 0; 227 } 228 229 /** 230 * Verify and store value for devargs. 231 * 232 * @param[in] key 233 * Key argument to verify. 234 * @param[in] val 235 * Value associated with key. 236 * @param opaque 237 * User data. 238 * 239 * @return 240 * 0 on success, a negative errno value otherwise and rte_errno is set. 241 */ 242 static int 243 mlx5_common_args_check_handler(const char *key, const char *val, void *opaque) 244 { 245 struct mlx5_common_dev_config *config = opaque; 246 signed long tmp; 247 248 if (strcmp(MLX5_DRIVER_KEY, key) == 0 || 249 strcmp(RTE_DEVARGS_KEY_CLASS, key) == 0) 250 return 0; 251 errno = 0; 252 tmp = strtol(val, NULL, 0); 253 if (errno) { 254 rte_errno = errno; 255 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val); 256 return -rte_errno; 257 } 258 if (strcmp(key, MLX5_TX_DB_NC) == 0) { 259 if (tmp != MLX5_TXDB_CACHED && 260 tmp != MLX5_TXDB_NCACHED && 261 tmp != MLX5_TXDB_HEURISTIC) { 262 DRV_LOG(ERR, "Invalid Tx doorbell mapping parameter."); 263 rte_errno = EINVAL; 264 return -rte_errno; 265 } 266 config->dbnc = tmp; 267 } else if (strcmp(key, MLX5_MR_EXT_MEMSEG_EN) == 0) { 268 config->mr_ext_memseg_en = !!tmp; 269 } else if (strcmp(key, MLX5_MR_MEMPOOL_REG_EN) == 0) { 270 config->mr_mempool_reg_en = !!tmp; 271 } else if (strcmp(key, MLX5_SYS_MEM_EN) == 0) { 272 config->sys_mem_en = !!tmp; 273 } 274 return 0; 275 } 276 277 /** 278 * Parse common device parameters. 279 * 280 * @param devargs 281 * Device arguments structure. 282 * @param config 283 * Pointer to device configuration structure. 284 * 285 * @return 286 * 0 on success, a negative errno value otherwise and rte_errno is set. 287 */ 288 static int 289 mlx5_common_config_get(struct mlx5_kvargs_ctrl *mkvlist, 290 struct mlx5_common_dev_config *config) 291 { 292 const char **params = (const char *[]){ 293 RTE_DEVARGS_KEY_CLASS, 294 MLX5_DRIVER_KEY, 295 MLX5_TX_DB_NC, 296 MLX5_MR_EXT_MEMSEG_EN, 297 MLX5_SYS_MEM_EN, 298 MLX5_MR_MEMPOOL_REG_EN, 299 NULL, 300 }; 301 int ret = 0; 302 303 if (mkvlist == NULL) 304 return 0; 305 /* Set defaults. */ 306 config->mr_ext_memseg_en = 1; 307 config->mr_mempool_reg_en = 1; 308 config->sys_mem_en = 0; 309 config->dbnc = MLX5_ARG_UNSET; 310 /* Process common parameters. */ 311 ret = mlx5_kvargs_process(mkvlist, params, 312 mlx5_common_args_check_handler, config); 313 if (ret) { 314 rte_errno = EINVAL; 315 ret = -rte_errno; 316 } 317 DRV_LOG(DEBUG, "mr_ext_memseg_en is %u.", config->mr_ext_memseg_en); 318 DRV_LOG(DEBUG, "mr_mempool_reg_en is %u.", config->mr_mempool_reg_en); 319 DRV_LOG(DEBUG, "sys_mem_en is %u.", config->sys_mem_en); 320 DRV_LOG(DEBUG, "Tx doorbell mapping parameter is %d.", config->dbnc); 321 return ret; 322 } 323 324 static int 325 devargs_class_handler(__rte_unused const char *key, 326 const char *class_names, void *opaque) 327 { 328 int *ret = opaque; 329 int class_val; 330 char *scratch; 331 char *found; 332 char *refstr = NULL; 333 334 *ret = 0; 335 scratch = strdup(class_names); 336 if (scratch == NULL) { 337 *ret = -ENOMEM; 338 return *ret; 339 } 340 found = strtok_r(scratch, ":", &refstr); 341 if (found == NULL) 342 /* Empty string. */ 343 goto err; 344 do { 345 /* Extract each individual class name. Multiple 346 * classes can be supplied as class=net:regex:foo:bar. 347 */ 348 class_val = class_name_to_value(found); 349 /* Check if its a valid class. */ 350 if (class_val < 0) { 351 *ret = -EINVAL; 352 goto err; 353 } 354 *ret |= class_val; 355 found = strtok_r(NULL, ":", &refstr); 356 } while (found != NULL); 357 err: 358 free(scratch); 359 if (*ret < 0) 360 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 361 return *ret; 362 } 363 364 static int 365 parse_class_options(const struct rte_devargs *devargs, 366 struct mlx5_kvargs_ctrl *mkvlist) 367 { 368 int ret = 0; 369 370 if (devargs == NULL) 371 return 0; 372 if (devargs->cls != NULL && devargs->cls->name != NULL) 373 /* Global syntax, only one class type. */ 374 return class_name_to_value(devargs->cls->name); 375 /* Legacy devargs support multiple classes. */ 376 rte_kvargs_process(mkvlist->kvlist, RTE_DEVARGS_KEY_CLASS, 377 devargs_class_handler, &ret); 378 return ret; 379 } 380 381 static const unsigned int mlx5_class_invalid_combinations[] = { 382 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 383 /* New class combination should be added here. */ 384 }; 385 386 static int 387 is_valid_class_combination(uint32_t user_classes) 388 { 389 unsigned int i; 390 391 /* Verify if user specified unsupported combination. */ 392 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 393 if ((mlx5_class_invalid_combinations[i] & user_classes) == 394 mlx5_class_invalid_combinations[i]) 395 return -EINVAL; 396 } 397 /* Not found any invalid class combination. */ 398 return 0; 399 } 400 401 static bool 402 mlx5_bus_match(const struct mlx5_class_driver *drv, 403 const struct rte_device *dev) 404 { 405 if (mlx5_dev_is_pci(dev)) 406 return mlx5_dev_pci_match(drv, dev); 407 return true; 408 } 409 410 static struct mlx5_common_device * 411 to_mlx5_device(const struct rte_device *rte_dev) 412 { 413 struct mlx5_common_device *cdev; 414 415 TAILQ_FOREACH(cdev, &devices_list, next) { 416 if (rte_dev == cdev->dev) 417 return cdev; 418 } 419 return NULL; 420 } 421 422 int 423 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 424 { 425 struct rte_pci_addr pci_addr = { 0 }; 426 int ret; 427 428 if (mlx5_dev_is_pci(dev)) { 429 /* Input might be <BDF>, format PCI address to <DBDF>. */ 430 ret = rte_pci_addr_parse(dev->name, &pci_addr); 431 if (ret != 0) 432 return -ENODEV; 433 rte_pci_device_name(&pci_addr, addr, size); 434 return 0; 435 } 436 #ifdef RTE_EXEC_ENV_LINUX 437 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 438 addr, size); 439 #else 440 rte_errno = ENODEV; 441 return -rte_errno; 442 #endif 443 } 444 445 /** 446 * Register the mempool for the protection domain. 447 * 448 * @param cdev 449 * Pointer to the mlx5 common device. 450 * @param mp 451 * Mempool being registered. 452 * 453 * @return 454 * 0 on success, (-1) on failure and rte_errno is set. 455 */ 456 static int 457 mlx5_dev_mempool_register(struct mlx5_common_device *cdev, 458 struct rte_mempool *mp, bool is_extmem) 459 { 460 return mlx5_mr_mempool_register(cdev, mp, is_extmem); 461 } 462 463 /** 464 * Unregister the mempool from the protection domain. 465 * 466 * @param cdev 467 * Pointer to the mlx5 common device. 468 * @param mp 469 * Mempool being unregistered. 470 */ 471 void 472 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev, 473 struct rte_mempool *mp) 474 { 475 if (mlx5_mr_mempool_unregister(cdev, mp) < 0) 476 DRV_LOG(WARNING, "Failed to unregister mempool %s for PD %p: %s", 477 mp->name, cdev->pd, rte_strerror(rte_errno)); 478 } 479 480 /** 481 * rte_mempool_walk() callback to register mempools for the protection domain. 482 * 483 * @param mp 484 * The mempool being walked. 485 * @param arg 486 * Pointer to the device shared context. 487 */ 488 static void 489 mlx5_dev_mempool_register_cb(struct rte_mempool *mp, void *arg) 490 { 491 struct mlx5_common_device *cdev = arg; 492 int ret; 493 494 ret = mlx5_dev_mempool_register(cdev, mp, false); 495 if (ret < 0 && rte_errno != EEXIST) 496 DRV_LOG(ERR, 497 "Failed to register existing mempool %s for PD %p: %s", 498 mp->name, cdev->pd, rte_strerror(rte_errno)); 499 } 500 501 /** 502 * rte_mempool_walk() callback to unregister mempools 503 * from the protection domain. 504 * 505 * @param mp 506 * The mempool being walked. 507 * @param arg 508 * Pointer to the device shared context. 509 */ 510 static void 511 mlx5_dev_mempool_unregister_cb(struct rte_mempool *mp, void *arg) 512 { 513 mlx5_dev_mempool_unregister((struct mlx5_common_device *)arg, mp); 514 } 515 516 /** 517 * Mempool life cycle callback for mlx5 common devices. 518 * 519 * @param event 520 * Mempool life cycle event. 521 * @param mp 522 * Associated mempool. 523 * @param arg 524 * Pointer to a device shared context. 525 */ 526 static void 527 mlx5_dev_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp, 528 void *arg) 529 { 530 struct mlx5_common_device *cdev = arg; 531 532 switch (event) { 533 case RTE_MEMPOOL_EVENT_READY: 534 if (mlx5_dev_mempool_register(cdev, mp, false) < 0) 535 DRV_LOG(ERR, 536 "Failed to register new mempool %s for PD %p: %s", 537 mp->name, cdev->pd, rte_strerror(rte_errno)); 538 break; 539 case RTE_MEMPOOL_EVENT_DESTROY: 540 mlx5_dev_mempool_unregister(cdev, mp); 541 break; 542 } 543 } 544 545 int 546 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev) 547 { 548 int ret = 0; 549 550 if (!cdev->config.mr_mempool_reg_en) 551 return 0; 552 rte_rwlock_write_lock(&cdev->mr_scache.mprwlock); 553 if (cdev->mr_scache.mp_cb_registered) 554 goto exit; 555 /* Callback for this device may be already registered. */ 556 ret = rte_mempool_event_callback_register(mlx5_dev_mempool_event_cb, 557 cdev); 558 if (ret != 0 && rte_errno != EEXIST) 559 goto exit; 560 /* Register mempools only once for this device. */ 561 if (ret == 0) 562 rte_mempool_walk(mlx5_dev_mempool_register_cb, cdev); 563 ret = 0; 564 cdev->mr_scache.mp_cb_registered = 1; 565 exit: 566 rte_rwlock_write_unlock(&cdev->mr_scache.mprwlock); 567 return ret; 568 } 569 570 static void 571 mlx5_dev_mempool_unsubscribe(struct mlx5_common_device *cdev) 572 { 573 int ret; 574 575 if (!cdev->mr_scache.mp_cb_registered || 576 !cdev->config.mr_mempool_reg_en) 577 return; 578 /* Stop watching for mempool events and unregister all mempools. */ 579 ret = rte_mempool_event_callback_unregister(mlx5_dev_mempool_event_cb, 580 cdev); 581 if (ret == 0) 582 rte_mempool_walk(mlx5_dev_mempool_unregister_cb, cdev); 583 } 584 585 /** 586 * Callback for memory event. 587 * 588 * @param event_type 589 * Memory event type. 590 * @param addr 591 * Address of memory. 592 * @param len 593 * Size of memory. 594 */ 595 static void 596 mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, 597 size_t len, void *arg __rte_unused) 598 { 599 struct mlx5_common_device *cdev; 600 601 /* Must be called from the primary process. */ 602 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 603 switch (event_type) { 604 case RTE_MEM_EVENT_FREE: 605 pthread_mutex_lock(&devices_list_lock); 606 /* Iterate all the existing mlx5 devices. */ 607 TAILQ_FOREACH(cdev, &devices_list, next) 608 mlx5_free_mr_by_addr(&cdev->mr_scache, 609 mlx5_os_get_ctx_device_name 610 (cdev->ctx), 611 addr, len); 612 pthread_mutex_unlock(&devices_list_lock); 613 break; 614 case RTE_MEM_EVENT_ALLOC: 615 default: 616 break; 617 } 618 } 619 620 /** 621 * Uninitialize all HW global of device context. 622 * 623 * @param cdev 624 * Pointer to mlx5 device structure. 625 * 626 * @return 627 * 0 on success, a negative errno value otherwise and rte_errno is set. 628 */ 629 static void 630 mlx5_dev_hw_global_release(struct mlx5_common_device *cdev) 631 { 632 if (cdev->pd != NULL) { 633 claim_zero(mlx5_os_dealloc_pd(cdev->pd)); 634 cdev->pd = NULL; 635 } 636 if (cdev->ctx != NULL) { 637 claim_zero(mlx5_glue->close_device(cdev->ctx)); 638 cdev->ctx = NULL; 639 } 640 } 641 642 /** 643 * Initialize all HW global of device context. 644 * 645 * @param cdev 646 * Pointer to mlx5 device structure. 647 * @param classes 648 * Chosen classes come from user device arguments. 649 * 650 * @return 651 * 0 on success, a negative errno value otherwise and rte_errno is set. 652 */ 653 static int 654 mlx5_dev_hw_global_prepare(struct mlx5_common_device *cdev, uint32_t classes) 655 { 656 int ret; 657 658 /* Create context device */ 659 ret = mlx5_os_open_device(cdev, classes); 660 if (ret < 0) 661 return ret; 662 /* Allocate Protection Domain object and extract its pdn. */ 663 ret = mlx5_os_pd_create(cdev); 664 if (ret) 665 goto error; 666 /* All actions taken below are relevant only when DevX is supported */ 667 if (cdev->config.devx == 0) 668 return 0; 669 /* Query HCA attributes. */ 670 ret = mlx5_devx_cmd_query_hca_attr(cdev->ctx, &cdev->config.hca_attr); 671 if (ret) { 672 DRV_LOG(ERR, "Unable to read HCA capabilities."); 673 rte_errno = ENOTSUP; 674 goto error; 675 } 676 return 0; 677 error: 678 mlx5_dev_hw_global_release(cdev); 679 return ret; 680 } 681 682 static void 683 mlx5_common_dev_release(struct mlx5_common_device *cdev) 684 { 685 pthread_mutex_lock(&devices_list_lock); 686 TAILQ_REMOVE(&devices_list, cdev, next); 687 pthread_mutex_unlock(&devices_list_lock); 688 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 689 if (TAILQ_EMPTY(&devices_list)) 690 rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", 691 NULL); 692 mlx5_dev_mempool_unsubscribe(cdev); 693 mlx5_mr_release_cache(&cdev->mr_scache); 694 mlx5_dev_hw_global_release(cdev); 695 } 696 rte_free(cdev); 697 } 698 699 static struct mlx5_common_device * 700 mlx5_common_dev_create(struct rte_device *eal_dev, uint32_t classes, 701 struct mlx5_kvargs_ctrl *mkvlist) 702 { 703 struct mlx5_common_device *cdev; 704 int ret; 705 706 cdev = rte_zmalloc("mlx5_common_device", sizeof(*cdev), 0); 707 if (!cdev) { 708 DRV_LOG(ERR, "Device allocation failure."); 709 rte_errno = ENOMEM; 710 return NULL; 711 } 712 cdev->dev = eal_dev; 713 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 714 goto exit; 715 /* Parse device parameters. */ 716 ret = mlx5_common_config_get(mkvlist, &cdev->config); 717 if (ret < 0) { 718 DRV_LOG(ERR, "Failed to process device arguments: %s", 719 strerror(rte_errno)); 720 rte_free(cdev); 721 return NULL; 722 } 723 mlx5_malloc_mem_select(cdev->config.sys_mem_en); 724 /* Initialize all HW global of device context. */ 725 ret = mlx5_dev_hw_global_prepare(cdev, classes); 726 if (ret) { 727 DRV_LOG(ERR, "Failed to initialize device context."); 728 rte_free(cdev); 729 return NULL; 730 } 731 /* Initialize global MR cache resources and update its functions. */ 732 ret = mlx5_mr_create_cache(&cdev->mr_scache, eal_dev->numa_node); 733 if (ret) { 734 DRV_LOG(ERR, "Failed to initialize global MR share cache."); 735 mlx5_dev_hw_global_release(cdev); 736 rte_free(cdev); 737 return NULL; 738 } 739 /* Register callback function for global shared MR cache management. */ 740 if (TAILQ_EMPTY(&devices_list)) 741 rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", 742 mlx5_mr_mem_event_cb, NULL); 743 exit: 744 pthread_mutex_lock(&devices_list_lock); 745 TAILQ_INSERT_HEAD(&devices_list, cdev, next); 746 pthread_mutex_unlock(&devices_list_lock); 747 return cdev; 748 } 749 750 /** 751 * Validate common devargs when probing again. 752 * 753 * When common device probing again, it cannot change its configurations. 754 * If user ask non compatible configurations in devargs, it is error. 755 * This function checks the match between: 756 * - Common device configurations requested by probe again devargs. 757 * - Existing common device configurations. 758 * 759 * @param cdev 760 * Pointer to mlx5 device structure. 761 * @param mkvlist 762 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 763 * 764 * @return 765 * 0 on success, a negative errno value otherwise and rte_errno is set. 766 */ 767 static int 768 mlx5_common_probe_again_args_validate(struct mlx5_common_device *cdev, 769 struct mlx5_kvargs_ctrl *mkvlist) 770 { 771 struct mlx5_common_dev_config *config; 772 int ret; 773 774 /* Secondary process should not handle devargs. */ 775 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 776 return 0; 777 /* Probe again doesn't have to generate devargs. */ 778 if (mkvlist == NULL) 779 return 0; 780 config = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 781 sizeof(struct mlx5_common_dev_config), 782 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 783 if (config == NULL) { 784 rte_errno = -ENOMEM; 785 return -rte_errno; 786 } 787 /* 788 * Creates a temporary common configure structure according to new 789 * devargs attached in probing again. 790 */ 791 ret = mlx5_common_config_get(mkvlist, config); 792 if (ret) { 793 DRV_LOG(ERR, "Failed to process device configure: %s", 794 strerror(rte_errno)); 795 mlx5_free(config); 796 return ret; 797 } 798 /* 799 * Checks the match between the temporary structure and the existing 800 * common device structure. 801 */ 802 if (cdev->config.mr_ext_memseg_en ^ config->mr_ext_memseg_en) { 803 DRV_LOG(ERR, "\"mr_ext_memseg_en\" " 804 "configuration mismatch for device %s.", 805 cdev->dev->name); 806 goto error; 807 } 808 if (cdev->config.mr_mempool_reg_en ^ config->mr_mempool_reg_en) { 809 DRV_LOG(ERR, "\"mr_mempool_reg_en\" " 810 "configuration mismatch for device %s.", 811 cdev->dev->name); 812 goto error; 813 } 814 if (cdev->config.sys_mem_en ^ config->sys_mem_en) { 815 DRV_LOG(ERR, 816 "\"sys_mem_en\" configuration mismatch for device %s.", 817 cdev->dev->name); 818 goto error; 819 } 820 if (cdev->config.dbnc ^ config->dbnc) { 821 DRV_LOG(ERR, "\"dbnc\" configuration mismatch for device %s.", 822 cdev->dev->name); 823 goto error; 824 } 825 mlx5_free(config); 826 return 0; 827 error: 828 mlx5_free(config); 829 rte_errno = EINVAL; 830 return -rte_errno; 831 } 832 833 static int 834 drivers_remove(struct mlx5_common_device *cdev, uint32_t enabled_classes) 835 { 836 struct mlx5_class_driver *driver; 837 int local_ret = -ENODEV; 838 unsigned int i = 0; 839 int ret = 0; 840 841 while (enabled_classes) { 842 driver = driver_get(RTE_BIT64(i)); 843 if (driver != NULL) { 844 local_ret = driver->remove(cdev); 845 if (local_ret == 0) 846 cdev->classes_loaded &= ~RTE_BIT64(i); 847 else if (ret == 0) 848 ret = local_ret; 849 } 850 enabled_classes &= ~RTE_BIT64(i); 851 i++; 852 } 853 if (local_ret != 0 && ret == 0) 854 ret = local_ret; 855 return ret; 856 } 857 858 static int 859 drivers_probe(struct mlx5_common_device *cdev, uint32_t user_classes, 860 struct mlx5_kvargs_ctrl *mkvlist) 861 { 862 struct mlx5_class_driver *driver; 863 uint32_t enabled_classes = 0; 864 bool already_loaded; 865 int ret = -EINVAL; 866 867 TAILQ_FOREACH(driver, &drivers_list, next) { 868 if ((driver->drv_class & user_classes) == 0) 869 continue; 870 if (!mlx5_bus_match(driver, cdev->dev)) 871 continue; 872 already_loaded = cdev->classes_loaded & driver->drv_class; 873 if (already_loaded && driver->probe_again == 0) { 874 DRV_LOG(ERR, "Device %s is already probed", 875 cdev->dev->name); 876 ret = -EEXIST; 877 goto probe_err; 878 } 879 ret = driver->probe(cdev, mkvlist); 880 if (ret < 0) { 881 DRV_LOG(ERR, "Failed to load driver %s", 882 driver->name); 883 goto probe_err; 884 } 885 enabled_classes |= driver->drv_class; 886 } 887 if (!ret) { 888 cdev->classes_loaded |= enabled_classes; 889 return 0; 890 } 891 probe_err: 892 /* 893 * Need to remove only drivers which were not probed before this probe 894 * instance, but have already been probed before this failure. 895 */ 896 enabled_classes &= ~cdev->classes_loaded; 897 drivers_remove(cdev, enabled_classes); 898 return ret; 899 } 900 901 int 902 mlx5_common_dev_probe(struct rte_device *eal_dev) 903 { 904 struct mlx5_common_device *cdev; 905 struct mlx5_kvargs_ctrl mkvlist; 906 struct mlx5_kvargs_ctrl *mkvlist_p = NULL; 907 uint32_t classes = 0; 908 bool new_device = false; 909 int ret; 910 911 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 912 if (eal_dev->devargs != NULL) 913 mkvlist_p = &mkvlist; 914 ret = mlx5_kvargs_prepare(mkvlist_p, eal_dev->devargs); 915 if (ret < 0) { 916 DRV_LOG(ERR, "Unsupported device arguments: %s", 917 eal_dev->devargs->args); 918 return ret; 919 } 920 ret = parse_class_options(eal_dev->devargs, mkvlist_p); 921 if (ret < 0) { 922 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 923 eal_dev->devargs->args); 924 goto class_err; 925 } 926 classes = ret; 927 if (classes == 0) 928 /* Default to net class. */ 929 classes = MLX5_CLASS_ETH; 930 /* 931 * MLX5 common driver supports probing again in two scenarios: 932 * - Add new driver under existing common device (regardless of the 933 * driver's own support in probing again). 934 * - Transfer the probing again support of the drivers themselves. 935 * 936 * In both scenarios it uses in the existing device. here it looks for 937 * device that match to rte device, if it exists, the request classes 938 * were probed with this device. 939 */ 940 cdev = to_mlx5_device(eal_dev); 941 if (!cdev) { 942 /* It isn't probing again, creates a new device. */ 943 cdev = mlx5_common_dev_create(eal_dev, classes, mkvlist_p); 944 if (!cdev) { 945 ret = -ENOMEM; 946 goto class_err; 947 } 948 new_device = true; 949 } else { 950 /* It is probing again, validate common devargs match. */ 951 ret = mlx5_common_probe_again_args_validate(cdev, mkvlist_p); 952 if (ret) { 953 DRV_LOG(ERR, 954 "Probe again parameters aren't compatible : %s", 955 strerror(rte_errno)); 956 goto class_err; 957 } 958 } 959 /* 960 * Validate combination here. 961 * For new device, the classes_loaded field is 0 and it check only 962 * the classes given as user device arguments. 963 */ 964 ret = is_valid_class_combination(classes | cdev->classes_loaded); 965 if (ret != 0) { 966 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 967 goto class_err; 968 } 969 ret = drivers_probe(cdev, classes, mkvlist_p); 970 if (ret) 971 goto class_err; 972 /* 973 * Validate that all devargs have been used, unused key -> unknown Key. 974 * When probe again validate is failed, the added drivers aren't removed 975 * here but when device is released. 976 */ 977 ret = mlx5_kvargs_validate(mkvlist_p); 978 if (ret) 979 goto class_err; 980 mlx5_kvargs_release(mkvlist_p); 981 return 0; 982 class_err: 983 if (new_device) { 984 /* 985 * For new device, classes_loaded is always 0 before 986 * drivers_probe function. 987 */ 988 if (cdev->classes_loaded) 989 drivers_remove(cdev, cdev->classes_loaded); 990 mlx5_common_dev_release(cdev); 991 } 992 mlx5_kvargs_release(mkvlist_p); 993 return ret; 994 } 995 996 int 997 mlx5_common_dev_remove(struct rte_device *eal_dev) 998 { 999 struct mlx5_common_device *cdev; 1000 int ret; 1001 1002 cdev = to_mlx5_device(eal_dev); 1003 if (!cdev) 1004 return -ENODEV; 1005 /* Matching device found, cleanup and unload drivers. */ 1006 ret = drivers_remove(cdev, cdev->classes_loaded); 1007 if (ret == 0) 1008 mlx5_common_dev_release(cdev); 1009 return ret; 1010 } 1011 1012 /** 1013 * Callback to DMA map external memory to a device. 1014 * 1015 * @param rte_dev 1016 * Pointer to the generic device. 1017 * @param addr 1018 * Starting virtual address of memory to be mapped. 1019 * @param iova 1020 * Starting IOVA address of memory to be mapped. 1021 * @param len 1022 * Length of memory segment being mapped. 1023 * 1024 * @return 1025 * 0 on success, negative value on error. 1026 */ 1027 int 1028 mlx5_common_dev_dma_map(struct rte_device *rte_dev, void *addr, 1029 uint64_t iova __rte_unused, size_t len) 1030 { 1031 struct mlx5_common_device *dev; 1032 struct mlx5_mr *mr; 1033 1034 dev = to_mlx5_device(rte_dev); 1035 if (!dev) { 1036 DRV_LOG(WARNING, 1037 "Unable to find matching mlx5 device to device %s", 1038 rte_dev->name); 1039 rte_errno = ENODEV; 1040 return -1; 1041 } 1042 mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, 1043 SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); 1044 if (!mr) { 1045 DRV_LOG(WARNING, "Device %s unable to DMA map", rte_dev->name); 1046 rte_errno = EINVAL; 1047 return -1; 1048 } 1049 rte_rwlock_write_lock(&dev->mr_scache.rwlock); 1050 LIST_INSERT_HEAD(&dev->mr_scache.mr_list, mr, mr); 1051 /* Insert to the global cache table. */ 1052 mlx5_mr_insert_cache(&dev->mr_scache, mr); 1053 rte_rwlock_write_unlock(&dev->mr_scache.rwlock); 1054 return 0; 1055 } 1056 1057 /** 1058 * Callback to DMA unmap external memory to a device. 1059 * 1060 * @param rte_dev 1061 * Pointer to the generic device. 1062 * @param addr 1063 * Starting virtual address of memory to be unmapped. 1064 * @param iova 1065 * Starting IOVA address of memory to be unmapped. 1066 * @param len 1067 * Length of memory segment being unmapped. 1068 * 1069 * @return 1070 * 0 on success, negative value on error. 1071 */ 1072 int 1073 mlx5_common_dev_dma_unmap(struct rte_device *rte_dev, void *addr, 1074 uint64_t iova __rte_unused, size_t len __rte_unused) 1075 { 1076 struct mlx5_common_device *dev; 1077 struct mr_cache_entry entry; 1078 struct mlx5_mr *mr; 1079 1080 dev = to_mlx5_device(rte_dev); 1081 if (!dev) { 1082 DRV_LOG(WARNING, 1083 "Unable to find matching mlx5 device to device %s.", 1084 rte_dev->name); 1085 rte_errno = ENODEV; 1086 return -1; 1087 } 1088 rte_rwlock_read_lock(&dev->mr_scache.rwlock); 1089 mr = mlx5_mr_lookup_list(&dev->mr_scache, &entry, (uintptr_t)addr); 1090 if (!mr) { 1091 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 1092 DRV_LOG(WARNING, 1093 "Address 0x%" PRIxPTR " wasn't registered to device %s", 1094 (uintptr_t)addr, rte_dev->name); 1095 rte_errno = EINVAL; 1096 return -1; 1097 } 1098 LIST_REMOVE(mr, mr); 1099 DRV_LOG(DEBUG, "MR(%p) is removed from list.", (void *)mr); 1100 mlx5_mr_free(mr, dev->mr_scache.dereg_mr_cb); 1101 mlx5_mr_rebuild_cache(&dev->mr_scache); 1102 /* 1103 * No explicit wmb is needed after updating dev_gen due to 1104 * store-release ordering in unlock that provides the 1105 * implicit barrier at the software visible level. 1106 */ 1107 ++dev->mr_scache.dev_gen; 1108 DRV_LOG(DEBUG, "Broadcasting local cache flush, gen=%d.", 1109 dev->mr_scache.dev_gen); 1110 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 1111 return 0; 1112 } 1113 1114 void 1115 mlx5_class_driver_register(struct mlx5_class_driver *driver) 1116 { 1117 mlx5_common_driver_on_register_pci(driver); 1118 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 1119 } 1120 1121 static void mlx5_common_driver_init(void) 1122 { 1123 mlx5_common_pci_init(); 1124 #ifdef RTE_EXEC_ENV_LINUX 1125 mlx5_common_auxiliary_init(); 1126 #endif 1127 } 1128 1129 static bool mlx5_common_initialized; 1130 1131 /** 1132 * One time initialization routine for run-time dependency on glue library 1133 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 1134 * must invoke in its constructor. 1135 */ 1136 void 1137 mlx5_common_init(void) 1138 { 1139 if (mlx5_common_initialized) 1140 return; 1141 1142 pthread_mutex_init(&devices_list_lock, NULL); 1143 mlx5_glue_constructor(); 1144 mlx5_common_driver_init(); 1145 mlx5_common_initialized = true; 1146 } 1147 1148 /** 1149 * This function is responsible of initializing the variable 1150 * haswell_broadwell_cpu by checking if the cpu is intel 1151 * and reading the data returned from mlx5_cpu_id(). 1152 * since haswell and broadwell cpus don't have improved performance 1153 * when using relaxed ordering we want to check the cpu type before 1154 * before deciding whether to enable RO or not. 1155 * if the cpu is haswell or broadwell the variable will be set to 1 1156 * otherwise it will be 0. 1157 */ 1158 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 1159 { 1160 #ifdef RTE_ARCH_X86_64 1161 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 1162 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 1163 unsigned int i, model, family, brand_id, vendor; 1164 unsigned int signature_intel_ebx = 0x756e6547; 1165 unsigned int extended_model; 1166 unsigned int eax = 0; 1167 unsigned int ebx = 0; 1168 unsigned int ecx = 0; 1169 unsigned int edx = 0; 1170 int max_level; 1171 1172 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 1173 vendor = ebx; 1174 max_level = eax; 1175 if (max_level < 1) { 1176 haswell_broadwell_cpu = 0; 1177 return; 1178 } 1179 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 1180 model = (eax >> 4) & 0x0f; 1181 family = (eax >> 8) & 0x0f; 1182 brand_id = ebx & 0xff; 1183 extended_model = (eax >> 12) & 0xf0; 1184 /* Check if the processor is Haswell or Broadwell */ 1185 if (vendor == signature_intel_ebx) { 1186 if (family == 0x06) 1187 model += extended_model; 1188 if (brand_id == 0 && family == 0x6) { 1189 for (i = 0; i < RTE_DIM(broadwell_models); i++) 1190 if (model == broadwell_models[i]) { 1191 haswell_broadwell_cpu = 1; 1192 return; 1193 } 1194 for (i = 0; i < RTE_DIM(haswell_models); i++) 1195 if (model == haswell_models[i]) { 1196 haswell_broadwell_cpu = 1; 1197 return; 1198 } 1199 } 1200 } 1201 #endif 1202 haswell_broadwell_cpu = 0; 1203 } 1204 1205 /** 1206 * Allocate the User Access Region with DevX on specified device. 1207 * This routine handles the following UAR allocation issues: 1208 * 1209 * - Try to allocate the UAR with the most appropriate memory mapping 1210 * type from the ones supported by the host. 1211 * 1212 * - Try to allocate the UAR with non-NULL base address OFED 5.0.x and 1213 * Upstream rdma_core before v29 returned the NULL as UAR base address 1214 * if UAR was not the first object in the UAR page. 1215 * It caused the PMD failure and we should try to get another UAR till 1216 * we get the first one with non-NULL base address returned. 1217 * 1218 * @param [in] cdev 1219 * Pointer to mlx5 device structure to perform allocation on its context. 1220 * 1221 * @return 1222 * UAR object pointer on success, NULL otherwise and rte_errno is set. 1223 */ 1224 static void * 1225 mlx5_devx_alloc_uar(struct mlx5_common_device *cdev) 1226 { 1227 void *uar; 1228 uint32_t retry, uar_mapping; 1229 void *base_addr; 1230 1231 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 1232 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 1233 /* Control the mapping type according to the settings. */ 1234 uar_mapping = (cdev->config.dbnc == MLX5_TXDB_NCACHED) ? 1235 MLX5DV_UAR_ALLOC_TYPE_NC : MLX5DV_UAR_ALLOC_TYPE_BF; 1236 #else 1237 /* 1238 * It seems we have no way to control the memory mapping type 1239 * for the UAR, the default "Write-Combining" type is supposed. 1240 */ 1241 uar_mapping = 0; 1242 #endif 1243 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 1244 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 1245 if (!uar && uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 1246 /* 1247 * In some environments like virtual machine the 1248 * Write Combining mapped might be not supported and 1249 * UAR allocation fails. We tried "Non-Cached" mapping 1250 * for the case. 1251 */ 1252 DRV_LOG(DEBUG, "Failed to allocate DevX UAR (BF)"); 1253 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 1254 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 1255 } else if (!uar && uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 1256 /* 1257 * If Verbs/kernel does not support "Non-Cached" 1258 * try the "Write-Combining". 1259 */ 1260 DRV_LOG(DEBUG, "Failed to allocate DevX UAR (NC)"); 1261 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 1262 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 1263 } 1264 #endif 1265 if (!uar) { 1266 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 1267 rte_errno = ENOMEM; 1268 goto exit; 1269 } 1270 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 1271 if (base_addr) 1272 break; 1273 /* 1274 * The UARs are allocated by rdma_core within the 1275 * IB device context, on context closure all UARs 1276 * will be freed, should be no memory/object leakage. 1277 */ 1278 DRV_LOG(DEBUG, "Retrying to allocate DevX UAR"); 1279 uar = NULL; 1280 } 1281 /* Check whether we finally succeeded with valid UAR allocation. */ 1282 if (!uar) { 1283 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 1284 rte_errno = ENOMEM; 1285 } 1286 /* 1287 * Return void * instead of struct mlx5dv_devx_uar * 1288 * is for compatibility with older rdma-core library headers. 1289 */ 1290 exit: 1291 return uar; 1292 } 1293 1294 void 1295 mlx5_devx_uar_release(struct mlx5_uar *uar) 1296 { 1297 if (uar->obj != NULL) 1298 mlx5_glue->devx_free_uar(uar->obj); 1299 memset(uar, 0, sizeof(*uar)); 1300 } 1301 1302 int 1303 mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar) 1304 { 1305 off_t uar_mmap_offset; 1306 const size_t page_size = rte_mem_page_size(); 1307 void *base_addr; 1308 void *uar_obj; 1309 1310 if (page_size == (size_t)-1) { 1311 DRV_LOG(ERR, "Failed to get mem page size"); 1312 rte_errno = ENOMEM; 1313 return -1; 1314 } 1315 uar_obj = mlx5_devx_alloc_uar(cdev); 1316 if (uar_obj == NULL || mlx5_os_get_devx_uar_reg_addr(uar_obj) == NULL) { 1317 rte_errno = errno; 1318 DRV_LOG(ERR, "Failed to allocate UAR."); 1319 return -1; 1320 } 1321 uar->obj = uar_obj; 1322 uar_mmap_offset = mlx5_os_get_devx_uar_mmap_offset(uar_obj); 1323 base_addr = mlx5_os_get_devx_uar_base_addr(uar_obj); 1324 uar->dbnc = mlx5_db_map_type_get(uar_mmap_offset, page_size); 1325 uar->bf_db.db = mlx5_os_get_devx_uar_reg_addr(uar_obj); 1326 uar->cq_db.db = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL); 1327 #ifndef RTE_ARCH_64 1328 rte_spinlock_init(&uar->bf_sl); 1329 rte_spinlock_init(&uar->cq_sl); 1330 uar->bf_db.sl_p = &uar->bf_sl; 1331 uar->cq_db.sl_p = &uar->cq_sl; 1332 #endif /* RTE_ARCH_64 */ 1333 return 0; 1334 } 1335 1336 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 1337