1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 #include <rte_eal_paging.h> 14 15 #include "mlx5_common.h" 16 #include "mlx5_common_os.h" 17 #include "mlx5_common_mp.h" 18 #include "mlx5_common_log.h" 19 #include "mlx5_common_defs.h" 20 #include "mlx5_common_private.h" 21 22 uint8_t haswell_broadwell_cpu; 23 24 /* In case this is an x86_64 intel processor to check if 25 * we should use relaxed ordering. 26 */ 27 #ifdef RTE_ARCH_X86_64 28 /** 29 * This function returns processor identification and feature information 30 * into the registers. 31 * 32 * @param eax, ebx, ecx, edx 33 * Pointers to the registers that will hold cpu information. 34 * @param level 35 * The main category of information returned. 36 */ 37 static inline void mlx5_cpu_id(unsigned int level, 38 unsigned int *eax, unsigned int *ebx, 39 unsigned int *ecx, unsigned int *edx) 40 { 41 __asm__("cpuid\n\t" 42 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 43 : "0" (level)); 44 } 45 #endif 46 47 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 48 49 /* Head of list of drivers. */ 50 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 51 TAILQ_HEAD_INITIALIZER(drivers_list); 52 53 /* Head of devices. */ 54 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 55 TAILQ_HEAD_INITIALIZER(devices_list); 56 static pthread_mutex_t devices_list_lock; 57 58 static const struct { 59 const char *name; 60 unsigned int drv_class; 61 } mlx5_classes[] = { 62 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 63 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 64 /* Keep class "net" for backward compatibility. */ 65 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 66 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 67 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 68 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 69 }; 70 71 static int 72 class_name_to_value(const char *class_name) 73 { 74 unsigned int i; 75 76 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 77 if (strcmp(class_name, mlx5_classes[i].name) == 0) 78 return mlx5_classes[i].drv_class; 79 } 80 return -EINVAL; 81 } 82 83 static struct mlx5_class_driver * 84 driver_get(uint32_t class) 85 { 86 struct mlx5_class_driver *driver; 87 88 TAILQ_FOREACH(driver, &drivers_list, next) { 89 if ((uint32_t)driver->drv_class == class) 90 return driver; 91 } 92 return NULL; 93 } 94 95 /** 96 * Verify and store value for devargs. 97 * 98 * @param[in] key 99 * Key argument to verify. 100 * @param[in] val 101 * Value associated with key. 102 * @param opaque 103 * User data. 104 * 105 * @return 106 * 0 on success, a negative errno value otherwise and rte_errno is set. 107 */ 108 static int 109 mlx5_common_args_check_handler(const char *key, const char *val, void *opaque) 110 { 111 struct mlx5_common_dev_config *config = opaque; 112 signed long tmp; 113 114 if (val == NULL || *val == '\0') { 115 DRV_LOG(ERR, "Key %s is missing value.", key); 116 rte_errno = EINVAL; 117 return -rte_errno; 118 } 119 errno = 0; 120 tmp = strtol(val, NULL, 0); 121 if (errno) { 122 rte_errno = errno; 123 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val); 124 return -rte_errno; 125 } 126 if (strcmp(key, "tx_db_nc") == 0) { 127 if (tmp != MLX5_TXDB_CACHED && 128 tmp != MLX5_TXDB_NCACHED && 129 tmp != MLX5_TXDB_HEURISTIC) { 130 DRV_LOG(ERR, "Invalid Tx doorbell mapping parameter."); 131 rte_errno = EINVAL; 132 return -rte_errno; 133 } 134 config->dbnc = tmp; 135 } else if (strcmp(key, "mr_ext_memseg_en") == 0) { 136 config->mr_ext_memseg_en = !!tmp; 137 } else if (strcmp(key, "mr_mempool_reg_en") == 0) { 138 config->mr_mempool_reg_en = !!tmp; 139 } else if (strcmp(key, "sys_mem_en") == 0) { 140 config->sys_mem_en = !!tmp; 141 } 142 return 0; 143 } 144 145 /** 146 * Parse common device parameters. 147 * 148 * @param devargs 149 * Device arguments structure. 150 * @param config 151 * Pointer to device configuration structure. 152 * 153 * @return 154 * 0 on success, a negative errno value otherwise and rte_errno is set. 155 */ 156 static int 157 mlx5_common_config_get(struct rte_devargs *devargs, 158 struct mlx5_common_dev_config *config) 159 { 160 struct rte_kvargs *kvlist; 161 int ret = 0; 162 163 /* Set defaults. */ 164 config->mr_ext_memseg_en = 1; 165 config->mr_mempool_reg_en = 1; 166 config->sys_mem_en = 0; 167 config->dbnc = MLX5_ARG_UNSET; 168 if (devargs == NULL) 169 return 0; 170 kvlist = rte_kvargs_parse(devargs->args, NULL); 171 if (kvlist == NULL) { 172 rte_errno = EINVAL; 173 return -rte_errno; 174 } 175 ret = rte_kvargs_process(kvlist, NULL, mlx5_common_args_check_handler, 176 config); 177 if (ret) 178 ret = -rte_errno; 179 rte_kvargs_free(kvlist); 180 DRV_LOG(DEBUG, "mr_ext_memseg_en is %u.", config->mr_ext_memseg_en); 181 DRV_LOG(DEBUG, "mr_mempool_reg_en is %u.", config->mr_mempool_reg_en); 182 DRV_LOG(DEBUG, "sys_mem_en is %u.", config->sys_mem_en); 183 DRV_LOG(DEBUG, "Tx doorbell mapping parameter is %d.", config->dbnc); 184 return ret; 185 } 186 187 static int 188 devargs_class_handler(__rte_unused const char *key, 189 const char *class_names, void *opaque) 190 { 191 int *ret = opaque; 192 int class_val; 193 char *scratch; 194 char *found; 195 char *refstr = NULL; 196 197 *ret = 0; 198 scratch = strdup(class_names); 199 if (scratch == NULL) { 200 *ret = -ENOMEM; 201 return *ret; 202 } 203 found = strtok_r(scratch, ":", &refstr); 204 if (found == NULL) 205 /* Empty string. */ 206 goto err; 207 do { 208 /* Extract each individual class name. Multiple 209 * classes can be supplied as class=net:regex:foo:bar. 210 */ 211 class_val = class_name_to_value(found); 212 /* Check if its a valid class. */ 213 if (class_val < 0) { 214 *ret = -EINVAL; 215 goto err; 216 } 217 *ret |= class_val; 218 found = strtok_r(NULL, ":", &refstr); 219 } while (found != NULL); 220 err: 221 free(scratch); 222 if (*ret < 0) 223 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 224 return *ret; 225 } 226 227 static int 228 parse_class_options(const struct rte_devargs *devargs) 229 { 230 struct rte_kvargs *kvlist; 231 int ret = 0; 232 233 if (devargs == NULL) 234 return 0; 235 if (devargs->cls != NULL && devargs->cls->name != NULL) 236 /* Global syntax, only one class type. */ 237 return class_name_to_value(devargs->cls->name); 238 /* Legacy devargs support multiple classes. */ 239 kvlist = rte_kvargs_parse(devargs->args, NULL); 240 if (kvlist == NULL) 241 return 0; 242 rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_CLASS, 243 devargs_class_handler, &ret); 244 rte_kvargs_free(kvlist); 245 return ret; 246 } 247 248 static const unsigned int mlx5_class_invalid_combinations[] = { 249 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 250 /* New class combination should be added here. */ 251 }; 252 253 static int 254 is_valid_class_combination(uint32_t user_classes) 255 { 256 unsigned int i; 257 258 /* Verify if user specified unsupported combination. */ 259 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 260 if ((mlx5_class_invalid_combinations[i] & user_classes) == 261 mlx5_class_invalid_combinations[i]) 262 return -EINVAL; 263 } 264 /* Not found any invalid class combination. */ 265 return 0; 266 } 267 268 static bool 269 mlx5_bus_match(const struct mlx5_class_driver *drv, 270 const struct rte_device *dev) 271 { 272 if (mlx5_dev_is_pci(dev)) 273 return mlx5_dev_pci_match(drv, dev); 274 return true; 275 } 276 277 static struct mlx5_common_device * 278 to_mlx5_device(const struct rte_device *rte_dev) 279 { 280 struct mlx5_common_device *cdev; 281 282 TAILQ_FOREACH(cdev, &devices_list, next) { 283 if (rte_dev == cdev->dev) 284 return cdev; 285 } 286 return NULL; 287 } 288 289 int 290 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 291 { 292 struct rte_pci_addr pci_addr = { 0 }; 293 int ret; 294 295 if (mlx5_dev_is_pci(dev)) { 296 /* Input might be <BDF>, format PCI address to <DBDF>. */ 297 ret = rte_pci_addr_parse(dev->name, &pci_addr); 298 if (ret != 0) 299 return -ENODEV; 300 rte_pci_device_name(&pci_addr, addr, size); 301 return 0; 302 } 303 #ifdef RTE_EXEC_ENV_LINUX 304 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 305 addr, size); 306 #else 307 rte_errno = ENODEV; 308 return -rte_errno; 309 #endif 310 } 311 312 /** 313 * Register the mempool for the protection domain. 314 * 315 * @param cdev 316 * Pointer to the mlx5 common device. 317 * @param mp 318 * Mempool being registered. 319 * 320 * @return 321 * 0 on success, (-1) on failure and rte_errno is set. 322 */ 323 static int 324 mlx5_dev_mempool_register(struct mlx5_common_device *cdev, 325 struct rte_mempool *mp, bool is_extmem) 326 { 327 return mlx5_mr_mempool_register(cdev, mp, is_extmem); 328 } 329 330 /** 331 * Unregister the mempool from the protection domain. 332 * 333 * @param cdev 334 * Pointer to the mlx5 common device. 335 * @param mp 336 * Mempool being unregistered. 337 */ 338 void 339 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev, 340 struct rte_mempool *mp) 341 { 342 if (mlx5_mr_mempool_unregister(cdev, mp) < 0) 343 DRV_LOG(WARNING, "Failed to unregister mempool %s for PD %p: %s", 344 mp->name, cdev->pd, rte_strerror(rte_errno)); 345 } 346 347 /** 348 * rte_mempool_walk() callback to register mempools for the protection domain. 349 * 350 * @param mp 351 * The mempool being walked. 352 * @param arg 353 * Pointer to the device shared context. 354 */ 355 static void 356 mlx5_dev_mempool_register_cb(struct rte_mempool *mp, void *arg) 357 { 358 struct mlx5_common_device *cdev = arg; 359 int ret; 360 361 ret = mlx5_dev_mempool_register(cdev, mp, false); 362 if (ret < 0 && rte_errno != EEXIST) 363 DRV_LOG(ERR, 364 "Failed to register existing mempool %s for PD %p: %s", 365 mp->name, cdev->pd, rte_strerror(rte_errno)); 366 } 367 368 /** 369 * rte_mempool_walk() callback to unregister mempools 370 * from the protection domain. 371 * 372 * @param mp 373 * The mempool being walked. 374 * @param arg 375 * Pointer to the device shared context. 376 */ 377 static void 378 mlx5_dev_mempool_unregister_cb(struct rte_mempool *mp, void *arg) 379 { 380 mlx5_dev_mempool_unregister((struct mlx5_common_device *)arg, mp); 381 } 382 383 /** 384 * Mempool life cycle callback for mlx5 common devices. 385 * 386 * @param event 387 * Mempool life cycle event. 388 * @param mp 389 * Associated mempool. 390 * @param arg 391 * Pointer to a device shared context. 392 */ 393 static void 394 mlx5_dev_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp, 395 void *arg) 396 { 397 struct mlx5_common_device *cdev = arg; 398 399 switch (event) { 400 case RTE_MEMPOOL_EVENT_READY: 401 if (mlx5_dev_mempool_register(cdev, mp, false) < 0) 402 DRV_LOG(ERR, 403 "Failed to register new mempool %s for PD %p: %s", 404 mp->name, cdev->pd, rte_strerror(rte_errno)); 405 break; 406 case RTE_MEMPOOL_EVENT_DESTROY: 407 mlx5_dev_mempool_unregister(cdev, mp); 408 break; 409 } 410 } 411 412 int 413 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev) 414 { 415 int ret = 0; 416 417 if (!cdev->config.mr_mempool_reg_en) 418 return 0; 419 rte_rwlock_write_lock(&cdev->mr_scache.mprwlock); 420 if (cdev->mr_scache.mp_cb_registered) 421 goto exit; 422 /* Callback for this device may be already registered. */ 423 ret = rte_mempool_event_callback_register(mlx5_dev_mempool_event_cb, 424 cdev); 425 if (ret != 0 && rte_errno != EEXIST) 426 goto exit; 427 /* Register mempools only once for this device. */ 428 if (ret == 0) 429 rte_mempool_walk(mlx5_dev_mempool_register_cb, cdev); 430 ret = 0; 431 cdev->mr_scache.mp_cb_registered = 1; 432 exit: 433 rte_rwlock_write_unlock(&cdev->mr_scache.mprwlock); 434 return ret; 435 } 436 437 static void 438 mlx5_dev_mempool_unsubscribe(struct mlx5_common_device *cdev) 439 { 440 int ret; 441 442 if (!cdev->mr_scache.mp_cb_registered || 443 !cdev->config.mr_mempool_reg_en) 444 return; 445 /* Stop watching for mempool events and unregister all mempools. */ 446 ret = rte_mempool_event_callback_unregister(mlx5_dev_mempool_event_cb, 447 cdev); 448 if (ret == 0) 449 rte_mempool_walk(mlx5_dev_mempool_unregister_cb, cdev); 450 } 451 452 /** 453 * Callback for memory event. 454 * 455 * @param event_type 456 * Memory event type. 457 * @param addr 458 * Address of memory. 459 * @param len 460 * Size of memory. 461 */ 462 static void 463 mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, 464 size_t len, void *arg __rte_unused) 465 { 466 struct mlx5_common_device *cdev; 467 468 /* Must be called from the primary process. */ 469 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 470 switch (event_type) { 471 case RTE_MEM_EVENT_FREE: 472 pthread_mutex_lock(&devices_list_lock); 473 /* Iterate all the existing mlx5 devices. */ 474 TAILQ_FOREACH(cdev, &devices_list, next) 475 mlx5_free_mr_by_addr(&cdev->mr_scache, 476 mlx5_os_get_ctx_device_name 477 (cdev->ctx), 478 addr, len); 479 pthread_mutex_unlock(&devices_list_lock); 480 break; 481 case RTE_MEM_EVENT_ALLOC: 482 default: 483 break; 484 } 485 } 486 487 /** 488 * Uninitialize all HW global of device context. 489 * 490 * @param cdev 491 * Pointer to mlx5 device structure. 492 * 493 * @return 494 * 0 on success, a negative errno value otherwise and rte_errno is set. 495 */ 496 static void 497 mlx5_dev_hw_global_release(struct mlx5_common_device *cdev) 498 { 499 if (cdev->pd != NULL) { 500 claim_zero(mlx5_os_dealloc_pd(cdev->pd)); 501 cdev->pd = NULL; 502 } 503 if (cdev->ctx != NULL) { 504 claim_zero(mlx5_glue->close_device(cdev->ctx)); 505 cdev->ctx = NULL; 506 } 507 } 508 509 /** 510 * Initialize all HW global of device context. 511 * 512 * @param cdev 513 * Pointer to mlx5 device structure. 514 * @param classes 515 * Chosen classes come from user device arguments. 516 * 517 * @return 518 * 0 on success, a negative errno value otherwise and rte_errno is set. 519 */ 520 static int 521 mlx5_dev_hw_global_prepare(struct mlx5_common_device *cdev, uint32_t classes) 522 { 523 int ret; 524 525 /* Create context device */ 526 ret = mlx5_os_open_device(cdev, classes); 527 if (ret < 0) 528 return ret; 529 /* Allocate Protection Domain object and extract its pdn. */ 530 ret = mlx5_os_pd_create(cdev); 531 if (ret) 532 goto error; 533 /* All actions taken below are relevant only when DevX is supported */ 534 if (cdev->config.devx == 0) 535 return 0; 536 /* Query HCA attributes. */ 537 ret = mlx5_devx_cmd_query_hca_attr(cdev->ctx, &cdev->config.hca_attr); 538 if (ret) { 539 DRV_LOG(ERR, "Unable to read HCA capabilities."); 540 rte_errno = ENOTSUP; 541 goto error; 542 } 543 return 0; 544 error: 545 mlx5_dev_hw_global_release(cdev); 546 return ret; 547 } 548 549 static void 550 mlx5_common_dev_release(struct mlx5_common_device *cdev) 551 { 552 pthread_mutex_lock(&devices_list_lock); 553 TAILQ_REMOVE(&devices_list, cdev, next); 554 pthread_mutex_unlock(&devices_list_lock); 555 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 556 if (TAILQ_EMPTY(&devices_list)) 557 rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", 558 NULL); 559 mlx5_dev_mempool_unsubscribe(cdev); 560 mlx5_mr_release_cache(&cdev->mr_scache); 561 mlx5_dev_hw_global_release(cdev); 562 } 563 rte_free(cdev); 564 } 565 566 static struct mlx5_common_device * 567 mlx5_common_dev_create(struct rte_device *eal_dev, uint32_t classes) 568 { 569 struct mlx5_common_device *cdev; 570 int ret; 571 572 cdev = rte_zmalloc("mlx5_common_device", sizeof(*cdev), 0); 573 if (!cdev) { 574 DRV_LOG(ERR, "Device allocation failure."); 575 rte_errno = ENOMEM; 576 return NULL; 577 } 578 cdev->dev = eal_dev; 579 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 580 goto exit; 581 /* Parse device parameters. */ 582 ret = mlx5_common_config_get(eal_dev->devargs, &cdev->config); 583 if (ret < 0) { 584 DRV_LOG(ERR, "Failed to process device arguments: %s", 585 strerror(rte_errno)); 586 rte_free(cdev); 587 return NULL; 588 } 589 mlx5_malloc_mem_select(cdev->config.sys_mem_en); 590 /* Initialize all HW global of device context. */ 591 ret = mlx5_dev_hw_global_prepare(cdev, classes); 592 if (ret) { 593 DRV_LOG(ERR, "Failed to initialize device context."); 594 rte_free(cdev); 595 return NULL; 596 } 597 /* Initialize global MR cache resources and update its functions. */ 598 ret = mlx5_mr_create_cache(&cdev->mr_scache, eal_dev->numa_node); 599 if (ret) { 600 DRV_LOG(ERR, "Failed to initialize global MR share cache."); 601 mlx5_dev_hw_global_release(cdev); 602 rte_free(cdev); 603 return NULL; 604 } 605 /* Register callback function for global shared MR cache management. */ 606 if (TAILQ_EMPTY(&devices_list)) 607 rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", 608 mlx5_mr_mem_event_cb, NULL); 609 exit: 610 pthread_mutex_lock(&devices_list_lock); 611 TAILQ_INSERT_HEAD(&devices_list, cdev, next); 612 pthread_mutex_unlock(&devices_list_lock); 613 return cdev; 614 } 615 616 /** 617 * Validate common devargs when probing again. 618 * 619 * When common device probing again, it cannot change its configurations. 620 * If user ask non compatible configurations in devargs, it is error. 621 * This function checks the match between: 622 * - Common device configurations requested by probe again devargs. 623 * - Existing common device configurations. 624 * 625 * @param cdev 626 * Pointer to mlx5 device structure. 627 * 628 * @return 629 * 0 on success, a negative errno value otherwise and rte_errno is set. 630 */ 631 static int 632 mlx5_common_probe_again_args_validate(struct mlx5_common_device *cdev) 633 { 634 struct mlx5_common_dev_config *config; 635 int ret; 636 637 /* Secondary process should not handle devargs. */ 638 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 639 return 0; 640 /* Probe again doesn't have to generate devargs. */ 641 if (cdev->dev->devargs == NULL) 642 return 0; 643 config = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 644 sizeof(struct mlx5_common_dev_config), 645 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 646 if (config == NULL) { 647 rte_errno = -ENOMEM; 648 return -rte_errno; 649 } 650 /* 651 * Creates a temporary common configure structure according to new 652 * devargs attached in probing again. 653 */ 654 ret = mlx5_common_config_get(cdev->dev->devargs, config); 655 if (ret) { 656 DRV_LOG(ERR, "Failed to process device configure: %s", 657 strerror(rte_errno)); 658 mlx5_free(config); 659 return ret; 660 } 661 /* 662 * Checks the match between the temporary structure and the existing 663 * common device structure. 664 */ 665 if (cdev->config.mr_ext_memseg_en ^ config->mr_ext_memseg_en) { 666 DRV_LOG(ERR, "\"mr_ext_memseg_en\" " 667 "configuration mismatch for device %s.", 668 cdev->dev->name); 669 goto error; 670 } 671 if (cdev->config.mr_mempool_reg_en ^ config->mr_mempool_reg_en) { 672 DRV_LOG(ERR, "\"mr_mempool_reg_en\" " 673 "configuration mismatch for device %s.", 674 cdev->dev->name); 675 goto error; 676 } 677 if (cdev->config.sys_mem_en ^ config->sys_mem_en) { 678 DRV_LOG(ERR, 679 "\"sys_mem_en\" configuration mismatch for device %s.", 680 cdev->dev->name); 681 goto error; 682 } 683 if (cdev->config.dbnc ^ config->dbnc) { 684 DRV_LOG(ERR, "\"dbnc\" configuration mismatch for device %s.", 685 cdev->dev->name); 686 goto error; 687 } 688 mlx5_free(config); 689 return 0; 690 error: 691 mlx5_free(config); 692 rte_errno = EINVAL; 693 return -rte_errno; 694 } 695 696 static int 697 drivers_remove(struct mlx5_common_device *cdev, uint32_t enabled_classes) 698 { 699 struct mlx5_class_driver *driver; 700 int local_ret = -ENODEV; 701 unsigned int i = 0; 702 int ret = 0; 703 704 while (enabled_classes) { 705 driver = driver_get(RTE_BIT64(i)); 706 if (driver != NULL) { 707 local_ret = driver->remove(cdev); 708 if (local_ret == 0) 709 cdev->classes_loaded &= ~RTE_BIT64(i); 710 else if (ret == 0) 711 ret = local_ret; 712 } 713 enabled_classes &= ~RTE_BIT64(i); 714 i++; 715 } 716 if (local_ret != 0 && ret == 0) 717 ret = local_ret; 718 return ret; 719 } 720 721 static int 722 drivers_probe(struct mlx5_common_device *cdev, uint32_t user_classes) 723 { 724 struct mlx5_class_driver *driver; 725 uint32_t enabled_classes = 0; 726 bool already_loaded; 727 int ret = -EINVAL; 728 729 TAILQ_FOREACH(driver, &drivers_list, next) { 730 if ((driver->drv_class & user_classes) == 0) 731 continue; 732 if (!mlx5_bus_match(driver, cdev->dev)) 733 continue; 734 already_loaded = cdev->classes_loaded & driver->drv_class; 735 if (already_loaded && driver->probe_again == 0) { 736 DRV_LOG(ERR, "Device %s is already probed", 737 cdev->dev->name); 738 ret = -EEXIST; 739 goto probe_err; 740 } 741 ret = driver->probe(cdev); 742 if (ret < 0) { 743 DRV_LOG(ERR, "Failed to load driver %s", 744 driver->name); 745 goto probe_err; 746 } 747 enabled_classes |= driver->drv_class; 748 } 749 if (!ret) { 750 cdev->classes_loaded |= enabled_classes; 751 return 0; 752 } 753 probe_err: 754 /* 755 * Need to remove only drivers which were not probed before this probe 756 * instance, but have already been probed before this failure. 757 */ 758 enabled_classes &= ~cdev->classes_loaded; 759 drivers_remove(cdev, enabled_classes); 760 return ret; 761 } 762 763 int 764 mlx5_common_dev_probe(struct rte_device *eal_dev) 765 { 766 struct mlx5_common_device *cdev; 767 uint32_t classes = 0; 768 bool new_device = false; 769 int ret; 770 771 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 772 ret = parse_class_options(eal_dev->devargs); 773 if (ret < 0) { 774 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 775 eal_dev->devargs->args); 776 return ret; 777 } 778 classes = ret; 779 if (classes == 0) 780 /* Default to net class. */ 781 classes = MLX5_CLASS_ETH; 782 /* 783 * MLX5 common driver supports probing again in two scenarios: 784 * - Add new driver under existing common device (regardless of the 785 * driver's own support in probing again). 786 * - Transfer the probing again support of the drivers themselves. 787 * 788 * In both scenarios it uses in the existing device. here it looks for 789 * device that match to rte device, if it exists, the request classes 790 * were probed with this device. 791 */ 792 cdev = to_mlx5_device(eal_dev); 793 if (!cdev) { 794 /* It isn't probing again, creates a new device. */ 795 cdev = mlx5_common_dev_create(eal_dev, classes); 796 if (!cdev) 797 return -ENOMEM; 798 new_device = true; 799 } else { 800 /* It is probing again, validate common devargs match. */ 801 ret = mlx5_common_probe_again_args_validate(cdev); 802 if (ret) { 803 DRV_LOG(ERR, 804 "Probe again parameters aren't compatible : %s", 805 strerror(rte_errno)); 806 return ret; 807 } 808 } 809 /* 810 * Validate combination here. 811 * For new device, the classes_loaded field is 0 and it check only 812 * the classes given as user device arguments. 813 */ 814 ret = is_valid_class_combination(classes | cdev->classes_loaded); 815 if (ret != 0) { 816 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 817 goto class_err; 818 } 819 ret = drivers_probe(cdev, classes); 820 if (ret) 821 goto class_err; 822 return 0; 823 class_err: 824 if (new_device) 825 mlx5_common_dev_release(cdev); 826 return ret; 827 } 828 829 int 830 mlx5_common_dev_remove(struct rte_device *eal_dev) 831 { 832 struct mlx5_common_device *cdev; 833 int ret; 834 835 cdev = to_mlx5_device(eal_dev); 836 if (!cdev) 837 return -ENODEV; 838 /* Matching device found, cleanup and unload drivers. */ 839 ret = drivers_remove(cdev, cdev->classes_loaded); 840 if (ret == 0) 841 mlx5_common_dev_release(cdev); 842 return ret; 843 } 844 845 /** 846 * Callback to DMA map external memory to a device. 847 * 848 * @param rte_dev 849 * Pointer to the generic device. 850 * @param addr 851 * Starting virtual address of memory to be mapped. 852 * @param iova 853 * Starting IOVA address of memory to be mapped. 854 * @param len 855 * Length of memory segment being mapped. 856 * 857 * @return 858 * 0 on success, negative value on error. 859 */ 860 int 861 mlx5_common_dev_dma_map(struct rte_device *rte_dev, void *addr, 862 uint64_t iova __rte_unused, size_t len) 863 { 864 struct mlx5_common_device *dev; 865 struct mlx5_mr *mr; 866 867 dev = to_mlx5_device(rte_dev); 868 if (!dev) { 869 DRV_LOG(WARNING, 870 "Unable to find matching mlx5 device to device %s", 871 rte_dev->name); 872 rte_errno = ENODEV; 873 return -1; 874 } 875 mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, 876 SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); 877 if (!mr) { 878 DRV_LOG(WARNING, "Device %s unable to DMA map", rte_dev->name); 879 rte_errno = EINVAL; 880 return -1; 881 } 882 rte_rwlock_write_lock(&dev->mr_scache.rwlock); 883 LIST_INSERT_HEAD(&dev->mr_scache.mr_list, mr, mr); 884 /* Insert to the global cache table. */ 885 mlx5_mr_insert_cache(&dev->mr_scache, mr); 886 rte_rwlock_write_unlock(&dev->mr_scache.rwlock); 887 return 0; 888 } 889 890 /** 891 * Callback to DMA unmap external memory to a device. 892 * 893 * @param rte_dev 894 * Pointer to the generic device. 895 * @param addr 896 * Starting virtual address of memory to be unmapped. 897 * @param iova 898 * Starting IOVA address of memory to be unmapped. 899 * @param len 900 * Length of memory segment being unmapped. 901 * 902 * @return 903 * 0 on success, negative value on error. 904 */ 905 int 906 mlx5_common_dev_dma_unmap(struct rte_device *rte_dev, void *addr, 907 uint64_t iova __rte_unused, size_t len __rte_unused) 908 { 909 struct mlx5_common_device *dev; 910 struct mr_cache_entry entry; 911 struct mlx5_mr *mr; 912 913 dev = to_mlx5_device(rte_dev); 914 if (!dev) { 915 DRV_LOG(WARNING, 916 "Unable to find matching mlx5 device to device %s.", 917 rte_dev->name); 918 rte_errno = ENODEV; 919 return -1; 920 } 921 rte_rwlock_read_lock(&dev->mr_scache.rwlock); 922 mr = mlx5_mr_lookup_list(&dev->mr_scache, &entry, (uintptr_t)addr); 923 if (!mr) { 924 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 925 DRV_LOG(WARNING, 926 "Address 0x%" PRIxPTR " wasn't registered to device %s", 927 (uintptr_t)addr, rte_dev->name); 928 rte_errno = EINVAL; 929 return -1; 930 } 931 LIST_REMOVE(mr, mr); 932 DRV_LOG(DEBUG, "MR(%p) is removed from list.", (void *)mr); 933 mlx5_mr_free(mr, dev->mr_scache.dereg_mr_cb); 934 mlx5_mr_rebuild_cache(&dev->mr_scache); 935 /* 936 * No explicit wmb is needed after updating dev_gen due to 937 * store-release ordering in unlock that provides the 938 * implicit barrier at the software visible level. 939 */ 940 ++dev->mr_scache.dev_gen; 941 DRV_LOG(DEBUG, "Broadcasting local cache flush, gen=%d.", 942 dev->mr_scache.dev_gen); 943 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 944 return 0; 945 } 946 947 void 948 mlx5_class_driver_register(struct mlx5_class_driver *driver) 949 { 950 mlx5_common_driver_on_register_pci(driver); 951 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 952 } 953 954 static void mlx5_common_driver_init(void) 955 { 956 mlx5_common_pci_init(); 957 #ifdef RTE_EXEC_ENV_LINUX 958 mlx5_common_auxiliary_init(); 959 #endif 960 } 961 962 static bool mlx5_common_initialized; 963 964 /** 965 * One time initialization routine for run-time dependency on glue library 966 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 967 * must invoke in its constructor. 968 */ 969 void 970 mlx5_common_init(void) 971 { 972 if (mlx5_common_initialized) 973 return; 974 975 pthread_mutex_init(&devices_list_lock, NULL); 976 mlx5_glue_constructor(); 977 mlx5_common_driver_init(); 978 mlx5_common_initialized = true; 979 } 980 981 /** 982 * This function is responsible of initializing the variable 983 * haswell_broadwell_cpu by checking if the cpu is intel 984 * and reading the data returned from mlx5_cpu_id(). 985 * since haswell and broadwell cpus don't have improved performance 986 * when using relaxed ordering we want to check the cpu type before 987 * before deciding whether to enable RO or not. 988 * if the cpu is haswell or broadwell the variable will be set to 1 989 * otherwise it will be 0. 990 */ 991 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 992 { 993 #ifdef RTE_ARCH_X86_64 994 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 995 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 996 unsigned int i, model, family, brand_id, vendor; 997 unsigned int signature_intel_ebx = 0x756e6547; 998 unsigned int extended_model; 999 unsigned int eax = 0; 1000 unsigned int ebx = 0; 1001 unsigned int ecx = 0; 1002 unsigned int edx = 0; 1003 int max_level; 1004 1005 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 1006 vendor = ebx; 1007 max_level = eax; 1008 if (max_level < 1) { 1009 haswell_broadwell_cpu = 0; 1010 return; 1011 } 1012 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 1013 model = (eax >> 4) & 0x0f; 1014 family = (eax >> 8) & 0x0f; 1015 brand_id = ebx & 0xff; 1016 extended_model = (eax >> 12) & 0xf0; 1017 /* Check if the processor is Haswell or Broadwell */ 1018 if (vendor == signature_intel_ebx) { 1019 if (family == 0x06) 1020 model += extended_model; 1021 if (brand_id == 0 && family == 0x6) { 1022 for (i = 0; i < RTE_DIM(broadwell_models); i++) 1023 if (model == broadwell_models[i]) { 1024 haswell_broadwell_cpu = 1; 1025 return; 1026 } 1027 for (i = 0; i < RTE_DIM(haswell_models); i++) 1028 if (model == haswell_models[i]) { 1029 haswell_broadwell_cpu = 1; 1030 return; 1031 } 1032 } 1033 } 1034 #endif 1035 haswell_broadwell_cpu = 0; 1036 } 1037 1038 /** 1039 * Allocate the User Access Region with DevX on specified device. 1040 * This routine handles the following UAR allocation issues: 1041 * 1042 * - Try to allocate the UAR with the most appropriate memory mapping 1043 * type from the ones supported by the host. 1044 * 1045 * - Try to allocate the UAR with non-NULL base address OFED 5.0.x and 1046 * Upstream rdma_core before v29 returned the NULL as UAR base address 1047 * if UAR was not the first object in the UAR page. 1048 * It caused the PMD failure and we should try to get another UAR till 1049 * we get the first one with non-NULL base address returned. 1050 * 1051 * @param [in] cdev 1052 * Pointer to mlx5 device structure to perform allocation on its context. 1053 * 1054 * @return 1055 * UAR object pointer on success, NULL otherwise and rte_errno is set. 1056 */ 1057 static void * 1058 mlx5_devx_alloc_uar(struct mlx5_common_device *cdev) 1059 { 1060 void *uar; 1061 uint32_t retry, uar_mapping; 1062 void *base_addr; 1063 1064 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 1065 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 1066 /* Control the mapping type according to the settings. */ 1067 uar_mapping = (cdev->config.dbnc == MLX5_TXDB_NCACHED) ? 1068 MLX5DV_UAR_ALLOC_TYPE_NC : MLX5DV_UAR_ALLOC_TYPE_BF; 1069 #else 1070 /* 1071 * It seems we have no way to control the memory mapping type 1072 * for the UAR, the default "Write-Combining" type is supposed. 1073 */ 1074 uar_mapping = 0; 1075 #endif 1076 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 1077 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 1078 if (!uar && uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 1079 /* 1080 * In some environments like virtual machine the 1081 * Write Combining mapped might be not supported and 1082 * UAR allocation fails. We tried "Non-Cached" mapping 1083 * for the case. 1084 */ 1085 DRV_LOG(DEBUG, "Failed to allocate DevX UAR (BF)"); 1086 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 1087 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 1088 } else if (!uar && uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 1089 /* 1090 * If Verbs/kernel does not support "Non-Cached" 1091 * try the "Write-Combining". 1092 */ 1093 DRV_LOG(DEBUG, "Failed to allocate DevX UAR (NC)"); 1094 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 1095 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 1096 } 1097 #endif 1098 if (!uar) { 1099 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 1100 rte_errno = ENOMEM; 1101 goto exit; 1102 } 1103 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 1104 if (base_addr) 1105 break; 1106 /* 1107 * The UARs are allocated by rdma_core within the 1108 * IB device context, on context closure all UARs 1109 * will be freed, should be no memory/object leakage. 1110 */ 1111 DRV_LOG(DEBUG, "Retrying to allocate DevX UAR"); 1112 uar = NULL; 1113 } 1114 /* Check whether we finally succeeded with valid UAR allocation. */ 1115 if (!uar) { 1116 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 1117 rte_errno = ENOMEM; 1118 } 1119 /* 1120 * Return void * instead of struct mlx5dv_devx_uar * 1121 * is for compatibility with older rdma-core library headers. 1122 */ 1123 exit: 1124 return uar; 1125 } 1126 1127 void 1128 mlx5_devx_uar_release(struct mlx5_uar *uar) 1129 { 1130 if (uar->obj != NULL) 1131 mlx5_glue->devx_free_uar(uar->obj); 1132 memset(uar, 0, sizeof(*uar)); 1133 } 1134 1135 int 1136 mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar) 1137 { 1138 off_t uar_mmap_offset; 1139 const size_t page_size = rte_mem_page_size(); 1140 void *base_addr; 1141 void *uar_obj; 1142 1143 if (page_size == (size_t)-1) { 1144 DRV_LOG(ERR, "Failed to get mem page size"); 1145 rte_errno = ENOMEM; 1146 return -1; 1147 } 1148 uar_obj = mlx5_devx_alloc_uar(cdev); 1149 if (uar_obj == NULL || mlx5_os_get_devx_uar_reg_addr(uar_obj) == NULL) { 1150 rte_errno = errno; 1151 DRV_LOG(ERR, "Failed to allocate UAR."); 1152 return -1; 1153 } 1154 uar->obj = uar_obj; 1155 uar_mmap_offset = mlx5_os_get_devx_uar_mmap_offset(uar_obj); 1156 base_addr = mlx5_os_get_devx_uar_base_addr(uar_obj); 1157 uar->dbnc = mlx5_db_map_type_get(uar_mmap_offset, page_size); 1158 uar->bf_db.db = mlx5_os_get_devx_uar_reg_addr(uar_obj); 1159 uar->cq_db.db = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL); 1160 #ifndef RTE_ARCH_64 1161 rte_spinlock_init(&uar->bf_sl); 1162 rte_spinlock_init(&uar->cq_sl); 1163 uar->bf_db.sl_p = &uar->bf_sl; 1164 uar->cq_db.sl_p = &uar->cq_sl; 1165 #endif /* RTE_ARCH_64 */ 1166 return 0; 1167 } 1168 1169 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 1170