1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 14 #include "mlx5_common.h" 15 #include "mlx5_common_os.h" 16 #include "mlx5_common_mp.h" 17 #include "mlx5_common_log.h" 18 #include "mlx5_common_defs.h" 19 #include "mlx5_common_private.h" 20 21 uint8_t haswell_broadwell_cpu; 22 23 /* In case this is an x86_64 intel processor to check if 24 * we should use relaxed ordering. 25 */ 26 #ifdef RTE_ARCH_X86_64 27 /** 28 * This function returns processor identification and feature information 29 * into the registers. 30 * 31 * @param eax, ebx, ecx, edx 32 * Pointers to the registers that will hold cpu information. 33 * @param level 34 * The main category of information returned. 35 */ 36 static inline void mlx5_cpu_id(unsigned int level, 37 unsigned int *eax, unsigned int *ebx, 38 unsigned int *ecx, unsigned int *edx) 39 { 40 __asm__("cpuid\n\t" 41 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 42 : "0" (level)); 43 } 44 #endif 45 46 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 47 48 /* Head of list of drivers. */ 49 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 50 TAILQ_HEAD_INITIALIZER(drivers_list); 51 52 /* Head of devices. */ 53 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 54 TAILQ_HEAD_INITIALIZER(devices_list); 55 static pthread_mutex_t devices_list_lock; 56 57 static const struct { 58 const char *name; 59 unsigned int drv_class; 60 } mlx5_classes[] = { 61 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 62 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 63 /* Keep class "net" for backward compatibility. */ 64 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 65 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 66 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 67 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 68 }; 69 70 static int 71 class_name_to_value(const char *class_name) 72 { 73 unsigned int i; 74 75 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 76 if (strcmp(class_name, mlx5_classes[i].name) == 0) 77 return mlx5_classes[i].drv_class; 78 } 79 return -EINVAL; 80 } 81 82 static struct mlx5_class_driver * 83 driver_get(uint32_t class) 84 { 85 struct mlx5_class_driver *driver; 86 87 TAILQ_FOREACH(driver, &drivers_list, next) { 88 if ((uint32_t)driver->drv_class == class) 89 return driver; 90 } 91 return NULL; 92 } 93 94 /** 95 * Verify and store value for devargs. 96 * 97 * @param[in] key 98 * Key argument to verify. 99 * @param[in] val 100 * Value associated with key. 101 * @param opaque 102 * User data. 103 * 104 * @return 105 * 0 on success, a negative errno value otherwise and rte_errno is set. 106 */ 107 static int 108 mlx5_common_args_check_handler(const char *key, const char *val, void *opaque) 109 { 110 struct mlx5_common_dev_config *config = opaque; 111 signed long tmp; 112 113 errno = 0; 114 tmp = strtol(val, NULL, 0); 115 if (errno) { 116 rte_errno = errno; 117 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val); 118 return -rte_errno; 119 } 120 if (strcmp(key, "tx_db_nc") == 0) { 121 if (tmp != MLX5_TXDB_CACHED && 122 tmp != MLX5_TXDB_NCACHED && 123 tmp != MLX5_TXDB_HEURISTIC) { 124 DRV_LOG(ERR, "Invalid Tx doorbell mapping parameter."); 125 rte_errno = EINVAL; 126 return -rte_errno; 127 } 128 config->dbnc = tmp; 129 } else if (strcmp(key, "mr_ext_memseg_en") == 0) { 130 config->mr_ext_memseg_en = !!tmp; 131 } else if (strcmp(key, "mr_mempool_reg_en") == 0) { 132 config->mr_mempool_reg_en = !!tmp; 133 } else if (strcmp(key, "sys_mem_en") == 0) { 134 config->sys_mem_en = !!tmp; 135 } 136 return 0; 137 } 138 139 /** 140 * Parse common device parameters. 141 * 142 * @param devargs 143 * Device arguments structure. 144 * @param config 145 * Pointer to device configuration structure. 146 * 147 * @return 148 * 0 on success, a negative errno value otherwise and rte_errno is set. 149 */ 150 static int 151 mlx5_common_config_get(struct rte_devargs *devargs, 152 struct mlx5_common_dev_config *config) 153 { 154 struct rte_kvargs *kvlist; 155 int ret = 0; 156 157 /* Set defaults. */ 158 config->mr_ext_memseg_en = 1; 159 config->mr_mempool_reg_en = 1; 160 config->sys_mem_en = 0; 161 config->dbnc = MLX5_ARG_UNSET; 162 if (devargs == NULL) 163 return 0; 164 kvlist = rte_kvargs_parse(devargs->args, NULL); 165 if (kvlist == NULL) { 166 rte_errno = EINVAL; 167 return -rte_errno; 168 } 169 ret = rte_kvargs_process(kvlist, NULL, mlx5_common_args_check_handler, 170 config); 171 if (ret) 172 ret = -rte_errno; 173 rte_kvargs_free(kvlist); 174 DRV_LOG(DEBUG, "mr_ext_memseg_en is %u.", config->mr_ext_memseg_en); 175 DRV_LOG(DEBUG, "mr_mempool_reg_en is %u.", config->mr_mempool_reg_en); 176 DRV_LOG(DEBUG, "sys_mem_en is %u.", config->sys_mem_en); 177 DRV_LOG(DEBUG, "Tx doorbell mapping parameter is %d.", config->dbnc); 178 return ret; 179 } 180 181 static int 182 devargs_class_handler(__rte_unused const char *key, 183 const char *class_names, void *opaque) 184 { 185 int *ret = opaque; 186 int class_val; 187 char *scratch; 188 char *found; 189 char *refstr = NULL; 190 191 *ret = 0; 192 scratch = strdup(class_names); 193 if (scratch == NULL) { 194 *ret = -ENOMEM; 195 return *ret; 196 } 197 found = strtok_r(scratch, ":", &refstr); 198 if (found == NULL) 199 /* Empty string. */ 200 goto err; 201 do { 202 /* Extract each individual class name. Multiple 203 * classes can be supplied as class=net:regex:foo:bar. 204 */ 205 class_val = class_name_to_value(found); 206 /* Check if its a valid class. */ 207 if (class_val < 0) { 208 *ret = -EINVAL; 209 goto err; 210 } 211 *ret |= class_val; 212 found = strtok_r(NULL, ":", &refstr); 213 } while (found != NULL); 214 err: 215 free(scratch); 216 if (*ret < 0) 217 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 218 return *ret; 219 } 220 221 static int 222 parse_class_options(const struct rte_devargs *devargs) 223 { 224 struct rte_kvargs *kvlist; 225 int ret = 0; 226 227 if (devargs == NULL) 228 return 0; 229 if (devargs->cls != NULL && devargs->cls->name != NULL) 230 /* Global syntax, only one class type. */ 231 return class_name_to_value(devargs->cls->name); 232 /* Legacy devargs support multiple classes. */ 233 kvlist = rte_kvargs_parse(devargs->args, NULL); 234 if (kvlist == NULL) 235 return 0; 236 rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_CLASS, 237 devargs_class_handler, &ret); 238 rte_kvargs_free(kvlist); 239 return ret; 240 } 241 242 static const unsigned int mlx5_class_invalid_combinations[] = { 243 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 244 /* New class combination should be added here. */ 245 }; 246 247 static int 248 is_valid_class_combination(uint32_t user_classes) 249 { 250 unsigned int i; 251 252 /* Verify if user specified unsupported combination. */ 253 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 254 if ((mlx5_class_invalid_combinations[i] & user_classes) == 255 mlx5_class_invalid_combinations[i]) 256 return -EINVAL; 257 } 258 /* Not found any invalid class combination. */ 259 return 0; 260 } 261 262 static bool 263 mlx5_bus_match(const struct mlx5_class_driver *drv, 264 const struct rte_device *dev) 265 { 266 if (mlx5_dev_is_pci(dev)) 267 return mlx5_dev_pci_match(drv, dev); 268 return true; 269 } 270 271 static struct mlx5_common_device * 272 to_mlx5_device(const struct rte_device *rte_dev) 273 { 274 struct mlx5_common_device *cdev; 275 276 TAILQ_FOREACH(cdev, &devices_list, next) { 277 if (rte_dev == cdev->dev) 278 return cdev; 279 } 280 return NULL; 281 } 282 283 int 284 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 285 { 286 struct rte_pci_addr pci_addr = { 0 }; 287 int ret; 288 289 if (mlx5_dev_is_pci(dev)) { 290 /* Input might be <BDF>, format PCI address to <DBDF>. */ 291 ret = rte_pci_addr_parse(dev->name, &pci_addr); 292 if (ret != 0) 293 return -ENODEV; 294 rte_pci_device_name(&pci_addr, addr, size); 295 return 0; 296 } 297 #ifdef RTE_EXEC_ENV_LINUX 298 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 299 addr, size); 300 #else 301 rte_errno = ENODEV; 302 return -rte_errno; 303 #endif 304 } 305 306 /** 307 * Register the mempool for the protection domain. 308 * 309 * @param cdev 310 * Pointer to the mlx5 common device. 311 * @param mp 312 * Mempool being registered. 313 * 314 * @return 315 * 0 on success, (-1) on failure and rte_errno is set. 316 */ 317 static int 318 mlx5_dev_mempool_register(struct mlx5_common_device *cdev, 319 struct rte_mempool *mp) 320 { 321 struct mlx5_mp_id mp_id; 322 323 mlx5_mp_id_init(&mp_id, 0); 324 return mlx5_mr_mempool_register(&cdev->mr_scache, cdev->pd, mp, &mp_id); 325 } 326 327 /** 328 * Unregister the mempool from the protection domain. 329 * 330 * @param cdev 331 * Pointer to the mlx5 common device. 332 * @param mp 333 * Mempool being unregistered. 334 */ 335 void 336 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev, 337 struct rte_mempool *mp) 338 { 339 struct mlx5_mp_id mp_id; 340 341 mlx5_mp_id_init(&mp_id, 0); 342 if (mlx5_mr_mempool_unregister(&cdev->mr_scache, mp, &mp_id) < 0) 343 DRV_LOG(WARNING, "Failed to unregister mempool %s for PD %p: %s", 344 mp->name, cdev->pd, rte_strerror(rte_errno)); 345 } 346 347 /** 348 * rte_mempool_walk() callback to register mempools for the protection domain. 349 * 350 * @param mp 351 * The mempool being walked. 352 * @param arg 353 * Pointer to the device shared context. 354 */ 355 static void 356 mlx5_dev_mempool_register_cb(struct rte_mempool *mp, void *arg) 357 { 358 struct mlx5_common_device *cdev = arg; 359 int ret; 360 361 ret = mlx5_dev_mempool_register(cdev, mp); 362 if (ret < 0 && rte_errno != EEXIST) 363 DRV_LOG(ERR, 364 "Failed to register existing mempool %s for PD %p: %s", 365 mp->name, cdev->pd, rte_strerror(rte_errno)); 366 } 367 368 /** 369 * rte_mempool_walk() callback to unregister mempools 370 * from the protection domain. 371 * 372 * @param mp 373 * The mempool being walked. 374 * @param arg 375 * Pointer to the device shared context. 376 */ 377 static void 378 mlx5_dev_mempool_unregister_cb(struct rte_mempool *mp, void *arg) 379 { 380 mlx5_dev_mempool_unregister((struct mlx5_common_device *)arg, mp); 381 } 382 383 /** 384 * Mempool life cycle callback for mlx5 common devices. 385 * 386 * @param event 387 * Mempool life cycle event. 388 * @param mp 389 * Associated mempool. 390 * @param arg 391 * Pointer to a device shared context. 392 */ 393 static void 394 mlx5_dev_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp, 395 void *arg) 396 { 397 struct mlx5_common_device *cdev = arg; 398 399 switch (event) { 400 case RTE_MEMPOOL_EVENT_READY: 401 if (mlx5_dev_mempool_register(cdev, mp) < 0) 402 DRV_LOG(ERR, 403 "Failed to register new mempool %s for PD %p: %s", 404 mp->name, cdev->pd, rte_strerror(rte_errno)); 405 break; 406 case RTE_MEMPOOL_EVENT_DESTROY: 407 mlx5_dev_mempool_unregister(cdev, mp); 408 break; 409 } 410 } 411 412 int 413 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev) 414 { 415 int ret = 0; 416 417 if (!cdev->config.mr_mempool_reg_en) 418 return 0; 419 rte_rwlock_write_lock(&cdev->mr_scache.mprwlock); 420 if (cdev->mr_scache.mp_cb_registered) 421 goto exit; 422 /* Callback for this device may be already registered. */ 423 ret = rte_mempool_event_callback_register(mlx5_dev_mempool_event_cb, 424 cdev); 425 if (ret != 0 && rte_errno != EEXIST) 426 goto exit; 427 /* Register mempools only once for this device. */ 428 if (ret == 0) 429 rte_mempool_walk(mlx5_dev_mempool_register_cb, cdev); 430 ret = 0; 431 cdev->mr_scache.mp_cb_registered = 1; 432 exit: 433 rte_rwlock_write_unlock(&cdev->mr_scache.mprwlock); 434 return ret; 435 } 436 437 static void 438 mlx5_dev_mempool_unsubscribe(struct mlx5_common_device *cdev) 439 { 440 int ret; 441 442 if (!cdev->mr_scache.mp_cb_registered || 443 !cdev->config.mr_mempool_reg_en) 444 return; 445 /* Stop watching for mempool events and unregister all mempools. */ 446 ret = rte_mempool_event_callback_unregister(mlx5_dev_mempool_event_cb, 447 cdev); 448 if (ret == 0) 449 rte_mempool_walk(mlx5_dev_mempool_unregister_cb, cdev); 450 } 451 452 /** 453 * Callback for memory event. 454 * 455 * @param event_type 456 * Memory event type. 457 * @param addr 458 * Address of memory. 459 * @param len 460 * Size of memory. 461 */ 462 static void 463 mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, 464 size_t len, void *arg __rte_unused) 465 { 466 struct mlx5_common_device *cdev; 467 468 /* Must be called from the primary process. */ 469 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 470 switch (event_type) { 471 case RTE_MEM_EVENT_FREE: 472 pthread_mutex_lock(&devices_list_lock); 473 /* Iterate all the existing mlx5 devices. */ 474 TAILQ_FOREACH(cdev, &devices_list, next) 475 mlx5_free_mr_by_addr(&cdev->mr_scache, 476 mlx5_os_get_ctx_device_name 477 (cdev->ctx), 478 addr, len); 479 pthread_mutex_unlock(&devices_list_lock); 480 break; 481 case RTE_MEM_EVENT_ALLOC: 482 default: 483 break; 484 } 485 } 486 487 /** 488 * Uninitialize all HW global of device context. 489 * 490 * @param cdev 491 * Pointer to mlx5 device structure. 492 * 493 * @return 494 * 0 on success, a negative errno value otherwise and rte_errno is set. 495 */ 496 static void 497 mlx5_dev_hw_global_release(struct mlx5_common_device *cdev) 498 { 499 if (cdev->pd != NULL) { 500 claim_zero(mlx5_os_dealloc_pd(cdev->pd)); 501 cdev->pd = NULL; 502 } 503 if (cdev->ctx != NULL) { 504 claim_zero(mlx5_glue->close_device(cdev->ctx)); 505 cdev->ctx = NULL; 506 } 507 } 508 509 /** 510 * Initialize all HW global of device context. 511 * 512 * @param cdev 513 * Pointer to mlx5 device structure. 514 * @param classes 515 * Chosen classes come from user device arguments. 516 * 517 * @return 518 * 0 on success, a negative errno value otherwise and rte_errno is set. 519 */ 520 static int 521 mlx5_dev_hw_global_prepare(struct mlx5_common_device *cdev, uint32_t classes) 522 { 523 int ret; 524 525 /* Create context device */ 526 ret = mlx5_os_open_device(cdev, classes); 527 if (ret < 0) 528 return ret; 529 /* Allocate Protection Domain object and extract its pdn. */ 530 ret = mlx5_os_pd_create(cdev); 531 if (ret) 532 goto error; 533 /* All actions taken below are relevant only when DevX is supported */ 534 if (cdev->config.devx == 0) 535 return 0; 536 /* Query HCA attributes. */ 537 ret = mlx5_devx_cmd_query_hca_attr(cdev->ctx, &cdev->config.hca_attr); 538 if (ret) { 539 DRV_LOG(ERR, "Unable to read HCA capabilities."); 540 rte_errno = ENOTSUP; 541 goto error; 542 } 543 return 0; 544 error: 545 mlx5_dev_hw_global_release(cdev); 546 return ret; 547 } 548 549 static void 550 mlx5_common_dev_release(struct mlx5_common_device *cdev) 551 { 552 pthread_mutex_lock(&devices_list_lock); 553 TAILQ_REMOVE(&devices_list, cdev, next); 554 pthread_mutex_unlock(&devices_list_lock); 555 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 556 if (TAILQ_EMPTY(&devices_list)) 557 rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", 558 NULL); 559 mlx5_dev_mempool_unsubscribe(cdev); 560 mlx5_mr_release_cache(&cdev->mr_scache); 561 mlx5_dev_hw_global_release(cdev); 562 } 563 rte_free(cdev); 564 } 565 566 static struct mlx5_common_device * 567 mlx5_common_dev_create(struct rte_device *eal_dev, uint32_t classes) 568 { 569 struct mlx5_common_device *cdev; 570 int ret; 571 572 cdev = rte_zmalloc("mlx5_common_device", sizeof(*cdev), 0); 573 if (!cdev) { 574 DRV_LOG(ERR, "Device allocation failure."); 575 rte_errno = ENOMEM; 576 return NULL; 577 } 578 cdev->dev = eal_dev; 579 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 580 goto exit; 581 /* Parse device parameters. */ 582 ret = mlx5_common_config_get(eal_dev->devargs, &cdev->config); 583 if (ret < 0) { 584 DRV_LOG(ERR, "Failed to process device arguments: %s", 585 strerror(rte_errno)); 586 rte_free(cdev); 587 return NULL; 588 } 589 mlx5_malloc_mem_select(cdev->config.sys_mem_en); 590 /* Initialize all HW global of device context. */ 591 ret = mlx5_dev_hw_global_prepare(cdev, classes); 592 if (ret) { 593 DRV_LOG(ERR, "Failed to initialize device context."); 594 rte_free(cdev); 595 return NULL; 596 } 597 /* Initialize global MR cache resources and update its functions. */ 598 ret = mlx5_mr_create_cache(&cdev->mr_scache, eal_dev->numa_node); 599 if (ret) { 600 DRV_LOG(ERR, "Failed to initialize global MR share cache."); 601 mlx5_dev_hw_global_release(cdev); 602 rte_free(cdev); 603 return NULL; 604 } 605 /* Register callback function for global shared MR cache management. */ 606 if (TAILQ_EMPTY(&devices_list)) 607 rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", 608 mlx5_mr_mem_event_cb, NULL); 609 exit: 610 pthread_mutex_lock(&devices_list_lock); 611 TAILQ_INSERT_HEAD(&devices_list, cdev, next); 612 pthread_mutex_unlock(&devices_list_lock); 613 return cdev; 614 } 615 616 static int 617 drivers_remove(struct mlx5_common_device *cdev, uint32_t enabled_classes) 618 { 619 struct mlx5_class_driver *driver; 620 int local_ret = -ENODEV; 621 unsigned int i = 0; 622 int ret = 0; 623 624 enabled_classes &= cdev->classes_loaded; 625 while (enabled_classes) { 626 driver = driver_get(RTE_BIT64(i)); 627 if (driver != NULL) { 628 local_ret = driver->remove(cdev); 629 if (local_ret == 0) 630 cdev->classes_loaded &= ~RTE_BIT64(i); 631 else if (ret == 0) 632 ret = local_ret; 633 } 634 enabled_classes &= ~RTE_BIT64(i); 635 i++; 636 } 637 if (local_ret != 0 && ret == 0) 638 ret = local_ret; 639 return ret; 640 } 641 642 static int 643 drivers_probe(struct mlx5_common_device *cdev, uint32_t user_classes) 644 { 645 struct mlx5_class_driver *driver; 646 uint32_t enabled_classes = 0; 647 bool already_loaded; 648 int ret; 649 650 TAILQ_FOREACH(driver, &drivers_list, next) { 651 if ((driver->drv_class & user_classes) == 0) 652 continue; 653 if (!mlx5_bus_match(driver, cdev->dev)) 654 continue; 655 already_loaded = cdev->classes_loaded & driver->drv_class; 656 if (already_loaded && driver->probe_again == 0) { 657 DRV_LOG(ERR, "Device %s is already probed", 658 cdev->dev->name); 659 ret = -EEXIST; 660 goto probe_err; 661 } 662 ret = driver->probe(cdev); 663 if (ret < 0) { 664 DRV_LOG(ERR, "Failed to load driver %s", 665 driver->name); 666 goto probe_err; 667 } 668 enabled_classes |= driver->drv_class; 669 } 670 cdev->classes_loaded |= enabled_classes; 671 return 0; 672 probe_err: 673 /* Only unload drivers which are enabled which were enabled 674 * in this probe instance. 675 */ 676 drivers_remove(cdev, enabled_classes); 677 return ret; 678 } 679 680 int 681 mlx5_common_dev_probe(struct rte_device *eal_dev) 682 { 683 struct mlx5_common_device *cdev; 684 uint32_t classes = 0; 685 bool new_device = false; 686 int ret; 687 688 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 689 ret = parse_class_options(eal_dev->devargs); 690 if (ret < 0) { 691 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 692 eal_dev->devargs->args); 693 return ret; 694 } 695 classes = ret; 696 if (classes == 0) 697 /* Default to net class. */ 698 classes = MLX5_CLASS_ETH; 699 cdev = to_mlx5_device(eal_dev); 700 if (!cdev) { 701 cdev = mlx5_common_dev_create(eal_dev, classes); 702 if (!cdev) 703 return -ENOMEM; 704 new_device = true; 705 } 706 /* 707 * Validate combination here. 708 * For new device, the classes_loaded field is 0 and it check only 709 * the classes given as user device arguments. 710 */ 711 ret = is_valid_class_combination(classes | cdev->classes_loaded); 712 if (ret != 0) { 713 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 714 goto class_err; 715 } 716 ret = drivers_probe(cdev, classes); 717 if (ret) 718 goto class_err; 719 return 0; 720 class_err: 721 if (new_device) 722 mlx5_common_dev_release(cdev); 723 return ret; 724 } 725 726 int 727 mlx5_common_dev_remove(struct rte_device *eal_dev) 728 { 729 struct mlx5_common_device *cdev; 730 int ret; 731 732 cdev = to_mlx5_device(eal_dev); 733 if (!cdev) 734 return -ENODEV; 735 /* Matching device found, cleanup and unload drivers. */ 736 ret = drivers_remove(cdev, cdev->classes_loaded); 737 if (ret == 0) 738 mlx5_common_dev_release(cdev); 739 return ret; 740 } 741 742 /** 743 * Callback to DMA map external memory to a device. 744 * 745 * @param rte_dev 746 * Pointer to the generic device. 747 * @param addr 748 * Starting virtual address of memory to be mapped. 749 * @param iova 750 * Starting IOVA address of memory to be mapped. 751 * @param len 752 * Length of memory segment being mapped. 753 * 754 * @return 755 * 0 on success, negative value on error. 756 */ 757 int 758 mlx5_common_dev_dma_map(struct rte_device *rte_dev, void *addr, 759 uint64_t iova __rte_unused, size_t len) 760 { 761 struct mlx5_common_device *dev; 762 struct mlx5_mr *mr; 763 764 dev = to_mlx5_device(rte_dev); 765 if (!dev) { 766 DRV_LOG(WARNING, 767 "Unable to find matching mlx5 device to device %s", 768 rte_dev->name); 769 rte_errno = ENODEV; 770 return -1; 771 } 772 mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, 773 SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); 774 if (!mr) { 775 DRV_LOG(WARNING, "Device %s unable to DMA map", rte_dev->name); 776 rte_errno = EINVAL; 777 return -1; 778 } 779 rte_rwlock_write_lock(&dev->mr_scache.rwlock); 780 LIST_INSERT_HEAD(&dev->mr_scache.mr_list, mr, mr); 781 /* Insert to the global cache table. */ 782 mlx5_mr_insert_cache(&dev->mr_scache, mr); 783 rte_rwlock_write_unlock(&dev->mr_scache.rwlock); 784 return 0; 785 } 786 787 /** 788 * Callback to DMA unmap external memory to a device. 789 * 790 * @param rte_dev 791 * Pointer to the generic device. 792 * @param addr 793 * Starting virtual address of memory to be unmapped. 794 * @param iova 795 * Starting IOVA address of memory to be unmapped. 796 * @param len 797 * Length of memory segment being unmapped. 798 * 799 * @return 800 * 0 on success, negative value on error. 801 */ 802 int 803 mlx5_common_dev_dma_unmap(struct rte_device *rte_dev, void *addr, 804 uint64_t iova __rte_unused, size_t len __rte_unused) 805 { 806 struct mlx5_common_device *dev; 807 struct mr_cache_entry entry; 808 struct mlx5_mr *mr; 809 810 dev = to_mlx5_device(rte_dev); 811 if (!dev) { 812 DRV_LOG(WARNING, 813 "Unable to find matching mlx5 device to device %s.", 814 rte_dev->name); 815 rte_errno = ENODEV; 816 return -1; 817 } 818 rte_rwlock_read_lock(&dev->mr_scache.rwlock); 819 mr = mlx5_mr_lookup_list(&dev->mr_scache, &entry, (uintptr_t)addr); 820 if (!mr) { 821 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 822 DRV_LOG(WARNING, 823 "Address 0x%" PRIxPTR " wasn't registered to device %s", 824 (uintptr_t)addr, rte_dev->name); 825 rte_errno = EINVAL; 826 return -1; 827 } 828 LIST_REMOVE(mr, mr); 829 DRV_LOG(DEBUG, "MR(%p) is removed from list.", (void *)mr); 830 mlx5_mr_free(mr, dev->mr_scache.dereg_mr_cb); 831 mlx5_mr_rebuild_cache(&dev->mr_scache); 832 /* 833 * No explicit wmb is needed after updating dev_gen due to 834 * store-release ordering in unlock that provides the 835 * implicit barrier at the software visible level. 836 */ 837 ++dev->mr_scache.dev_gen; 838 DRV_LOG(DEBUG, "Broadcasting local cache flush, gen=%d.", 839 dev->mr_scache.dev_gen); 840 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 841 return 0; 842 } 843 844 void 845 mlx5_class_driver_register(struct mlx5_class_driver *driver) 846 { 847 mlx5_common_driver_on_register_pci(driver); 848 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 849 } 850 851 static void mlx5_common_driver_init(void) 852 { 853 mlx5_common_pci_init(); 854 #ifdef RTE_EXEC_ENV_LINUX 855 mlx5_common_auxiliary_init(); 856 #endif 857 } 858 859 static bool mlx5_common_initialized; 860 861 /** 862 * One time innitialization routine for run-time dependency on glue library 863 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 864 * must invoke in its constructor. 865 */ 866 void 867 mlx5_common_init(void) 868 { 869 if (mlx5_common_initialized) 870 return; 871 872 pthread_mutex_init(&devices_list_lock, NULL); 873 mlx5_glue_constructor(); 874 mlx5_common_driver_init(); 875 mlx5_common_initialized = true; 876 } 877 878 /** 879 * This function is responsible of initializing the variable 880 * haswell_broadwell_cpu by checking if the cpu is intel 881 * and reading the data returned from mlx5_cpu_id(). 882 * since haswell and broadwell cpus don't have improved performance 883 * when using relaxed ordering we want to check the cpu type before 884 * before deciding whether to enable RO or not. 885 * if the cpu is haswell or broadwell the variable will be set to 1 886 * otherwise it will be 0. 887 */ 888 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 889 { 890 #ifdef RTE_ARCH_X86_64 891 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 892 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 893 unsigned int i, model, family, brand_id, vendor; 894 unsigned int signature_intel_ebx = 0x756e6547; 895 unsigned int extended_model; 896 unsigned int eax = 0; 897 unsigned int ebx = 0; 898 unsigned int ecx = 0; 899 unsigned int edx = 0; 900 int max_level; 901 902 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 903 vendor = ebx; 904 max_level = eax; 905 if (max_level < 1) { 906 haswell_broadwell_cpu = 0; 907 return; 908 } 909 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 910 model = (eax >> 4) & 0x0f; 911 family = (eax >> 8) & 0x0f; 912 brand_id = ebx & 0xff; 913 extended_model = (eax >> 12) & 0xf0; 914 /* Check if the processor is Haswell or Broadwell */ 915 if (vendor == signature_intel_ebx) { 916 if (family == 0x06) 917 model += extended_model; 918 if (brand_id == 0 && family == 0x6) { 919 for (i = 0; i < RTE_DIM(broadwell_models); i++) 920 if (model == broadwell_models[i]) { 921 haswell_broadwell_cpu = 1; 922 return; 923 } 924 for (i = 0; i < RTE_DIM(haswell_models); i++) 925 if (model == haswell_models[i]) { 926 haswell_broadwell_cpu = 1; 927 return; 928 } 929 } 930 } 931 #endif 932 haswell_broadwell_cpu = 0; 933 } 934 935 /** 936 * Allocate the User Access Region with DevX on specified device. 937 * 938 * @param [in] ctx 939 * Infiniband device context to perform allocation on. 940 * @param [in] mapping 941 * MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining 942 * attributes (if supported by the host), the 943 * writes to the UAR registers must be followed 944 * by write memory barrier. 945 * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are 946 * promoted to the registers immediately, no 947 * memory barriers needed. 948 * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, 949 * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC 950 * is performed. The drivers specifying negative values should 951 * always provide the write memory barrier operation after UAR 952 * register writings. 953 * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma 954 * library headers), the caller can specify 0. 955 * 956 * @return 957 * UAR object pointer on success, NULL otherwise and rte_errno is set. 958 */ 959 void * 960 mlx5_devx_alloc_uar(void *ctx, int mapping) 961 { 962 void *uar; 963 uint32_t retry, uar_mapping; 964 void *base_addr; 965 966 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 967 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 968 /* Control the mapping type according to the settings. */ 969 uar_mapping = (mapping < 0) ? 970 MLX5DV_UAR_ALLOC_TYPE_NC : mapping; 971 #else 972 /* 973 * It seems we have no way to control the memory mapping type 974 * for the UAR, the default "Write-Combining" type is supposed. 975 */ 976 uar_mapping = 0; 977 RTE_SET_USED(mapping); 978 #endif 979 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 980 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 981 if (!uar && 982 mapping < 0 && 983 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 984 /* 985 * In some environments like virtual machine the 986 * Write Combining mapped might be not supported and 987 * UAR allocation fails. We tried "Non-Cached" mapping 988 * for the case. 989 */ 990 DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); 991 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 992 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 993 } else if (!uar && 994 mapping < 0 && 995 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 996 /* 997 * If Verbs/kernel does not support "Non-Cached" 998 * try the "Write-Combining". 999 */ 1000 DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); 1001 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 1002 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 1003 } 1004 #endif 1005 if (!uar) { 1006 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 1007 rte_errno = ENOMEM; 1008 goto exit; 1009 } 1010 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 1011 if (base_addr) 1012 break; 1013 /* 1014 * The UARs are allocated by rdma_core within the 1015 * IB device context, on context closure all UARs 1016 * will be freed, should be no memory/object leakage. 1017 */ 1018 DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); 1019 uar = NULL; 1020 } 1021 /* Check whether we finally succeeded with valid UAR allocation. */ 1022 if (!uar) { 1023 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 1024 rte_errno = ENOMEM; 1025 } 1026 /* 1027 * Return void * instead of struct mlx5dv_devx_uar * 1028 * is for compatibility with older rdma-core library headers. 1029 */ 1030 exit: 1031 return uar; 1032 } 1033 1034 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 1035