1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 #include <rte_eal_paging.h> 14 15 #include "mlx5_common.h" 16 #include "mlx5_common_os.h" 17 #include "mlx5_common_mp.h" 18 #include "mlx5_common_log.h" 19 #include "mlx5_common_defs.h" 20 #include "mlx5_common_private.h" 21 22 uint8_t haswell_broadwell_cpu; 23 24 /* In case this is an x86_64 intel processor to check if 25 * we should use relaxed ordering. 26 */ 27 #ifdef RTE_ARCH_X86_64 28 /** 29 * This function returns processor identification and feature information 30 * into the registers. 31 * 32 * @param eax, ebx, ecx, edx 33 * Pointers to the registers that will hold cpu information. 34 * @param level 35 * The main category of information returned. 36 */ 37 static inline void mlx5_cpu_id(unsigned int level, 38 unsigned int *eax, unsigned int *ebx, 39 unsigned int *ecx, unsigned int *edx) 40 { 41 __asm__("cpuid\n\t" 42 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 43 : "0" (level)); 44 } 45 #endif 46 47 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 48 49 /* Head of list of drivers. */ 50 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 51 TAILQ_HEAD_INITIALIZER(drivers_list); 52 53 /* Head of devices. */ 54 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 55 TAILQ_HEAD_INITIALIZER(devices_list); 56 static pthread_mutex_t devices_list_lock; 57 58 static const struct { 59 const char *name; 60 unsigned int drv_class; 61 } mlx5_classes[] = { 62 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 63 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 64 /* Keep class "net" for backward compatibility. */ 65 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 66 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 67 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 68 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 69 }; 70 71 static int 72 class_name_to_value(const char *class_name) 73 { 74 unsigned int i; 75 76 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 77 if (strcmp(class_name, mlx5_classes[i].name) == 0) 78 return mlx5_classes[i].drv_class; 79 } 80 return -EINVAL; 81 } 82 83 static struct mlx5_class_driver * 84 driver_get(uint32_t class) 85 { 86 struct mlx5_class_driver *driver; 87 88 TAILQ_FOREACH(driver, &drivers_list, next) { 89 if ((uint32_t)driver->drv_class == class) 90 return driver; 91 } 92 return NULL; 93 } 94 95 /** 96 * Verify and store value for devargs. 97 * 98 * @param[in] key 99 * Key argument to verify. 100 * @param[in] val 101 * Value associated with key. 102 * @param opaque 103 * User data. 104 * 105 * @return 106 * 0 on success, a negative errno value otherwise and rte_errno is set. 107 */ 108 static int 109 mlx5_common_args_check_handler(const char *key, const char *val, void *opaque) 110 { 111 struct mlx5_common_dev_config *config = opaque; 112 signed long tmp; 113 114 errno = 0; 115 tmp = strtol(val, NULL, 0); 116 if (errno) { 117 rte_errno = errno; 118 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val); 119 return -rte_errno; 120 } 121 if (strcmp(key, "tx_db_nc") == 0) { 122 if (tmp != MLX5_TXDB_CACHED && 123 tmp != MLX5_TXDB_NCACHED && 124 tmp != MLX5_TXDB_HEURISTIC) { 125 DRV_LOG(ERR, "Invalid Tx doorbell mapping parameter."); 126 rte_errno = EINVAL; 127 return -rte_errno; 128 } 129 config->dbnc = tmp; 130 } else if (strcmp(key, "mr_ext_memseg_en") == 0) { 131 config->mr_ext_memseg_en = !!tmp; 132 } else if (strcmp(key, "mr_mempool_reg_en") == 0) { 133 config->mr_mempool_reg_en = !!tmp; 134 } else if (strcmp(key, "sys_mem_en") == 0) { 135 config->sys_mem_en = !!tmp; 136 } 137 return 0; 138 } 139 140 /** 141 * Parse common device parameters. 142 * 143 * @param devargs 144 * Device arguments structure. 145 * @param config 146 * Pointer to device configuration structure. 147 * 148 * @return 149 * 0 on success, a negative errno value otherwise and rte_errno is set. 150 */ 151 static int 152 mlx5_common_config_get(struct rte_devargs *devargs, 153 struct mlx5_common_dev_config *config) 154 { 155 struct rte_kvargs *kvlist; 156 int ret = 0; 157 158 /* Set defaults. */ 159 config->mr_ext_memseg_en = 1; 160 config->mr_mempool_reg_en = 1; 161 config->sys_mem_en = 0; 162 config->dbnc = MLX5_ARG_UNSET; 163 if (devargs == NULL) 164 return 0; 165 kvlist = rte_kvargs_parse(devargs->args, NULL); 166 if (kvlist == NULL) { 167 rte_errno = EINVAL; 168 return -rte_errno; 169 } 170 ret = rte_kvargs_process(kvlist, NULL, mlx5_common_args_check_handler, 171 config); 172 if (ret) 173 ret = -rte_errno; 174 rte_kvargs_free(kvlist); 175 DRV_LOG(DEBUG, "mr_ext_memseg_en is %u.", config->mr_ext_memseg_en); 176 DRV_LOG(DEBUG, "mr_mempool_reg_en is %u.", config->mr_mempool_reg_en); 177 DRV_LOG(DEBUG, "sys_mem_en is %u.", config->sys_mem_en); 178 DRV_LOG(DEBUG, "Tx doorbell mapping parameter is %d.", config->dbnc); 179 return ret; 180 } 181 182 static int 183 devargs_class_handler(__rte_unused const char *key, 184 const char *class_names, void *opaque) 185 { 186 int *ret = opaque; 187 int class_val; 188 char *scratch; 189 char *found; 190 char *refstr = NULL; 191 192 *ret = 0; 193 scratch = strdup(class_names); 194 if (scratch == NULL) { 195 *ret = -ENOMEM; 196 return *ret; 197 } 198 found = strtok_r(scratch, ":", &refstr); 199 if (found == NULL) 200 /* Empty string. */ 201 goto err; 202 do { 203 /* Extract each individual class name. Multiple 204 * classes can be supplied as class=net:regex:foo:bar. 205 */ 206 class_val = class_name_to_value(found); 207 /* Check if its a valid class. */ 208 if (class_val < 0) { 209 *ret = -EINVAL; 210 goto err; 211 } 212 *ret |= class_val; 213 found = strtok_r(NULL, ":", &refstr); 214 } while (found != NULL); 215 err: 216 free(scratch); 217 if (*ret < 0) 218 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 219 return *ret; 220 } 221 222 static int 223 parse_class_options(const struct rte_devargs *devargs) 224 { 225 struct rte_kvargs *kvlist; 226 int ret = 0; 227 228 if (devargs == NULL) 229 return 0; 230 if (devargs->cls != NULL && devargs->cls->name != NULL) 231 /* Global syntax, only one class type. */ 232 return class_name_to_value(devargs->cls->name); 233 /* Legacy devargs support multiple classes. */ 234 kvlist = rte_kvargs_parse(devargs->args, NULL); 235 if (kvlist == NULL) 236 return 0; 237 rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_CLASS, 238 devargs_class_handler, &ret); 239 rte_kvargs_free(kvlist); 240 return ret; 241 } 242 243 static const unsigned int mlx5_class_invalid_combinations[] = { 244 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 245 /* New class combination should be added here. */ 246 }; 247 248 static int 249 is_valid_class_combination(uint32_t user_classes) 250 { 251 unsigned int i; 252 253 /* Verify if user specified unsupported combination. */ 254 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 255 if ((mlx5_class_invalid_combinations[i] & user_classes) == 256 mlx5_class_invalid_combinations[i]) 257 return -EINVAL; 258 } 259 /* Not found any invalid class combination. */ 260 return 0; 261 } 262 263 static bool 264 mlx5_bus_match(const struct mlx5_class_driver *drv, 265 const struct rte_device *dev) 266 { 267 if (mlx5_dev_is_pci(dev)) 268 return mlx5_dev_pci_match(drv, dev); 269 return true; 270 } 271 272 static struct mlx5_common_device * 273 to_mlx5_device(const struct rte_device *rte_dev) 274 { 275 struct mlx5_common_device *cdev; 276 277 TAILQ_FOREACH(cdev, &devices_list, next) { 278 if (rte_dev == cdev->dev) 279 return cdev; 280 } 281 return NULL; 282 } 283 284 int 285 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 286 { 287 struct rte_pci_addr pci_addr = { 0 }; 288 int ret; 289 290 if (mlx5_dev_is_pci(dev)) { 291 /* Input might be <BDF>, format PCI address to <DBDF>. */ 292 ret = rte_pci_addr_parse(dev->name, &pci_addr); 293 if (ret != 0) 294 return -ENODEV; 295 rte_pci_device_name(&pci_addr, addr, size); 296 return 0; 297 } 298 #ifdef RTE_EXEC_ENV_LINUX 299 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 300 addr, size); 301 #else 302 rte_errno = ENODEV; 303 return -rte_errno; 304 #endif 305 } 306 307 /** 308 * Register the mempool for the protection domain. 309 * 310 * @param cdev 311 * Pointer to the mlx5 common device. 312 * @param mp 313 * Mempool being registered. 314 * 315 * @return 316 * 0 on success, (-1) on failure and rte_errno is set. 317 */ 318 static int 319 mlx5_dev_mempool_register(struct mlx5_common_device *cdev, 320 struct rte_mempool *mp) 321 { 322 return mlx5_mr_mempool_register(cdev, mp); 323 } 324 325 /** 326 * Unregister the mempool from the protection domain. 327 * 328 * @param cdev 329 * Pointer to the mlx5 common device. 330 * @param mp 331 * Mempool being unregistered. 332 */ 333 void 334 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev, 335 struct rte_mempool *mp) 336 { 337 if (mlx5_mr_mempool_unregister(cdev, mp) < 0) 338 DRV_LOG(WARNING, "Failed to unregister mempool %s for PD %p: %s", 339 mp->name, cdev->pd, rte_strerror(rte_errno)); 340 } 341 342 /** 343 * rte_mempool_walk() callback to register mempools for the protection domain. 344 * 345 * @param mp 346 * The mempool being walked. 347 * @param arg 348 * Pointer to the device shared context. 349 */ 350 static void 351 mlx5_dev_mempool_register_cb(struct rte_mempool *mp, void *arg) 352 { 353 struct mlx5_common_device *cdev = arg; 354 int ret; 355 356 ret = mlx5_dev_mempool_register(cdev, mp); 357 if (ret < 0 && rte_errno != EEXIST) 358 DRV_LOG(ERR, 359 "Failed to register existing mempool %s for PD %p: %s", 360 mp->name, cdev->pd, rte_strerror(rte_errno)); 361 } 362 363 /** 364 * rte_mempool_walk() callback to unregister mempools 365 * from the protection domain. 366 * 367 * @param mp 368 * The mempool being walked. 369 * @param arg 370 * Pointer to the device shared context. 371 */ 372 static void 373 mlx5_dev_mempool_unregister_cb(struct rte_mempool *mp, void *arg) 374 { 375 mlx5_dev_mempool_unregister((struct mlx5_common_device *)arg, mp); 376 } 377 378 /** 379 * Mempool life cycle callback for mlx5 common devices. 380 * 381 * @param event 382 * Mempool life cycle event. 383 * @param mp 384 * Associated mempool. 385 * @param arg 386 * Pointer to a device shared context. 387 */ 388 static void 389 mlx5_dev_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp, 390 void *arg) 391 { 392 struct mlx5_common_device *cdev = arg; 393 bool extmem = mlx5_mempool_is_extmem(mp); 394 395 switch (event) { 396 case RTE_MEMPOOL_EVENT_READY: 397 if (extmem) 398 break; 399 if (mlx5_dev_mempool_register(cdev, mp) < 0) 400 DRV_LOG(ERR, 401 "Failed to register new mempool %s for PD %p: %s", 402 mp->name, cdev->pd, rte_strerror(rte_errno)); 403 break; 404 case RTE_MEMPOOL_EVENT_DESTROY: 405 mlx5_dev_mempool_unregister(cdev, mp); 406 break; 407 } 408 } 409 410 int 411 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev) 412 { 413 int ret = 0; 414 415 if (!cdev->config.mr_mempool_reg_en) 416 return 0; 417 rte_rwlock_write_lock(&cdev->mr_scache.mprwlock); 418 if (cdev->mr_scache.mp_cb_registered) 419 goto exit; 420 /* Callback for this device may be already registered. */ 421 ret = rte_mempool_event_callback_register(mlx5_dev_mempool_event_cb, 422 cdev); 423 if (ret != 0 && rte_errno != EEXIST) 424 goto exit; 425 /* Register mempools only once for this device. */ 426 if (ret == 0) 427 rte_mempool_walk(mlx5_dev_mempool_register_cb, cdev); 428 ret = 0; 429 cdev->mr_scache.mp_cb_registered = 1; 430 exit: 431 rte_rwlock_write_unlock(&cdev->mr_scache.mprwlock); 432 return ret; 433 } 434 435 static void 436 mlx5_dev_mempool_unsubscribe(struct mlx5_common_device *cdev) 437 { 438 int ret; 439 440 if (!cdev->mr_scache.mp_cb_registered || 441 !cdev->config.mr_mempool_reg_en) 442 return; 443 /* Stop watching for mempool events and unregister all mempools. */ 444 ret = rte_mempool_event_callback_unregister(mlx5_dev_mempool_event_cb, 445 cdev); 446 if (ret == 0) 447 rte_mempool_walk(mlx5_dev_mempool_unregister_cb, cdev); 448 } 449 450 /** 451 * Callback for memory event. 452 * 453 * @param event_type 454 * Memory event type. 455 * @param addr 456 * Address of memory. 457 * @param len 458 * Size of memory. 459 */ 460 static void 461 mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, 462 size_t len, void *arg __rte_unused) 463 { 464 struct mlx5_common_device *cdev; 465 466 /* Must be called from the primary process. */ 467 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 468 switch (event_type) { 469 case RTE_MEM_EVENT_FREE: 470 pthread_mutex_lock(&devices_list_lock); 471 /* Iterate all the existing mlx5 devices. */ 472 TAILQ_FOREACH(cdev, &devices_list, next) 473 mlx5_free_mr_by_addr(&cdev->mr_scache, 474 mlx5_os_get_ctx_device_name 475 (cdev->ctx), 476 addr, len); 477 pthread_mutex_unlock(&devices_list_lock); 478 break; 479 case RTE_MEM_EVENT_ALLOC: 480 default: 481 break; 482 } 483 } 484 485 /** 486 * Uninitialize all HW global of device context. 487 * 488 * @param cdev 489 * Pointer to mlx5 device structure. 490 * 491 * @return 492 * 0 on success, a negative errno value otherwise and rte_errno is set. 493 */ 494 static void 495 mlx5_dev_hw_global_release(struct mlx5_common_device *cdev) 496 { 497 if (cdev->pd != NULL) { 498 claim_zero(mlx5_os_dealloc_pd(cdev->pd)); 499 cdev->pd = NULL; 500 } 501 if (cdev->ctx != NULL) { 502 claim_zero(mlx5_glue->close_device(cdev->ctx)); 503 cdev->ctx = NULL; 504 } 505 } 506 507 /** 508 * Initialize all HW global of device context. 509 * 510 * @param cdev 511 * Pointer to mlx5 device structure. 512 * @param classes 513 * Chosen classes come from user device arguments. 514 * 515 * @return 516 * 0 on success, a negative errno value otherwise and rte_errno is set. 517 */ 518 static int 519 mlx5_dev_hw_global_prepare(struct mlx5_common_device *cdev, uint32_t classes) 520 { 521 int ret; 522 523 /* Create context device */ 524 ret = mlx5_os_open_device(cdev, classes); 525 if (ret < 0) 526 return ret; 527 /* Allocate Protection Domain object and extract its pdn. */ 528 ret = mlx5_os_pd_create(cdev); 529 if (ret) 530 goto error; 531 /* All actions taken below are relevant only when DevX is supported */ 532 if (cdev->config.devx == 0) 533 return 0; 534 /* Query HCA attributes. */ 535 ret = mlx5_devx_cmd_query_hca_attr(cdev->ctx, &cdev->config.hca_attr); 536 if (ret) { 537 DRV_LOG(ERR, "Unable to read HCA capabilities."); 538 rte_errno = ENOTSUP; 539 goto error; 540 } 541 return 0; 542 error: 543 mlx5_dev_hw_global_release(cdev); 544 return ret; 545 } 546 547 static void 548 mlx5_common_dev_release(struct mlx5_common_device *cdev) 549 { 550 pthread_mutex_lock(&devices_list_lock); 551 TAILQ_REMOVE(&devices_list, cdev, next); 552 pthread_mutex_unlock(&devices_list_lock); 553 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 554 if (TAILQ_EMPTY(&devices_list)) 555 rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", 556 NULL); 557 mlx5_dev_mempool_unsubscribe(cdev); 558 mlx5_mr_release_cache(&cdev->mr_scache); 559 mlx5_dev_hw_global_release(cdev); 560 } 561 rte_free(cdev); 562 } 563 564 static struct mlx5_common_device * 565 mlx5_common_dev_create(struct rte_device *eal_dev, uint32_t classes) 566 { 567 struct mlx5_common_device *cdev; 568 int ret; 569 570 cdev = rte_zmalloc("mlx5_common_device", sizeof(*cdev), 0); 571 if (!cdev) { 572 DRV_LOG(ERR, "Device allocation failure."); 573 rte_errno = ENOMEM; 574 return NULL; 575 } 576 cdev->dev = eal_dev; 577 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 578 goto exit; 579 /* Parse device parameters. */ 580 ret = mlx5_common_config_get(eal_dev->devargs, &cdev->config); 581 if (ret < 0) { 582 DRV_LOG(ERR, "Failed to process device arguments: %s", 583 strerror(rte_errno)); 584 rte_free(cdev); 585 return NULL; 586 } 587 mlx5_malloc_mem_select(cdev->config.sys_mem_en); 588 /* Initialize all HW global of device context. */ 589 ret = mlx5_dev_hw_global_prepare(cdev, classes); 590 if (ret) { 591 DRV_LOG(ERR, "Failed to initialize device context."); 592 rte_free(cdev); 593 return NULL; 594 } 595 /* Initialize global MR cache resources and update its functions. */ 596 ret = mlx5_mr_create_cache(&cdev->mr_scache, eal_dev->numa_node); 597 if (ret) { 598 DRV_LOG(ERR, "Failed to initialize global MR share cache."); 599 mlx5_dev_hw_global_release(cdev); 600 rte_free(cdev); 601 return NULL; 602 } 603 /* Register callback function for global shared MR cache management. */ 604 if (TAILQ_EMPTY(&devices_list)) 605 rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", 606 mlx5_mr_mem_event_cb, NULL); 607 exit: 608 pthread_mutex_lock(&devices_list_lock); 609 TAILQ_INSERT_HEAD(&devices_list, cdev, next); 610 pthread_mutex_unlock(&devices_list_lock); 611 return cdev; 612 } 613 614 static int 615 drivers_remove(struct mlx5_common_device *cdev, uint32_t enabled_classes) 616 { 617 struct mlx5_class_driver *driver; 618 int local_ret = -ENODEV; 619 unsigned int i = 0; 620 int ret = 0; 621 622 enabled_classes &= cdev->classes_loaded; 623 while (enabled_classes) { 624 driver = driver_get(RTE_BIT64(i)); 625 if (driver != NULL) { 626 local_ret = driver->remove(cdev); 627 if (local_ret == 0) 628 cdev->classes_loaded &= ~RTE_BIT64(i); 629 else if (ret == 0) 630 ret = local_ret; 631 } 632 enabled_classes &= ~RTE_BIT64(i); 633 i++; 634 } 635 if (local_ret != 0 && ret == 0) 636 ret = local_ret; 637 return ret; 638 } 639 640 static int 641 drivers_probe(struct mlx5_common_device *cdev, uint32_t user_classes) 642 { 643 struct mlx5_class_driver *driver; 644 uint32_t enabled_classes = 0; 645 bool already_loaded; 646 int ret; 647 648 TAILQ_FOREACH(driver, &drivers_list, next) { 649 if ((driver->drv_class & user_classes) == 0) 650 continue; 651 if (!mlx5_bus_match(driver, cdev->dev)) 652 continue; 653 already_loaded = cdev->classes_loaded & driver->drv_class; 654 if (already_loaded && driver->probe_again == 0) { 655 DRV_LOG(ERR, "Device %s is already probed", 656 cdev->dev->name); 657 ret = -EEXIST; 658 goto probe_err; 659 } 660 ret = driver->probe(cdev); 661 if (ret < 0) { 662 DRV_LOG(ERR, "Failed to load driver %s", 663 driver->name); 664 goto probe_err; 665 } 666 enabled_classes |= driver->drv_class; 667 } 668 cdev->classes_loaded |= enabled_classes; 669 return 0; 670 probe_err: 671 /* Only unload drivers which are enabled which were enabled 672 * in this probe instance. 673 */ 674 drivers_remove(cdev, enabled_classes); 675 return ret; 676 } 677 678 int 679 mlx5_common_dev_probe(struct rte_device *eal_dev) 680 { 681 struct mlx5_common_device *cdev; 682 uint32_t classes = 0; 683 bool new_device = false; 684 int ret; 685 686 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 687 ret = parse_class_options(eal_dev->devargs); 688 if (ret < 0) { 689 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 690 eal_dev->devargs->args); 691 return ret; 692 } 693 classes = ret; 694 if (classes == 0) 695 /* Default to net class. */ 696 classes = MLX5_CLASS_ETH; 697 cdev = to_mlx5_device(eal_dev); 698 if (!cdev) { 699 cdev = mlx5_common_dev_create(eal_dev, classes); 700 if (!cdev) 701 return -ENOMEM; 702 new_device = true; 703 } 704 /* 705 * Validate combination here. 706 * For new device, the classes_loaded field is 0 and it check only 707 * the classes given as user device arguments. 708 */ 709 ret = is_valid_class_combination(classes | cdev->classes_loaded); 710 if (ret != 0) { 711 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 712 goto class_err; 713 } 714 ret = drivers_probe(cdev, classes); 715 if (ret) 716 goto class_err; 717 return 0; 718 class_err: 719 if (new_device) 720 mlx5_common_dev_release(cdev); 721 return ret; 722 } 723 724 int 725 mlx5_common_dev_remove(struct rte_device *eal_dev) 726 { 727 struct mlx5_common_device *cdev; 728 int ret; 729 730 cdev = to_mlx5_device(eal_dev); 731 if (!cdev) 732 return -ENODEV; 733 /* Matching device found, cleanup and unload drivers. */ 734 ret = drivers_remove(cdev, cdev->classes_loaded); 735 if (ret == 0) 736 mlx5_common_dev_release(cdev); 737 return ret; 738 } 739 740 /** 741 * Callback to DMA map external memory to a device. 742 * 743 * @param rte_dev 744 * Pointer to the generic device. 745 * @param addr 746 * Starting virtual address of memory to be mapped. 747 * @param iova 748 * Starting IOVA address of memory to be mapped. 749 * @param len 750 * Length of memory segment being mapped. 751 * 752 * @return 753 * 0 on success, negative value on error. 754 */ 755 int 756 mlx5_common_dev_dma_map(struct rte_device *rte_dev, void *addr, 757 uint64_t iova __rte_unused, size_t len) 758 { 759 struct mlx5_common_device *dev; 760 struct mlx5_mr *mr; 761 762 dev = to_mlx5_device(rte_dev); 763 if (!dev) { 764 DRV_LOG(WARNING, 765 "Unable to find matching mlx5 device to device %s", 766 rte_dev->name); 767 rte_errno = ENODEV; 768 return -1; 769 } 770 mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, 771 SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); 772 if (!mr) { 773 DRV_LOG(WARNING, "Device %s unable to DMA map", rte_dev->name); 774 rte_errno = EINVAL; 775 return -1; 776 } 777 rte_rwlock_write_lock(&dev->mr_scache.rwlock); 778 LIST_INSERT_HEAD(&dev->mr_scache.mr_list, mr, mr); 779 /* Insert to the global cache table. */ 780 mlx5_mr_insert_cache(&dev->mr_scache, mr); 781 rte_rwlock_write_unlock(&dev->mr_scache.rwlock); 782 return 0; 783 } 784 785 /** 786 * Callback to DMA unmap external memory to a device. 787 * 788 * @param rte_dev 789 * Pointer to the generic device. 790 * @param addr 791 * Starting virtual address of memory to be unmapped. 792 * @param iova 793 * Starting IOVA address of memory to be unmapped. 794 * @param len 795 * Length of memory segment being unmapped. 796 * 797 * @return 798 * 0 on success, negative value on error. 799 */ 800 int 801 mlx5_common_dev_dma_unmap(struct rte_device *rte_dev, void *addr, 802 uint64_t iova __rte_unused, size_t len __rte_unused) 803 { 804 struct mlx5_common_device *dev; 805 struct mr_cache_entry entry; 806 struct mlx5_mr *mr; 807 808 dev = to_mlx5_device(rte_dev); 809 if (!dev) { 810 DRV_LOG(WARNING, 811 "Unable to find matching mlx5 device to device %s.", 812 rte_dev->name); 813 rte_errno = ENODEV; 814 return -1; 815 } 816 rte_rwlock_read_lock(&dev->mr_scache.rwlock); 817 mr = mlx5_mr_lookup_list(&dev->mr_scache, &entry, (uintptr_t)addr); 818 if (!mr) { 819 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 820 DRV_LOG(WARNING, 821 "Address 0x%" PRIxPTR " wasn't registered to device %s", 822 (uintptr_t)addr, rte_dev->name); 823 rte_errno = EINVAL; 824 return -1; 825 } 826 LIST_REMOVE(mr, mr); 827 DRV_LOG(DEBUG, "MR(%p) is removed from list.", (void *)mr); 828 mlx5_mr_free(mr, dev->mr_scache.dereg_mr_cb); 829 mlx5_mr_rebuild_cache(&dev->mr_scache); 830 /* 831 * No explicit wmb is needed after updating dev_gen due to 832 * store-release ordering in unlock that provides the 833 * implicit barrier at the software visible level. 834 */ 835 ++dev->mr_scache.dev_gen; 836 DRV_LOG(DEBUG, "Broadcasting local cache flush, gen=%d.", 837 dev->mr_scache.dev_gen); 838 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 839 return 0; 840 } 841 842 void 843 mlx5_class_driver_register(struct mlx5_class_driver *driver) 844 { 845 mlx5_common_driver_on_register_pci(driver); 846 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 847 } 848 849 static void mlx5_common_driver_init(void) 850 { 851 mlx5_common_pci_init(); 852 #ifdef RTE_EXEC_ENV_LINUX 853 mlx5_common_auxiliary_init(); 854 #endif 855 } 856 857 static bool mlx5_common_initialized; 858 859 /** 860 * One time innitialization routine for run-time dependency on glue library 861 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 862 * must invoke in its constructor. 863 */ 864 void 865 mlx5_common_init(void) 866 { 867 if (mlx5_common_initialized) 868 return; 869 870 pthread_mutex_init(&devices_list_lock, NULL); 871 mlx5_glue_constructor(); 872 mlx5_common_driver_init(); 873 mlx5_common_initialized = true; 874 } 875 876 /** 877 * This function is responsible of initializing the variable 878 * haswell_broadwell_cpu by checking if the cpu is intel 879 * and reading the data returned from mlx5_cpu_id(). 880 * since haswell and broadwell cpus don't have improved performance 881 * when using relaxed ordering we want to check the cpu type before 882 * before deciding whether to enable RO or not. 883 * if the cpu is haswell or broadwell the variable will be set to 1 884 * otherwise it will be 0. 885 */ 886 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 887 { 888 #ifdef RTE_ARCH_X86_64 889 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 890 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 891 unsigned int i, model, family, brand_id, vendor; 892 unsigned int signature_intel_ebx = 0x756e6547; 893 unsigned int extended_model; 894 unsigned int eax = 0; 895 unsigned int ebx = 0; 896 unsigned int ecx = 0; 897 unsigned int edx = 0; 898 int max_level; 899 900 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 901 vendor = ebx; 902 max_level = eax; 903 if (max_level < 1) { 904 haswell_broadwell_cpu = 0; 905 return; 906 } 907 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 908 model = (eax >> 4) & 0x0f; 909 family = (eax >> 8) & 0x0f; 910 brand_id = ebx & 0xff; 911 extended_model = (eax >> 12) & 0xf0; 912 /* Check if the processor is Haswell or Broadwell */ 913 if (vendor == signature_intel_ebx) { 914 if (family == 0x06) 915 model += extended_model; 916 if (brand_id == 0 && family == 0x6) { 917 for (i = 0; i < RTE_DIM(broadwell_models); i++) 918 if (model == broadwell_models[i]) { 919 haswell_broadwell_cpu = 1; 920 return; 921 } 922 for (i = 0; i < RTE_DIM(haswell_models); i++) 923 if (model == haswell_models[i]) { 924 haswell_broadwell_cpu = 1; 925 return; 926 } 927 } 928 } 929 #endif 930 haswell_broadwell_cpu = 0; 931 } 932 933 /** 934 * Allocate the User Access Region with DevX on specified device. 935 * This routine handles the following UAR allocation issues: 936 * 937 * - Try to allocate the UAR with the most appropriate memory mapping 938 * type from the ones supported by the host. 939 * 940 * - Try to allocate the UAR with non-NULL base address OFED 5.0.x and 941 * Upstream rdma_core before v29 returned the NULL as UAR base address 942 * if UAR was not the first object in the UAR page. 943 * It caused the PMD failure and we should try to get another UAR till 944 * we get the first one with non-NULL base address returned. 945 * 946 * @param [in] cdev 947 * Pointer to mlx5 device structure to perform allocation on its context. 948 * 949 * @return 950 * UAR object pointer on success, NULL otherwise and rte_errno is set. 951 */ 952 static void * 953 mlx5_devx_alloc_uar(struct mlx5_common_device *cdev) 954 { 955 void *uar; 956 uint32_t retry, uar_mapping; 957 void *base_addr; 958 959 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 960 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 961 /* Control the mapping type according to the settings. */ 962 uar_mapping = (cdev->config.dbnc == MLX5_TXDB_NCACHED) ? 963 MLX5DV_UAR_ALLOC_TYPE_NC : MLX5DV_UAR_ALLOC_TYPE_BF; 964 #else 965 /* 966 * It seems we have no way to control the memory mapping type 967 * for the UAR, the default "Write-Combining" type is supposed. 968 */ 969 uar_mapping = 0; 970 #endif 971 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 972 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 973 if (!uar && uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 974 /* 975 * In some environments like virtual machine the 976 * Write Combining mapped might be not supported and 977 * UAR allocation fails. We tried "Non-Cached" mapping 978 * for the case. 979 */ 980 DRV_LOG(DEBUG, "Failed to allocate DevX UAR (BF)"); 981 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 982 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 983 } else if (!uar && uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 984 /* 985 * If Verbs/kernel does not support "Non-Cached" 986 * try the "Write-Combining". 987 */ 988 DRV_LOG(DEBUG, "Failed to allocate DevX UAR (NC)"); 989 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 990 uar = mlx5_glue->devx_alloc_uar(cdev->ctx, uar_mapping); 991 } 992 #endif 993 if (!uar) { 994 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 995 rte_errno = ENOMEM; 996 goto exit; 997 } 998 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 999 if (base_addr) 1000 break; 1001 /* 1002 * The UARs are allocated by rdma_core within the 1003 * IB device context, on context closure all UARs 1004 * will be freed, should be no memory/object leakage. 1005 */ 1006 DRV_LOG(DEBUG, "Retrying to allocate DevX UAR"); 1007 uar = NULL; 1008 } 1009 /* Check whether we finally succeeded with valid UAR allocation. */ 1010 if (!uar) { 1011 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 1012 rte_errno = ENOMEM; 1013 } 1014 /* 1015 * Return void * instead of struct mlx5dv_devx_uar * 1016 * is for compatibility with older rdma-core library headers. 1017 */ 1018 exit: 1019 return uar; 1020 } 1021 1022 void 1023 mlx5_devx_uar_release(struct mlx5_uar *uar) 1024 { 1025 if (uar->obj != NULL) 1026 mlx5_glue->devx_free_uar(uar->obj); 1027 memset(uar, 0, sizeof(*uar)); 1028 } 1029 1030 int 1031 mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar) 1032 { 1033 off_t uar_mmap_offset; 1034 const size_t page_size = rte_mem_page_size(); 1035 void *base_addr; 1036 void *uar_obj; 1037 1038 if (page_size == (size_t)-1) { 1039 DRV_LOG(ERR, "Failed to get mem page size"); 1040 rte_errno = ENOMEM; 1041 return -1; 1042 } 1043 uar_obj = mlx5_devx_alloc_uar(cdev); 1044 if (uar_obj == NULL || mlx5_os_get_devx_uar_reg_addr(uar_obj) == NULL) { 1045 rte_errno = errno; 1046 DRV_LOG(ERR, "Failed to allocate UAR."); 1047 return -1; 1048 } 1049 uar->obj = uar_obj; 1050 uar_mmap_offset = mlx5_os_get_devx_uar_mmap_offset(uar_obj); 1051 base_addr = mlx5_os_get_devx_uar_base_addr(uar_obj); 1052 uar->dbnc = mlx5_db_map_type_get(uar_mmap_offset, page_size); 1053 uar->bf_db.db = mlx5_os_get_devx_uar_reg_addr(uar_obj); 1054 uar->cq_db.db = RTE_PTR_ADD(base_addr, MLX5_CQ_DOORBELL); 1055 #ifndef RTE_ARCH_64 1056 rte_spinlock_init(&uar->bf_sl); 1057 rte_spinlock_init(&uar->cq_sl); 1058 uar->bf_db.sl_p = &uar->bf_sl; 1059 uar->cq_db.sl_p = &uar->cq_sl; 1060 #endif /* RTE_ARCH_64 */ 1061 return 0; 1062 } 1063 1064 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 1065