1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 14 #include "mlx5_common.h" 15 #include "mlx5_common_os.h" 16 #include "mlx5_common_log.h" 17 #include "mlx5_common_defs.h" 18 #include "mlx5_common_private.h" 19 20 uint8_t haswell_broadwell_cpu; 21 22 /* In case this is an x86_64 intel processor to check if 23 * we should use relaxed ordering. 24 */ 25 #ifdef RTE_ARCH_X86_64 26 /** 27 * This function returns processor identification and feature information 28 * into the registers. 29 * 30 * @param eax, ebx, ecx, edx 31 * Pointers to the registers that will hold cpu information. 32 * @param level 33 * The main category of information returned. 34 */ 35 static inline void mlx5_cpu_id(unsigned int level, 36 unsigned int *eax, unsigned int *ebx, 37 unsigned int *ecx, unsigned int *edx) 38 { 39 __asm__("cpuid\n\t" 40 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 41 : "0" (level)); 42 } 43 #endif 44 45 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 46 47 /* Head of list of drivers. */ 48 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 49 TAILQ_HEAD_INITIALIZER(drivers_list); 50 51 /* Head of devices. */ 52 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 53 TAILQ_HEAD_INITIALIZER(devices_list); 54 static pthread_mutex_t devices_list_lock; 55 56 static const struct { 57 const char *name; 58 unsigned int drv_class; 59 } mlx5_classes[] = { 60 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 61 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 62 /* Keep class "net" for backward compatibility. */ 63 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 64 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 65 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 66 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 67 }; 68 69 static int 70 class_name_to_value(const char *class_name) 71 { 72 unsigned int i; 73 74 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 75 if (strcmp(class_name, mlx5_classes[i].name) == 0) 76 return mlx5_classes[i].drv_class; 77 } 78 return -EINVAL; 79 } 80 81 static struct mlx5_class_driver * 82 driver_get(uint32_t class) 83 { 84 struct mlx5_class_driver *driver; 85 86 TAILQ_FOREACH(driver, &drivers_list, next) { 87 if ((uint32_t)driver->drv_class == class) 88 return driver; 89 } 90 return NULL; 91 } 92 93 /** 94 * Verify and store value for devargs. 95 * 96 * @param[in] key 97 * Key argument to verify. 98 * @param[in] val 99 * Value associated with key. 100 * @param opaque 101 * User data. 102 * 103 * @return 104 * 0 on success, a negative errno value otherwise and rte_errno is set. 105 */ 106 static int 107 mlx5_common_args_check_handler(const char *key, const char *val, void *opaque) 108 { 109 struct mlx5_common_dev_config *config = opaque; 110 signed long tmp; 111 112 errno = 0; 113 tmp = strtol(val, NULL, 0); 114 if (errno) { 115 rte_errno = errno; 116 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val); 117 return -rte_errno; 118 } 119 if (strcmp(key, "tx_db_nc") == 0) { 120 if (tmp != MLX5_TXDB_CACHED && 121 tmp != MLX5_TXDB_NCACHED && 122 tmp != MLX5_TXDB_HEURISTIC) { 123 DRV_LOG(ERR, "Invalid Tx doorbell mapping parameter."); 124 rte_errno = EINVAL; 125 return -rte_errno; 126 } 127 config->dbnc = tmp; 128 } else if (strcmp(key, "mr_ext_memseg_en") == 0) { 129 config->mr_ext_memseg_en = !!tmp; 130 } else if (strcmp(key, "mr_mempool_reg_en") == 0) { 131 config->mr_mempool_reg_en = !!tmp; 132 } else if (strcmp(key, "sys_mem_en") == 0) { 133 config->sys_mem_en = !!tmp; 134 } 135 return 0; 136 } 137 138 /** 139 * Parse common device parameters. 140 * 141 * @param devargs 142 * Device arguments structure. 143 * @param config 144 * Pointer to device configuration structure. 145 * 146 * @return 147 * 0 on success, a negative errno value otherwise and rte_errno is set. 148 */ 149 static int 150 mlx5_common_config_get(struct rte_devargs *devargs, 151 struct mlx5_common_dev_config *config) 152 { 153 struct rte_kvargs *kvlist; 154 int ret = 0; 155 156 /* Set defaults. */ 157 config->mr_ext_memseg_en = 1; 158 config->mr_mempool_reg_en = 1; 159 config->sys_mem_en = 0; 160 config->dbnc = MLX5_ARG_UNSET; 161 if (devargs == NULL) 162 return 0; 163 kvlist = rte_kvargs_parse(devargs->args, NULL); 164 if (kvlist == NULL) { 165 rte_errno = EINVAL; 166 return -rte_errno; 167 } 168 ret = rte_kvargs_process(kvlist, NULL, mlx5_common_args_check_handler, 169 config); 170 if (ret) 171 ret = -rte_errno; 172 rte_kvargs_free(kvlist); 173 DRV_LOG(DEBUG, "mr_ext_memseg_en is %u.", config->mr_ext_memseg_en); 174 DRV_LOG(DEBUG, "mr_mempool_reg_en is %u.", config->mr_mempool_reg_en); 175 DRV_LOG(DEBUG, "sys_mem_en is %u.", config->sys_mem_en); 176 DRV_LOG(DEBUG, "Tx doorbell mapping parameter is %d.", config->dbnc); 177 return ret; 178 } 179 180 static int 181 devargs_class_handler(__rte_unused const char *key, 182 const char *class_names, void *opaque) 183 { 184 int *ret = opaque; 185 int class_val; 186 char *scratch; 187 char *found; 188 char *refstr = NULL; 189 190 *ret = 0; 191 scratch = strdup(class_names); 192 if (scratch == NULL) { 193 *ret = -ENOMEM; 194 return *ret; 195 } 196 found = strtok_r(scratch, ":", &refstr); 197 if (found == NULL) 198 /* Empty string. */ 199 goto err; 200 do { 201 /* Extract each individual class name. Multiple 202 * classes can be supplied as class=net:regex:foo:bar. 203 */ 204 class_val = class_name_to_value(found); 205 /* Check if its a valid class. */ 206 if (class_val < 0) { 207 *ret = -EINVAL; 208 goto err; 209 } 210 *ret |= class_val; 211 found = strtok_r(NULL, ":", &refstr); 212 } while (found != NULL); 213 err: 214 free(scratch); 215 if (*ret < 0) 216 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 217 return *ret; 218 } 219 220 static int 221 parse_class_options(const struct rte_devargs *devargs) 222 { 223 struct rte_kvargs *kvlist; 224 int ret = 0; 225 226 if (devargs == NULL) 227 return 0; 228 if (devargs->cls != NULL && devargs->cls->name != NULL) 229 /* Global syntax, only one class type. */ 230 return class_name_to_value(devargs->cls->name); 231 /* Legacy devargs support multiple classes. */ 232 kvlist = rte_kvargs_parse(devargs->args, NULL); 233 if (kvlist == NULL) 234 return 0; 235 rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_CLASS, 236 devargs_class_handler, &ret); 237 rte_kvargs_free(kvlist); 238 return ret; 239 } 240 241 static const unsigned int mlx5_class_invalid_combinations[] = { 242 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 243 /* New class combination should be added here. */ 244 }; 245 246 static int 247 is_valid_class_combination(uint32_t user_classes) 248 { 249 unsigned int i; 250 251 /* Verify if user specified unsupported combination. */ 252 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 253 if ((mlx5_class_invalid_combinations[i] & user_classes) == 254 mlx5_class_invalid_combinations[i]) 255 return -EINVAL; 256 } 257 /* Not found any invalid class combination. */ 258 return 0; 259 } 260 261 static bool 262 mlx5_bus_match(const struct mlx5_class_driver *drv, 263 const struct rte_device *dev) 264 { 265 if (mlx5_dev_is_pci(dev)) 266 return mlx5_dev_pci_match(drv, dev); 267 return true; 268 } 269 270 static struct mlx5_common_device * 271 to_mlx5_device(const struct rte_device *rte_dev) 272 { 273 struct mlx5_common_device *cdev; 274 275 TAILQ_FOREACH(cdev, &devices_list, next) { 276 if (rte_dev == cdev->dev) 277 return cdev; 278 } 279 return NULL; 280 } 281 282 int 283 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 284 { 285 struct rte_pci_addr pci_addr = { 0 }; 286 int ret; 287 288 if (mlx5_dev_is_pci(dev)) { 289 /* Input might be <BDF>, format PCI address to <DBDF>. */ 290 ret = rte_pci_addr_parse(dev->name, &pci_addr); 291 if (ret != 0) 292 return -ENODEV; 293 rte_pci_device_name(&pci_addr, addr, size); 294 return 0; 295 } 296 #ifdef RTE_EXEC_ENV_LINUX 297 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 298 addr, size); 299 #else 300 rte_errno = ENODEV; 301 return -rte_errno; 302 #endif 303 } 304 305 /** 306 * Callback for memory event. 307 * 308 * @param event_type 309 * Memory event type. 310 * @param addr 311 * Address of memory. 312 * @param len 313 * Size of memory. 314 */ 315 static void 316 mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, 317 size_t len, void *arg __rte_unused) 318 { 319 struct mlx5_common_device *cdev; 320 321 /* Must be called from the primary process. */ 322 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 323 switch (event_type) { 324 case RTE_MEM_EVENT_FREE: 325 pthread_mutex_lock(&devices_list_lock); 326 /* Iterate all the existing mlx5 devices. */ 327 TAILQ_FOREACH(cdev, &devices_list, next) 328 mlx5_free_mr_by_addr(&cdev->mr_scache, 329 mlx5_os_get_ctx_device_name 330 (cdev->ctx), 331 addr, len); 332 pthread_mutex_unlock(&devices_list_lock); 333 break; 334 case RTE_MEM_EVENT_ALLOC: 335 default: 336 break; 337 } 338 } 339 340 /** 341 * Uninitialize all HW global of device context. 342 * 343 * @param cdev 344 * Pointer to mlx5 device structure. 345 * 346 * @return 347 * 0 on success, a negative errno value otherwise and rte_errno is set. 348 */ 349 static void 350 mlx5_dev_hw_global_release(struct mlx5_common_device *cdev) 351 { 352 if (cdev->pd != NULL) { 353 claim_zero(mlx5_os_dealloc_pd(cdev->pd)); 354 cdev->pd = NULL; 355 } 356 if (cdev->ctx != NULL) { 357 claim_zero(mlx5_glue->close_device(cdev->ctx)); 358 cdev->ctx = NULL; 359 } 360 } 361 362 /** 363 * Initialize all HW global of device context. 364 * 365 * @param cdev 366 * Pointer to mlx5 device structure. 367 * @param classes 368 * Chosen classes come from user device arguments. 369 * 370 * @return 371 * 0 on success, a negative errno value otherwise and rte_errno is set. 372 */ 373 static int 374 mlx5_dev_hw_global_prepare(struct mlx5_common_device *cdev, uint32_t classes) 375 { 376 int ret; 377 378 /* Create context device */ 379 ret = mlx5_os_open_device(cdev, classes); 380 if (ret < 0) 381 return ret; 382 /* Allocate Protection Domain object and extract its pdn. */ 383 ret = mlx5_os_pd_create(cdev); 384 if (ret) 385 goto error; 386 /* All actions taken below are relevant only when DevX is supported */ 387 if (cdev->config.devx == 0) 388 return 0; 389 /* Query HCA attributes. */ 390 ret = mlx5_devx_cmd_query_hca_attr(cdev->ctx, &cdev->config.hca_attr); 391 if (ret) { 392 DRV_LOG(ERR, "Unable to read HCA capabilities."); 393 rte_errno = ENOTSUP; 394 goto error; 395 } 396 return 0; 397 error: 398 mlx5_dev_hw_global_release(cdev); 399 return ret; 400 } 401 402 static void 403 mlx5_common_dev_release(struct mlx5_common_device *cdev) 404 { 405 pthread_mutex_lock(&devices_list_lock); 406 TAILQ_REMOVE(&devices_list, cdev, next); 407 pthread_mutex_unlock(&devices_list_lock); 408 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 409 if (TAILQ_EMPTY(&devices_list)) 410 rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", 411 NULL); 412 mlx5_mr_release_cache(&cdev->mr_scache); 413 mlx5_dev_hw_global_release(cdev); 414 } 415 rte_free(cdev); 416 } 417 418 static struct mlx5_common_device * 419 mlx5_common_dev_create(struct rte_device *eal_dev, uint32_t classes) 420 { 421 struct mlx5_common_device *cdev; 422 int ret; 423 424 cdev = rte_zmalloc("mlx5_common_device", sizeof(*cdev), 0); 425 if (!cdev) { 426 DRV_LOG(ERR, "Device allocation failure."); 427 rte_errno = ENOMEM; 428 return NULL; 429 } 430 cdev->dev = eal_dev; 431 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 432 goto exit; 433 /* Parse device parameters. */ 434 ret = mlx5_common_config_get(eal_dev->devargs, &cdev->config); 435 if (ret < 0) { 436 DRV_LOG(ERR, "Failed to process device arguments: %s", 437 strerror(rte_errno)); 438 rte_free(cdev); 439 return NULL; 440 } 441 mlx5_malloc_mem_select(cdev->config.sys_mem_en); 442 /* Initialize all HW global of device context. */ 443 ret = mlx5_dev_hw_global_prepare(cdev, classes); 444 if (ret) { 445 DRV_LOG(ERR, "Failed to initialize device context."); 446 rte_free(cdev); 447 return NULL; 448 } 449 /* Initialize global MR cache resources and update its functions. */ 450 ret = mlx5_mr_create_cache(&cdev->mr_scache, eal_dev->numa_node); 451 if (ret) { 452 DRV_LOG(ERR, "Failed to initialize global MR share cache."); 453 mlx5_dev_hw_global_release(cdev); 454 rte_free(cdev); 455 return NULL; 456 } 457 /* Register callback function for global shared MR cache management. */ 458 if (TAILQ_EMPTY(&devices_list)) 459 rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", 460 mlx5_mr_mem_event_cb, NULL); 461 exit: 462 pthread_mutex_lock(&devices_list_lock); 463 TAILQ_INSERT_HEAD(&devices_list, cdev, next); 464 pthread_mutex_unlock(&devices_list_lock); 465 return cdev; 466 } 467 468 static int 469 drivers_remove(struct mlx5_common_device *cdev, uint32_t enabled_classes) 470 { 471 struct mlx5_class_driver *driver; 472 int local_ret = -ENODEV; 473 unsigned int i = 0; 474 int ret = 0; 475 476 enabled_classes &= cdev->classes_loaded; 477 while (enabled_classes) { 478 driver = driver_get(RTE_BIT64(i)); 479 if (driver != NULL) { 480 local_ret = driver->remove(cdev); 481 if (local_ret == 0) 482 cdev->classes_loaded &= ~RTE_BIT64(i); 483 else if (ret == 0) 484 ret = local_ret; 485 } 486 enabled_classes &= ~RTE_BIT64(i); 487 i++; 488 } 489 if (local_ret != 0 && ret == 0) 490 ret = local_ret; 491 return ret; 492 } 493 494 static int 495 drivers_probe(struct mlx5_common_device *cdev, uint32_t user_classes) 496 { 497 struct mlx5_class_driver *driver; 498 uint32_t enabled_classes = 0; 499 bool already_loaded; 500 int ret; 501 502 TAILQ_FOREACH(driver, &drivers_list, next) { 503 if ((driver->drv_class & user_classes) == 0) 504 continue; 505 if (!mlx5_bus_match(driver, cdev->dev)) 506 continue; 507 already_loaded = cdev->classes_loaded & driver->drv_class; 508 if (already_loaded && driver->probe_again == 0) { 509 DRV_LOG(ERR, "Device %s is already probed", 510 cdev->dev->name); 511 ret = -EEXIST; 512 goto probe_err; 513 } 514 ret = driver->probe(cdev); 515 if (ret < 0) { 516 DRV_LOG(ERR, "Failed to load driver %s", 517 driver->name); 518 goto probe_err; 519 } 520 enabled_classes |= driver->drv_class; 521 } 522 cdev->classes_loaded |= enabled_classes; 523 return 0; 524 probe_err: 525 /* Only unload drivers which are enabled which were enabled 526 * in this probe instance. 527 */ 528 drivers_remove(cdev, enabled_classes); 529 return ret; 530 } 531 532 int 533 mlx5_common_dev_probe(struct rte_device *eal_dev) 534 { 535 struct mlx5_common_device *cdev; 536 uint32_t classes = 0; 537 bool new_device = false; 538 int ret; 539 540 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 541 ret = parse_class_options(eal_dev->devargs); 542 if (ret < 0) { 543 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 544 eal_dev->devargs->args); 545 return ret; 546 } 547 classes = ret; 548 if (classes == 0) 549 /* Default to net class. */ 550 classes = MLX5_CLASS_ETH; 551 cdev = to_mlx5_device(eal_dev); 552 if (!cdev) { 553 cdev = mlx5_common_dev_create(eal_dev, classes); 554 if (!cdev) 555 return -ENOMEM; 556 new_device = true; 557 } 558 /* 559 * Validate combination here. 560 * For new device, the classes_loaded field is 0 and it check only 561 * the classes given as user device arguments. 562 */ 563 ret = is_valid_class_combination(classes | cdev->classes_loaded); 564 if (ret != 0) { 565 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 566 goto class_err; 567 } 568 ret = drivers_probe(cdev, classes); 569 if (ret) 570 goto class_err; 571 return 0; 572 class_err: 573 if (new_device) 574 mlx5_common_dev_release(cdev); 575 return ret; 576 } 577 578 int 579 mlx5_common_dev_remove(struct rte_device *eal_dev) 580 { 581 struct mlx5_common_device *cdev; 582 int ret; 583 584 cdev = to_mlx5_device(eal_dev); 585 if (!cdev) 586 return -ENODEV; 587 /* Matching device found, cleanup and unload drivers. */ 588 ret = drivers_remove(cdev, cdev->classes_loaded); 589 if (ret == 0) 590 mlx5_common_dev_release(cdev); 591 return ret; 592 } 593 594 /** 595 * Callback to DMA map external memory to a device. 596 * 597 * @param rte_dev 598 * Pointer to the generic device. 599 * @param addr 600 * Starting virtual address of memory to be mapped. 601 * @param iova 602 * Starting IOVA address of memory to be mapped. 603 * @param len 604 * Length of memory segment being mapped. 605 * 606 * @return 607 * 0 on success, negative value on error. 608 */ 609 int 610 mlx5_common_dev_dma_map(struct rte_device *rte_dev, void *addr, 611 uint64_t iova __rte_unused, size_t len) 612 { 613 struct mlx5_common_device *dev; 614 struct mlx5_mr *mr; 615 616 dev = to_mlx5_device(rte_dev); 617 if (!dev) { 618 DRV_LOG(WARNING, 619 "Unable to find matching mlx5 device to device %s", 620 rte_dev->name); 621 rte_errno = ENODEV; 622 return -1; 623 } 624 mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, 625 SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); 626 if (!mr) { 627 DRV_LOG(WARNING, "Device %s unable to DMA map", rte_dev->name); 628 rte_errno = EINVAL; 629 return -1; 630 } 631 rte_rwlock_write_lock(&dev->mr_scache.rwlock); 632 LIST_INSERT_HEAD(&dev->mr_scache.mr_list, mr, mr); 633 /* Insert to the global cache table. */ 634 mlx5_mr_insert_cache(&dev->mr_scache, mr); 635 rte_rwlock_write_unlock(&dev->mr_scache.rwlock); 636 return 0; 637 } 638 639 /** 640 * Callback to DMA unmap external memory to a device. 641 * 642 * @param rte_dev 643 * Pointer to the generic device. 644 * @param addr 645 * Starting virtual address of memory to be unmapped. 646 * @param iova 647 * Starting IOVA address of memory to be unmapped. 648 * @param len 649 * Length of memory segment being unmapped. 650 * 651 * @return 652 * 0 on success, negative value on error. 653 */ 654 int 655 mlx5_common_dev_dma_unmap(struct rte_device *rte_dev, void *addr, 656 uint64_t iova __rte_unused, size_t len __rte_unused) 657 { 658 struct mlx5_common_device *dev; 659 struct mr_cache_entry entry; 660 struct mlx5_mr *mr; 661 662 dev = to_mlx5_device(rte_dev); 663 if (!dev) { 664 DRV_LOG(WARNING, 665 "Unable to find matching mlx5 device to device %s.", 666 rte_dev->name); 667 rte_errno = ENODEV; 668 return -1; 669 } 670 rte_rwlock_read_lock(&dev->mr_scache.rwlock); 671 mr = mlx5_mr_lookup_list(&dev->mr_scache, &entry, (uintptr_t)addr); 672 if (!mr) { 673 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 674 DRV_LOG(WARNING, 675 "Address 0x%" PRIxPTR " wasn't registered to device %s", 676 (uintptr_t)addr, rte_dev->name); 677 rte_errno = EINVAL; 678 return -1; 679 } 680 LIST_REMOVE(mr, mr); 681 DRV_LOG(DEBUG, "MR(%p) is removed from list.", (void *)mr); 682 mlx5_mr_free(mr, dev->mr_scache.dereg_mr_cb); 683 mlx5_mr_rebuild_cache(&dev->mr_scache); 684 /* 685 * No explicit wmb is needed after updating dev_gen due to 686 * store-release ordering in unlock that provides the 687 * implicit barrier at the software visible level. 688 */ 689 ++dev->mr_scache.dev_gen; 690 DRV_LOG(DEBUG, "Broadcasting local cache flush, gen=%d.", 691 dev->mr_scache.dev_gen); 692 rte_rwlock_read_unlock(&dev->mr_scache.rwlock); 693 return 0; 694 } 695 696 void 697 mlx5_class_driver_register(struct mlx5_class_driver *driver) 698 { 699 mlx5_common_driver_on_register_pci(driver); 700 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 701 } 702 703 static void mlx5_common_driver_init(void) 704 { 705 mlx5_common_pci_init(); 706 #ifdef RTE_EXEC_ENV_LINUX 707 mlx5_common_auxiliary_init(); 708 #endif 709 } 710 711 static bool mlx5_common_initialized; 712 713 /** 714 * One time innitialization routine for run-time dependency on glue library 715 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 716 * must invoke in its constructor. 717 */ 718 void 719 mlx5_common_init(void) 720 { 721 if (mlx5_common_initialized) 722 return; 723 724 pthread_mutex_init(&devices_list_lock, NULL); 725 mlx5_glue_constructor(); 726 mlx5_common_driver_init(); 727 mlx5_common_initialized = true; 728 } 729 730 /** 731 * This function is responsible of initializing the variable 732 * haswell_broadwell_cpu by checking if the cpu is intel 733 * and reading the data returned from mlx5_cpu_id(). 734 * since haswell and broadwell cpus don't have improved performance 735 * when using relaxed ordering we want to check the cpu type before 736 * before deciding whether to enable RO or not. 737 * if the cpu is haswell or broadwell the variable will be set to 1 738 * otherwise it will be 0. 739 */ 740 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 741 { 742 #ifdef RTE_ARCH_X86_64 743 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 744 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 745 unsigned int i, model, family, brand_id, vendor; 746 unsigned int signature_intel_ebx = 0x756e6547; 747 unsigned int extended_model; 748 unsigned int eax = 0; 749 unsigned int ebx = 0; 750 unsigned int ecx = 0; 751 unsigned int edx = 0; 752 int max_level; 753 754 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 755 vendor = ebx; 756 max_level = eax; 757 if (max_level < 1) { 758 haswell_broadwell_cpu = 0; 759 return; 760 } 761 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 762 model = (eax >> 4) & 0x0f; 763 family = (eax >> 8) & 0x0f; 764 brand_id = ebx & 0xff; 765 extended_model = (eax >> 12) & 0xf0; 766 /* Check if the processor is Haswell or Broadwell */ 767 if (vendor == signature_intel_ebx) { 768 if (family == 0x06) 769 model += extended_model; 770 if (brand_id == 0 && family == 0x6) { 771 for (i = 0; i < RTE_DIM(broadwell_models); i++) 772 if (model == broadwell_models[i]) { 773 haswell_broadwell_cpu = 1; 774 return; 775 } 776 for (i = 0; i < RTE_DIM(haswell_models); i++) 777 if (model == haswell_models[i]) { 778 haswell_broadwell_cpu = 1; 779 return; 780 } 781 } 782 } 783 #endif 784 haswell_broadwell_cpu = 0; 785 } 786 787 /** 788 * Allocate the User Access Region with DevX on specified device. 789 * 790 * @param [in] ctx 791 * Infiniband device context to perform allocation on. 792 * @param [in] mapping 793 * MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining 794 * attributes (if supported by the host), the 795 * writes to the UAR registers must be followed 796 * by write memory barrier. 797 * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are 798 * promoted to the registers immediately, no 799 * memory barriers needed. 800 * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, 801 * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC 802 * is performed. The drivers specifying negative values should 803 * always provide the write memory barrier operation after UAR 804 * register writings. 805 * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma 806 * library headers), the caller can specify 0. 807 * 808 * @return 809 * UAR object pointer on success, NULL otherwise and rte_errno is set. 810 */ 811 void * 812 mlx5_devx_alloc_uar(void *ctx, int mapping) 813 { 814 void *uar; 815 uint32_t retry, uar_mapping; 816 void *base_addr; 817 818 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 819 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 820 /* Control the mapping type according to the settings. */ 821 uar_mapping = (mapping < 0) ? 822 MLX5DV_UAR_ALLOC_TYPE_NC : mapping; 823 #else 824 /* 825 * It seems we have no way to control the memory mapping type 826 * for the UAR, the default "Write-Combining" type is supposed. 827 */ 828 uar_mapping = 0; 829 RTE_SET_USED(mapping); 830 #endif 831 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 832 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 833 if (!uar && 834 mapping < 0 && 835 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 836 /* 837 * In some environments like virtual machine the 838 * Write Combining mapped might be not supported and 839 * UAR allocation fails. We tried "Non-Cached" mapping 840 * for the case. 841 */ 842 DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); 843 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 844 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 845 } else if (!uar && 846 mapping < 0 && 847 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 848 /* 849 * If Verbs/kernel does not support "Non-Cached" 850 * try the "Write-Combining". 851 */ 852 DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); 853 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 854 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 855 } 856 #endif 857 if (!uar) { 858 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 859 rte_errno = ENOMEM; 860 goto exit; 861 } 862 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 863 if (base_addr) 864 break; 865 /* 866 * The UARs are allocated by rdma_core within the 867 * IB device context, on context closure all UARs 868 * will be freed, should be no memory/object leakage. 869 */ 870 DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); 871 uar = NULL; 872 } 873 /* Check whether we finally succeeded with valid UAR allocation. */ 874 if (!uar) { 875 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 876 rte_errno = ENOMEM; 877 } 878 /* 879 * Return void * instead of struct mlx5dv_devx_uar * 880 * is for compatibility with older rdma-core library headers. 881 */ 882 exit: 883 return uar; 884 } 885 886 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 887