1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 14 #include "mlx5_common.h" 15 #include "mlx5_common_os.h" 16 #include "mlx5_common_log.h" 17 #include "mlx5_common_defs.h" 18 #include "mlx5_common_private.h" 19 20 uint8_t haswell_broadwell_cpu; 21 22 /* In case this is an x86_64 intel processor to check if 23 * we should use relaxed ordering. 24 */ 25 #ifdef RTE_ARCH_X86_64 26 /** 27 * This function returns processor identification and feature information 28 * into the registers. 29 * 30 * @param eax, ebx, ecx, edx 31 * Pointers to the registers that will hold cpu information. 32 * @param level 33 * The main category of information returned. 34 */ 35 static inline void mlx5_cpu_id(unsigned int level, 36 unsigned int *eax, unsigned int *ebx, 37 unsigned int *ecx, unsigned int *edx) 38 { 39 __asm__("cpuid\n\t" 40 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 41 : "0" (level)); 42 } 43 #endif 44 45 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 46 47 /* Head of list of drivers. */ 48 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 49 TAILQ_HEAD_INITIALIZER(drivers_list); 50 51 /* Head of devices. */ 52 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 53 TAILQ_HEAD_INITIALIZER(devices_list); 54 static pthread_mutex_t devices_list_lock; 55 56 static const struct { 57 const char *name; 58 unsigned int drv_class; 59 } mlx5_classes[] = { 60 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 61 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 62 /* Keep class "net" for backward compatibility. */ 63 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 64 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 65 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 66 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 67 }; 68 69 static int 70 class_name_to_value(const char *class_name) 71 { 72 unsigned int i; 73 74 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 75 if (strcmp(class_name, mlx5_classes[i].name) == 0) 76 return mlx5_classes[i].drv_class; 77 } 78 return -EINVAL; 79 } 80 81 static struct mlx5_class_driver * 82 driver_get(uint32_t class) 83 { 84 struct mlx5_class_driver *driver; 85 86 TAILQ_FOREACH(driver, &drivers_list, next) { 87 if ((uint32_t)driver->drv_class == class) 88 return driver; 89 } 90 return NULL; 91 } 92 93 static int 94 devargs_class_handler(__rte_unused const char *key, 95 const char *class_names, void *opaque) 96 { 97 int *ret = opaque; 98 int class_val; 99 char *scratch; 100 char *found; 101 char *refstr = NULL; 102 103 *ret = 0; 104 scratch = strdup(class_names); 105 if (scratch == NULL) { 106 *ret = -ENOMEM; 107 return *ret; 108 } 109 found = strtok_r(scratch, ":", &refstr); 110 if (found == NULL) 111 /* Empty string. */ 112 goto err; 113 do { 114 /* Extract each individual class name. Multiple 115 * classes can be supplied as class=net:regex:foo:bar. 116 */ 117 class_val = class_name_to_value(found); 118 /* Check if its a valid class. */ 119 if (class_val < 0) { 120 *ret = -EINVAL; 121 goto err; 122 } 123 *ret |= class_val; 124 found = strtok_r(NULL, ":", &refstr); 125 } while (found != NULL); 126 err: 127 free(scratch); 128 if (*ret < 0) 129 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 130 return *ret; 131 } 132 133 static int 134 parse_class_options(const struct rte_devargs *devargs) 135 { 136 struct rte_kvargs *kvlist; 137 int ret = 0; 138 139 if (devargs == NULL) 140 return 0; 141 if (devargs->cls != NULL && devargs->cls->name != NULL) 142 /* Global syntax, only one class type. */ 143 return class_name_to_value(devargs->cls->name); 144 /* Legacy devargs support multiple classes. */ 145 kvlist = rte_kvargs_parse(devargs->args, NULL); 146 if (kvlist == NULL) 147 return 0; 148 rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_CLASS, 149 devargs_class_handler, &ret); 150 rte_kvargs_free(kvlist); 151 return ret; 152 } 153 154 static const unsigned int mlx5_class_invalid_combinations[] = { 155 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 156 /* New class combination should be added here. */ 157 }; 158 159 static int 160 is_valid_class_combination(uint32_t user_classes) 161 { 162 unsigned int i; 163 164 /* Verify if user specified unsupported combination. */ 165 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 166 if ((mlx5_class_invalid_combinations[i] & user_classes) == 167 mlx5_class_invalid_combinations[i]) 168 return -EINVAL; 169 } 170 /* Not found any invalid class combination. */ 171 return 0; 172 } 173 174 static bool 175 device_class_enabled(const struct mlx5_common_device *device, uint32_t class) 176 { 177 return (device->classes_loaded & class) > 0; 178 } 179 180 static bool 181 mlx5_bus_match(const struct mlx5_class_driver *drv, 182 const struct rte_device *dev) 183 { 184 if (mlx5_dev_is_pci(dev)) 185 return mlx5_dev_pci_match(drv, dev); 186 return true; 187 } 188 189 static struct mlx5_common_device * 190 to_mlx5_device(const struct rte_device *rte_dev) 191 { 192 struct mlx5_common_device *dev; 193 194 TAILQ_FOREACH(dev, &devices_list, next) { 195 if (rte_dev == dev->dev) 196 return dev; 197 } 198 return NULL; 199 } 200 201 int 202 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 203 { 204 struct rte_pci_addr pci_addr = { 0 }; 205 int ret; 206 207 if (mlx5_dev_is_pci(dev)) { 208 /* Input might be <BDF>, format PCI address to <DBDF>. */ 209 ret = rte_pci_addr_parse(dev->name, &pci_addr); 210 if (ret != 0) 211 return -ENODEV; 212 rte_pci_device_name(&pci_addr, addr, size); 213 return 0; 214 } 215 #ifdef RTE_EXEC_ENV_LINUX 216 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 217 addr, size); 218 #else 219 rte_errno = ENODEV; 220 return -rte_errno; 221 #endif 222 } 223 224 static void 225 dev_release(struct mlx5_common_device *dev) 226 { 227 pthread_mutex_lock(&devices_list_lock); 228 TAILQ_REMOVE(&devices_list, dev, next); 229 pthread_mutex_unlock(&devices_list_lock); 230 rte_free(dev); 231 } 232 233 static int 234 drivers_remove(struct mlx5_common_device *dev, uint32_t enabled_classes) 235 { 236 struct mlx5_class_driver *driver; 237 int local_ret = -ENODEV; 238 unsigned int i = 0; 239 int ret = 0; 240 241 enabled_classes &= dev->classes_loaded; 242 while (enabled_classes) { 243 driver = driver_get(RTE_BIT64(i)); 244 if (driver != NULL) { 245 local_ret = driver->remove(dev); 246 if (local_ret == 0) 247 dev->classes_loaded &= ~RTE_BIT64(i); 248 else if (ret == 0) 249 ret = local_ret; 250 } 251 enabled_classes &= ~RTE_BIT64(i); 252 i++; 253 } 254 if (local_ret != 0 && ret == 0) 255 ret = local_ret; 256 return ret; 257 } 258 259 static int 260 drivers_probe(struct mlx5_common_device *dev, uint32_t user_classes) 261 { 262 struct mlx5_class_driver *driver; 263 uint32_t enabled_classes = 0; 264 bool already_loaded; 265 int ret; 266 267 TAILQ_FOREACH(driver, &drivers_list, next) { 268 if ((driver->drv_class & user_classes) == 0) 269 continue; 270 if (!mlx5_bus_match(driver, dev->dev)) 271 continue; 272 already_loaded = dev->classes_loaded & driver->drv_class; 273 if (already_loaded && driver->probe_again == 0) { 274 DRV_LOG(ERR, "Device %s is already probed", 275 dev->dev->name); 276 ret = -EEXIST; 277 goto probe_err; 278 } 279 ret = driver->probe(dev); 280 if (ret < 0) { 281 DRV_LOG(ERR, "Failed to load driver %s", 282 driver->name); 283 goto probe_err; 284 } 285 enabled_classes |= driver->drv_class; 286 } 287 dev->classes_loaded |= enabled_classes; 288 return 0; 289 probe_err: 290 /* Only unload drivers which are enabled which were enabled 291 * in this probe instance. 292 */ 293 drivers_remove(dev, enabled_classes); 294 return ret; 295 } 296 297 int 298 mlx5_common_dev_probe(struct rte_device *eal_dev) 299 { 300 struct mlx5_common_device *dev; 301 uint32_t classes = 0; 302 bool new_device = false; 303 int ret; 304 305 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 306 ret = parse_class_options(eal_dev->devargs); 307 if (ret < 0) { 308 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 309 eal_dev->devargs->args); 310 return ret; 311 } 312 classes = ret; 313 if (classes == 0) 314 /* Default to net class. */ 315 classes = MLX5_CLASS_ETH; 316 dev = to_mlx5_device(eal_dev); 317 if (!dev) { 318 dev = rte_zmalloc("mlx5_common_device", sizeof(*dev), 0); 319 if (!dev) 320 return -ENOMEM; 321 dev->dev = eal_dev; 322 pthread_mutex_lock(&devices_list_lock); 323 TAILQ_INSERT_HEAD(&devices_list, dev, next); 324 pthread_mutex_unlock(&devices_list_lock); 325 new_device = true; 326 } 327 /* 328 * Validate combination here. 329 * For new device, the classes_loaded field is 0 and it check only 330 * the classes given as user device arguments. 331 */ 332 ret = is_valid_class_combination(classes | dev->classes_loaded); 333 if (ret != 0) { 334 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 335 goto class_err; 336 } 337 ret = drivers_probe(dev, classes); 338 if (ret) 339 goto class_err; 340 return 0; 341 class_err: 342 if (new_device) 343 dev_release(dev); 344 return ret; 345 } 346 347 int 348 mlx5_common_dev_remove(struct rte_device *eal_dev) 349 { 350 struct mlx5_common_device *dev; 351 int ret; 352 353 dev = to_mlx5_device(eal_dev); 354 if (!dev) 355 return -ENODEV; 356 /* Matching device found, cleanup and unload drivers. */ 357 ret = drivers_remove(dev, dev->classes_loaded); 358 if (ret == 0) 359 dev_release(dev); 360 return ret; 361 } 362 363 int 364 mlx5_common_dev_dma_map(struct rte_device *dev, void *addr, uint64_t iova, 365 size_t len) 366 { 367 struct mlx5_class_driver *driver = NULL; 368 struct mlx5_class_driver *temp; 369 struct mlx5_common_device *mdev; 370 int ret = -EINVAL; 371 372 mdev = to_mlx5_device(dev); 373 if (!mdev) 374 return -ENODEV; 375 TAILQ_FOREACH(driver, &drivers_list, next) { 376 if (!device_class_enabled(mdev, driver->drv_class) || 377 driver->dma_map == NULL) 378 continue; 379 ret = driver->dma_map(dev, addr, iova, len); 380 if (ret) 381 goto map_err; 382 } 383 return ret; 384 map_err: 385 TAILQ_FOREACH(temp, &drivers_list, next) { 386 if (temp == driver) 387 break; 388 if (device_class_enabled(mdev, temp->drv_class) && 389 temp->dma_map && temp->dma_unmap) 390 temp->dma_unmap(dev, addr, iova, len); 391 } 392 return ret; 393 } 394 395 int 396 mlx5_common_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, 397 size_t len) 398 { 399 struct mlx5_class_driver *driver; 400 struct mlx5_common_device *mdev; 401 int local_ret = -EINVAL; 402 int ret = 0; 403 404 mdev = to_mlx5_device(dev); 405 if (!mdev) 406 return -ENODEV; 407 /* There is no unmap error recovery in current implementation. */ 408 TAILQ_FOREACH_REVERSE(driver, &drivers_list, mlx5_drivers, next) { 409 if (!device_class_enabled(mdev, driver->drv_class) || 410 driver->dma_unmap == NULL) 411 continue; 412 local_ret = driver->dma_unmap(dev, addr, iova, len); 413 if (local_ret && (ret == 0)) 414 ret = local_ret; 415 } 416 if (local_ret) 417 ret = local_ret; 418 return ret; 419 } 420 421 void 422 mlx5_class_driver_register(struct mlx5_class_driver *driver) 423 { 424 mlx5_common_driver_on_register_pci(driver); 425 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 426 } 427 428 static void mlx5_common_driver_init(void) 429 { 430 mlx5_common_pci_init(); 431 #ifdef RTE_EXEC_ENV_LINUX 432 mlx5_common_auxiliary_init(); 433 #endif 434 } 435 436 static bool mlx5_common_initialized; 437 438 /** 439 * One time innitialization routine for run-time dependency on glue library 440 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 441 * must invoke in its constructor. 442 */ 443 void 444 mlx5_common_init(void) 445 { 446 if (mlx5_common_initialized) 447 return; 448 449 pthread_mutex_init(&devices_list_lock, NULL); 450 mlx5_glue_constructor(); 451 mlx5_common_driver_init(); 452 mlx5_common_initialized = true; 453 } 454 455 /** 456 * This function is responsible of initializing the variable 457 * haswell_broadwell_cpu by checking if the cpu is intel 458 * and reading the data returned from mlx5_cpu_id(). 459 * since haswell and broadwell cpus don't have improved performance 460 * when using relaxed ordering we want to check the cpu type before 461 * before deciding whether to enable RO or not. 462 * if the cpu is haswell or broadwell the variable will be set to 1 463 * otherwise it will be 0. 464 */ 465 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 466 { 467 #ifdef RTE_ARCH_X86_64 468 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 469 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 470 unsigned int i, model, family, brand_id, vendor; 471 unsigned int signature_intel_ebx = 0x756e6547; 472 unsigned int extended_model; 473 unsigned int eax = 0; 474 unsigned int ebx = 0; 475 unsigned int ecx = 0; 476 unsigned int edx = 0; 477 int max_level; 478 479 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 480 vendor = ebx; 481 max_level = eax; 482 if (max_level < 1) { 483 haswell_broadwell_cpu = 0; 484 return; 485 } 486 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 487 model = (eax >> 4) & 0x0f; 488 family = (eax >> 8) & 0x0f; 489 brand_id = ebx & 0xff; 490 extended_model = (eax >> 12) & 0xf0; 491 /* Check if the processor is Haswell or Broadwell */ 492 if (vendor == signature_intel_ebx) { 493 if (family == 0x06) 494 model += extended_model; 495 if (brand_id == 0 && family == 0x6) { 496 for (i = 0; i < RTE_DIM(broadwell_models); i++) 497 if (model == broadwell_models[i]) { 498 haswell_broadwell_cpu = 1; 499 return; 500 } 501 for (i = 0; i < RTE_DIM(haswell_models); i++) 502 if (model == haswell_models[i]) { 503 haswell_broadwell_cpu = 1; 504 return; 505 } 506 } 507 } 508 #endif 509 haswell_broadwell_cpu = 0; 510 } 511 512 /** 513 * Allocate the User Access Region with DevX on specified device. 514 * 515 * @param [in] ctx 516 * Infiniband device context to perform allocation on. 517 * @param [in] mapping 518 * MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining 519 * attributes (if supported by the host), the 520 * writes to the UAR registers must be followed 521 * by write memory barrier. 522 * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are 523 * promoted to the registers immediately, no 524 * memory barriers needed. 525 * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, 526 * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC 527 * is performed. The drivers specifying negative values should 528 * always provide the write memory barrier operation after UAR 529 * register writings. 530 * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma 531 * library headers), the caller can specify 0. 532 * 533 * @return 534 * UAR object pointer on success, NULL otherwise and rte_errno is set. 535 */ 536 void * 537 mlx5_devx_alloc_uar(void *ctx, int mapping) 538 { 539 void *uar; 540 uint32_t retry, uar_mapping; 541 void *base_addr; 542 543 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 544 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 545 /* Control the mapping type according to the settings. */ 546 uar_mapping = (mapping < 0) ? 547 MLX5DV_UAR_ALLOC_TYPE_NC : mapping; 548 #else 549 /* 550 * It seems we have no way to control the memory mapping type 551 * for the UAR, the default "Write-Combining" type is supposed. 552 */ 553 uar_mapping = 0; 554 RTE_SET_USED(mapping); 555 #endif 556 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 557 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 558 if (!uar && 559 mapping < 0 && 560 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 561 /* 562 * In some environments like virtual machine the 563 * Write Combining mapped might be not supported and 564 * UAR allocation fails. We tried "Non-Cached" mapping 565 * for the case. 566 */ 567 DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); 568 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 569 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 570 } else if (!uar && 571 mapping < 0 && 572 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 573 /* 574 * If Verbs/kernel does not support "Non-Cached" 575 * try the "Write-Combining". 576 */ 577 DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); 578 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 579 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 580 } 581 #endif 582 if (!uar) { 583 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 584 rte_errno = ENOMEM; 585 goto exit; 586 } 587 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 588 if (base_addr) 589 break; 590 /* 591 * The UARs are allocated by rdma_core within the 592 * IB device context, on context closure all UARs 593 * will be freed, should be no memory/object leakage. 594 */ 595 DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); 596 uar = NULL; 597 } 598 /* Check whether we finally succeeded with valid UAR allocation. */ 599 if (!uar) { 600 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 601 rte_errno = ENOMEM; 602 } 603 /* 604 * Return void * instead of struct mlx5dv_devx_uar * 605 * is for compatibility with older rdma-core library headers. 606 */ 607 exit: 608 return uar; 609 } 610 611 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 612