1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 14 #include "mlx5_common.h" 15 #include "mlx5_common_os.h" 16 #include "mlx5_common_log.h" 17 #include "mlx5_common_private.h" 18 19 uint8_t haswell_broadwell_cpu; 20 21 /* In case this is an x86_64 intel processor to check if 22 * we should use relaxed ordering. 23 */ 24 #ifdef RTE_ARCH_X86_64 25 /** 26 * This function returns processor identification and feature information 27 * into the registers. 28 * 29 * @param eax, ebx, ecx, edx 30 * Pointers to the registers that will hold cpu information. 31 * @param level 32 * The main category of information returned. 33 */ 34 static inline void mlx5_cpu_id(unsigned int level, 35 unsigned int *eax, unsigned int *ebx, 36 unsigned int *ecx, unsigned int *edx) 37 { 38 __asm__("cpuid\n\t" 39 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 40 : "0" (level)); 41 } 42 #endif 43 44 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 45 46 /* Head of list of drivers. */ 47 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 48 TAILQ_HEAD_INITIALIZER(drivers_list); 49 50 /* Head of devices. */ 51 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 52 TAILQ_HEAD_INITIALIZER(devices_list); 53 static pthread_mutex_t devices_list_lock; 54 55 static const struct { 56 const char *name; 57 unsigned int drv_class; 58 } mlx5_classes[] = { 59 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 60 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 61 /* Keep class "net" for backward compatibility. */ 62 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 63 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 64 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 65 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 66 }; 67 68 static int 69 class_name_to_value(const char *class_name) 70 { 71 unsigned int i; 72 73 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 74 if (strcmp(class_name, mlx5_classes[i].name) == 0) 75 return mlx5_classes[i].drv_class; 76 } 77 return -EINVAL; 78 } 79 80 static struct mlx5_class_driver * 81 driver_get(uint32_t class) 82 { 83 struct mlx5_class_driver *driver; 84 85 TAILQ_FOREACH(driver, &drivers_list, next) { 86 if ((uint32_t)driver->drv_class == class) 87 return driver; 88 } 89 return NULL; 90 } 91 92 static int 93 devargs_class_handler(__rte_unused const char *key, 94 const char *class_names, void *opaque) 95 { 96 int *ret = opaque; 97 int class_val; 98 char *scratch; 99 char *found; 100 char *refstr = NULL; 101 102 *ret = 0; 103 scratch = strdup(class_names); 104 if (scratch == NULL) { 105 *ret = -ENOMEM; 106 return *ret; 107 } 108 found = strtok_r(scratch, ":", &refstr); 109 if (found == NULL) 110 /* Empty string. */ 111 goto err; 112 do { 113 /* Extract each individual class name. Multiple 114 * classes can be supplied as class=net:regex:foo:bar. 115 */ 116 class_val = class_name_to_value(found); 117 /* Check if its a valid class. */ 118 if (class_val < 0) { 119 *ret = -EINVAL; 120 goto err; 121 } 122 *ret |= class_val; 123 found = strtok_r(NULL, ":", &refstr); 124 } while (found != NULL); 125 err: 126 free(scratch); 127 if (*ret < 0) 128 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 129 return *ret; 130 } 131 132 static int 133 parse_class_options(const struct rte_devargs *devargs) 134 { 135 struct rte_kvargs *kvlist; 136 int ret = 0; 137 138 if (devargs == NULL) 139 return 0; 140 if (devargs->cls != NULL && devargs->cls->name != NULL) 141 /* Global syntax, only one class type. */ 142 return class_name_to_value(devargs->cls->name); 143 /* Legacy devargs support multiple classes. */ 144 kvlist = rte_kvargs_parse(devargs->args, NULL); 145 if (kvlist == NULL) 146 return 0; 147 rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_CLASS, 148 devargs_class_handler, &ret); 149 rte_kvargs_free(kvlist); 150 return ret; 151 } 152 153 static const unsigned int mlx5_class_invalid_combinations[] = { 154 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 155 /* New class combination should be added here. */ 156 }; 157 158 static int 159 is_valid_class_combination(uint32_t user_classes) 160 { 161 unsigned int i; 162 163 /* Verify if user specified unsupported combination. */ 164 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 165 if ((mlx5_class_invalid_combinations[i] & user_classes) == 166 mlx5_class_invalid_combinations[i]) 167 return -EINVAL; 168 } 169 /* Not found any invalid class combination. */ 170 return 0; 171 } 172 173 static bool 174 device_class_enabled(const struct mlx5_common_device *device, uint32_t class) 175 { 176 return (device->classes_loaded & class) > 0; 177 } 178 179 static bool 180 mlx5_bus_match(const struct mlx5_class_driver *drv, 181 const struct rte_device *dev) 182 { 183 if (mlx5_dev_is_pci(dev)) 184 return mlx5_dev_pci_match(drv, dev); 185 return true; 186 } 187 188 static struct mlx5_common_device * 189 to_mlx5_device(const struct rte_device *rte_dev) 190 { 191 struct mlx5_common_device *dev; 192 193 TAILQ_FOREACH(dev, &devices_list, next) { 194 if (rte_dev == dev->dev) 195 return dev; 196 } 197 return NULL; 198 } 199 200 int 201 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 202 { 203 struct rte_pci_addr pci_addr = { 0 }; 204 int ret; 205 206 if (mlx5_dev_is_pci(dev)) { 207 /* Input might be <BDF>, format PCI address to <DBDF>. */ 208 ret = rte_pci_addr_parse(dev->name, &pci_addr); 209 if (ret != 0) 210 return -ENODEV; 211 rte_pci_device_name(&pci_addr, addr, size); 212 return 0; 213 } 214 #ifdef RTE_EXEC_ENV_LINUX 215 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 216 addr, size); 217 #else 218 rte_errno = ENODEV; 219 return -rte_errno; 220 #endif 221 } 222 223 static void 224 dev_release(struct mlx5_common_device *dev) 225 { 226 pthread_mutex_lock(&devices_list_lock); 227 TAILQ_REMOVE(&devices_list, dev, next); 228 pthread_mutex_unlock(&devices_list_lock); 229 rte_free(dev); 230 } 231 232 static int 233 drivers_remove(struct mlx5_common_device *dev, uint32_t enabled_classes) 234 { 235 struct mlx5_class_driver *driver; 236 int local_ret = -ENODEV; 237 unsigned int i = 0; 238 int ret = 0; 239 240 enabled_classes &= dev->classes_loaded; 241 while (enabled_classes) { 242 driver = driver_get(RTE_BIT64(i)); 243 if (driver != NULL) { 244 local_ret = driver->remove(dev->dev); 245 if (local_ret == 0) 246 dev->classes_loaded &= ~RTE_BIT64(i); 247 else if (ret == 0) 248 ret = local_ret; 249 } 250 enabled_classes &= ~RTE_BIT64(i); 251 i++; 252 } 253 if (local_ret != 0 && ret == 0) 254 ret = local_ret; 255 return ret; 256 } 257 258 static int 259 drivers_probe(struct mlx5_common_device *dev, uint32_t user_classes) 260 { 261 struct mlx5_class_driver *driver; 262 uint32_t enabled_classes = 0; 263 bool already_loaded; 264 int ret; 265 266 TAILQ_FOREACH(driver, &drivers_list, next) { 267 if ((driver->drv_class & user_classes) == 0) 268 continue; 269 if (!mlx5_bus_match(driver, dev->dev)) 270 continue; 271 already_loaded = dev->classes_loaded & driver->drv_class; 272 if (already_loaded && driver->probe_again == 0) { 273 DRV_LOG(ERR, "Device %s is already probed", 274 dev->dev->name); 275 ret = -EEXIST; 276 goto probe_err; 277 } 278 ret = driver->probe(dev->dev); 279 if (ret < 0) { 280 DRV_LOG(ERR, "Failed to load driver %s", 281 driver->name); 282 goto probe_err; 283 } 284 enabled_classes |= driver->drv_class; 285 } 286 dev->classes_loaded |= enabled_classes; 287 return 0; 288 probe_err: 289 /* Only unload drivers which are enabled which were enabled 290 * in this probe instance. 291 */ 292 drivers_remove(dev, enabled_classes); 293 return ret; 294 } 295 296 int 297 mlx5_common_dev_probe(struct rte_device *eal_dev) 298 { 299 struct mlx5_common_device *dev; 300 uint32_t classes = 0; 301 bool new_device = false; 302 int ret; 303 304 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 305 ret = parse_class_options(eal_dev->devargs); 306 if (ret < 0) { 307 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 308 eal_dev->devargs->args); 309 return ret; 310 } 311 classes = ret; 312 if (classes == 0) 313 /* Default to net class. */ 314 classes = MLX5_CLASS_ETH; 315 dev = to_mlx5_device(eal_dev); 316 if (!dev) { 317 dev = rte_zmalloc("mlx5_common_device", sizeof(*dev), 0); 318 if (!dev) 319 return -ENOMEM; 320 dev->dev = eal_dev; 321 pthread_mutex_lock(&devices_list_lock); 322 TAILQ_INSERT_HEAD(&devices_list, dev, next); 323 pthread_mutex_unlock(&devices_list_lock); 324 new_device = true; 325 } 326 /* 327 * Validate combination here. 328 * For new device, the classes_loaded field is 0 and it check only 329 * the classes given as user device arguments. 330 */ 331 ret = is_valid_class_combination(classes | dev->classes_loaded); 332 if (ret != 0) { 333 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 334 goto class_err; 335 } 336 ret = drivers_probe(dev, classes); 337 if (ret) 338 goto class_err; 339 return 0; 340 class_err: 341 if (new_device) 342 dev_release(dev); 343 return ret; 344 } 345 346 int 347 mlx5_common_dev_remove(struct rte_device *eal_dev) 348 { 349 struct mlx5_common_device *dev; 350 int ret; 351 352 dev = to_mlx5_device(eal_dev); 353 if (!dev) 354 return -ENODEV; 355 /* Matching device found, cleanup and unload drivers. */ 356 ret = drivers_remove(dev, dev->classes_loaded); 357 if (ret == 0) 358 dev_release(dev); 359 return ret; 360 } 361 362 int 363 mlx5_common_dev_dma_map(struct rte_device *dev, void *addr, uint64_t iova, 364 size_t len) 365 { 366 struct mlx5_class_driver *driver = NULL; 367 struct mlx5_class_driver *temp; 368 struct mlx5_common_device *mdev; 369 int ret = -EINVAL; 370 371 mdev = to_mlx5_device(dev); 372 if (!mdev) 373 return -ENODEV; 374 TAILQ_FOREACH(driver, &drivers_list, next) { 375 if (!device_class_enabled(mdev, driver->drv_class) || 376 driver->dma_map == NULL) 377 continue; 378 ret = driver->dma_map(dev, addr, iova, len); 379 if (ret) 380 goto map_err; 381 } 382 return ret; 383 map_err: 384 TAILQ_FOREACH(temp, &drivers_list, next) { 385 if (temp == driver) 386 break; 387 if (device_class_enabled(mdev, temp->drv_class) && 388 temp->dma_map && temp->dma_unmap) 389 temp->dma_unmap(dev, addr, iova, len); 390 } 391 return ret; 392 } 393 394 int 395 mlx5_common_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, 396 size_t len) 397 { 398 struct mlx5_class_driver *driver; 399 struct mlx5_common_device *mdev; 400 int local_ret = -EINVAL; 401 int ret = 0; 402 403 mdev = to_mlx5_device(dev); 404 if (!mdev) 405 return -ENODEV; 406 /* There is no unmap error recovery in current implementation. */ 407 TAILQ_FOREACH_REVERSE(driver, &drivers_list, mlx5_drivers, next) { 408 if (!device_class_enabled(mdev, driver->drv_class) || 409 driver->dma_unmap == NULL) 410 continue; 411 local_ret = driver->dma_unmap(dev, addr, iova, len); 412 if (local_ret && (ret == 0)) 413 ret = local_ret; 414 } 415 if (local_ret) 416 ret = local_ret; 417 return ret; 418 } 419 420 void 421 mlx5_class_driver_register(struct mlx5_class_driver *driver) 422 { 423 mlx5_common_driver_on_register_pci(driver); 424 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 425 } 426 427 static void mlx5_common_driver_init(void) 428 { 429 mlx5_common_pci_init(); 430 #ifdef RTE_EXEC_ENV_LINUX 431 mlx5_common_auxiliary_init(); 432 #endif 433 } 434 435 static bool mlx5_common_initialized; 436 437 /** 438 * One time innitialization routine for run-time dependency on glue library 439 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 440 * must invoke in its constructor. 441 */ 442 void 443 mlx5_common_init(void) 444 { 445 if (mlx5_common_initialized) 446 return; 447 448 pthread_mutex_init(&devices_list_lock, NULL); 449 mlx5_glue_constructor(); 450 mlx5_common_driver_init(); 451 mlx5_common_initialized = true; 452 } 453 454 /** 455 * This function is responsible of initializing the variable 456 * haswell_broadwell_cpu by checking if the cpu is intel 457 * and reading the data returned from mlx5_cpu_id(). 458 * since haswell and broadwell cpus don't have improved performance 459 * when using relaxed ordering we want to check the cpu type before 460 * before deciding whether to enable RO or not. 461 * if the cpu is haswell or broadwell the variable will be set to 1 462 * otherwise it will be 0. 463 */ 464 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 465 { 466 #ifdef RTE_ARCH_X86_64 467 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 468 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 469 unsigned int i, model, family, brand_id, vendor; 470 unsigned int signature_intel_ebx = 0x756e6547; 471 unsigned int extended_model; 472 unsigned int eax = 0; 473 unsigned int ebx = 0; 474 unsigned int ecx = 0; 475 unsigned int edx = 0; 476 int max_level; 477 478 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 479 vendor = ebx; 480 max_level = eax; 481 if (max_level < 1) { 482 haswell_broadwell_cpu = 0; 483 return; 484 } 485 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 486 model = (eax >> 4) & 0x0f; 487 family = (eax >> 8) & 0x0f; 488 brand_id = ebx & 0xff; 489 extended_model = (eax >> 12) & 0xf0; 490 /* Check if the processor is Haswell or Broadwell */ 491 if (vendor == signature_intel_ebx) { 492 if (family == 0x06) 493 model += extended_model; 494 if (brand_id == 0 && family == 0x6) { 495 for (i = 0; i < RTE_DIM(broadwell_models); i++) 496 if (model == broadwell_models[i]) { 497 haswell_broadwell_cpu = 1; 498 return; 499 } 500 for (i = 0; i < RTE_DIM(haswell_models); i++) 501 if (model == haswell_models[i]) { 502 haswell_broadwell_cpu = 1; 503 return; 504 } 505 } 506 } 507 #endif 508 haswell_broadwell_cpu = 0; 509 } 510 511 /** 512 * Allocate the User Access Region with DevX on specified device. 513 * 514 * @param [in] ctx 515 * Infiniband device context to perform allocation on. 516 * @param [in] mapping 517 * MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining 518 * attributes (if supported by the host), the 519 * writes to the UAR registers must be followed 520 * by write memory barrier. 521 * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are 522 * promoted to the registers immediately, no 523 * memory barriers needed. 524 * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, 525 * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC 526 * is performed. The drivers specifying negative values should 527 * always provide the write memory barrier operation after UAR 528 * register writings. 529 * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma 530 * library headers), the caller can specify 0. 531 * 532 * @return 533 * UAR object pointer on success, NULL otherwise and rte_errno is set. 534 */ 535 void * 536 mlx5_devx_alloc_uar(void *ctx, int mapping) 537 { 538 void *uar; 539 uint32_t retry, uar_mapping; 540 void *base_addr; 541 542 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 543 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 544 /* Control the mapping type according to the settings. */ 545 uar_mapping = (mapping < 0) ? 546 MLX5DV_UAR_ALLOC_TYPE_NC : mapping; 547 #else 548 /* 549 * It seems we have no way to control the memory mapping type 550 * for the UAR, the default "Write-Combining" type is supposed. 551 */ 552 uar_mapping = 0; 553 RTE_SET_USED(mapping); 554 #endif 555 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 556 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 557 if (!uar && 558 mapping < 0 && 559 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 560 /* 561 * In some environments like virtual machine the 562 * Write Combining mapped might be not supported and 563 * UAR allocation fails. We tried "Non-Cached" mapping 564 * for the case. 565 */ 566 DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); 567 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 568 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 569 } else if (!uar && 570 mapping < 0 && 571 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 572 /* 573 * If Verbs/kernel does not support "Non-Cached" 574 * try the "Write-Combining". 575 */ 576 DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); 577 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 578 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 579 } 580 #endif 581 if (!uar) { 582 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 583 rte_errno = ENOMEM; 584 goto exit; 585 } 586 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 587 if (base_addr) 588 break; 589 /* 590 * The UARs are allocated by rdma_core within the 591 * IB device context, on context closure all UARs 592 * will be freed, should be no memory/object leakage. 593 */ 594 DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); 595 uar = NULL; 596 } 597 /* Check whether we finally succeeded with valid UAR allocation. */ 598 if (!uar) { 599 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 600 rte_errno = ENOMEM; 601 } 602 /* 603 * Return void * instead of struct mlx5dv_devx_uar * 604 * is for compatibility with older rdma-core library headers. 605 */ 606 exit: 607 return uar; 608 } 609 610 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 611