1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_class.h> 12 #include <rte_malloc.h> 13 14 #include "mlx5_common.h" 15 #include "mlx5_common_os.h" 16 #include "mlx5_common_log.h" 17 #include "mlx5_common_private.h" 18 19 uint8_t haswell_broadwell_cpu; 20 21 /* In case this is an x86_64 intel processor to check if 22 * we should use relaxed ordering. 23 */ 24 #ifdef RTE_ARCH_X86_64 25 /** 26 * This function returns processor identification and feature information 27 * into the registers. 28 * 29 * @param eax, ebx, ecx, edx 30 * Pointers to the registers that will hold cpu information. 31 * @param level 32 * The main category of information returned. 33 */ 34 static inline void mlx5_cpu_id(unsigned int level, 35 unsigned int *eax, unsigned int *ebx, 36 unsigned int *ecx, unsigned int *edx) 37 { 38 __asm__("cpuid\n\t" 39 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 40 : "0" (level)); 41 } 42 #endif 43 44 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) 45 46 /* Head of list of drivers. */ 47 static TAILQ_HEAD(mlx5_drivers, mlx5_class_driver) drivers_list = 48 TAILQ_HEAD_INITIALIZER(drivers_list); 49 50 /* Head of devices. */ 51 static TAILQ_HEAD(mlx5_devices, mlx5_common_device) devices_list = 52 TAILQ_HEAD_INITIALIZER(devices_list); 53 54 static const struct { 55 const char *name; 56 unsigned int drv_class; 57 } mlx5_classes[] = { 58 { .name = "vdpa", .drv_class = MLX5_CLASS_VDPA }, 59 { .name = "eth", .drv_class = MLX5_CLASS_ETH }, 60 /* Keep class "net" for backward compatibility. */ 61 { .name = "net", .drv_class = MLX5_CLASS_ETH }, 62 { .name = "regex", .drv_class = MLX5_CLASS_REGEX }, 63 { .name = "compress", .drv_class = MLX5_CLASS_COMPRESS }, 64 { .name = "crypto", .drv_class = MLX5_CLASS_CRYPTO }, 65 }; 66 67 static int 68 class_name_to_value(const char *class_name) 69 { 70 unsigned int i; 71 72 for (i = 0; i < RTE_DIM(mlx5_classes); i++) { 73 if (strcmp(class_name, mlx5_classes[i].name) == 0) 74 return mlx5_classes[i].drv_class; 75 } 76 return -EINVAL; 77 } 78 79 static struct mlx5_class_driver * 80 driver_get(uint32_t class) 81 { 82 struct mlx5_class_driver *driver; 83 84 TAILQ_FOREACH(driver, &drivers_list, next) { 85 if ((uint32_t)driver->drv_class == class) 86 return driver; 87 } 88 return NULL; 89 } 90 91 static int 92 devargs_class_handler(__rte_unused const char *key, 93 const char *class_names, void *opaque) 94 { 95 int *ret = opaque; 96 int class_val; 97 char *scratch; 98 char *found; 99 char *refstr = NULL; 100 101 *ret = 0; 102 scratch = strdup(class_names); 103 if (scratch == NULL) { 104 *ret = -ENOMEM; 105 return *ret; 106 } 107 found = strtok_r(scratch, ":", &refstr); 108 if (found == NULL) 109 /* Empty string. */ 110 goto err; 111 do { 112 /* Extract each individual class name. Multiple 113 * classes can be supplied as class=net:regex:foo:bar. 114 */ 115 class_val = class_name_to_value(found); 116 /* Check if its a valid class. */ 117 if (class_val < 0) { 118 *ret = -EINVAL; 119 goto err; 120 } 121 *ret |= class_val; 122 found = strtok_r(NULL, ":", &refstr); 123 } while (found != NULL); 124 err: 125 free(scratch); 126 if (*ret < 0) 127 DRV_LOG(ERR, "Invalid mlx5 class options: %s.\n", class_names); 128 return *ret; 129 } 130 131 static int 132 parse_class_options(const struct rte_devargs *devargs) 133 { 134 struct rte_kvargs *kvlist; 135 int ret = 0; 136 137 if (devargs == NULL) 138 return 0; 139 if (devargs->cls != NULL && devargs->cls->name != NULL) 140 /* Global syntax, only one class type. */ 141 return class_name_to_value(devargs->cls->name); 142 /* Legacy devargs support multiple classes. */ 143 kvlist = rte_kvargs_parse(devargs->args, NULL); 144 if (kvlist == NULL) 145 return 0; 146 rte_kvargs_process(kvlist, RTE_DEVARGS_KEY_CLASS, 147 devargs_class_handler, &ret); 148 rte_kvargs_free(kvlist); 149 return ret; 150 } 151 152 static const unsigned int mlx5_class_invalid_combinations[] = { 153 MLX5_CLASS_ETH | MLX5_CLASS_VDPA, 154 /* New class combination should be added here. */ 155 }; 156 157 static int 158 is_valid_class_combination(uint32_t user_classes) 159 { 160 unsigned int i; 161 162 /* Verify if user specified unsupported combination. */ 163 for (i = 0; i < RTE_DIM(mlx5_class_invalid_combinations); i++) { 164 if ((mlx5_class_invalid_combinations[i] & user_classes) == 165 mlx5_class_invalid_combinations[i]) 166 return -EINVAL; 167 } 168 /* Not found any invalid class combination. */ 169 return 0; 170 } 171 172 static bool 173 device_class_enabled(const struct mlx5_common_device *device, uint32_t class) 174 { 175 return (device->classes_loaded & class) > 0; 176 } 177 178 static bool 179 mlx5_bus_match(const struct mlx5_class_driver *drv, 180 const struct rte_device *dev) 181 { 182 if (mlx5_dev_is_pci(dev)) 183 return mlx5_dev_pci_match(drv, dev); 184 return true; 185 } 186 187 static struct mlx5_common_device * 188 to_mlx5_device(const struct rte_device *rte_dev) 189 { 190 struct mlx5_common_device *dev; 191 192 TAILQ_FOREACH(dev, &devices_list, next) { 193 if (rte_dev == dev->dev) 194 return dev; 195 } 196 return NULL; 197 } 198 199 int 200 mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) 201 { 202 struct rte_pci_addr pci_addr = { 0 }; 203 int ret; 204 205 if (mlx5_dev_is_pci(dev)) { 206 /* Input might be <BDF>, format PCI address to <DBDF>. */ 207 ret = rte_pci_addr_parse(dev->name, &pci_addr); 208 if (ret != 0) 209 return -ENODEV; 210 rte_pci_device_name(&pci_addr, addr, size); 211 return 0; 212 } 213 #ifdef RTE_EXEC_ENV_LINUX 214 return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev), 215 addr, size); 216 #else 217 rte_errno = ENODEV; 218 return -rte_errno; 219 #endif 220 } 221 222 static void 223 dev_release(struct mlx5_common_device *dev) 224 { 225 TAILQ_REMOVE(&devices_list, dev, next); 226 rte_free(dev); 227 } 228 229 static int 230 drivers_remove(struct mlx5_common_device *dev, uint32_t enabled_classes) 231 { 232 struct mlx5_class_driver *driver; 233 int local_ret = -ENODEV; 234 unsigned int i = 0; 235 int ret = 0; 236 237 enabled_classes &= dev->classes_loaded; 238 while (enabled_classes) { 239 driver = driver_get(RTE_BIT64(i)); 240 if (driver != NULL) { 241 local_ret = driver->remove(dev->dev); 242 if (local_ret == 0) 243 dev->classes_loaded &= ~RTE_BIT64(i); 244 else if (ret == 0) 245 ret = local_ret; 246 } 247 enabled_classes &= ~RTE_BIT64(i); 248 i++; 249 } 250 if (local_ret != 0 && ret == 0) 251 ret = local_ret; 252 return ret; 253 } 254 255 static int 256 drivers_probe(struct mlx5_common_device *dev, uint32_t user_classes) 257 { 258 struct mlx5_class_driver *driver; 259 uint32_t enabled_classes = 0; 260 bool already_loaded; 261 int ret; 262 263 TAILQ_FOREACH(driver, &drivers_list, next) { 264 if ((driver->drv_class & user_classes) == 0) 265 continue; 266 if (!mlx5_bus_match(driver, dev->dev)) 267 continue; 268 already_loaded = dev->classes_loaded & driver->drv_class; 269 if (already_loaded && driver->probe_again == 0) { 270 DRV_LOG(ERR, "Device %s is already probed", 271 dev->dev->name); 272 ret = -EEXIST; 273 goto probe_err; 274 } 275 ret = driver->probe(dev->dev); 276 if (ret < 0) { 277 DRV_LOG(ERR, "Failed to load driver %s", 278 driver->name); 279 goto probe_err; 280 } 281 enabled_classes |= driver->drv_class; 282 } 283 dev->classes_loaded |= enabled_classes; 284 return 0; 285 probe_err: 286 /* Only unload drivers which are enabled which were enabled 287 * in this probe instance. 288 */ 289 drivers_remove(dev, enabled_classes); 290 return ret; 291 } 292 293 int 294 mlx5_common_dev_probe(struct rte_device *eal_dev) 295 { 296 struct mlx5_common_device *dev; 297 uint32_t classes = 0; 298 bool new_device = false; 299 int ret; 300 301 DRV_LOG(INFO, "probe device \"%s\".", eal_dev->name); 302 ret = parse_class_options(eal_dev->devargs); 303 if (ret < 0) { 304 DRV_LOG(ERR, "Unsupported mlx5 class type: %s", 305 eal_dev->devargs->args); 306 return ret; 307 } 308 classes = ret; 309 if (classes == 0) 310 /* Default to net class. */ 311 classes = MLX5_CLASS_ETH; 312 dev = to_mlx5_device(eal_dev); 313 if (!dev) { 314 dev = rte_zmalloc("mlx5_common_device", sizeof(*dev), 0); 315 if (!dev) 316 return -ENOMEM; 317 dev->dev = eal_dev; 318 TAILQ_INSERT_HEAD(&devices_list, dev, next); 319 new_device = true; 320 } 321 /* 322 * Validate combination here. 323 * For new device, the classes_loaded field is 0 and it check only 324 * the classes given as user device arguments. 325 */ 326 ret = is_valid_class_combination(classes | dev->classes_loaded); 327 if (ret != 0) { 328 DRV_LOG(ERR, "Unsupported mlx5 classes combination."); 329 goto class_err; 330 } 331 ret = drivers_probe(dev, classes); 332 if (ret) 333 goto class_err; 334 return 0; 335 class_err: 336 if (new_device) 337 dev_release(dev); 338 return ret; 339 } 340 341 int 342 mlx5_common_dev_remove(struct rte_device *eal_dev) 343 { 344 struct mlx5_common_device *dev; 345 int ret; 346 347 dev = to_mlx5_device(eal_dev); 348 if (!dev) 349 return -ENODEV; 350 /* Matching device found, cleanup and unload drivers. */ 351 ret = drivers_remove(dev, dev->classes_loaded); 352 if (ret != 0) 353 dev_release(dev); 354 return ret; 355 } 356 357 int 358 mlx5_common_dev_dma_map(struct rte_device *dev, void *addr, uint64_t iova, 359 size_t len) 360 { 361 struct mlx5_class_driver *driver = NULL; 362 struct mlx5_class_driver *temp; 363 struct mlx5_common_device *mdev; 364 int ret = -EINVAL; 365 366 mdev = to_mlx5_device(dev); 367 if (!mdev) 368 return -ENODEV; 369 TAILQ_FOREACH(driver, &drivers_list, next) { 370 if (!device_class_enabled(mdev, driver->drv_class) || 371 driver->dma_map == NULL) 372 continue; 373 ret = driver->dma_map(dev, addr, iova, len); 374 if (ret) 375 goto map_err; 376 } 377 return ret; 378 map_err: 379 TAILQ_FOREACH(temp, &drivers_list, next) { 380 if (temp == driver) 381 break; 382 if (device_class_enabled(mdev, temp->drv_class) && 383 temp->dma_map && temp->dma_unmap) 384 temp->dma_unmap(dev, addr, iova, len); 385 } 386 return ret; 387 } 388 389 int 390 mlx5_common_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, 391 size_t len) 392 { 393 struct mlx5_class_driver *driver; 394 struct mlx5_common_device *mdev; 395 int local_ret = -EINVAL; 396 int ret = 0; 397 398 mdev = to_mlx5_device(dev); 399 if (!mdev) 400 return -ENODEV; 401 /* There is no unmap error recovery in current implementation. */ 402 TAILQ_FOREACH_REVERSE(driver, &drivers_list, mlx5_drivers, next) { 403 if (!device_class_enabled(mdev, driver->drv_class) || 404 driver->dma_unmap == NULL) 405 continue; 406 local_ret = driver->dma_unmap(dev, addr, iova, len); 407 if (local_ret && (ret == 0)) 408 ret = local_ret; 409 } 410 if (local_ret) 411 ret = local_ret; 412 return ret; 413 } 414 415 void 416 mlx5_class_driver_register(struct mlx5_class_driver *driver) 417 { 418 mlx5_common_driver_on_register_pci(driver); 419 TAILQ_INSERT_TAIL(&drivers_list, driver, next); 420 } 421 422 static void mlx5_common_driver_init(void) 423 { 424 mlx5_common_pci_init(); 425 #ifdef RTE_EXEC_ENV_LINUX 426 mlx5_common_auxiliary_init(); 427 #endif 428 } 429 430 static bool mlx5_common_initialized; 431 432 /** 433 * One time innitialization routine for run-time dependency on glue library 434 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 435 * must invoke in its constructor. 436 */ 437 void 438 mlx5_common_init(void) 439 { 440 if (mlx5_common_initialized) 441 return; 442 443 mlx5_glue_constructor(); 444 mlx5_common_driver_init(); 445 mlx5_common_initialized = true; 446 } 447 448 /** 449 * This function is responsible of initializing the variable 450 * haswell_broadwell_cpu by checking if the cpu is intel 451 * and reading the data returned from mlx5_cpu_id(). 452 * since haswell and broadwell cpus don't have improved performance 453 * when using relaxed ordering we want to check the cpu type before 454 * before deciding whether to enable RO or not. 455 * if the cpu is haswell or broadwell the variable will be set to 1 456 * otherwise it will be 0. 457 */ 458 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 459 { 460 #ifdef RTE_ARCH_X86_64 461 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 462 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 463 unsigned int i, model, family, brand_id, vendor; 464 unsigned int signature_intel_ebx = 0x756e6547; 465 unsigned int extended_model; 466 unsigned int eax = 0; 467 unsigned int ebx = 0; 468 unsigned int ecx = 0; 469 unsigned int edx = 0; 470 int max_level; 471 472 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 473 vendor = ebx; 474 max_level = eax; 475 if (max_level < 1) { 476 haswell_broadwell_cpu = 0; 477 return; 478 } 479 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 480 model = (eax >> 4) & 0x0f; 481 family = (eax >> 8) & 0x0f; 482 brand_id = ebx & 0xff; 483 extended_model = (eax >> 12) & 0xf0; 484 /* Check if the processor is Haswell or Broadwell */ 485 if (vendor == signature_intel_ebx) { 486 if (family == 0x06) 487 model += extended_model; 488 if (brand_id == 0 && family == 0x6) { 489 for (i = 0; i < RTE_DIM(broadwell_models); i++) 490 if (model == broadwell_models[i]) { 491 haswell_broadwell_cpu = 1; 492 return; 493 } 494 for (i = 0; i < RTE_DIM(haswell_models); i++) 495 if (model == haswell_models[i]) { 496 haswell_broadwell_cpu = 1; 497 return; 498 } 499 } 500 } 501 #endif 502 haswell_broadwell_cpu = 0; 503 } 504 505 /** 506 * Allocate the User Access Region with DevX on specified device. 507 * 508 * @param [in] ctx 509 * Infiniband device context to perform allocation on. 510 * @param [in] mapping 511 * MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining 512 * attributes (if supported by the host), the 513 * writes to the UAR registers must be followed 514 * by write memory barrier. 515 * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are 516 * promoted to the registers immediately, no 517 * memory barriers needed. 518 * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, 519 * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC 520 * is performed. The drivers specifying negative values should 521 * always provide the write memory barrier operation after UAR 522 * register writings. 523 * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma 524 * library headers), the caller can specify 0. 525 * 526 * @return 527 * UAR object pointer on success, NULL otherwise and rte_errno is set. 528 */ 529 void * 530 mlx5_devx_alloc_uar(void *ctx, int mapping) 531 { 532 void *uar; 533 uint32_t retry, uar_mapping; 534 void *base_addr; 535 536 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 537 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 538 /* Control the mapping type according to the settings. */ 539 uar_mapping = (mapping < 0) ? 540 MLX5DV_UAR_ALLOC_TYPE_NC : mapping; 541 #else 542 /* 543 * It seems we have no way to control the memory mapping type 544 * for the UAR, the default "Write-Combining" type is supposed. 545 */ 546 uar_mapping = 0; 547 RTE_SET_USED(mapping); 548 #endif 549 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 550 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 551 if (!uar && 552 mapping < 0 && 553 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 554 /* 555 * In some environments like virtual machine the 556 * Write Combining mapped might be not supported and 557 * UAR allocation fails. We tried "Non-Cached" mapping 558 * for the case. 559 */ 560 DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); 561 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 562 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 563 } else if (!uar && 564 mapping < 0 && 565 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 566 /* 567 * If Verbs/kernel does not support "Non-Cached" 568 * try the "Write-Combining". 569 */ 570 DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); 571 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 572 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 573 } 574 #endif 575 if (!uar) { 576 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 577 rte_errno = ENOMEM; 578 goto exit; 579 } 580 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 581 if (base_addr) 582 break; 583 /* 584 * The UARs are allocated by rdma_core within the 585 * IB device context, on context closure all UARs 586 * will be freed, should be no memory/object leakage. 587 */ 588 DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); 589 uar = NULL; 590 } 591 /* Check whether we finally succeeded with valid UAR allocation. */ 592 if (!uar) { 593 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 594 rte_errno = ENOMEM; 595 } 596 /* 597 * Return void * instead of struct mlx5dv_devx_uar * 598 * is for compatibility with older rdma-core library headers. 599 */ 600 exit: 601 return uar; 602 } 603 604 RTE_PMD_EXPORT_NAME(mlx5_common_driver, __COUNTER__); 605