1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 #ifdef RTE_IBVERBS_LINK_DLOPEN 9 #include <dlfcn.h> 10 #endif 11 12 #include <rte_errno.h> 13 14 #include "mlx5_common.h" 15 #include "mlx5_common_utils.h" 16 #include "mlx5_glue.h" 17 18 19 int mlx5_common_logtype; 20 21 #ifdef MLX5_GLUE 22 const struct mlx5_glue *mlx5_glue; 23 #endif 24 25 uint8_t haswell_broadwell_cpu; 26 27 /** 28 * Get PCI information by sysfs device path. 29 * 30 * @param dev_path 31 * Pointer to device sysfs folder name. 32 * @param[out] pci_addr 33 * PCI bus address output buffer. 34 * 35 * @return 36 * 0 on success, a negative errno value otherwise and rte_errno is set. 37 */ 38 int 39 mlx5_dev_to_pci_addr(const char *dev_path, 40 struct rte_pci_addr *pci_addr) 41 { 42 FILE *file; 43 char line[32]; 44 MKSTR(path, "%s/device/uevent", dev_path); 45 46 file = fopen(path, "rb"); 47 if (file == NULL) { 48 rte_errno = errno; 49 return -rte_errno; 50 } 51 while (fgets(line, sizeof(line), file) == line) { 52 size_t len = strlen(line); 53 int ret; 54 55 /* Truncate long lines. */ 56 if (len == (sizeof(line) - 1)) 57 while (line[(len - 1)] != '\n') { 58 ret = fgetc(file); 59 if (ret == EOF) 60 break; 61 line[(len - 1)] = ret; 62 } 63 /* Extract information. */ 64 if (sscanf(line, 65 "PCI_SLOT_NAME=" 66 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 67 &pci_addr->domain, 68 &pci_addr->bus, 69 &pci_addr->devid, 70 &pci_addr->function) == 4) { 71 ret = 0; 72 break; 73 } 74 } 75 fclose(file); 76 return 0; 77 } 78 79 static int 80 mlx5_class_check_handler(__rte_unused const char *key, const char *value, 81 void *opaque) 82 { 83 enum mlx5_class *ret = opaque; 84 85 if (strcmp(value, "vdpa") == 0) { 86 *ret = MLX5_CLASS_VDPA; 87 } else if (strcmp(value, "net") == 0) { 88 *ret = MLX5_CLASS_NET; 89 } else { 90 DRV_LOG(ERR, "Invalid mlx5 class %s. Maybe typo in device" 91 " class argument setting?", value); 92 *ret = MLX5_CLASS_INVALID; 93 } 94 return 0; 95 } 96 97 enum mlx5_class 98 mlx5_class_get(struct rte_devargs *devargs) 99 { 100 struct rte_kvargs *kvlist; 101 const char *key = MLX5_CLASS_ARG_NAME; 102 enum mlx5_class ret = MLX5_CLASS_NET; 103 104 if (devargs == NULL) 105 return ret; 106 kvlist = rte_kvargs_parse(devargs->args, NULL); 107 if (kvlist == NULL) 108 return ret; 109 if (rte_kvargs_count(kvlist, key)) 110 rte_kvargs_process(kvlist, key, mlx5_class_check_handler, &ret); 111 rte_kvargs_free(kvlist); 112 return ret; 113 } 114 115 /** 116 * Extract port name, as a number, from sysfs or netlink information. 117 * 118 * @param[in] port_name_in 119 * String representing the port name. 120 * @param[out] port_info_out 121 * Port information, including port name as a number and port name 122 * type if recognized 123 * 124 * @return 125 * port_name field set according to recognized name format. 126 */ 127 void 128 mlx5_translate_port_name(const char *port_name_in, 129 struct mlx5_switch_info *port_info_out) 130 { 131 char pf_c1, pf_c2, vf_c1, vf_c2; 132 char *end; 133 int sc_items; 134 135 /* 136 * Check for port-name as a string of the form pf0vf0 137 * (support kernel ver >= 5.0 or OFED ver >= 4.6). 138 */ 139 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", 140 &pf_c1, &pf_c2, &port_info_out->pf_num, 141 &vf_c1, &vf_c2, &port_info_out->port_name); 142 if (sc_items == 6 && 143 pf_c1 == 'p' && pf_c2 == 'f' && 144 vf_c1 == 'v' && vf_c2 == 'f') { 145 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; 146 return; 147 } 148 /* 149 * Check for port-name as a string of the form p0 150 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 151 */ 152 sc_items = sscanf(port_name_in, "%c%d", 153 &pf_c1, &port_info_out->port_name); 154 if (sc_items == 2 && pf_c1 == 'p') { 155 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 156 return; 157 } 158 /* Check for port-name as a number (support kernel ver < 5.0 */ 159 errno = 0; 160 port_info_out->port_name = strtol(port_name_in, &end, 0); 161 if (!errno && 162 (size_t)(end - port_name_in) == strlen(port_name_in)) { 163 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 164 return; 165 } 166 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 167 return; 168 } 169 170 #ifdef MLX5_GLUE 171 172 /** 173 * Suffix RTE_EAL_PMD_PATH with "-glue". 174 * 175 * This function performs a sanity check on RTE_EAL_PMD_PATH before 176 * suffixing its last component. 177 * 178 * @param buf[out] 179 * Output buffer, should be large enough otherwise NULL is returned. 180 * @param size 181 * Size of @p out. 182 * 183 * @return 184 * Pointer to @p buf or @p NULL in case suffix cannot be appended. 185 */ 186 static char * 187 mlx5_glue_path(char *buf, size_t size) 188 { 189 static const char *const bad[] = { "/", ".", "..", NULL }; 190 const char *path = RTE_EAL_PMD_PATH; 191 size_t len = strlen(path); 192 size_t off; 193 int i; 194 195 while (len && path[len - 1] == '/') 196 --len; 197 for (off = len; off && path[off - 1] != '/'; --off) 198 ; 199 for (i = 0; bad[i]; ++i) 200 if (!strncmp(path + off, bad[i], (int)(len - off))) 201 goto error; 202 i = snprintf(buf, size, "%.*s-glue", (int)len, path); 203 if (i == -1 || (size_t)i >= size) 204 goto error; 205 return buf; 206 error: 207 RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of" 208 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please" 209 " re-configure DPDK"); 210 return NULL; 211 } 212 213 static int 214 mlx5_glue_dlopen(void) 215 { 216 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 217 void *handle = NULL; 218 219 const char *path[] = { 220 /* 221 * A basic security check is necessary before trusting 222 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 223 */ 224 (geteuid() == getuid() && getegid() == getgid() ? 225 getenv("MLX5_GLUE_PATH") : NULL), 226 /* 227 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 228 * variant, otherwise let dlopen() look up libraries on its 229 * own. 230 */ 231 (*RTE_EAL_PMD_PATH ? 232 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), 233 }; 234 unsigned int i = 0; 235 void **sym; 236 const char *dlmsg; 237 238 while (!handle && i != RTE_DIM(path)) { 239 const char *end; 240 size_t len; 241 int ret; 242 243 if (!path[i]) { 244 ++i; 245 continue; 246 } 247 end = strpbrk(path[i], ":;"); 248 if (!end) 249 end = path[i] + strlen(path[i]); 250 len = end - path[i]; 251 ret = 0; 252 do { 253 char name[ret + 1]; 254 255 ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, 256 (int)len, path[i], 257 (!len || *(end - 1) == '/') ? "" : "/"); 258 if (ret == -1) 259 break; 260 if (sizeof(name) != (size_t)ret + 1) 261 continue; 262 DRV_LOG(DEBUG, "Looking for rdma-core glue as " 263 "\"%s\"", name); 264 handle = dlopen(name, RTLD_LAZY); 265 break; 266 } while (1); 267 path[i] = end + 1; 268 if (!*end) 269 ++i; 270 } 271 if (!handle) { 272 rte_errno = EINVAL; 273 dlmsg = dlerror(); 274 if (dlmsg) 275 DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg); 276 goto glue_error; 277 } 278 sym = dlsym(handle, "mlx5_glue"); 279 if (!sym || !*sym) { 280 rte_errno = EINVAL; 281 dlmsg = dlerror(); 282 if (dlmsg) 283 DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg); 284 goto glue_error; 285 } 286 mlx5_glue = *sym; 287 return 0; 288 289 glue_error: 290 if (handle) 291 dlclose(handle); 292 return -1; 293 } 294 295 #endif 296 297 /* In case this is an x86_64 intel processor to check if 298 * we should use relaxed ordering. 299 */ 300 #ifdef RTE_ARCH_X86_64 301 /** 302 * This function returns processor identification and feature information 303 * into the registers. 304 * 305 * @param eax, ebx, ecx, edx 306 * Pointers to the registers that will hold cpu information. 307 * @param level 308 * The main category of information returned. 309 */ 310 static inline void mlx5_cpu_id(unsigned int level, 311 unsigned int *eax, unsigned int *ebx, 312 unsigned int *ecx, unsigned int *edx) 313 { 314 __asm__("cpuid\n\t" 315 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 316 : "0" (level)); 317 } 318 #endif 319 320 RTE_INIT_PRIO(mlx5_log_init, LOG) 321 { 322 mlx5_common_logtype = rte_log_register("pmd.common.mlx5"); 323 if (mlx5_common_logtype >= 0) 324 rte_log_set_level(mlx5_common_logtype, RTE_LOG_NOTICE); 325 } 326 327 /** 328 * Initialization routine for run-time dependency on rdma-core. 329 */ 330 RTE_INIT_PRIO(mlx5_glue_init, CLASS) 331 { 332 /* 333 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 334 * huge pages. Calling ibv_fork_init() during init allows 335 * applications to use fork() safely for purposes other than 336 * using this PMD, which is not supported in forked processes. 337 */ 338 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 339 /* Match the size of Rx completion entry to the size of a cacheline. */ 340 if (RTE_CACHE_LINE_SIZE == 128) 341 setenv("MLX5_CQE_SIZE", "128", 0); 342 /* 343 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to 344 * cleanup all the Verbs resources even when the device was removed. 345 */ 346 setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1); 347 348 #ifdef MLX5_GLUE 349 if (mlx5_glue_dlopen() != 0) 350 goto glue_error; 351 #endif 352 353 #ifdef RTE_LIBRTE_MLX5_DEBUG 354 /* Glue structure must not contain any NULL pointers. */ 355 { 356 unsigned int i; 357 358 for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) 359 MLX5_ASSERT(((const void *const *)mlx5_glue)[i]); 360 } 361 #endif 362 if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { 363 rte_errno = EINVAL; 364 DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is " 365 "required", mlx5_glue->version, MLX5_GLUE_VERSION); 366 goto glue_error; 367 } 368 mlx5_glue->fork_init(); 369 return; 370 371 glue_error: 372 DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing" 373 " run-time dependency on rdma-core libraries (libibverbs," 374 " libmlx5)"); 375 mlx5_glue = NULL; 376 return; 377 } 378 379 /** 380 * This function is responsible of initializing the variable 381 * haswell_broadwell_cpu by checking if the cpu is intel 382 * and reading the data returned from mlx5_cpu_id(). 383 * since haswell and broadwell cpus don't have improved performance 384 * when using relaxed ordering we want to check the cpu type before 385 * before deciding whether to enable RO or not. 386 * if the cpu is haswell or broadwell the variable will be set to 1 387 * otherwise it will be 0. 388 */ 389 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 390 { 391 #ifdef RTE_ARCH_X86_64 392 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 393 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 394 unsigned int i, model, family, brand_id, vendor; 395 unsigned int signature_intel_ebx = 0x756e6547; 396 unsigned int extended_model; 397 unsigned int eax = 0; 398 unsigned int ebx = 0; 399 unsigned int ecx = 0; 400 unsigned int edx = 0; 401 int max_level; 402 403 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 404 vendor = ebx; 405 max_level = eax; 406 if (max_level < 1) { 407 haswell_broadwell_cpu = 0; 408 return; 409 } 410 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 411 model = (eax >> 4) & 0x0f; 412 family = (eax >> 8) & 0x0f; 413 brand_id = ebx & 0xff; 414 extended_model = (eax >> 12) & 0xf0; 415 /* Check if the processor is Haswell or Broadwell */ 416 if (vendor == signature_intel_ebx) { 417 if (family == 0x06) 418 model += extended_model; 419 if (brand_id == 0 && family == 0x6) { 420 for (i = 0; i < RTE_DIM(broadwell_models); i++) 421 if (model == broadwell_models[i]) { 422 haswell_broadwell_cpu = 1; 423 return; 424 } 425 for (i = 0; i < RTE_DIM(haswell_models); i++) 426 if (model == haswell_models[i]) { 427 haswell_broadwell_cpu = 1; 428 return; 429 } 430 } 431 } 432 #endif 433 haswell_broadwell_cpu = 0; 434 } 435