1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #ifndef RTE_PMD_MLX5_COMMON_H_ 6 #define RTE_PMD_MLX5_COMMON_H_ 7 8 #include <stdio.h> 9 10 #include <rte_pci.h> 11 #include <rte_debug.h> 12 #include <rte_atomic.h> 13 #include <rte_rwlock.h> 14 #include <rte_log.h> 15 #include <rte_kvargs.h> 16 #include <rte_devargs.h> 17 #include <rte_bitops.h> 18 #include <rte_lcore.h> 19 #include <rte_spinlock.h> 20 #include <rte_os_shim.h> 21 22 #include "mlx5_prm.h" 23 #include "mlx5_devx_cmds.h" 24 #include "mlx5_common_os.h" 25 #include "mlx5_common_mr.h" 26 27 /* Reported driver name. */ 28 #define MLX5_PCI_DRIVER_NAME "mlx5_pci" 29 #define MLX5_AUXILIARY_DRIVER_NAME "mlx5_auxiliary" 30 31 /* Bit-field manipulation. */ 32 #define BITFIELD_DECLARE(bf, type, size) \ 33 type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \ 34 !!((size_t)(size) % (sizeof(type) * CHAR_BIT)))] 35 #define BITFIELD_DEFINE(bf, type, size) \ 36 BITFIELD_DECLARE((bf), type, (size)) = { 0 } 37 #define BITFIELD_SET(bf, b) \ 38 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |= \ 39 ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))) 40 #define BITFIELD_RESET(bf, b) \ 41 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &= \ 42 ~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))) 43 #define BITFIELD_ISSET(bf, b) \ 44 !!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] & \ 45 ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))) 46 47 /* 48 * Helper macros to work around __VA_ARGS__ limitations in a C99 compliant 49 * manner. 50 */ 51 #define PMD_DRV_LOG_STRIP(a, b) a 52 #define PMD_DRV_LOG_OPAREN ( 53 #define PMD_DRV_LOG_CPAREN ) 54 #define PMD_DRV_LOG_COMMA , 55 56 /* Return the file name part of a path. */ 57 static inline const char * 58 pmd_drv_log_basename(const char *s) 59 { 60 const char *n = s; 61 62 while (*n) 63 if (*(n++) == '/') 64 s = n; 65 return s; 66 } 67 68 #define PMD_DRV_LOG___(level, type, name, ...) \ 69 rte_log(RTE_LOG_ ## level, \ 70 type, \ 71 RTE_FMT(name ": " \ 72 RTE_FMT_HEAD(__VA_ARGS__,), \ 73 RTE_FMT_TAIL(__VA_ARGS__,))) 74 75 #ifdef RTE_LIBRTE_MLX5_DEBUG 76 77 #define PMD_DRV_LOG__(level, type, name, ...) \ 78 PMD_DRV_LOG___(level, type, name, "%s:%u: %s(): " __VA_ARGS__) 79 #define PMD_DRV_LOG_(level, type, name, s, ...) \ 80 PMD_DRV_LOG__(level, type, name,\ 81 s "\n" PMD_DRV_LOG_COMMA \ 82 pmd_drv_log_basename(__FILE__) PMD_DRV_LOG_COMMA \ 83 __LINE__ PMD_DRV_LOG_COMMA \ 84 __func__, \ 85 __VA_ARGS__) 86 87 #else /* RTE_LIBRTE_MLX5_DEBUG */ 88 #define PMD_DRV_LOG__(level, type, name, ...) \ 89 PMD_DRV_LOG___(level, type, name, __VA_ARGS__) 90 #define PMD_DRV_LOG_(level, type, name, s, ...) \ 91 PMD_DRV_LOG__(level, type, name, s "\n", __VA_ARGS__) 92 93 #endif /* RTE_LIBRTE_MLX5_DEBUG */ 94 95 /* claim_zero() does not perform any check when debugging is disabled. */ 96 #ifdef RTE_LIBRTE_MLX5_DEBUG 97 98 #define MLX5_ASSERT(exp) RTE_VERIFY(exp) 99 #define claim_zero(...) MLX5_ASSERT((__VA_ARGS__) == 0) 100 #define claim_nonzero(...) MLX5_ASSERT((__VA_ARGS__) != 0) 101 102 #else /* RTE_LIBRTE_MLX5_DEBUG */ 103 104 #define MLX5_ASSERT(exp) RTE_ASSERT(exp) 105 #define claim_zero(...) (__VA_ARGS__) 106 #define claim_nonzero(...) (__VA_ARGS__) 107 108 #endif /* RTE_LIBRTE_MLX5_DEBUG */ 109 110 /* Allocate a buffer on the stack and fill it with a printf format string. */ 111 #define MKSTR(name, ...) \ 112 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 113 char name[mkstr_size_##name + 1]; \ 114 \ 115 memset(name, 0, mkstr_size_##name + 1); \ 116 snprintf(name, sizeof(name), "" __VA_ARGS__) 117 118 enum { 119 PCI_VENDOR_ID_MELLANOX = 0x15b3, 120 }; 121 122 enum { 123 PCI_DEVICE_ID_MELLANOX_CONNECTX4 = 0x1013, 124 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014, 125 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015, 126 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016, 127 PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017, 128 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018, 129 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019, 130 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a, 131 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2, 132 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3, 133 PCI_DEVICE_ID_MELLANOX_CONNECTX6 = 0x101b, 134 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF = 0x101c, 135 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX = 0x101d, 136 PCI_DEVICE_ID_MELLANOX_CONNECTXVF = 0x101e, 137 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF = 0xa2d6, 138 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX = 0x101f, 139 PCI_DEVICE_ID_MELLANOX_CONNECTX7 = 0x1021, 140 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF = 0Xa2dc, 141 }; 142 143 /* Maximum number of simultaneous unicast MAC addresses. */ 144 #define MLX5_MAX_UC_MAC_ADDRESSES 128 145 /* Maximum number of simultaneous Multicast MAC addresses. */ 146 #define MLX5_MAX_MC_MAC_ADDRESSES 128 147 /* Maximum number of simultaneous MAC addresses. */ 148 #define MLX5_MAX_MAC_ADDRESSES \ 149 (MLX5_MAX_UC_MAC_ADDRESSES + MLX5_MAX_MC_MAC_ADDRESSES) 150 151 /* Recognized Infiniband device physical port name types. */ 152 enum mlx5_nl_phys_port_name_type { 153 MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */ 154 MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */ 155 MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ 156 MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ 157 MLX5_PHYS_PORT_NAME_TYPE_PFHPF, /* pf0, kernel ver >= 5.7, HPF rep */ 158 MLX5_PHYS_PORT_NAME_TYPE_PFSF, /* pf0sf0, kernel ver >= 5.0 */ 159 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ 160 }; 161 162 /** Switch information returned by mlx5_nl_switch_info(). */ 163 struct mlx5_switch_info { 164 uint32_t master:1; /**< Master device. */ 165 uint32_t representor:1; /**< Representor device. */ 166 enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ 167 int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */ 168 int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ 169 int32_t port_name; /**< Representor port name. */ 170 uint64_t switch_id; /**< Switch identifier. */ 171 }; 172 173 /* CQE status. */ 174 enum mlx5_cqe_status { 175 MLX5_CQE_STATUS_SW_OWN = -1, 176 MLX5_CQE_STATUS_HW_OWN = -2, 177 MLX5_CQE_STATUS_ERR = -3, 178 }; 179 180 /** 181 * Check whether CQE is valid. 182 * 183 * @param cqe 184 * Pointer to CQE. 185 * @param cqes_n 186 * Size of completion queue. 187 * @param ci 188 * Consumer index. 189 * 190 * @return 191 * The CQE status. 192 */ 193 static __rte_always_inline enum mlx5_cqe_status 194 check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n, 195 const uint16_t ci) 196 { 197 const uint16_t idx = ci & cqes_n; 198 const uint8_t op_own = cqe->op_own; 199 const uint8_t op_owner = MLX5_CQE_OWNER(op_own); 200 const uint8_t op_code = MLX5_CQE_OPCODE(op_own); 201 202 if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID))) 203 return MLX5_CQE_STATUS_HW_OWN; 204 rte_io_rmb(); 205 if (unlikely(op_code == MLX5_CQE_RESP_ERR || 206 op_code == MLX5_CQE_REQ_ERR)) 207 return MLX5_CQE_STATUS_ERR; 208 return MLX5_CQE_STATUS_SW_OWN; 209 } 210 211 /* 212 * Get PCI address <DBDF> string from EAL device. 213 * 214 * @param[out] addr 215 * The output address buffer string 216 * @param[in] size 217 * The output buffer size 218 * @return 219 * - 0 on success. 220 * - Negative value and rte_errno is set otherwise. 221 */ 222 int mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size); 223 224 /* 225 * Get PCI address from sysfs of a PCI-related device. 226 * 227 * @param[in] dev_path 228 * The sysfs path should not point to the direct plain PCI device. 229 * Instead, the node "/device/" is used to access the real device. 230 * @param[out] pci_addr 231 * Parsed PCI address. 232 * 233 * @return 234 * - 0 on success. 235 * - Negative value and rte_errno is set otherwise. 236 */ 237 __rte_internal 238 int mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr); 239 240 /* 241 * Get kernel network interface name from sysfs IB device path. 242 * 243 * @param[in] ibdev_path 244 * The sysfs path to IB device. 245 * @param[out] ifname 246 * Interface name output of size IF_NAMESIZE. 247 * 248 * @return 249 * - 0 on success. 250 * - Negative value and rte_errno is set otherwise. 251 */ 252 __rte_internal 253 int mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname); 254 255 __rte_internal 256 int mlx5_auxiliary_get_child_name(const char *dev, const char *node, 257 char *child, size_t size); 258 259 enum mlx5_class { 260 MLX5_CLASS_INVALID, 261 MLX5_CLASS_ETH = RTE_BIT64(0), 262 MLX5_CLASS_VDPA = RTE_BIT64(1), 263 MLX5_CLASS_REGEX = RTE_BIT64(2), 264 MLX5_CLASS_COMPRESS = RTE_BIT64(3), 265 MLX5_CLASS_CRYPTO = RTE_BIT64(4), 266 }; 267 268 #define MLX5_DBR_SIZE RTE_CACHE_LINE_SIZE 269 270 /* devX creation object */ 271 struct mlx5_devx_obj { 272 void *obj; /* The DV object. */ 273 int id; /* The object ID. */ 274 }; 275 276 /* UMR memory buffer used to define 1 entry in indirect mkey. */ 277 struct mlx5_klm { 278 uint32_t byte_count; 279 uint32_t mkey; 280 uint64_t address; 281 }; 282 283 /* All UAR arguments using doorbell register in datapath. */ 284 struct mlx5_uar_data { 285 uint64_t *db; 286 /* The doorbell's virtual address mapped to the relevant HW UAR space.*/ 287 #ifndef RTE_ARCH_64 288 rte_spinlock_t *sl_p; 289 /* Pointer to UAR access lock required for 32bit implementations. */ 290 #endif /* RTE_ARCH_64 */ 291 }; 292 293 /* DevX UAR control structure. */ 294 struct mlx5_uar { 295 struct mlx5_uar_data bf_db; /* UAR data for Blueflame register. */ 296 struct mlx5_uar_data cq_db; /* UAR data for CQ arm db register. */ 297 void *obj; /* DevX UAR object. */ 298 bool dbnc; /* Doorbell mapped to non-cached region. */ 299 #ifndef RTE_ARCH_64 300 rte_spinlock_t bf_sl; 301 rte_spinlock_t cq_sl; 302 /* UAR access locks required for 32bit implementations. */ 303 #endif /* RTE_ARCH_64 */ 304 }; 305 306 /** 307 * Ring a doorbell and flush the update if requested. 308 * 309 * @param uar 310 * Pointer to UAR data structure. 311 * @param val 312 * value to write in big endian format. 313 * @param index 314 * Index of doorbell record. 315 * @param db_rec 316 * Address of doorbell record. 317 * @param flash 318 * Decide whether to flush the DB writing using a memory barrier. 319 */ 320 static __rte_always_inline void 321 mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index, 322 volatile uint32_t *db_rec, bool flash) 323 { 324 rte_io_wmb(); 325 *db_rec = rte_cpu_to_be_32(index); 326 /* Ensure ordering between DB record actual update and UAR access. */ 327 rte_wmb(); 328 #ifdef RTE_ARCH_64 329 *uar->db = val; 330 #else /* !RTE_ARCH_64 */ 331 rte_spinlock_lock(uar->sl_p); 332 *(volatile uint32_t *)uar->db = val; 333 rte_io_wmb(); 334 *((volatile uint32_t *)uar->db + 1) = val >> 32; 335 rte_spinlock_unlock(uar->sl_p); 336 #endif 337 if (flash) 338 rte_wmb(); 339 } 340 341 /** 342 * Get the doorbell register mapping type. 343 * 344 * @param uar_mmap_offset 345 * Mmap offset of Verbs/DevX UAR. 346 * @param page_size 347 * System page size 348 * 349 * @return 350 * 1 for non-cached, 0 otherwise. 351 */ 352 static inline uint16_t 353 mlx5_db_map_type_get(off_t uar_mmap_offset, size_t page_size) 354 { 355 off_t cmd = uar_mmap_offset / page_size; 356 357 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; 358 cmd &= MLX5_UAR_MMAP_CMD_MASK; 359 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) 360 return 1; 361 return 0; 362 } 363 364 __rte_internal 365 void mlx5_translate_port_name(const char *port_name_in, 366 struct mlx5_switch_info *port_info_out); 367 void mlx5_glue_constructor(void); 368 extern uint8_t haswell_broadwell_cpu; 369 370 __rte_internal 371 void mlx5_common_init(void); 372 373 /* 374 * Common Driver Interface 375 * 376 * ConnectX common driver supports multiple classes: net, vDPA, regex, crypto 377 * and compress devices. This layer enables creating such multiple classes 378 * on a single device by allowing to bind multiple class-specific device 379 * drivers to attach to the common driver. 380 * 381 * ------------ ------------- -------------- ----------------- ------------ 382 * | mlx5 net | | mlx5 vdpa | | mlx5 regex | | mlx5 compress | | mlx5 ... | 383 * | driver | | driver | | driver | | driver | | drivers | 384 * ------------ ------------- -------------- ----------------- ------------ 385 * || 386 * ----------------- 387 * | mlx5 | 388 * | common driver | 389 * ----------------- 390 * | | 391 * ----------- ----------------- 392 * | mlx5 | | mlx5 | 393 * | pci dev | | auxiliary dev | 394 * ----------- ----------------- 395 * 396 * - mlx5 PCI bus driver binds to mlx5 PCI devices defined by PCI ID table 397 * of all related devices. 398 * - mlx5 class driver such as net, vDPA, regex defines its specific 399 * PCI ID table and mlx5 bus driver probes matching class drivers. 400 * - mlx5 common driver is central place that validates supported 401 * class combinations. 402 * - mlx5 common driver hides bus difference by resolving device address 403 * from devargs, locating target RDMA device and probing with it. 404 */ 405 406 /* 407 * Device configuration structure. 408 * 409 * Merged configuration from: 410 * 411 * - Device capabilities, 412 * - User device parameters disabled features. 413 */ 414 struct mlx5_common_dev_config { 415 struct mlx5_hca_attr hca_attr; /* HCA attributes. */ 416 int dbnc; /* Skip doorbell register write barrier. */ 417 unsigned int devx:1; /* Whether devx interface is available or not. */ 418 unsigned int sys_mem_en:1; /* The default memory allocator. */ 419 unsigned int mr_mempool_reg_en:1; 420 /* Allow/prevent implicit mempool memory registration. */ 421 unsigned int mr_ext_memseg_en:1; 422 /* Whether memseg should be extended for MR creation. */ 423 }; 424 425 struct mlx5_common_device { 426 struct rte_device *dev; 427 TAILQ_ENTRY(mlx5_common_device) next; 428 uint32_t classes_loaded; 429 void *ctx; /* Verbs/DV/DevX context. */ 430 void *pd; /* Protection Domain. */ 431 uint32_t pdn; /* Protection Domain Number. */ 432 struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */ 433 struct mlx5_common_dev_config config; /* Device configuration. */ 434 }; 435 436 /** 437 * Initialization function for the driver called during device probing. 438 */ 439 typedef int (mlx5_class_driver_probe_t)(struct mlx5_common_device *dev); 440 441 /** 442 * Uninitialization function for the driver called during hot-unplugging. 443 */ 444 typedef int (mlx5_class_driver_remove_t)(struct mlx5_common_device *dev); 445 446 /** Device already probed can be probed again to check for new ports. */ 447 #define MLX5_DRV_PROBE_AGAIN 0x0004 448 449 /** 450 * A structure describing a mlx5 common class driver. 451 */ 452 struct mlx5_class_driver { 453 TAILQ_ENTRY(mlx5_class_driver) next; 454 enum mlx5_class drv_class; /**< Class of this driver. */ 455 const char *name; /**< Driver name. */ 456 mlx5_class_driver_probe_t *probe; /**< Device probe function. */ 457 mlx5_class_driver_remove_t *remove; /**< Device remove function. */ 458 const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ 459 uint32_t probe_again:1; 460 /**< Device already probed can be probed again to check new device. */ 461 uint32_t intr_lsc:1; /**< Supports link state interrupt. */ 462 uint32_t intr_rmv:1; /**< Supports device remove interrupt. */ 463 }; 464 465 /** 466 * Register a mlx5 device driver. 467 * 468 * @param driver 469 * A pointer to a mlx5_driver structure describing the driver 470 * to be registered. 471 */ 472 __rte_internal 473 void 474 mlx5_class_driver_register(struct mlx5_class_driver *driver); 475 476 /** 477 * Test device is a PCI bus device. 478 * 479 * @param dev 480 * Pointer to device. 481 * 482 * @return 483 * - True on device devargs is a PCI bus device. 484 * - False otherwise. 485 */ 486 __rte_internal 487 bool 488 mlx5_dev_is_pci(const struct rte_device *dev); 489 490 __rte_internal 491 int 492 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev); 493 494 __rte_internal 495 void 496 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev, 497 struct rte_mempool *mp); 498 499 __rte_internal 500 int 501 mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar); 502 503 __rte_internal 504 void 505 mlx5_devx_uar_release(struct mlx5_uar *uar); 506 507 /* mlx5_common_os.c */ 508 509 int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes); 510 int mlx5_os_pd_create(struct mlx5_common_device *cdev); 511 512 /* mlx5 PMD wrapped MR struct. */ 513 struct mlx5_pmd_wrapped_mr { 514 uint32_t lkey; 515 void *addr; 516 size_t len; 517 void *obj; /* verbs mr object or devx umem object. */ 518 void *imkey; /* DevX indirect mkey object. */ 519 }; 520 521 __rte_internal 522 int 523 mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, 524 size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr); 525 526 __rte_internal 527 void 528 mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr); 529 530 #endif /* RTE_PMD_MLX5_COMMON_H_ */ 531