1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #ifndef RTE_PMD_MLX5_COMMON_H_ 6 #define RTE_PMD_MLX5_COMMON_H_ 7 8 #include <stdio.h> 9 10 #include <rte_pci.h> 11 #include <rte_bus_pci.h> 12 #include <rte_debug.h> 13 #include <rte_atomic.h> 14 #include <rte_rwlock.h> 15 #include <rte_log.h> 16 #include <rte_kvargs.h> 17 #include <rte_devargs.h> 18 #include <rte_bitops.h> 19 #include <rte_lcore.h> 20 #include <rte_spinlock.h> 21 #include <rte_os_shim.h> 22 23 #include "mlx5_prm.h" 24 #include "mlx5_devx_cmds.h" 25 #include "mlx5_common_os.h" 26 #include "mlx5_common_mr.h" 27 28 /* Reported driver name. */ 29 #define MLX5_PCI_DRIVER_NAME "mlx5_pci" 30 #define MLX5_AUXILIARY_DRIVER_NAME "mlx5_auxiliary" 31 32 /* Bit-field manipulation. */ 33 #define BITFIELD_DECLARE(bf, type, size) \ 34 type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \ 35 !!((size_t)(size) % (sizeof(type) * CHAR_BIT)))] 36 #define BITFIELD_DEFINE(bf, type, size) \ 37 BITFIELD_DECLARE((bf), type, (size)) = { 0 } 38 #define BITFIELD_SET(bf, b) \ 39 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |= \ 40 ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))) 41 #define BITFIELD_RESET(bf, b) \ 42 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &= \ 43 ~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))) 44 #define BITFIELD_ISSET(bf, b) \ 45 !!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] & \ 46 ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))) 47 48 /* 49 * Helper macros to work around __VA_ARGS__ limitations in a C99 compliant 50 * manner. 51 */ 52 #define PMD_DRV_LOG_STRIP(a, b) a 53 #define PMD_DRV_LOG_OPAREN ( 54 #define PMD_DRV_LOG_CPAREN ) 55 #define PMD_DRV_LOG_COMMA , 56 57 /* Return the file name part of a path. */ 58 static inline const char * 59 pmd_drv_log_basename(const char *s) 60 { 61 const char *n = s; 62 63 while (*n) 64 if (*(n++) == '/') 65 s = n; 66 return s; 67 } 68 69 #define PMD_DRV_LOG___(level, type, name, ...) \ 70 rte_log(RTE_LOG_ ## level, \ 71 type, \ 72 RTE_FMT(name ": " \ 73 RTE_FMT_HEAD(__VA_ARGS__,), \ 74 RTE_FMT_TAIL(__VA_ARGS__,))) 75 76 #ifdef RTE_LIBRTE_MLX5_DEBUG 77 78 #define PMD_DRV_LOG__(level, type, name, ...) \ 79 PMD_DRV_LOG___(level, type, name, "%s:%u: %s(): " __VA_ARGS__) 80 #define PMD_DRV_LOG_(level, type, name, s, ...) \ 81 PMD_DRV_LOG__(level, type, name,\ 82 s "\n" PMD_DRV_LOG_COMMA \ 83 pmd_drv_log_basename(__FILE__) PMD_DRV_LOG_COMMA \ 84 __LINE__ PMD_DRV_LOG_COMMA \ 85 __func__, \ 86 __VA_ARGS__) 87 88 #else /* RTE_LIBRTE_MLX5_DEBUG */ 89 #define PMD_DRV_LOG__(level, type, name, ...) \ 90 PMD_DRV_LOG___(level, type, name, __VA_ARGS__) 91 #define PMD_DRV_LOG_(level, type, name, s, ...) \ 92 PMD_DRV_LOG__(level, type, name, s "\n", __VA_ARGS__) 93 94 #endif /* RTE_LIBRTE_MLX5_DEBUG */ 95 96 /* claim_zero() does not perform any check when debugging is disabled. */ 97 #ifdef RTE_LIBRTE_MLX5_DEBUG 98 99 #define MLX5_ASSERT(exp) RTE_VERIFY(exp) 100 #define claim_zero(...) MLX5_ASSERT((__VA_ARGS__) == 0) 101 #define claim_nonzero(...) MLX5_ASSERT((__VA_ARGS__) != 0) 102 103 #else /* RTE_LIBRTE_MLX5_DEBUG */ 104 105 #define MLX5_ASSERT(exp) RTE_ASSERT(exp) 106 #define claim_zero(...) (__VA_ARGS__) 107 #define claim_nonzero(...) (__VA_ARGS__) 108 109 #endif /* RTE_LIBRTE_MLX5_DEBUG */ 110 111 /* Allocate a buffer on the stack and fill it with a printf format string. */ 112 #define MKSTR(name, ...) \ 113 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 114 char name[mkstr_size_##name + 1]; \ 115 \ 116 memset(name, 0, mkstr_size_##name + 1); \ 117 snprintf(name, sizeof(name), "" __VA_ARGS__) 118 119 enum { 120 PCI_VENDOR_ID_MELLANOX = 0x15b3, 121 }; 122 123 enum { 124 PCI_DEVICE_ID_MELLANOX_CONNECTX4 = 0x1013, 125 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014, 126 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015, 127 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016, 128 PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017, 129 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018, 130 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019, 131 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a, 132 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2, 133 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3, 134 PCI_DEVICE_ID_MELLANOX_CONNECTX6 = 0x101b, 135 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF = 0x101c, 136 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX = 0x101d, 137 PCI_DEVICE_ID_MELLANOX_CONNECTXVF = 0x101e, 138 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF = 0xa2d6, 139 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX = 0x101f, 140 PCI_DEVICE_ID_MELLANOX_CONNECTX7 = 0x1021, 141 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF = 0Xa2dc, 142 }; 143 144 /* Maximum number of simultaneous unicast MAC addresses. */ 145 #define MLX5_MAX_UC_MAC_ADDRESSES 128 146 /* Maximum number of simultaneous Multicast MAC addresses. */ 147 #define MLX5_MAX_MC_MAC_ADDRESSES 128 148 /* Maximum number of simultaneous MAC addresses. */ 149 #define MLX5_MAX_MAC_ADDRESSES \ 150 (MLX5_MAX_UC_MAC_ADDRESSES + MLX5_MAX_MC_MAC_ADDRESSES) 151 152 /* Recognized Infiniband device physical port name types. */ 153 enum mlx5_nl_phys_port_name_type { 154 MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */ 155 MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */ 156 MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ 157 MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ 158 MLX5_PHYS_PORT_NAME_TYPE_PFHPF, /* pf0, kernel ver >= 5.7, HPF rep */ 159 MLX5_PHYS_PORT_NAME_TYPE_PFSF, /* pf0sf0, kernel ver >= 5.0 */ 160 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ 161 }; 162 163 /** Switch information returned by mlx5_nl_switch_info(). */ 164 struct mlx5_switch_info { 165 uint32_t master:1; /**< Master device. */ 166 uint32_t representor:1; /**< Representor device. */ 167 enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ 168 int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */ 169 int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ 170 int32_t port_name; /**< Representor port name. */ 171 uint64_t switch_id; /**< Switch identifier. */ 172 }; 173 174 /* CQE status. */ 175 enum mlx5_cqe_status { 176 MLX5_CQE_STATUS_SW_OWN = -1, 177 MLX5_CQE_STATUS_HW_OWN = -2, 178 MLX5_CQE_STATUS_ERR = -3, 179 }; 180 181 /** 182 * Check whether CQE is valid. 183 * 184 * @param cqe 185 * Pointer to CQE. 186 * @param cqes_n 187 * Size of completion queue. 188 * @param ci 189 * Consumer index. 190 * 191 * @return 192 * The CQE status. 193 */ 194 static __rte_always_inline enum mlx5_cqe_status 195 check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n, 196 const uint16_t ci) 197 { 198 const uint16_t idx = ci & cqes_n; 199 const uint8_t op_own = cqe->op_own; 200 const uint8_t op_owner = MLX5_CQE_OWNER(op_own); 201 const uint8_t op_code = MLX5_CQE_OPCODE(op_own); 202 203 if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID))) 204 return MLX5_CQE_STATUS_HW_OWN; 205 rte_io_rmb(); 206 if (unlikely(op_code == MLX5_CQE_RESP_ERR || 207 op_code == MLX5_CQE_REQ_ERR)) 208 return MLX5_CQE_STATUS_ERR; 209 return MLX5_CQE_STATUS_SW_OWN; 210 } 211 212 /* 213 * Get PCI address <DBDF> string from EAL device. 214 * 215 * @param[out] addr 216 * The output address buffer string 217 * @param[in] size 218 * The output buffer size 219 * @return 220 * - 0 on success. 221 * - Negative value and rte_errno is set otherwise. 222 */ 223 int mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size); 224 225 /* 226 * Get PCI address from sysfs of a PCI-related device. 227 * 228 * @param[in] dev_path 229 * The sysfs path should not point to the direct plain PCI device. 230 * Instead, the node "/device/" is used to access the real device. 231 * @param[out] pci_addr 232 * Parsed PCI address. 233 * 234 * @return 235 * - 0 on success. 236 * - Negative value and rte_errno is set otherwise. 237 */ 238 __rte_internal 239 int mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr); 240 241 /* 242 * Get kernel network interface name from sysfs IB device path. 243 * 244 * @param[in] ibdev_path 245 * The sysfs path to IB device. 246 * @param[out] ifname 247 * Interface name output of size IF_NAMESIZE. 248 * 249 * @return 250 * - 0 on success. 251 * - Negative value and rte_errno is set otherwise. 252 */ 253 __rte_internal 254 int mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname); 255 256 __rte_internal 257 int mlx5_auxiliary_get_child_name(const char *dev, const char *node, 258 char *child, size_t size); 259 260 enum mlx5_class { 261 MLX5_CLASS_INVALID, 262 MLX5_CLASS_ETH = RTE_BIT64(0), 263 MLX5_CLASS_VDPA = RTE_BIT64(1), 264 MLX5_CLASS_REGEX = RTE_BIT64(2), 265 MLX5_CLASS_COMPRESS = RTE_BIT64(3), 266 MLX5_CLASS_CRYPTO = RTE_BIT64(4), 267 }; 268 269 #define MLX5_DBR_SIZE RTE_CACHE_LINE_SIZE 270 271 /* devX creation object */ 272 struct mlx5_devx_obj { 273 void *obj; /* The DV object. */ 274 int id; /* The object ID. */ 275 }; 276 277 /* UMR memory buffer used to define 1 entry in indirect mkey. */ 278 struct mlx5_klm { 279 uint32_t byte_count; 280 uint32_t mkey; 281 uint64_t address; 282 }; 283 284 /** Control for key/values list. */ 285 struct mlx5_kvargs_ctrl { 286 struct rte_kvargs *kvlist; /* Structure containing list of key/values.*/ 287 bool is_used[RTE_KVARGS_MAX]; /* Indicator which devargs were used. */ 288 }; 289 290 /** 291 * Call a handler function for each key/value in the list of keys. 292 * 293 * For each key/value association that matches the given key, calls the 294 * handler function with the for a given arg_name passing the value on the 295 * dictionary for that key and a given extra argument. 296 * 297 * @param mkvlist 298 * The mlx5_kvargs structure. 299 * @param keys 300 * A list of keys to process (table of const char *, the last must be NULL). 301 * @param handler 302 * The function to call for each matching key. 303 * @param opaque_arg 304 * A pointer passed unchanged to the handler. 305 * 306 * @return 307 * - 0 on success 308 * - Negative on error 309 */ 310 __rte_internal 311 int 312 mlx5_kvargs_process(struct mlx5_kvargs_ctrl *mkvlist, const char *const keys[], 313 arg_handler_t handler, void *opaque_arg); 314 315 /* All UAR arguments using doorbell register in datapath. */ 316 struct mlx5_uar_data { 317 uint64_t *db; 318 /* The doorbell's virtual address mapped to the relevant HW UAR space.*/ 319 #ifndef RTE_ARCH_64 320 rte_spinlock_t *sl_p; 321 /* Pointer to UAR access lock required for 32bit implementations. */ 322 #endif /* RTE_ARCH_64 */ 323 }; 324 325 /* DevX UAR control structure. */ 326 struct mlx5_uar { 327 struct mlx5_uar_data bf_db; /* UAR data for Blueflame register. */ 328 struct mlx5_uar_data cq_db; /* UAR data for CQ arm db register. */ 329 void *obj; /* DevX UAR object. */ 330 bool dbnc; /* Doorbell mapped to non-cached region. */ 331 #ifndef RTE_ARCH_64 332 rte_spinlock_t bf_sl; 333 rte_spinlock_t cq_sl; 334 /* UAR access locks required for 32bit implementations. */ 335 #endif /* RTE_ARCH_64 */ 336 }; 337 338 /** 339 * Ring a doorbell and flush the update if requested. 340 * 341 * @param uar 342 * Pointer to UAR data structure. 343 * @param val 344 * value to write in big endian format. 345 * @param index 346 * Index of doorbell record. 347 * @param db_rec 348 * Address of doorbell record. 349 * @param flash 350 * Decide whether to flush the DB writing using a memory barrier. 351 */ 352 static __rte_always_inline void 353 mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index, 354 volatile uint32_t *db_rec, bool flash) 355 { 356 rte_io_wmb(); 357 *db_rec = rte_cpu_to_be_32(index); 358 /* Ensure ordering between DB record actual update and UAR access. */ 359 rte_wmb(); 360 #ifdef RTE_ARCH_64 361 *uar->db = val; 362 #else /* !RTE_ARCH_64 */ 363 rte_spinlock_lock(uar->sl_p); 364 *(volatile uint32_t *)uar->db = val; 365 rte_io_wmb(); 366 *((volatile uint32_t *)uar->db + 1) = val >> 32; 367 rte_spinlock_unlock(uar->sl_p); 368 #endif 369 if (flash) 370 rte_wmb(); 371 } 372 373 /** 374 * Get the doorbell register mapping type. 375 * 376 * @param uar_mmap_offset 377 * Mmap offset of Verbs/DevX UAR. 378 * @param page_size 379 * System page size 380 * 381 * @return 382 * 1 for non-cached, 0 otherwise. 383 */ 384 static inline uint16_t 385 mlx5_db_map_type_get(off_t uar_mmap_offset, size_t page_size) 386 { 387 off_t cmd = uar_mmap_offset / page_size; 388 389 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; 390 cmd &= MLX5_UAR_MMAP_CMD_MASK; 391 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) 392 return 1; 393 return 0; 394 } 395 396 __rte_internal 397 void mlx5_translate_port_name(const char *port_name_in, 398 struct mlx5_switch_info *port_info_out); 399 void mlx5_glue_constructor(void); 400 extern uint8_t haswell_broadwell_cpu; 401 402 __rte_internal 403 void mlx5_common_init(void); 404 405 /* 406 * Common Driver Interface 407 * 408 * ConnectX common driver supports multiple classes: net, vDPA, regex, crypto 409 * and compress devices. This layer enables creating such multiple classes 410 * on a single device by allowing to bind multiple class-specific device 411 * drivers to attach to the common driver. 412 * 413 * ------------ ------------- -------------- ----------------- ------------ 414 * | mlx5 net | | mlx5 vdpa | | mlx5 regex | | mlx5 compress | | mlx5 ... | 415 * | driver | | driver | | driver | | driver | | drivers | 416 * ------------ ------------- -------------- ----------------- ------------ 417 * || 418 * ----------------- 419 * | mlx5 | 420 * | common driver | 421 * ----------------- 422 * | | 423 * ----------- ----------------- 424 * | mlx5 | | mlx5 | 425 * | pci dev | | auxiliary dev | 426 * ----------- ----------------- 427 * 428 * - mlx5 PCI bus driver binds to mlx5 PCI devices defined by PCI ID table 429 * of all related devices. 430 * - mlx5 class driver such as net, vDPA, regex defines its specific 431 * PCI ID table and mlx5 bus driver probes matching class drivers. 432 * - mlx5 common driver is central place that validates supported 433 * class combinations. 434 * - mlx5 common driver hides bus difference by resolving device address 435 * from devargs, locating target RDMA device and probing with it. 436 */ 437 438 /* 439 * Device configuration structure. 440 * 441 * Merged configuration from: 442 * 443 * - Device capabilities, 444 * - User device parameters disabled features. 445 */ 446 struct mlx5_common_dev_config { 447 struct mlx5_hca_attr hca_attr; /* HCA attributes. */ 448 int dbnc; /* Skip doorbell register write barrier. */ 449 unsigned int devx:1; /* Whether devx interface is available or not. */ 450 unsigned int sys_mem_en:1; /* The default memory allocator. */ 451 unsigned int mr_mempool_reg_en:1; 452 /* Allow/prevent implicit mempool memory registration. */ 453 unsigned int mr_ext_memseg_en:1; 454 /* Whether memseg should be extended for MR creation. */ 455 }; 456 457 struct mlx5_common_device { 458 struct rte_device *dev; 459 TAILQ_ENTRY(mlx5_common_device) next; 460 uint32_t classes_loaded; 461 void *ctx; /* Verbs/DV/DevX context. */ 462 void *pd; /* Protection Domain. */ 463 uint32_t pdn; /* Protection Domain Number. */ 464 struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */ 465 struct mlx5_common_dev_config config; /* Device configuration. */ 466 }; 467 468 /** 469 * Initialization function for the driver called during device probing. 470 */ 471 typedef int (mlx5_class_driver_probe_t)(struct mlx5_common_device *cdev, 472 struct mlx5_kvargs_ctrl *mkvlist); 473 474 /** 475 * Uninitialization function for the driver called during hot-unplugging. 476 */ 477 typedef int (mlx5_class_driver_remove_t)(struct mlx5_common_device *cdev); 478 479 /** Device already probed can be probed again to check for new ports. */ 480 #define MLX5_DRV_PROBE_AGAIN 0x0004 481 482 /** 483 * A structure describing a mlx5 common class driver. 484 */ 485 struct mlx5_class_driver { 486 TAILQ_ENTRY(mlx5_class_driver) next; 487 enum mlx5_class drv_class; /**< Class of this driver. */ 488 const char *name; /**< Driver name. */ 489 mlx5_class_driver_probe_t *probe; /**< Device probe function. */ 490 mlx5_class_driver_remove_t *remove; /**< Device remove function. */ 491 const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ 492 uint32_t probe_again:1; 493 /**< Device already probed can be probed again to check new device. */ 494 uint32_t intr_lsc:1; /**< Supports link state interrupt. */ 495 uint32_t intr_rmv:1; /**< Supports device remove interrupt. */ 496 }; 497 498 /** 499 * Register a mlx5 device driver. 500 * 501 * @param driver 502 * A pointer to a mlx5_driver structure describing the driver 503 * to be registered. 504 */ 505 __rte_internal 506 void 507 mlx5_class_driver_register(struct mlx5_class_driver *driver); 508 509 /** 510 * Test device is a PCI bus device. 511 * 512 * @param dev 513 * Pointer to device. 514 * 515 * @return 516 * - True on device devargs is a PCI bus device. 517 * - False otherwise. 518 */ 519 __rte_internal 520 bool 521 mlx5_dev_is_pci(const struct rte_device *dev); 522 523 /** 524 * Test PCI device is a VF device. 525 * 526 * @param pci_dev 527 * Pointer to PCI device. 528 * 529 * @return 530 * - True on PCI device is a VF device. 531 * - False otherwise. 532 */ 533 __rte_internal 534 bool 535 mlx5_dev_is_vf_pci(struct rte_pci_device *pci_dev); 536 537 __rte_internal 538 int 539 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev); 540 541 __rte_internal 542 void 543 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev, 544 struct rte_mempool *mp); 545 546 __rte_internal 547 int 548 mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar); 549 550 __rte_internal 551 void 552 mlx5_devx_uar_release(struct mlx5_uar *uar); 553 554 /* mlx5_common_os.c */ 555 556 int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes); 557 int mlx5_os_pd_create(struct mlx5_common_device *cdev); 558 559 /* mlx5 PMD wrapped MR struct. */ 560 struct mlx5_pmd_wrapped_mr { 561 uint32_t lkey; 562 void *addr; 563 size_t len; 564 void *obj; /* verbs mr object or devx umem object. */ 565 void *imkey; /* DevX indirect mkey object. */ 566 }; 567 568 __rte_internal 569 int 570 mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, 571 size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr); 572 573 __rte_internal 574 void 575 mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr); 576 577 #endif /* RTE_PMD_MLX5_COMMON_H_ */ 578