xref: /dpdk/drivers/common/mlx5/mlx5_common.h (revision f8dbaebbf1c9efcbb2e2354b341ed62175466a57)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 
5 #ifndef RTE_PMD_MLX5_COMMON_H_
6 #define RTE_PMD_MLX5_COMMON_H_
7 
8 #include <stdio.h>
9 
10 #include <rte_pci.h>
11 #include <rte_debug.h>
12 #include <rte_atomic.h>
13 #include <rte_rwlock.h>
14 #include <rte_log.h>
15 #include <rte_kvargs.h>
16 #include <rte_devargs.h>
17 #include <rte_bitops.h>
18 #include <rte_lcore.h>
19 #include <rte_spinlock.h>
20 #include <rte_os_shim.h>
21 
22 #include "mlx5_prm.h"
23 #include "mlx5_devx_cmds.h"
24 #include "mlx5_common_os.h"
25 #include "mlx5_common_mr.h"
26 
27 /* Reported driver name. */
28 #define MLX5_PCI_DRIVER_NAME "mlx5_pci"
29 #define MLX5_AUXILIARY_DRIVER_NAME "mlx5_auxiliary"
30 
31 /* Bit-field manipulation. */
32 #define BITFIELD_DECLARE(bf, type, size) \
33 	type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \
34 		!!((size_t)(size) % (sizeof(type) * CHAR_BIT)))]
35 #define BITFIELD_DEFINE(bf, type, size) \
36 	BITFIELD_DECLARE((bf), type, (size)) = { 0 }
37 #define BITFIELD_SET(bf, b) \
38 	(void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |= \
39 		((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))
40 #define BITFIELD_RESET(bf, b) \
41 	(void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &= \
42 		~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))
43 #define BITFIELD_ISSET(bf, b) \
44 	!!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] & \
45 		((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))))
46 
47 /*
48  * Helper macros to work around __VA_ARGS__ limitations in a C99 compliant
49  * manner.
50  */
51 #define PMD_DRV_LOG_STRIP(a, b) a
52 #define PMD_DRV_LOG_OPAREN (
53 #define PMD_DRV_LOG_CPAREN )
54 #define PMD_DRV_LOG_COMMA ,
55 
56 /* Return the file name part of a path. */
57 static inline const char *
58 pmd_drv_log_basename(const char *s)
59 {
60 	const char *n = s;
61 
62 	while (*n)
63 		if (*(n++) == '/')
64 			s = n;
65 	return s;
66 }
67 
68 #define PMD_DRV_LOG___(level, type, name, ...) \
69 	rte_log(RTE_LOG_ ## level, \
70 		type, \
71 		RTE_FMT(name ": " \
72 			RTE_FMT_HEAD(__VA_ARGS__,), \
73 		RTE_FMT_TAIL(__VA_ARGS__,)))
74 
75 #ifdef RTE_LIBRTE_MLX5_DEBUG
76 
77 #define PMD_DRV_LOG__(level, type, name, ...) \
78 	PMD_DRV_LOG___(level, type, name, "%s:%u: %s(): " __VA_ARGS__)
79 #define PMD_DRV_LOG_(level, type, name, s, ...) \
80 	PMD_DRV_LOG__(level, type, name,\
81 		s "\n" PMD_DRV_LOG_COMMA \
82 		pmd_drv_log_basename(__FILE__) PMD_DRV_LOG_COMMA \
83 		__LINE__ PMD_DRV_LOG_COMMA \
84 		__func__, \
85 		__VA_ARGS__)
86 
87 #else /* RTE_LIBRTE_MLX5_DEBUG */
88 #define PMD_DRV_LOG__(level, type, name, ...) \
89 	PMD_DRV_LOG___(level, type, name, __VA_ARGS__)
90 #define PMD_DRV_LOG_(level, type, name, s, ...) \
91 	PMD_DRV_LOG__(level, type, name, s "\n", __VA_ARGS__)
92 
93 #endif /* RTE_LIBRTE_MLX5_DEBUG */
94 
95 /* claim_zero() does not perform any check when debugging is disabled. */
96 #ifdef RTE_LIBRTE_MLX5_DEBUG
97 
98 #define MLX5_ASSERT(exp) RTE_VERIFY(exp)
99 #define claim_zero(...) MLX5_ASSERT((__VA_ARGS__) == 0)
100 #define claim_nonzero(...) MLX5_ASSERT((__VA_ARGS__) != 0)
101 
102 #else /* RTE_LIBRTE_MLX5_DEBUG */
103 
104 #define MLX5_ASSERT(exp) RTE_ASSERT(exp)
105 #define claim_zero(...) (__VA_ARGS__)
106 #define claim_nonzero(...) (__VA_ARGS__)
107 
108 #endif /* RTE_LIBRTE_MLX5_DEBUG */
109 
110 /* Allocate a buffer on the stack and fill it with a printf format string. */
111 #define MKSTR(name, ...) \
112 	int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
113 	char name[mkstr_size_##name + 1]; \
114 	\
115 	memset(name, 0, mkstr_size_##name + 1); \
116 	snprintf(name, sizeof(name), "" __VA_ARGS__)
117 
118 enum {
119 	PCI_VENDOR_ID_MELLANOX = 0x15b3,
120 };
121 
122 enum {
123 	PCI_DEVICE_ID_MELLANOX_CONNECTX4 = 0x1013,
124 	PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014,
125 	PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015,
126 	PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016,
127 	PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017,
128 	PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018,
129 	PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019,
130 	PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
131 	PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2,
132 	PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3,
133 	PCI_DEVICE_ID_MELLANOX_CONNECTX6 = 0x101b,
134 	PCI_DEVICE_ID_MELLANOX_CONNECTX6VF = 0x101c,
135 	PCI_DEVICE_ID_MELLANOX_CONNECTX6DX = 0x101d,
136 	PCI_DEVICE_ID_MELLANOX_CONNECTXVF = 0x101e,
137 	PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF = 0xa2d6,
138 	PCI_DEVICE_ID_MELLANOX_CONNECTX6LX = 0x101f,
139 	PCI_DEVICE_ID_MELLANOX_CONNECTX7 = 0x1021,
140 	PCI_DEVICE_ID_MELLANOX_CONNECTX7BF = 0Xa2dc,
141 };
142 
143 /* Maximum number of simultaneous unicast MAC addresses. */
144 #define MLX5_MAX_UC_MAC_ADDRESSES 128
145 /* Maximum number of simultaneous Multicast MAC addresses. */
146 #define MLX5_MAX_MC_MAC_ADDRESSES 128
147 /* Maximum number of simultaneous MAC addresses. */
148 #define MLX5_MAX_MAC_ADDRESSES \
149 	(MLX5_MAX_UC_MAC_ADDRESSES + MLX5_MAX_MC_MAC_ADDRESSES)
150 
151 /* Recognized Infiniband device physical port name types. */
152 enum mlx5_nl_phys_port_name_type {
153 	MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */
154 	MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */
155 	MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */
156 	MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */
157 	MLX5_PHYS_PORT_NAME_TYPE_PFHPF, /* pf0, kernel ver >= 5.7, HPF rep */
158 	MLX5_PHYS_PORT_NAME_TYPE_PFSF, /* pf0sf0, kernel ver >= 5.0 */
159 	MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */
160 };
161 
162 /** Switch information returned by mlx5_nl_switch_info(). */
163 struct mlx5_switch_info {
164 	uint32_t master:1; /**< Master device. */
165 	uint32_t representor:1; /**< Representor device. */
166 	enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */
167 	int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */
168 	int32_t pf_num; /**< PF number (valid for pfxvfx format only). */
169 	int32_t port_name; /**< Representor port name. */
170 	uint64_t switch_id; /**< Switch identifier. */
171 };
172 
173 /* CQE status. */
174 enum mlx5_cqe_status {
175 	MLX5_CQE_STATUS_SW_OWN = -1,
176 	MLX5_CQE_STATUS_HW_OWN = -2,
177 	MLX5_CQE_STATUS_ERR = -3,
178 };
179 
180 /**
181  * Check whether CQE is valid.
182  *
183  * @param cqe
184  *   Pointer to CQE.
185  * @param cqes_n
186  *   Size of completion queue.
187  * @param ci
188  *   Consumer index.
189  *
190  * @return
191  *   The CQE status.
192  */
193 static __rte_always_inline enum mlx5_cqe_status
194 check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n,
195 	  const uint16_t ci)
196 {
197 	const uint16_t idx = ci & cqes_n;
198 	const uint8_t op_own = cqe->op_own;
199 	const uint8_t op_owner = MLX5_CQE_OWNER(op_own);
200 	const uint8_t op_code = MLX5_CQE_OPCODE(op_own);
201 
202 	if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
203 		return MLX5_CQE_STATUS_HW_OWN;
204 	rte_io_rmb();
205 	if (unlikely(op_code == MLX5_CQE_RESP_ERR ||
206 		     op_code == MLX5_CQE_REQ_ERR))
207 		return MLX5_CQE_STATUS_ERR;
208 	return MLX5_CQE_STATUS_SW_OWN;
209 }
210 
211 /*
212  * Get PCI address <DBDF> string from EAL device.
213  *
214  * @param[out] addr
215  *	The output address buffer string
216  * @param[in] size
217  *	The output buffer size
218  * @return
219  *   - 0 on success.
220  *   - Negative value and rte_errno is set otherwise.
221  */
222 int mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size);
223 
224 /*
225  * Get PCI address from sysfs of a PCI-related device.
226  *
227  * @param[in] dev_path
228  *   The sysfs path should not point to the direct plain PCI device.
229  *   Instead, the node "/device/" is used to access the real device.
230  * @param[out] pci_addr
231  *   Parsed PCI address.
232  *
233  * @return
234  *   - 0 on success.
235  *   - Negative value and rte_errno is set otherwise.
236  */
237 __rte_internal
238 int mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr);
239 
240 /*
241  * Get kernel network interface name from sysfs IB device path.
242  *
243  * @param[in] ibdev_path
244  *   The sysfs path to IB device.
245  * @param[out] ifname
246  *   Interface name output of size IF_NAMESIZE.
247  *
248  * @return
249  *   - 0 on success.
250  *   - Negative value and rte_errno is set otherwise.
251  */
252 __rte_internal
253 int mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname);
254 
255 __rte_internal
256 int mlx5_auxiliary_get_child_name(const char *dev, const char *node,
257 				  char *child, size_t size);
258 
259 enum mlx5_class {
260 	MLX5_CLASS_INVALID,
261 	MLX5_CLASS_ETH = RTE_BIT64(0),
262 	MLX5_CLASS_VDPA = RTE_BIT64(1),
263 	MLX5_CLASS_REGEX = RTE_BIT64(2),
264 	MLX5_CLASS_COMPRESS = RTE_BIT64(3),
265 	MLX5_CLASS_CRYPTO = RTE_BIT64(4),
266 };
267 
268 #define MLX5_DBR_SIZE RTE_CACHE_LINE_SIZE
269 
270 /* devX creation object */
271 struct mlx5_devx_obj {
272 	void *obj; /* The DV object. */
273 	int id; /* The object ID. */
274 };
275 
276 /* UMR memory buffer used to define 1 entry in indirect mkey. */
277 struct mlx5_klm {
278 	uint32_t byte_count;
279 	uint32_t mkey;
280 	uint64_t address;
281 };
282 
283 /* All UAR arguments using doorbell register in datapath. */
284 struct mlx5_uar_data {
285 	uint64_t *db;
286 	/* The doorbell's virtual address mapped to the relevant HW UAR space.*/
287 #ifndef RTE_ARCH_64
288 	rte_spinlock_t *sl_p;
289 	/* Pointer to UAR access lock required for 32bit implementations. */
290 #endif /* RTE_ARCH_64 */
291 };
292 
293 /* DevX UAR control structure. */
294 struct mlx5_uar {
295 	struct mlx5_uar_data bf_db; /* UAR data for Blueflame register. */
296 	struct mlx5_uar_data cq_db; /* UAR data for CQ arm db register. */
297 	void *obj; /* DevX UAR object. */
298 	bool dbnc; /* Doorbell mapped to non-cached region. */
299 #ifndef RTE_ARCH_64
300 	rte_spinlock_t bf_sl;
301 	rte_spinlock_t cq_sl;
302 	/* UAR access locks required for 32bit implementations. */
303 #endif /* RTE_ARCH_64 */
304 };
305 
306 /**
307  * Ring a doorbell and flush the update if requested.
308  *
309  * @param uar
310  *   Pointer to UAR data structure.
311  * @param val
312  *   value to write in big endian format.
313  * @param index
314  *   Index of doorbell record.
315  * @param db_rec
316  *   Address of doorbell record.
317  * @param flash
318  *   Decide whether to flush the DB writing using a memory barrier.
319  */
320 static __rte_always_inline void
321 mlx5_doorbell_ring(struct mlx5_uar_data *uar, uint64_t val, uint32_t index,
322 		   volatile uint32_t *db_rec, bool flash)
323 {
324 	rte_io_wmb();
325 	*db_rec = rte_cpu_to_be_32(index);
326 	/* Ensure ordering between DB record actual update and UAR access. */
327 	rte_wmb();
328 #ifdef RTE_ARCH_64
329 	*uar->db = val;
330 #else /* !RTE_ARCH_64 */
331 	rte_spinlock_lock(uar->sl_p);
332 	*(volatile uint32_t *)uar->db = val;
333 	rte_io_wmb();
334 	*((volatile uint32_t *)uar->db + 1) = val >> 32;
335 	rte_spinlock_unlock(uar->sl_p);
336 #endif
337 	if (flash)
338 		rte_wmb();
339 }
340 
341 /**
342  * Get the doorbell register mapping type.
343  *
344  * @param uar_mmap_offset
345  *   Mmap offset of Verbs/DevX UAR.
346  * @param page_size
347  *   System page size
348  *
349  * @return
350  *   1 for non-cached, 0 otherwise.
351  */
352 static inline uint16_t
353 mlx5_db_map_type_get(off_t uar_mmap_offset, size_t page_size)
354 {
355 	off_t cmd = uar_mmap_offset / page_size;
356 
357 	cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
358 	cmd &= MLX5_UAR_MMAP_CMD_MASK;
359 	if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
360 		return 1;
361 	return 0;
362 }
363 
364 __rte_internal
365 void mlx5_translate_port_name(const char *port_name_in,
366 			      struct mlx5_switch_info *port_info_out);
367 void mlx5_glue_constructor(void);
368 extern uint8_t haswell_broadwell_cpu;
369 
370 __rte_internal
371 void mlx5_common_init(void);
372 
373 /*
374  * Common Driver Interface
375  *
376  * ConnectX common driver supports multiple classes: net, vDPA, regex, crypto
377  * and compress devices. This layer enables creating such multiple classes
378  * on a single device by allowing to bind multiple class-specific device
379  * drivers to attach to the common driver.
380  *
381  * ------------  -------------  --------------  -----------------  ------------
382  * | mlx5 net |  | mlx5 vdpa |  | mlx5 regex |  | mlx5 compress |  | mlx5 ... |
383  * |  driver  |  |  driver   |  |   driver   |  |     driver    |  |  drivers |
384  * ------------  -------------  --------------  -----------------  ------------
385  *                               ||
386  *                        -----------------
387  *                        |     mlx5      |
388  *                        | common driver |
389  *                        -----------------
390  *                          |          |
391  *                 -----------        -----------------
392  *                 |   mlx5  |        |   mlx5        |
393  *                 | pci dev |        | auxiliary dev |
394  *                 -----------        -----------------
395  *
396  * - mlx5 PCI bus driver binds to mlx5 PCI devices defined by PCI ID table
397  *   of all related devices.
398  * - mlx5 class driver such as net, vDPA, regex defines its specific
399  *   PCI ID table and mlx5 bus driver probes matching class drivers.
400  * - mlx5 common driver is central place that validates supported
401  *   class combinations.
402  * - mlx5 common driver hides bus difference by resolving device address
403  *   from devargs, locating target RDMA device and probing with it.
404  */
405 
406 /*
407  * Device configuration structure.
408  *
409  * Merged configuration from:
410  *
411  *  - Device capabilities,
412  *  - User device parameters disabled features.
413  */
414 struct mlx5_common_dev_config {
415 	struct mlx5_hca_attr hca_attr; /* HCA attributes. */
416 	int dbnc; /* Skip doorbell register write barrier. */
417 	unsigned int devx:1; /* Whether devx interface is available or not. */
418 	unsigned int sys_mem_en:1; /* The default memory allocator. */
419 	unsigned int mr_mempool_reg_en:1;
420 	/* Allow/prevent implicit mempool memory registration. */
421 	unsigned int mr_ext_memseg_en:1;
422 	/* Whether memseg should be extended for MR creation. */
423 };
424 
425 struct mlx5_common_device {
426 	struct rte_device *dev;
427 	TAILQ_ENTRY(mlx5_common_device) next;
428 	uint32_t classes_loaded;
429 	void *ctx; /* Verbs/DV/DevX context. */
430 	void *pd; /* Protection Domain. */
431 	uint32_t pdn; /* Protection Domain Number. */
432 	struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */
433 	struct mlx5_common_dev_config config; /* Device configuration. */
434 };
435 
436 /**
437  * Initialization function for the driver called during device probing.
438  */
439 typedef int (mlx5_class_driver_probe_t)(struct mlx5_common_device *dev);
440 
441 /**
442  * Uninitialization function for the driver called during hot-unplugging.
443  */
444 typedef int (mlx5_class_driver_remove_t)(struct mlx5_common_device *dev);
445 
446 /** Device already probed can be probed again to check for new ports. */
447 #define MLX5_DRV_PROBE_AGAIN 0x0004
448 
449 /**
450  * A structure describing a mlx5 common class driver.
451  */
452 struct mlx5_class_driver {
453 	TAILQ_ENTRY(mlx5_class_driver) next;
454 	enum mlx5_class drv_class;            /**< Class of this driver. */
455 	const char *name;                     /**< Driver name. */
456 	mlx5_class_driver_probe_t *probe;     /**< Device probe function. */
457 	mlx5_class_driver_remove_t *remove;   /**< Device remove function. */
458 	const struct rte_pci_id *id_table;    /**< ID table, NULL terminated. */
459 	uint32_t probe_again:1;
460 	/**< Device already probed can be probed again to check new device. */
461 	uint32_t intr_lsc:1; /**< Supports link state interrupt. */
462 	uint32_t intr_rmv:1; /**< Supports device remove interrupt. */
463 };
464 
465 /**
466  * Register a mlx5 device driver.
467  *
468  * @param driver
469  *   A pointer to a mlx5_driver structure describing the driver
470  *   to be registered.
471  */
472 __rte_internal
473 void
474 mlx5_class_driver_register(struct mlx5_class_driver *driver);
475 
476 /**
477  * Test device is a PCI bus device.
478  *
479  * @param dev
480  *   Pointer to device.
481  *
482  * @return
483  *   - True on device devargs is a PCI bus device.
484  *   - False otherwise.
485  */
486 __rte_internal
487 bool
488 mlx5_dev_is_pci(const struct rte_device *dev);
489 
490 __rte_internal
491 int
492 mlx5_dev_mempool_subscribe(struct mlx5_common_device *cdev);
493 
494 __rte_internal
495 void
496 mlx5_dev_mempool_unregister(struct mlx5_common_device *cdev,
497 			    struct rte_mempool *mp);
498 
499 __rte_internal
500 int
501 mlx5_devx_uar_prepare(struct mlx5_common_device *cdev, struct mlx5_uar *uar);
502 
503 __rte_internal
504 void
505 mlx5_devx_uar_release(struct mlx5_uar *uar);
506 
507 /* mlx5_common_os.c */
508 
509 int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes);
510 int mlx5_os_pd_create(struct mlx5_common_device *cdev);
511 
512 /* mlx5 PMD wrapped MR struct. */
513 struct mlx5_pmd_wrapped_mr {
514 	uint32_t	     lkey;
515 	void		     *addr;
516 	size_t		     len;
517 	void		     *obj; /* verbs mr object or devx umem object. */
518 	void		     *imkey; /* DevX indirect mkey object. */
519 };
520 
521 __rte_internal
522 int
523 mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr,
524 			    size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr);
525 
526 __rte_internal
527 void
528 mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr);
529 
530 #endif /* RTE_PMD_MLX5_COMMON_H_ */
531