xref: /spdk/lib/mlx5/mlx5_priv.h (revision 83ba9086796471697a4975a58f60e2392bccd08c)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/stdinc.h"
6 #include "spdk/queue.h"
7 #include "spdk/barrier.h"
8 #include "spdk/likely.h"
9 
10 #include <infiniband/mlx5dv.h>
11 #include "spdk_internal/mlx5.h"
12 
13 /**
14  * Low level CQ representation, suitable for the direct polling
15  */
16 struct mlx5_hw_cq {
17 	uint64_t cq_addr;
18 	uint32_t cqe_cnt;
19 	uint32_t cqe_size;
20 	uint32_t ci;
21 	uint32_t cq_num;
22 };
23 
24 /**
25  * Low level CQ representation, suitable for the WQEs submission.
26  * Only submission queue is supported, receive queue is omitted since not used right now
27  */
28 struct mlx5_hw_qp {
29 	uint64_t dbr_addr;
30 	uint64_t sq_addr;
31 	uint64_t sq_bf_addr;
32 	uint32_t sq_wqe_cnt;
33 	uint16_t sq_pi;
34 	uint32_t sq_tx_db_nc;
35 	uint32_t qp_num;
36 };
37 
38 /* qp_num is 24 bits. 2D lookup table uses upper and lower 12 bits to find a qp by qp_num */
39 #define SPDK_MLX5_QP_NUM_UPPER_SHIFT (12)
40 #define SPDK_MLX5_QP_NUM_LOWER_MASK ((1 << SPDK_MLX5_QP_NUM_UPPER_SHIFT) - 1)
41 #define SPDK_MLX5_QP_NUM_LUT_SIZE (1 << 12)
42 
43 struct spdk_mlx5_cq {
44 	struct mlx5_hw_cq hw;
45 	struct {
46 		struct spdk_mlx5_qp **table;
47 		uint32_t count;
48 	} qps [SPDK_MLX5_QP_NUM_LUT_SIZE];
49 	struct ibv_cq *verbs_cq;
50 	uint32_t qps_count;
51 };
52 
53 struct mlx5_qp_sq_completion {
54 	uint64_t wr_id;
55 	/* Number of unsignaled completions before this one. Used to track qp overflow */
56 	uint32_t completions;
57 };
58 
59 struct spdk_mlx5_qp {
60 	struct mlx5_hw_qp hw;
61 	struct mlx5_qp_sq_completion *completions;
62 	/* Pointer to a last WQE controll segment written to SQ */
63 	struct mlx5_wqe_ctrl_seg *ctrl;
64 	struct spdk_mlx5_cq *cq;
65 	struct ibv_qp *verbs_qp;
66 	/* Number of WQEs submitted to HW which won't produce a CQE */
67 	uint16_t nonsignaled_outstanding;
68 	uint16_t max_send_sge;
69 	/* Number of WQEs available for submission */
70 	uint16_t tx_available;
71 	uint16_t last_pi;
72 	uint8_t sigmode;
73 };
74 
75 enum {
76 	/* Default mode, use flags passed by the user */
77 	SPDK_MLX5_QP_SIG_NONE = 0,
78 	/* Enable completion for every control WQE segment, regardless of the flags passed by the user */
79 	SPDK_MLX5_QP_SIG_ALL = 1,
80 	/* Enable completion only for the last control WQE segment, regardless of the flags passed by the user */
81 	SPDK_MLX5_QP_SIG_LAST = 2,
82 };
83 
84 /**
85  * Completion and Event mode (SPDK_MLX5_WQE_CTRL_CE_*)
86  * Maps internal representation of completion events configuration to PRM values
87  * g_mlx5_ce_map[][X] is fm_ce_se >> 2 & 0x3 */
88 static uint8_t g_mlx5_ce_map[3][4] = {
89 	/* SPDK_MLX5_QP_SIG_NONE */
90 	[0] = {
91 		[0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
92 		[1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
93 		[2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE,
94 		[3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE
95 	},
96 	/* SPDK_MLX5_QP_SIG_ALL */
97 	[1] = {
98 		[0] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE,
99 		[1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
100 		[2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE,
101 		[3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE
102 	},
103 	/* SPDK_MLX5_QP_SIG_LAST */
104 	[2] = {
105 		[0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
106 		[1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
107 		[2] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
108 		[3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE
109 	}
110 };
111 
112 struct mlx5_crypto_bsf_seg {
113 	uint8_t		size_type;
114 	uint8_t		enc_order;
115 	uint8_t		rsvd0;
116 	uint8_t		enc_standard;
117 	__be32		raw_data_size;
118 	uint8_t		crypto_block_size_pointer;
119 	uint8_t		rsvd1[7];
120 	uint8_t		xts_initial_tweak[16];
121 	__be32		dek_pointer;
122 	uint8_t		rsvd2[4];
123 	uint8_t		keytag[8];
124 	uint8_t		rsvd3[16];
125 };
126 
127 struct mlx5_sig_bsf_inl {
128 	__be16 vld_refresh;
129 	__be16 dif_apptag;
130 	__be32 dif_reftag;
131 	uint8_t sig_type;
132 	uint8_t rp_inv_seed;
133 	uint8_t rsvd[3];
134 	uint8_t dif_inc_ref_guard_check;
135 	__be16 dif_app_bitmask_check;
136 };
137 
138 struct mlx5_sig_bsf_seg {
139 	struct mlx5_sig_bsf_basic {
140 		uint8_t bsf_size_sbs;
141 		uint8_t check_byte_mask;
142 		union {
143 			uint8_t copy_byte_mask;
144 			uint8_t bs_selector;
145 			uint8_t rsvd_wflags;
146 		} wire;
147 		union {
148 			uint8_t bs_selector;
149 			uint8_t rsvd_mflags;
150 		} mem;
151 		__be32 raw_data_size;
152 		__be32 w_bfs_psv;
153 		__be32 m_bfs_psv;
154 	} basic;
155 	struct mlx5_sig_bsf_ext {
156 		__be32 t_init_gen_pro_size;
157 		__be32 rsvd_epi_size;
158 		__be32 w_tfs_psv;
159 		__be32 m_tfs_psv;
160 	} ext;
161 	struct mlx5_sig_bsf_inl w_inl;
162 	struct mlx5_sig_bsf_inl m_inl;
163 };
164 
165 struct mlx5_wqe_set_psv_seg {
166 	__be32 psv_index;
167 	__be16 syndrome;
168 	uint8_t reserved[2];
169 	__be64 transient_signature;
170 };
171 
172 static inline uint8_t
173 mlx5_qp_fm_ce_se_update(struct spdk_mlx5_qp *qp, uint8_t fm_ce_se)
174 {
175 	uint8_t ce = (fm_ce_se >> 2) & 0x3;
176 
177 	assert((ce & (~0x3)) == 0);
178 	fm_ce_se &= ~SPDK_MLX5_WQE_CTRL_CE_MASK;
179 	fm_ce_se |= g_mlx5_ce_map[qp->sigmode][ce];
180 
181 	return fm_ce_se;
182 }
183 
184 static inline void *
185 mlx5_qp_get_wqe_bb(struct mlx5_hw_qp *hw_qp)
186 {
187 	return (void *)hw_qp->sq_addr + (hw_qp->sq_pi & (hw_qp->sq_wqe_cnt - 1)) * MLX5_SEND_WQE_BB;
188 }
189 
190 static inline void *
191 mlx5_qp_get_next_wqebb(struct mlx5_hw_qp *hw_qp, uint32_t *to_end, void *cur)
192 {
193 	*to_end -= MLX5_SEND_WQE_BB;
194 	if (*to_end == 0) { /* wqe buffer wap around */
195 		*to_end = hw_qp->sq_wqe_cnt * MLX5_SEND_WQE_BB;
196 		return (void *)(uintptr_t)hw_qp->sq_addr;
197 	}
198 
199 	return ((char *)cur) + MLX5_SEND_WQE_BB;
200 }
201 
202 static inline void
203 mlx5_qp_set_comp(struct spdk_mlx5_qp *qp, uint16_t pi,
204 		 uint64_t wr_id, uint32_t fm_ce_se, uint32_t n_bb)
205 {
206 	qp->completions[pi].wr_id = wr_id;
207 	if ((fm_ce_se & SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) != SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) {
208 		/* non-signaled WQE, accumulate it in outstanding */
209 		qp->nonsignaled_outstanding += n_bb;
210 		qp->completions[pi].completions = 0;
211 		return;
212 	}
213 
214 	/* Store number of previous nonsignaled WQEs */
215 	qp->completions[pi].completions = qp->nonsignaled_outstanding + n_bb;
216 	qp->nonsignaled_outstanding = 0;
217 }
218 
219 #if defined(__aarch64__)
220 #define spdk_memory_bus_store_fence()  asm volatile("dmb oshst" ::: "memory")
221 #elif defined(__i386__) || defined(__x86_64__)
222 #define spdk_memory_bus_store_fence() spdk_wmb()
223 #endif
224 
225 static inline void
226 mlx5_update_tx_db(struct spdk_mlx5_qp *qp)
227 {
228 	/*
229 	 * Use cpu barrier to prevent code reordering
230 	 */
231 	spdk_smp_wmb();
232 
233 	((uint32_t *)qp->hw.dbr_addr)[MLX5_SND_DBR] = htobe32(qp->hw.sq_pi);
234 }
235 
236 static inline void
237 mlx5_flush_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl)
238 {
239 	*(uint64_t *)(qp->hw.sq_bf_addr) = *(uint64_t *)ctrl;
240 }
241 
242 static inline void
243 mlx5_ring_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl)
244 {
245 	/* 8.9.3.1  Posting a Work Request to Work Queue
246 	 * 1. Write WQE to the WQE buffer sequentially to previously-posted
247 	 *    WQE (on WQEBB granularity)
248 	 *
249 	 * 2. Update Doorbell Record associated with that queue by writing
250 	 *    the sq_wqebb_counter or wqe_counter for send and RQ respectively
251 	 */
252 	mlx5_update_tx_db(qp);
253 
254 	/* Make sure that doorbell record is written before ringing the doorbell */
255 	spdk_memory_bus_store_fence();
256 
257 	/* 3. For send request ring DoorBell by writing to the Doorbell
258 	 *    Register field in the UAR associated with that queue
259 	 */
260 	mlx5_flush_tx_db(qp, ctrl);
261 
262 	/* If UAR is mapped as WC (write combined) we need another fence to
263 	 * force write. Otherwise it may take a long time.
264 	 * On BF2/1 uar is mapped as NC (non combined) and fence is not needed
265 	 * here.
266 	 */
267 #if !defined(__aarch64__)
268 	if (!qp->hw.sq_tx_db_nc) {
269 		spdk_memory_bus_store_fence();
270 	}
271 #endif
272 }
273 
274 #ifdef DEBUG
275 void mlx5_qp_dump_wqe(struct spdk_mlx5_qp *qp, int n_wqe_bb);
276 #else
277 #define mlx5_qp_dump_wqe(...) do { } while (0)
278 #endif
279 
280 static inline void
281 mlx5_qp_wqe_submit(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, uint16_t n_wqe_bb,
282 		   uint16_t ctrlr_pi)
283 {
284 	mlx5_qp_dump_wqe(qp, n_wqe_bb);
285 
286 	/* Delay ringing the doorbell */
287 	qp->hw.sq_pi += n_wqe_bb;
288 	qp->last_pi = ctrlr_pi;
289 	qp->ctrl = ctrl;
290 }
291 
292 static inline void
293 mlx5_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *ctrl, uint16_t pi,
294 		  uint8_t opcode, uint8_t opmod, uint32_t qp_num,
295 		  uint8_t fm_ce_se, uint8_t ds,
296 		  uint8_t signature, uint32_t imm)
297 {
298 	*(uint32_t *)((void *)ctrl + 8) = 0;
299 	mlx5dv_set_ctrl_seg(ctrl, pi, opcode, opmod, qp_num,
300 			    fm_ce_se, ds, signature, imm);
301 }
302 
303 static inline struct spdk_mlx5_qp *
304 mlx5_cq_find_qp(struct spdk_mlx5_cq *cq, uint32_t qp_num)
305 {
306 	uint32_t qpn_upper = qp_num >> SPDK_MLX5_QP_NUM_UPPER_SHIFT;
307 	uint32_t qpn_mask = qp_num & SPDK_MLX5_QP_NUM_LOWER_MASK;
308 
309 	if (spdk_unlikely(!cq->qps[qpn_upper].count)) {
310 		return NULL;
311 	}
312 	return cq->qps[qpn_upper].table[qpn_mask];
313 }
314 
315 static inline int
316 mlx5_get_pd_id(struct ibv_pd *pd, uint32_t *pd_id)
317 {
318 	struct mlx5dv_pd pd_info;
319 	struct mlx5dv_obj obj;
320 	int rc;
321 
322 	if (!pd) {
323 		return -EINVAL;
324 	}
325 	obj.pd.in = pd;
326 	obj.pd.out = &pd_info;
327 	rc = mlx5dv_init_obj(&obj, MLX5DV_OBJ_PD);
328 	if (rc) {
329 		return rc;
330 	}
331 	*pd_id = pd_info.pdn;
332 
333 	return 0;
334 }
335