xref: /spdk/lib/mlx5/mlx5_priv.h (revision e3da0e84f767081099c2ef6beea972a6793eee7c)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/stdinc.h"
6 #include "spdk/queue.h"
7 #include "spdk/barrier.h"
8 #include "spdk/likely.h"
9 
10 #include <infiniband/mlx5dv.h>
11 #include "spdk_internal/mlx5.h"
12 
13 /**
14  * Low level CQ representation, suitable for the direct polling
15  */
16 struct mlx5_hw_cq {
17 	uint64_t cq_addr;
18 	uint32_t cqe_cnt;
19 	uint32_t cqe_size;
20 	uint32_t ci;
21 	uint32_t cq_num;
22 };
23 
24 /**
25  * Low level CQ representation, suitable for the WQEs submission.
26  * Only submission queue is supported, receive queue is omitted since not used right now
27  */
28 struct mlx5_hw_qp {
29 	uint64_t dbr_addr;
30 	uint64_t sq_addr;
31 	uint64_t sq_bf_addr;
32 	uint32_t sq_wqe_cnt;
33 	uint16_t sq_pi;
34 	uint32_t sq_tx_db_nc;
35 	uint32_t qp_num;
36 };
37 
38 /* qp_num is 24 bits. 2D lookup table uses upper and lower 12 bits to find a qp by qp_num */
39 #define SPDK_MLX5_QP_NUM_UPPER_SHIFT (12)
40 #define SPDK_MLX5_QP_NUM_LOWER_MASK ((1 << SPDK_MLX5_QP_NUM_UPPER_SHIFT) - 1)
41 #define SPDK_MLX5_QP_NUM_LUT_SIZE (1 << 12)
42 
43 struct spdk_mlx5_cq {
44 	struct mlx5_hw_cq hw;
45 	struct {
46 		struct spdk_mlx5_qp **table;
47 		uint32_t count;
48 	} qps [SPDK_MLX5_QP_NUM_LUT_SIZE];
49 	struct ibv_cq *verbs_cq;
50 	uint32_t qps_count;
51 };
52 
53 struct mlx5_qp_sq_completion {
54 	uint64_t wr_id;
55 	/* Number of unsignaled completions before this one. Used to track qp overflow */
56 	uint32_t completions;
57 };
58 
59 struct spdk_mlx5_qp {
60 	struct mlx5_hw_qp hw;
61 	struct mlx5_qp_sq_completion *completions;
62 	/* Pointer to a last WQE controll segment written to SQ */
63 	struct mlx5_wqe_ctrl_seg *ctrl;
64 	struct spdk_mlx5_cq *cq;
65 	struct ibv_qp *verbs_qp;
66 	/* Number of WQEs submitted to HW which won't produce a CQE */
67 	uint16_t nonsignaled_outstanding;
68 	uint16_t max_send_sge;
69 	/* Number of WQEs available for submission */
70 	uint16_t tx_available;
71 	uint16_t last_pi;
72 	uint8_t sigmode;
73 };
74 
75 enum {
76 	/* Default mode, use flags passed by the user */
77 	SPDK_MLX5_QP_SIG_NONE = 0,
78 	/* Enable completion for every control WQE segment, regardless of the flags passed by the user */
79 	SPDK_MLX5_QP_SIG_ALL = 1,
80 	/* Enable completion only for the last control WQE segment, regardless of the flags passed by the user */
81 	SPDK_MLX5_QP_SIG_LAST = 2,
82 };
83 
84 /**
85  * Completion and Event mode (SPDK_MLX5_WQE_CTRL_CE_*)
86  * Maps internal representation of completion events configuration to PRM values
87  * g_mlx5_ce_map[][X] is fm_ce_se >> 2 & 0x3 */
88 static uint8_t g_mlx5_ce_map[3][4] = {
89 	/* SPDK_MLX5_QP_SIG_NONE */
90 	[0] = {
91 		[0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
92 		[1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
93 		[2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE,
94 		[3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE
95 	},
96 	/* SPDK_MLX5_QP_SIG_ALL */
97 	[1] = {
98 		[0] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE,
99 		[1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
100 		[2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE,
101 		[3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE
102 	},
103 	/* SPDK_MLX5_QP_SIG_LAST */
104 	[2] = {
105 		[0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
106 		[1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
107 		[2] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR,
108 		[3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE
109 	}
110 
111 };
112 
113 static inline uint8_t
114 mlx5_qp_fm_ce_se_update(struct spdk_mlx5_qp *qp, uint8_t fm_ce_se)
115 {
116 	uint8_t ce = (fm_ce_se >> 2) & 0x3;
117 
118 	assert((ce & (~0x3)) == 0);
119 	fm_ce_se &= ~SPDK_MLX5_WQE_CTRL_CE_MASK;
120 	fm_ce_se |= g_mlx5_ce_map[qp->sigmode][ce];
121 
122 	return fm_ce_se;
123 }
124 
125 static inline void *
126 mlx5_qp_get_wqe_bb(struct mlx5_hw_qp *hw_qp)
127 {
128 	return (void *)hw_qp->sq_addr + (hw_qp->sq_pi & (hw_qp->sq_wqe_cnt - 1)) * MLX5_SEND_WQE_BB;
129 }
130 
131 static inline void *
132 mlx5_qp_get_next_wqebb(struct mlx5_hw_qp *hw_qp, uint32_t *to_end, void *cur)
133 {
134 	*to_end -= MLX5_SEND_WQE_BB;
135 	if (*to_end == 0) { /* wqe buffer wap around */
136 		*to_end = hw_qp->sq_wqe_cnt * MLX5_SEND_WQE_BB;
137 		return (void *)(uintptr_t)hw_qp->sq_addr;
138 	}
139 
140 	return ((char *)cur) + MLX5_SEND_WQE_BB;
141 }
142 
143 static inline void
144 mlx5_qp_set_comp(struct spdk_mlx5_qp *qp, uint16_t pi,
145 		 uint64_t wr_id, uint32_t fm_ce_se, uint32_t n_bb)
146 {
147 	qp->completions[pi].wr_id = wr_id;
148 	if ((fm_ce_se & SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) != SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) {
149 		/* non-signaled WQE, accumulate it in outstanding */
150 		qp->nonsignaled_outstanding += n_bb;
151 		qp->completions[pi].completions = 0;
152 		return;
153 	}
154 
155 	/* Store number of previous nonsignaled WQEs */
156 	qp->completions[pi].completions = qp->nonsignaled_outstanding + n_bb;
157 	qp->nonsignaled_outstanding = 0;
158 }
159 
160 #if defined(__aarch64__)
161 #define spdk_memory_bus_store_fence()  asm volatile("dmb oshst" ::: "memory")
162 #elif defined(__i386__) || defined(__x86_64__)
163 #define spdk_memory_bus_store_fence() spdk_wmb()
164 #endif
165 
166 static inline void
167 mlx5_update_tx_db(struct spdk_mlx5_qp *qp)
168 {
169 	/*
170 	 * Use cpu barrier to prevent code reordering
171 	 */
172 	spdk_smp_wmb();
173 
174 	((uint32_t *)qp->hw.dbr_addr)[MLX5_SND_DBR] = htobe32(qp->hw.sq_pi);
175 }
176 
177 static inline void
178 mlx5_flush_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl)
179 {
180 	*(uint64_t *)(qp->hw.sq_bf_addr) = *(uint64_t *)ctrl;
181 }
182 
183 static inline void
184 mlx5_ring_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl)
185 {
186 	/* 8.9.3.1  Posting a Work Request to Work Queue
187 	 * 1. Write WQE to the WQE buffer sequentially to previously-posted
188 	 *    WQE (on WQEBB granularity)
189 	 *
190 	 * 2. Update Doorbell Record associated with that queue by writing
191 	 *    the sq_wqebb_counter or wqe_counter for send and RQ respectively
192 	 */
193 	mlx5_update_tx_db(qp);
194 
195 	/* Make sure that doorbell record is written before ringing the doorbell */
196 	spdk_memory_bus_store_fence();
197 
198 	/* 3. For send request ring DoorBell by writing to the Doorbell
199 	 *    Register field in the UAR associated with that queue
200 	 */
201 	mlx5_flush_tx_db(qp, ctrl);
202 
203 	/* If UAR is mapped as WC (write combined) we need another fence to
204 	 * force write. Otherwise it may take a long time.
205 	 * On BF2/1 uar is mapped as NC (non combined) and fence is not needed
206 	 * here.
207 	 */
208 #if !defined(__aarch64__)
209 	if (!qp->hw.sq_tx_db_nc) {
210 		spdk_memory_bus_store_fence();
211 	}
212 #endif
213 }
214 
215 #ifdef DEBUG
216 void mlx5_qp_dump_wqe(struct spdk_mlx5_qp *qp, int n_wqe_bb);
217 #else
218 #define mlx5_qp_dump_wqe(...) do { } while (0)
219 #endif
220 
221 static inline void
222 mlx5_qp_wqe_submit(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, uint16_t n_wqe_bb,
223 		   uint16_t ctrlr_pi)
224 {
225 	mlx5_qp_dump_wqe(qp, n_wqe_bb);
226 
227 	/* Delay ringing the doorbell */
228 	qp->hw.sq_pi += n_wqe_bb;
229 	qp->last_pi = ctrlr_pi;
230 	qp->ctrl = ctrl;
231 }
232 
233 static inline void
234 mlx5_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *ctrl, uint16_t pi,
235 		  uint8_t opcode, uint8_t opmod, uint32_t qp_num,
236 		  uint8_t fm_ce_se, uint8_t ds,
237 		  uint8_t signature, uint32_t imm)
238 {
239 	*(uint32_t *)((void *)ctrl + 8) = 0;
240 	mlx5dv_set_ctrl_seg(ctrl, pi, opcode, opmod, qp_num,
241 			    fm_ce_se, ds, signature, imm);
242 }
243 
244 static inline struct spdk_mlx5_qp *
245 mlx5_cq_find_qp(struct spdk_mlx5_cq *cq, uint32_t qp_num)
246 {
247 	uint32_t qpn_upper = qp_num >> SPDK_MLX5_QP_NUM_UPPER_SHIFT;
248 	uint32_t qpn_mask = qp_num & SPDK_MLX5_QP_NUM_LOWER_MASK;
249 
250 	if (spdk_unlikely(!cq->qps[qpn_upper].count)) {
251 		return NULL;
252 	}
253 	return cq->qps[qpn_upper].table[qpn_mask];
254 }
255