xref: /dpdk/drivers/net/mlx5/hws/mlx5dr_send.c (revision 3cddeba0ca38b00c7dc646277484d08a4cb2d862)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 NVIDIA Corporation & Affiliates
3  */
4 
5 #include "mlx5dr_internal.h"
6 
7 struct mlx5dr_send_ring_dep_wqe *
8 mlx5dr_send_add_new_dep_wqe(struct mlx5dr_send_engine *queue)
9 {
10 	struct mlx5dr_send_ring_sq *send_sq = &queue->send_ring->send_sq;
11 	unsigned int idx = send_sq->head_dep_idx++ & (queue->num_entries - 1);
12 
13 	memset(&send_sq->dep_wqe[idx].wqe_data.tag, 0, MLX5DR_MATCH_TAG_SZ);
14 
15 	return &send_sq->dep_wqe[idx];
16 }
17 
18 void mlx5dr_send_abort_new_dep_wqe(struct mlx5dr_send_engine *queue)
19 {
20 	queue->send_ring->send_sq.head_dep_idx--;
21 }
22 
23 void mlx5dr_send_all_dep_wqe(struct mlx5dr_send_engine *queue)
24 {
25 	struct mlx5dr_send_ring_sq *send_sq = &queue->send_ring->send_sq;
26 	struct mlx5dr_send_ste_attr ste_attr = {0};
27 	struct mlx5dr_send_ring_dep_wqe *dep_wqe;
28 
29 	ste_attr.send_attr.opmod = MLX5DR_WQE_GTA_OPMOD_STE;
30 	ste_attr.send_attr.opcode = MLX5DR_WQE_OPCODE_TBL_ACCESS;
31 	ste_attr.send_attr.len = MLX5DR_WQE_SZ_GTA_CTRL + MLX5DR_WQE_SZ_GTA_DATA;
32 	ste_attr.gta_opcode = MLX5DR_WQE_GTA_OP_ACTIVATE;
33 
34 	/* Fence first from previous depend WQEs  */
35 	ste_attr.send_attr.fence = 1;
36 
37 	while (send_sq->head_dep_idx != send_sq->tail_dep_idx) {
38 		dep_wqe = &send_sq->dep_wqe[send_sq->tail_dep_idx++ & (queue->num_entries - 1)];
39 
40 		/* Notify HW on the last WQE */
41 		ste_attr.send_attr.notify_hw = (send_sq->tail_dep_idx == send_sq->head_dep_idx);
42 		ste_attr.send_attr.user_data = dep_wqe->user_data;
43 		ste_attr.send_attr.rule = dep_wqe->rule;
44 
45 		ste_attr.rtc_0 = dep_wqe->rtc_0;
46 		ste_attr.rtc_1 = dep_wqe->rtc_1;
47 		ste_attr.retry_rtc_0 = dep_wqe->retry_rtc_0;
48 		ste_attr.retry_rtc_1 = dep_wqe->retry_rtc_1;
49 		ste_attr.used_id_rtc_0 = &dep_wqe->rule->rtc_0;
50 		ste_attr.used_id_rtc_1 = &dep_wqe->rule->rtc_1;
51 		ste_attr.wqe_ctrl = &dep_wqe->wqe_ctrl;
52 		ste_attr.wqe_data = &dep_wqe->wqe_data;
53 		ste_attr.direct_index = dep_wqe->direct_index;
54 
55 		mlx5dr_send_ste(queue, &ste_attr);
56 
57 		/* Fencing is done only on the first WQE */
58 		ste_attr.send_attr.fence = 0;
59 	}
60 }
61 
62 struct mlx5dr_send_engine_post_ctrl
63 mlx5dr_send_engine_post_start(struct mlx5dr_send_engine *queue)
64 {
65 	struct mlx5dr_send_engine_post_ctrl ctrl;
66 
67 	ctrl.queue = queue;
68 	/* Currently only one send ring is supported */
69 	ctrl.send_ring = &queue->send_ring[0];
70 	ctrl.num_wqebbs = 0;
71 
72 	return ctrl;
73 }
74 
75 void mlx5dr_send_engine_post_req_wqe(struct mlx5dr_send_engine_post_ctrl *ctrl,
76 				     char **buf, size_t *len)
77 {
78 	struct mlx5dr_send_ring_sq *send_sq = &ctrl->send_ring->send_sq;
79 	unsigned int idx;
80 
81 	idx = (send_sq->cur_post + ctrl->num_wqebbs) & send_sq->buf_mask;
82 
83 	*buf = send_sq->buf + (idx << MLX5_SEND_WQE_SHIFT);
84 	*len = MLX5_SEND_WQE_BB;
85 
86 	if (!ctrl->num_wqebbs) {
87 		*buf += sizeof(struct mlx5dr_wqe_ctrl_seg);
88 		*len -= sizeof(struct mlx5dr_wqe_ctrl_seg);
89 	}
90 
91 	ctrl->num_wqebbs++;
92 }
93 
94 static void mlx5dr_send_engine_post_ring(struct mlx5dr_send_ring_sq *sq,
95 					 struct mlx5dv_devx_uar *uar,
96 					 struct mlx5dr_wqe_ctrl_seg *wqe_ctrl)
97 {
98 	rte_compiler_barrier();
99 	sq->db[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->cur_post);
100 
101 	rte_wmb();
102 	mlx5dr_uar_write64_relaxed(*((uint64_t *)wqe_ctrl), uar->reg_addr);
103 	rte_wmb();
104 }
105 
106 static void
107 mlx5dr_send_wqe_set_tag(struct mlx5dr_wqe_gta_data_seg_ste *wqe_data,
108 			struct mlx5dr_rule_match_tag *tag,
109 			bool is_jumbo)
110 {
111 	if (is_jumbo) {
112 		/* Clear previous possibly dirty control */
113 		memset(wqe_data, 0, MLX5DR_STE_CTRL_SZ);
114 		memcpy(wqe_data->jumbo, tag->jumbo, MLX5DR_JUMBO_TAG_SZ);
115 	} else {
116 		/* Clear previous possibly dirty control and actions */
117 		memset(wqe_data, 0, MLX5DR_STE_CTRL_SZ + MLX5DR_ACTIONS_SZ);
118 		memcpy(wqe_data->tag, tag->match, MLX5DR_MATCH_TAG_SZ);
119 	}
120 }
121 
122 void mlx5dr_send_engine_post_end(struct mlx5dr_send_engine_post_ctrl *ctrl,
123 				 struct mlx5dr_send_engine_post_attr *attr)
124 {
125 	struct mlx5dr_wqe_ctrl_seg *wqe_ctrl;
126 	struct mlx5dr_send_ring_sq *sq;
127 	uint32_t flags = 0;
128 	unsigned int idx;
129 
130 	sq = &ctrl->send_ring->send_sq;
131 	idx = sq->cur_post & sq->buf_mask;
132 	sq->last_idx = idx;
133 
134 	wqe_ctrl = (void *)(sq->buf + (idx << MLX5_SEND_WQE_SHIFT));
135 
136 	wqe_ctrl->opmod_idx_opcode =
137 		rte_cpu_to_be_32((attr->opmod << 24) |
138 				 ((sq->cur_post & 0xffff) << 8) |
139 				 attr->opcode);
140 	wqe_ctrl->qpn_ds =
141 		rte_cpu_to_be_32((attr->len + sizeof(struct mlx5dr_wqe_ctrl_seg)) / 16 |
142 				 sq->sqn << 8);
143 
144 	wqe_ctrl->imm = rte_cpu_to_be_32(attr->id);
145 
146 	flags |= attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
147 	flags |= attr->fence ? MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE : 0;
148 	wqe_ctrl->flags = rte_cpu_to_be_32(flags);
149 
150 	sq->wr_priv[idx].id = attr->id;
151 	sq->wr_priv[idx].retry_id = attr->retry_id;
152 
153 	sq->wr_priv[idx].rule = attr->rule;
154 	sq->wr_priv[idx].user_data = attr->user_data;
155 	sq->wr_priv[idx].num_wqebbs = ctrl->num_wqebbs;
156 
157 	if (attr->rule) {
158 		sq->wr_priv[idx].rule->pending_wqes++;
159 		sq->wr_priv[idx].used_id = attr->used_id;
160 	}
161 
162 	sq->cur_post += ctrl->num_wqebbs;
163 
164 	if (attr->notify_hw)
165 		mlx5dr_send_engine_post_ring(sq, ctrl->queue->uar, wqe_ctrl);
166 }
167 
168 static void mlx5dr_send_wqe(struct mlx5dr_send_engine *queue,
169 			    struct mlx5dr_send_engine_post_attr *send_attr,
170 			    struct mlx5dr_wqe_gta_ctrl_seg *send_wqe_ctrl,
171 			    void *send_wqe_data,
172 			    void *send_wqe_tag,
173 			    bool is_jumbo,
174 			    uint8_t gta_opcode,
175 			    uint32_t direct_index)
176 {
177 	struct mlx5dr_wqe_gta_data_seg_ste *wqe_data;
178 	struct mlx5dr_wqe_gta_ctrl_seg *wqe_ctrl;
179 	struct mlx5dr_send_engine_post_ctrl ctrl;
180 	size_t wqe_len;
181 
182 	ctrl = mlx5dr_send_engine_post_start(queue);
183 	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
184 	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
185 
186 	wqe_ctrl->op_dirix = htobe32(gta_opcode << 28 | direct_index);
187 	memcpy(wqe_ctrl->stc_ix, send_wqe_ctrl->stc_ix, sizeof(send_wqe_ctrl->stc_ix));
188 
189 	if (send_wqe_data)
190 		memcpy(wqe_data, send_wqe_data, sizeof(*wqe_data));
191 	else
192 		mlx5dr_send_wqe_set_tag(wqe_data, send_wqe_tag, is_jumbo);
193 
194 	mlx5dr_send_engine_post_end(&ctrl, send_attr);
195 }
196 
197 void mlx5dr_send_ste(struct mlx5dr_send_engine *queue,
198 		     struct mlx5dr_send_ste_attr *ste_attr)
199 {
200 	struct mlx5dr_send_engine_post_attr *send_attr = &ste_attr->send_attr;
201 	uint8_t notify_hw = send_attr->notify_hw;
202 	uint8_t fence = send_attr->fence;
203 
204 	if (ste_attr->rtc_1) {
205 		send_attr->id = ste_attr->rtc_1;
206 		send_attr->used_id = ste_attr->used_id_rtc_1;
207 		send_attr->retry_id = ste_attr->retry_rtc_1;
208 		send_attr->fence = fence;
209 		send_attr->notify_hw = notify_hw && !ste_attr->rtc_0;
210 		mlx5dr_send_wqe(queue, send_attr,
211 				ste_attr->wqe_ctrl,
212 				ste_attr->wqe_data,
213 				ste_attr->wqe_tag,
214 				ste_attr->wqe_tag_is_jumbo,
215 				ste_attr->gta_opcode,
216 				ste_attr->direct_index);
217 	}
218 
219 	if (ste_attr->rtc_0) {
220 		send_attr->id = ste_attr->rtc_0;
221 		send_attr->used_id = ste_attr->used_id_rtc_0;
222 		send_attr->retry_id = ste_attr->retry_rtc_0;
223 		send_attr->fence = fence && !ste_attr->rtc_1;
224 		send_attr->notify_hw = notify_hw;
225 		mlx5dr_send_wqe(queue, send_attr,
226 				ste_attr->wqe_ctrl,
227 				ste_attr->wqe_data,
228 				ste_attr->wqe_tag,
229 				ste_attr->wqe_tag_is_jumbo,
230 				ste_attr->gta_opcode,
231 				ste_attr->direct_index);
232 	}
233 
234 	/* Restore to ortginal requested values */
235 	send_attr->notify_hw = notify_hw;
236 	send_attr->fence = fence;
237 }
238 
239 static
240 int mlx5dr_send_wqe_fw(struct ibv_context *ibv_ctx,
241 		       uint32_t pd_num,
242 		       struct mlx5dr_send_engine_post_attr *send_attr,
243 		       struct mlx5dr_wqe_gta_ctrl_seg *send_wqe_ctrl,
244 		       void *send_wqe_match_data,
245 		       void *send_wqe_match_tag,
246 		       void *send_wqe_range_data,
247 		       void *send_wqe_range_tag,
248 		       bool is_jumbo,
249 		       uint8_t gta_opcode)
250 {
251 	bool has_range = send_wqe_range_data || send_wqe_range_tag;
252 	bool has_match = send_wqe_match_data || send_wqe_match_tag;
253 	struct mlx5dr_wqe_gta_data_seg_ste gta_wqe_data0 = {0};
254 	struct mlx5dr_wqe_gta_data_seg_ste gta_wqe_data1 = {0};
255 	struct mlx5dr_wqe_gta_ctrl_seg gta_wqe_ctrl = {0};
256 	struct mlx5dr_cmd_generate_wqe_attr attr = {0};
257 	struct mlx5dr_wqe_ctrl_seg wqe_ctrl = {0};
258 	struct mlx5_cqe64 cqe;
259 	uint32_t flags = 0;
260 	int ret;
261 
262 	/* Set WQE control */
263 	wqe_ctrl.opmod_idx_opcode =
264 		rte_cpu_to_be_32((send_attr->opmod << 24) | send_attr->opcode);
265 	wqe_ctrl.qpn_ds =
266 		rte_cpu_to_be_32((send_attr->len + sizeof(struct mlx5dr_wqe_ctrl_seg)) / 16);
267 	flags |= send_attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
268 	wqe_ctrl.flags = rte_cpu_to_be_32(flags);
269 	wqe_ctrl.imm = rte_cpu_to_be_32(send_attr->id);
270 
271 	/* Set GTA WQE CTRL */
272 	memcpy(gta_wqe_ctrl.stc_ix, send_wqe_ctrl->stc_ix, sizeof(send_wqe_ctrl->stc_ix));
273 	gta_wqe_ctrl.op_dirix = htobe32(gta_opcode << 28);
274 
275 	/* Set GTA match WQE DATA */
276 	if (has_match) {
277 		if (send_wqe_match_data)
278 			memcpy(&gta_wqe_data0, send_wqe_match_data, sizeof(gta_wqe_data0));
279 		else
280 			mlx5dr_send_wqe_set_tag(&gta_wqe_data0, send_wqe_match_tag, is_jumbo);
281 
282 		gta_wqe_data0.rsvd1_definer = htobe32(send_attr->match_definer_id << 8);
283 		attr.gta_data_0 = (uint8_t *)&gta_wqe_data0;
284 	}
285 
286 	/* Set GTA range WQE DATA */
287 	if (has_range) {
288 		if (send_wqe_range_data)
289 			memcpy(&gta_wqe_data1, send_wqe_range_data, sizeof(gta_wqe_data1));
290 		else
291 			mlx5dr_send_wqe_set_tag(&gta_wqe_data1, send_wqe_range_tag, false);
292 
293 		gta_wqe_data1.rsvd1_definer = htobe32(send_attr->range_definer_id << 8);
294 		attr.gta_data_1 = (uint8_t *)&gta_wqe_data1;
295 	}
296 
297 	attr.pdn = pd_num;
298 	attr.wqe_ctrl = (uint8_t *)&wqe_ctrl;
299 	attr.gta_ctrl = (uint8_t *)&gta_wqe_ctrl;
300 
301 send_wqe:
302 	ret = mlx5dr_cmd_generate_wqe(ibv_ctx, &attr, &cqe);
303 	if (ret) {
304 		DR_LOG(ERR, "Failed to write WQE using command");
305 		return ret;
306 	}
307 
308 	if ((mlx5dv_get_cqe_opcode(&cqe) == MLX5_CQE_REQ) &&
309 	    (rte_be_to_cpu_32(cqe.byte_cnt) >> 31 == 0)) {
310 		*send_attr->used_id = send_attr->id;
311 		return 0;
312 	}
313 
314 	/* Retry if rule failed */
315 	if (send_attr->retry_id) {
316 		wqe_ctrl.imm = rte_cpu_to_be_32(send_attr->retry_id);
317 		send_attr->id = send_attr->retry_id;
318 		send_attr->retry_id = 0;
319 		goto send_wqe;
320 	}
321 
322 	return -1;
323 }
324 
325 void mlx5dr_send_stes_fw(struct mlx5dr_send_engine *queue,
326 			 struct mlx5dr_send_ste_attr *ste_attr)
327 {
328 	struct mlx5dr_send_engine_post_attr *send_attr = &ste_attr->send_attr;
329 	struct mlx5dr_rule *rule = send_attr->rule;
330 	struct ibv_context *ibv_ctx;
331 	struct mlx5dr_context *ctx;
332 	uint16_t queue_id;
333 	uint32_t pdn;
334 	int ret;
335 
336 	ctx = rule->matcher->tbl->ctx;
337 	queue_id = queue - ctx->send_queue;
338 	ibv_ctx = ctx->ibv_ctx;
339 	pdn = ctx->pd_num;
340 
341 	/* Writing through FW can't HW fence, therefore we drain the queue */
342 	if (send_attr->fence)
343 		mlx5dr_send_queue_action(ctx,
344 					 queue_id,
345 					 MLX5DR_SEND_QUEUE_ACTION_DRAIN_SYNC);
346 
347 	if (ste_attr->rtc_1) {
348 		send_attr->id = ste_attr->rtc_1;
349 		send_attr->used_id = ste_attr->used_id_rtc_1;
350 		send_attr->retry_id = ste_attr->retry_rtc_1;
351 		ret = mlx5dr_send_wqe_fw(ibv_ctx, pdn, send_attr,
352 					 ste_attr->wqe_ctrl,
353 					 ste_attr->wqe_data,
354 					 ste_attr->wqe_tag,
355 					 ste_attr->range_wqe_data,
356 					 ste_attr->range_wqe_tag,
357 					 ste_attr->wqe_tag_is_jumbo,
358 					 ste_attr->gta_opcode);
359 		if (ret)
360 			goto fail_rule;
361 	}
362 
363 	if (ste_attr->rtc_0) {
364 		send_attr->id = ste_attr->rtc_0;
365 		send_attr->used_id = ste_attr->used_id_rtc_0;
366 		send_attr->retry_id = ste_attr->retry_rtc_0;
367 		ret = mlx5dr_send_wqe_fw(ibv_ctx, pdn, send_attr,
368 					 ste_attr->wqe_ctrl,
369 					 ste_attr->wqe_data,
370 					 ste_attr->wqe_tag,
371 					 ste_attr->range_wqe_data,
372 					 ste_attr->range_wqe_tag,
373 					 ste_attr->wqe_tag_is_jumbo,
374 					 ste_attr->gta_opcode);
375 		if (ret)
376 			goto fail_rule;
377 	}
378 
379 	/* Increase the status, this only works on good flow as the enum
380 	 * is arrange it away creating -> created -> deleting -> deleted
381 	 */
382 	rule->status++;
383 	mlx5dr_send_engine_gen_comp(queue, send_attr->user_data, RTE_FLOW_OP_SUCCESS);
384 	return;
385 
386 fail_rule:
387 	rule->status = !rule->rtc_0 && !rule->rtc_1 ?
388 		MLX5DR_RULE_STATUS_FAILED : MLX5DR_RULE_STATUS_FAILING;
389 	mlx5dr_send_engine_gen_comp(queue, send_attr->user_data, RTE_FLOW_OP_ERROR);
390 }
391 
392 static void mlx5dr_send_engine_retry_post_send(struct mlx5dr_send_engine *queue,
393 					       struct mlx5dr_send_ring_priv *priv,
394 					       uint16_t wqe_cnt)
395 {
396 	struct mlx5dr_send_engine_post_attr send_attr = {0};
397 	struct mlx5dr_wqe_gta_data_seg_ste *wqe_data;
398 	struct mlx5dr_wqe_gta_ctrl_seg *wqe_ctrl;
399 	struct mlx5dr_send_engine_post_ctrl ctrl;
400 	struct mlx5dr_send_ring_sq *send_sq;
401 	unsigned int idx;
402 	size_t wqe_len;
403 	char *p;
404 
405 	send_attr.rule = priv->rule;
406 	send_attr.opcode = MLX5DR_WQE_OPCODE_TBL_ACCESS;
407 	send_attr.opmod = MLX5DR_WQE_GTA_OPMOD_STE;
408 	send_attr.len = MLX5_SEND_WQE_BB * 2 - sizeof(struct mlx5dr_wqe_ctrl_seg);
409 	send_attr.notify_hw = 1;
410 	send_attr.fence = 0;
411 	send_attr.user_data = priv->user_data;
412 	send_attr.id = priv->retry_id;
413 	send_attr.used_id = priv->used_id;
414 
415 	ctrl = mlx5dr_send_engine_post_start(queue);
416 	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
417 	mlx5dr_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
418 
419 	send_sq = &ctrl.send_ring->send_sq;
420 	idx = wqe_cnt & send_sq->buf_mask;
421 	p = send_sq->buf + (idx << MLX5_SEND_WQE_SHIFT);
422 
423 	/* Copy old gta ctrl */
424 	memcpy(wqe_ctrl, p + sizeof(struct mlx5dr_wqe_ctrl_seg),
425 	       MLX5_SEND_WQE_BB - sizeof(struct mlx5dr_wqe_ctrl_seg));
426 
427 	idx = (wqe_cnt + 1) & send_sq->buf_mask;
428 	p = send_sq->buf + (idx << MLX5_SEND_WQE_SHIFT);
429 
430 	/* Copy old gta data */
431 	memcpy(wqe_data, p, MLX5_SEND_WQE_BB);
432 
433 	mlx5dr_send_engine_post_end(&ctrl, &send_attr);
434 }
435 
436 void mlx5dr_send_engine_flush_queue(struct mlx5dr_send_engine *queue)
437 {
438 	struct mlx5dr_send_ring_sq *sq = &queue->send_ring[0].send_sq;
439 	struct mlx5dr_wqe_ctrl_seg *wqe_ctrl;
440 
441 	wqe_ctrl = (void *)(sq->buf + (sq->last_idx << MLX5_SEND_WQE_SHIFT));
442 
443 	wqe_ctrl->flags |= rte_cpu_to_be_32(MLX5_WQE_CTRL_CQ_UPDATE);
444 
445 	mlx5dr_send_engine_post_ring(sq, queue->uar, wqe_ctrl);
446 }
447 
448 static void
449 mlx5dr_send_engine_update_rule_resize(struct mlx5dr_send_engine *queue,
450 				      struct mlx5dr_send_ring_priv *priv,
451 				      enum rte_flow_op_status *status)
452 {
453 	switch (priv->rule->resize_info->state) {
454 	case MLX5DR_RULE_RESIZE_STATE_WRITING:
455 		if (priv->rule->status == MLX5DR_RULE_STATUS_FAILING) {
456 			/* Backup original RTCs */
457 			uint32_t orig_rtc_0 = priv->rule->resize_info->rtc_0;
458 			uint32_t orig_rtc_1 = priv->rule->resize_info->rtc_1;
459 
460 			/* Delete partially failed move rule using resize_info */
461 			priv->rule->resize_info->rtc_0 = priv->rule->rtc_0;
462 			priv->rule->resize_info->rtc_1 = priv->rule->rtc_1;
463 
464 			/* Move rule to original RTC for future delete */
465 			priv->rule->rtc_0 = orig_rtc_0;
466 			priv->rule->rtc_1 = orig_rtc_1;
467 		}
468 		/* Clean leftovers */
469 		mlx5dr_rule_move_hws_remove(priv->rule, queue, priv->user_data);
470 		break;
471 
472 	case MLX5DR_RULE_RESIZE_STATE_DELETING:
473 		if (priv->rule->status == MLX5DR_RULE_STATUS_FAILING) {
474 			*status = RTE_FLOW_OP_ERROR;
475 		} else {
476 			*status = RTE_FLOW_OP_SUCCESS;
477 			priv->rule->matcher = priv->rule->matcher->resize_dst;
478 		}
479 		priv->rule->resize_info->state = MLX5DR_RULE_RESIZE_STATE_IDLE;
480 		priv->rule->status = MLX5DR_RULE_STATUS_CREATED;
481 		break;
482 
483 	default:
484 		break;
485 	}
486 }
487 
488 static void mlx5dr_send_engine_update_rule(struct mlx5dr_send_engine *queue,
489 					   struct mlx5dr_send_ring_priv *priv,
490 					   uint16_t wqe_cnt,
491 					   enum rte_flow_op_status *status)
492 {
493 	priv->rule->pending_wqes--;
494 
495 	if (*status == RTE_FLOW_OP_ERROR) {
496 		if (priv->retry_id) {
497 			mlx5dr_send_engine_retry_post_send(queue, priv, wqe_cnt);
498 			return;
499 		}
500 		/* Some part of the rule failed */
501 		priv->rule->status = MLX5DR_RULE_STATUS_FAILING;
502 		*priv->used_id = 0;
503 	} else {
504 		*priv->used_id = priv->id;
505 	}
506 
507 	/* Update rule status for the last completion */
508 	if (!priv->rule->pending_wqes) {
509 		if (unlikely(mlx5dr_rule_move_in_progress(priv->rule))) {
510 			mlx5dr_send_engine_update_rule_resize(queue, priv, status);
511 			return;
512 		}
513 
514 		if (unlikely(priv->rule->status == MLX5DR_RULE_STATUS_FAILING)) {
515 			/* Rule completely failed and doesn't require cleanup */
516 			if (!priv->rule->rtc_0 && !priv->rule->rtc_1)
517 				priv->rule->status = MLX5DR_RULE_STATUS_FAILED;
518 
519 			*status = RTE_FLOW_OP_ERROR;
520 		} else {
521 			/* Increase the status, this only works on good flow as the enum
522 			 * is arrange it away creating -> created -> deleting -> deleted
523 			 */
524 			priv->rule->status++;
525 			*status = RTE_FLOW_OP_SUCCESS;
526 			/* Rule was deleted now we can safely release action STEs
527 			 * and clear resize info
528 			 */
529 			if (priv->rule->status == MLX5DR_RULE_STATUS_DELETED) {
530 				mlx5dr_rule_free_action_ste_idx(priv->rule);
531 				mlx5dr_rule_clear_resize_info(priv->rule);
532 			}
533 		}
534 	}
535 }
536 
537 static void mlx5dr_send_engine_update(struct mlx5dr_send_engine *queue,
538 				      struct mlx5_cqe64 *cqe,
539 				      struct mlx5dr_send_ring_priv *priv,
540 				      struct rte_flow_op_result res[],
541 				      int64_t *i,
542 				      uint32_t res_nb,
543 				      uint16_t wqe_cnt)
544 {
545 	enum rte_flow_op_status status;
546 
547 	if (!cqe || (likely(rte_be_to_cpu_32(cqe->byte_cnt) >> 31 == 0) &&
548 	    likely(mlx5dv_get_cqe_opcode(cqe) == MLX5_CQE_REQ))) {
549 		status = RTE_FLOW_OP_SUCCESS;
550 	} else {
551 		status = RTE_FLOW_OP_ERROR;
552 	}
553 
554 	if (priv->user_data) {
555 		if (priv->rule) {
556 			mlx5dr_send_engine_update_rule(queue, priv, wqe_cnt, &status);
557 			/* Completion is provided on the last rule WQE */
558 			if (priv->rule->pending_wqes)
559 				return;
560 		}
561 
562 		if (*i < res_nb) {
563 			res[*i].user_data = priv->user_data;
564 			res[*i].status = status;
565 			(*i)++;
566 			mlx5dr_send_engine_dec_rule(queue);
567 		} else {
568 			mlx5dr_send_engine_gen_comp(queue, priv->user_data, status);
569 		}
570 	}
571 }
572 
573 static void mlx5dr_send_engine_poll_cq(struct mlx5dr_send_engine *queue,
574 				       struct mlx5dr_send_ring *send_ring,
575 				       struct rte_flow_op_result res[],
576 				       int64_t *i,
577 				       uint32_t res_nb)
578 {
579 	struct mlx5dr_send_ring_cq *cq = &send_ring->send_cq;
580 	struct mlx5dr_send_ring_sq *sq = &send_ring->send_sq;
581 	uint32_t cq_idx = cq->cons_index & cq->ncqe_mask;
582 	struct mlx5dr_send_ring_priv *priv;
583 	struct mlx5_cqe64 *cqe;
584 	uint32_t offset_cqe64;
585 	uint8_t cqe_opcode;
586 	uint8_t cqe_owner;
587 	uint16_t wqe_cnt;
588 	uint8_t sw_own;
589 
590 	offset_cqe64 = RTE_CACHE_LINE_SIZE - sizeof(struct mlx5_cqe64);
591 	cqe = (void *)(cq->buf + (cq_idx << cq->cqe_log_sz) + offset_cqe64);
592 
593 	sw_own = (cq->cons_index & cq->ncqe) ? 1 : 0;
594 	cqe_opcode = mlx5dv_get_cqe_opcode(cqe);
595 	cqe_owner = mlx5dv_get_cqe_owner(cqe);
596 
597 	if (cqe_opcode == MLX5_CQE_INVALID ||
598 	    cqe_owner != sw_own)
599 		return;
600 
601 	if (unlikely(cqe_opcode != MLX5_CQE_REQ)) {
602 		struct mlx5_error_cqe *err_cqe = (struct mlx5_error_cqe *)cqe;
603 
604 		DR_LOG(ERR, "CQE ERR:0x%x, Vendor_ERR:0x%x, OP:0x%x, QPN:0x%x, WQE_CNT:0x%x",
605 			err_cqe->syndrome, err_cqe->vendor_err_synd, cqe_opcode,
606 			(rte_be_to_cpu_32(err_cqe->s_wqe_opcode_qpn) & 0xffffff),
607 			rte_be_to_cpu_16(err_cqe->wqe_counter));
608 		queue->err = true;
609 	}
610 
611 	rte_io_rmb();
612 
613 	wqe_cnt = be16toh(cqe->wqe_counter) & sq->buf_mask;
614 
615 	while (cq->poll_wqe != wqe_cnt) {
616 		priv = &sq->wr_priv[cq->poll_wqe];
617 		mlx5dr_send_engine_update(queue, NULL, priv, res, i, res_nb, 0);
618 		cq->poll_wqe = (cq->poll_wqe + priv->num_wqebbs) & sq->buf_mask;
619 	}
620 
621 	priv = &sq->wr_priv[wqe_cnt];
622 	cq->poll_wqe = (wqe_cnt + priv->num_wqebbs) & sq->buf_mask;
623 	mlx5dr_send_engine_update(queue, cqe, priv, res, i, res_nb, wqe_cnt);
624 	cq->cons_index++;
625 	*cq->db = htobe32(cq->cons_index & 0xffffff);
626 }
627 
628 static void mlx5dr_send_engine_poll_cqs(struct mlx5dr_send_engine *queue,
629 					struct rte_flow_op_result res[],
630 					int64_t *polled,
631 					uint32_t res_nb)
632 {
633 	int j;
634 
635 	for (j = 0; j < MLX5DR_NUM_SEND_RINGS; j++)
636 		mlx5dr_send_engine_poll_cq(queue, &queue->send_ring[j],
637 					   res, polled, res_nb);
638 }
639 
640 static void mlx5dr_send_engine_poll_list(struct mlx5dr_send_engine *queue,
641 					 struct rte_flow_op_result res[],
642 					 int64_t *polled,
643 					 uint32_t res_nb)
644 {
645 	struct mlx5dr_completed_poll *comp = &queue->completed;
646 
647 	while (comp->ci != comp->pi) {
648 		if (*polled < res_nb) {
649 			res[*polled].status =
650 				comp->entries[comp->ci].status;
651 			res[*polled].user_data =
652 				comp->entries[comp->ci].user_data;
653 			(*polled)++;
654 			comp->ci = (comp->ci + 1) & comp->mask;
655 			mlx5dr_send_engine_dec_rule(queue);
656 		} else {
657 			return;
658 		}
659 	}
660 }
661 
662 static int mlx5dr_send_engine_poll(struct mlx5dr_send_engine *queue,
663 				   struct rte_flow_op_result res[],
664 				   uint32_t res_nb)
665 {
666 	int64_t polled = 0;
667 
668 	mlx5dr_send_engine_poll_list(queue, res, &polled, res_nb);
669 
670 	if (polled >= res_nb)
671 		return polled;
672 
673 	mlx5dr_send_engine_poll_cqs(queue, res, &polled, res_nb);
674 
675 	return polled;
676 }
677 
678 int mlx5dr_send_queue_poll(struct mlx5dr_context *ctx,
679 			   uint16_t queue_id,
680 			   struct rte_flow_op_result res[],
681 			   uint32_t res_nb)
682 {
683 	return mlx5dr_send_engine_poll(&ctx->send_queue[queue_id],
684 				       res, res_nb);
685 }
686 
687 static int mlx5dr_send_ring_create_sq_obj(struct mlx5dr_context *ctx,
688 					  struct mlx5dr_send_engine *queue,
689 					  struct mlx5dr_send_ring_sq *sq,
690 					  struct mlx5dr_send_ring_cq *cq,
691 					  size_t log_wq_sz)
692 {
693 	struct mlx5dr_cmd_sq_create_attr attr = {0};
694 	int err;
695 
696 	attr.cqn = cq->cqn;
697 	attr.pdn = ctx->pd_num;
698 	attr.page_id = queue->uar->page_id;
699 	attr.dbr_id = sq->db_umem->umem_id;
700 	attr.wq_id = sq->buf_umem->umem_id;
701 	attr.log_wq_sz = log_wq_sz;
702 	if (ctx->caps->sq_ts_format == MLX5_HCA_CAP_TIMESTAMP_FORMAT_FR)
703 		attr.ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING;
704 	else
705 		attr.ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
706 
707 	sq->obj = mlx5dr_cmd_sq_create(ctx->ibv_ctx, &attr);
708 	if (!sq->obj)
709 		return rte_errno;
710 
711 	sq->sqn = sq->obj->id;
712 
713 	err = mlx5dr_cmd_sq_modify_rdy(sq->obj);
714 	if (err)
715 		goto free_sq;
716 
717 	return 0;
718 
719 free_sq:
720 	mlx5dr_cmd_destroy_obj(sq->obj);
721 
722 	return err;
723 }
724 
725 static int mlx5dr_send_ring_open_sq(struct mlx5dr_context *ctx,
726 				    struct mlx5dr_send_engine *queue,
727 				    struct mlx5dr_send_ring_sq *sq,
728 				    struct mlx5dr_send_ring_cq *cq)
729 {
730 	size_t sq_log_buf_sz;
731 	size_t buf_aligned;
732 	size_t sq_buf_sz;
733 	size_t page_size;
734 	size_t buf_sz;
735 	int err;
736 
737 	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
738 	sq_log_buf_sz = log2above(buf_sz);
739 	sq_buf_sz = 1 << (sq_log_buf_sz + log2above(MLX5_SEND_WQE_BB));
740 
741 	page_size = sysconf(_SC_PAGESIZE);
742 	buf_aligned = align(sq_buf_sz, page_size);
743 	err = posix_memalign((void **)&sq->buf, page_size, buf_aligned);
744 	if (err) {
745 		rte_errno = ENOMEM;
746 		return err;
747 	}
748 	memset(sq->buf, 0, buf_aligned);
749 
750 	err = posix_memalign((void **)&sq->db, 8, 8);
751 	if (err)
752 		goto free_buf;
753 
754 	sq->buf_umem = mlx5_glue->devx_umem_reg(ctx->ibv_ctx, sq->buf, sq_buf_sz, 0);
755 
756 	if (!sq->buf_umem) {
757 		err = errno;
758 		goto free_db;
759 	}
760 
761 	sq->db_umem = mlx5_glue->devx_umem_reg(ctx->ibv_ctx, sq->db, 8, 0);
762 	if (!sq->db_umem) {
763 		err = errno;
764 		goto free_buf_umem;
765 	}
766 
767 	err = mlx5dr_send_ring_create_sq_obj(ctx, queue, sq, cq, sq_log_buf_sz);
768 
769 	if (err)
770 		goto free_db_umem;
771 
772 	sq->wr_priv = simple_malloc(sizeof(*sq->wr_priv) * buf_sz);
773 	if (!sq->wr_priv) {
774 		err = ENOMEM;
775 		goto destroy_sq_obj;
776 	}
777 
778 	sq->dep_wqe = simple_calloc(queue->num_entries, sizeof(*sq->dep_wqe));
779 	if (!sq->dep_wqe) {
780 		err = ENOMEM;
781 		goto destroy_wr_priv;
782 	}
783 
784 	sq->buf_mask = buf_sz - 1;
785 
786 	return 0;
787 
788 destroy_wr_priv:
789 	simple_free(sq->wr_priv);
790 destroy_sq_obj:
791 	mlx5dr_cmd_destroy_obj(sq->obj);
792 free_db_umem:
793 	mlx5_glue->devx_umem_dereg(sq->db_umem);
794 free_buf_umem:
795 	mlx5_glue->devx_umem_dereg(sq->buf_umem);
796 free_db:
797 	free(sq->db);
798 free_buf:
799 	free(sq->buf);
800 	rte_errno = err;
801 	return err;
802 }
803 
804 static void mlx5dr_send_ring_close_sq(struct mlx5dr_send_ring_sq *sq)
805 {
806 	simple_free(sq->dep_wqe);
807 	mlx5dr_cmd_destroy_obj(sq->obj);
808 	mlx5_glue->devx_umem_dereg(sq->db_umem);
809 	mlx5_glue->devx_umem_dereg(sq->buf_umem);
810 	simple_free(sq->wr_priv);
811 	free(sq->db);
812 	free(sq->buf);
813 }
814 
815 static int mlx5dr_send_ring_open_cq(struct mlx5dr_context *ctx,
816 				    struct mlx5dr_send_engine *queue,
817 				    struct mlx5dr_send_ring_cq *cq)
818 {
819 	struct mlx5dv_cq mlx5_cq = {0};
820 	struct mlx5dv_obj obj;
821 	struct ibv_cq *ibv_cq;
822 	size_t cq_size;
823 	int err;
824 
825 	cq_size = queue->num_entries;
826 	ibv_cq = mlx5_glue->create_cq(ctx->ibv_ctx, cq_size, NULL, NULL, 0);
827 	if (!ibv_cq) {
828 		DR_LOG(ERR, "Failed to create CQ");
829 		rte_errno = errno;
830 		return rte_errno;
831 	}
832 
833 	obj.cq.in = ibv_cq;
834 	obj.cq.out = &mlx5_cq;
835 	err = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
836 	if (err) {
837 		err = errno;
838 		goto close_cq;
839 	}
840 
841 	cq->buf = mlx5_cq.buf;
842 	cq->db = mlx5_cq.dbrec;
843 	cq->ncqe = mlx5_cq.cqe_cnt;
844 	cq->cqe_sz = mlx5_cq.cqe_size;
845 	cq->cqe_log_sz = log2above(cq->cqe_sz);
846 	cq->ncqe_mask = cq->ncqe - 1;
847 	cq->buf_sz = cq->cqe_sz * cq->ncqe;
848 	cq->cqn = mlx5_cq.cqn;
849 	cq->ibv_cq = ibv_cq;
850 
851 	return 0;
852 
853 close_cq:
854 	mlx5_glue->destroy_cq(ibv_cq);
855 	rte_errno = err;
856 	return err;
857 }
858 
859 static void mlx5dr_send_ring_close_cq(struct mlx5dr_send_ring_cq *cq)
860 {
861 	mlx5_glue->destroy_cq(cq->ibv_cq);
862 }
863 
864 static void mlx5dr_send_ring_close(struct mlx5dr_send_ring *ring)
865 {
866 	mlx5dr_send_ring_close_sq(&ring->send_sq);
867 	mlx5dr_send_ring_close_cq(&ring->send_cq);
868 }
869 
870 static int mlx5dr_send_ring_open(struct mlx5dr_context *ctx,
871 				 struct mlx5dr_send_engine *queue,
872 				 struct mlx5dr_send_ring *ring)
873 {
874 	int err;
875 
876 	err = mlx5dr_send_ring_open_cq(ctx, queue, &ring->send_cq);
877 	if (err)
878 		return err;
879 
880 	err = mlx5dr_send_ring_open_sq(ctx, queue, &ring->send_sq, &ring->send_cq);
881 	if (err)
882 		goto close_cq;
883 
884 	return err;
885 
886 close_cq:
887 	mlx5dr_send_ring_close_cq(&ring->send_cq);
888 
889 	return err;
890 }
891 
892 static void __mlx5dr_send_rings_close(struct mlx5dr_send_engine *queue,
893 				      uint16_t i)
894 {
895 	while (i--)
896 		mlx5dr_send_ring_close(&queue->send_ring[i]);
897 }
898 
899 static void mlx5dr_send_rings_close(struct mlx5dr_send_engine *queue)
900 {
901 	__mlx5dr_send_rings_close(queue, queue->rings);
902 }
903 
904 static int mlx5dr_send_rings_open(struct mlx5dr_context *ctx,
905 				  struct mlx5dr_send_engine *queue)
906 {
907 	uint16_t i;
908 	int err;
909 
910 	for (i = 0; i < queue->rings; i++) {
911 		err = mlx5dr_send_ring_open(ctx, queue, &queue->send_ring[i]);
912 		if (err)
913 			goto free_rings;
914 	}
915 
916 	return 0;
917 
918 free_rings:
919 	__mlx5dr_send_rings_close(queue, i);
920 
921 	return err;
922 }
923 
924 void mlx5dr_send_queue_close(struct mlx5dr_send_engine *queue)
925 {
926 	mlx5dr_send_rings_close(queue);
927 	simple_free(queue->completed.entries);
928 	mlx5_glue->devx_free_uar(queue->uar);
929 }
930 
931 int mlx5dr_send_queue_open(struct mlx5dr_context *ctx,
932 			   struct mlx5dr_send_engine *queue,
933 			   uint16_t queue_size)
934 {
935 	struct mlx5dv_devx_uar *uar;
936 	int err;
937 
938 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC
939 	uar = mlx5_glue->devx_alloc_uar(ctx->ibv_ctx, MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC);
940 	if (!uar) {
941 		rte_errno = errno;
942 		return rte_errno;
943 	}
944 #else
945 	uar = NULL;
946 	rte_errno = ENOTSUP;
947 	return rte_errno;
948 #endif
949 
950 	queue->uar = uar;
951 	queue->rings = MLX5DR_NUM_SEND_RINGS;
952 	queue->num_entries = roundup_pow_of_two(queue_size);
953 	queue->used_entries = 0;
954 	queue->th_entries = queue->num_entries;
955 
956 	queue->completed.entries = simple_calloc(queue->num_entries,
957 						 sizeof(queue->completed.entries[0]));
958 	if (!queue->completed.entries) {
959 		rte_errno = ENOMEM;
960 		goto free_uar;
961 	}
962 	queue->completed.pi = 0;
963 	queue->completed.ci = 0;
964 	queue->completed.mask = queue->num_entries - 1;
965 
966 	err = mlx5dr_send_rings_open(ctx, queue);
967 	if (err)
968 		goto free_completed_entries;
969 
970 	return 0;
971 
972 free_completed_entries:
973 	simple_free(queue->completed.entries);
974 free_uar:
975 	mlx5_glue->devx_free_uar(uar);
976 	return rte_errno;
977 }
978 
979 static void __mlx5dr_send_queues_close(struct mlx5dr_context *ctx, uint16_t queues)
980 {
981 	while (queues--)
982 		mlx5dr_send_queue_close(&ctx->send_queue[queues]);
983 }
984 
985 static int mlx5dr_bwc_send_queues_init(struct mlx5dr_context *ctx)
986 {
987 	int bwc_queues = ctx->queues - 1;
988 	int i;
989 
990 	if (!mlx5dr_context_bwc_supported(ctx))
991 		return 0;
992 
993 	ctx->queues += bwc_queues;
994 
995 	ctx->bwc_send_queue_locks = simple_calloc(bwc_queues,
996 						  sizeof(*ctx->bwc_send_queue_locks));
997 	if (!ctx->bwc_send_queue_locks) {
998 		rte_errno = ENOMEM;
999 		return rte_errno;
1000 	}
1001 
1002 	for (i = 0; i < bwc_queues; i++)
1003 		rte_spinlock_init(&ctx->bwc_send_queue_locks[i]);
1004 
1005 	return 0;
1006 }
1007 
1008 static void mlx5dr_send_queues_bwc_locks_destroy(struct mlx5dr_context *ctx)
1009 {
1010 	if (!mlx5dr_context_bwc_supported(ctx))
1011 		return;
1012 
1013 	simple_free(ctx->bwc_send_queue_locks);
1014 }
1015 
1016 void mlx5dr_send_queues_close(struct mlx5dr_context *ctx)
1017 {
1018 	__mlx5dr_send_queues_close(ctx, ctx->queues);
1019 	simple_free(ctx->send_queue);
1020 	mlx5dr_send_queues_bwc_locks_destroy(ctx);
1021 }
1022 
1023 int mlx5dr_send_queues_open(struct mlx5dr_context *ctx,
1024 			    uint16_t queues,
1025 			    uint16_t queue_size)
1026 {
1027 	int err = 0;
1028 	uint32_t i;
1029 
1030 	/* Open one extra queue for control path */
1031 	ctx->queues = queues + 1;
1032 
1033 	/* open a separate set of queues and locks for bwc API */
1034 	err = mlx5dr_bwc_send_queues_init(ctx);
1035 	if (err)
1036 		return err;
1037 
1038 	ctx->send_queue = simple_calloc(ctx->queues, sizeof(*ctx->send_queue));
1039 	if (!ctx->send_queue) {
1040 		rte_errno = ENOMEM;
1041 		err = rte_errno;
1042 		goto free_bwc_locks;
1043 	}
1044 
1045 	for (i = 0; i < ctx->queues; i++) {
1046 		err = mlx5dr_send_queue_open(ctx, &ctx->send_queue[i], queue_size);
1047 		if (err)
1048 			goto close_send_queues;
1049 	}
1050 
1051 	return 0;
1052 
1053 close_send_queues:
1054 	 __mlx5dr_send_queues_close(ctx, i);
1055 
1056 	simple_free(ctx->send_queue);
1057 
1058 free_bwc_locks:
1059 	mlx5dr_send_queues_bwc_locks_destroy(ctx);
1060 
1061 	return err;
1062 }
1063 
1064 int mlx5dr_send_queue_action(struct mlx5dr_context *ctx,
1065 			     uint16_t queue_id,
1066 			     uint32_t actions)
1067 {
1068 	struct mlx5dr_send_ring_sq *send_sq;
1069 	struct mlx5dr_send_engine *queue;
1070 	bool wait_comp = false;
1071 	int64_t polled = 0;
1072 
1073 	queue = &ctx->send_queue[queue_id];
1074 	send_sq = &queue->send_ring->send_sq;
1075 
1076 	switch (actions) {
1077 	case MLX5DR_SEND_QUEUE_ACTION_DRAIN_SYNC:
1078 		wait_comp = true;
1079 		/* FALLTHROUGH */
1080 	case MLX5DR_SEND_QUEUE_ACTION_DRAIN_ASYNC:
1081 		if (send_sq->head_dep_idx != send_sq->tail_dep_idx)
1082 			/* Send dependent WQEs to drain the queue */
1083 			mlx5dr_send_all_dep_wqe(queue);
1084 		else
1085 			/* Signal on the last posted WQE */
1086 			mlx5dr_send_engine_flush_queue(queue);
1087 
1088 		/* Poll queue until empty */
1089 		while (wait_comp && !mlx5dr_send_engine_empty(queue))
1090 			mlx5dr_send_engine_poll_cqs(queue, NULL, &polled, 0);
1091 
1092 		break;
1093 	default:
1094 		rte_errno = EINVAL;
1095 		return -rte_errno;
1096 	}
1097 
1098 	return 0;
1099 }
1100