xref: /dpdk/drivers/net/mlx5/mlx5_rxtx.c (revision a5d7a3f77ddc3c3ae18bce04d7555b458360cc65)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2015 6WIND S.A.
5  *   Copyright 2015 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <assert.h>
35 #include <stdint.h>
36 #include <string.h>
37 #include <stdlib.h>
38 
39 /* Verbs header. */
40 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
41 #ifdef PEDANTIC
42 #pragma GCC diagnostic ignored "-pedantic"
43 #endif
44 #include <infiniband/verbs.h>
45 #include <infiniband/mlx5_hw.h>
46 #include <infiniband/arch.h>
47 #ifdef PEDANTIC
48 #pragma GCC diagnostic error "-pedantic"
49 #endif
50 
51 /* DPDK headers don't like -pedantic. */
52 #ifdef PEDANTIC
53 #pragma GCC diagnostic ignored "-pedantic"
54 #endif
55 #include <rte_mbuf.h>
56 #include <rte_mempool.h>
57 #include <rte_prefetch.h>
58 #include <rte_common.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ether.h>
61 #ifdef PEDANTIC
62 #pragma GCC diagnostic error "-pedantic"
63 #endif
64 
65 #include "mlx5.h"
66 #include "mlx5_utils.h"
67 #include "mlx5_rxtx.h"
68 #include "mlx5_autoconf.h"
69 #include "mlx5_defs.h"
70 #include "mlx5_prm.h"
71 
72 #ifndef NDEBUG
73 
74 /**
75  * Verify or set magic value in CQE.
76  *
77  * @param cqe
78  *   Pointer to CQE.
79  *
80  * @return
81  *   0 the first time.
82  */
83 static inline int
84 check_cqe64_seen(volatile struct mlx5_cqe64 *cqe)
85 {
86 	static const uint8_t magic[] = "seen";
87 	volatile uint8_t (*buf)[sizeof(cqe->rsvd40)] = &cqe->rsvd40;
88 	int ret = 1;
89 	unsigned int i;
90 
91 	for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
92 		if (!ret || (*buf)[i] != magic[i]) {
93 			ret = 0;
94 			(*buf)[i] = magic[i];
95 		}
96 	return ret;
97 }
98 
99 #endif /* NDEBUG */
100 
101 static inline int
102 check_cqe64(volatile struct mlx5_cqe64 *cqe,
103 	    unsigned int cqes_n, const uint16_t ci)
104 	    __attribute__((always_inline));
105 
106 /**
107  * Check whether CQE is valid.
108  *
109  * @param cqe
110  *   Pointer to CQE.
111  * @param cqes_n
112  *   Size of completion queue.
113  * @param ci
114  *   Consumer index.
115  *
116  * @return
117  *   0 on success, 1 on failure.
118  */
119 static inline int
120 check_cqe64(volatile struct mlx5_cqe64 *cqe,
121 		unsigned int cqes_n, const uint16_t ci)
122 {
123 	uint16_t idx = ci & cqes_n;
124 	uint8_t op_own = cqe->op_own;
125 	uint8_t op_owner = MLX5_CQE_OWNER(op_own);
126 	uint8_t op_code = MLX5_CQE_OPCODE(op_own);
127 
128 	if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
129 		return 1; /* No CQE. */
130 #ifndef NDEBUG
131 	if ((op_code == MLX5_CQE_RESP_ERR) ||
132 	    (op_code == MLX5_CQE_REQ_ERR)) {
133 		volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
134 		uint8_t syndrome = err_cqe->syndrome;
135 
136 		if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
137 		    (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
138 			return 0;
139 		if (!check_cqe64_seen(cqe))
140 			ERROR("unexpected CQE error %u (0x%02x)"
141 			      " syndrome 0x%02x",
142 			      op_code, op_code, syndrome);
143 		return 1;
144 	} else if ((op_code != MLX5_CQE_RESP_SEND) &&
145 		   (op_code != MLX5_CQE_REQ)) {
146 		if (!check_cqe64_seen(cqe))
147 			ERROR("unexpected CQE opcode %u (0x%02x)",
148 			      op_code, op_code);
149 		return 1;
150 	}
151 #endif /* NDEBUG */
152 	return 0;
153 }
154 
155 /**
156  * Manage TX completions.
157  *
158  * When sending a burst, mlx5_tx_burst() posts several WRs.
159  *
160  * @param txq
161  *   Pointer to TX queue structure.
162  */
163 static void
164 txq_complete(struct txq *txq)
165 {
166 	const unsigned int elts_n = txq->elts_n;
167 	const unsigned int cqe_n = txq->cqe_n;
168 	const unsigned int cqe_cnt = cqe_n - 1;
169 	uint16_t elts_free = txq->elts_tail;
170 	uint16_t elts_tail;
171 	uint16_t cq_ci = txq->cq_ci;
172 	volatile struct mlx5_cqe64 *cqe = NULL;
173 	volatile union mlx5_wqe *wqe;
174 
175 	do {
176 		volatile struct mlx5_cqe64 *tmp;
177 
178 		tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64;
179 		if (check_cqe64(tmp, cqe_n, cq_ci))
180 			break;
181 		cqe = tmp;
182 #ifndef NDEBUG
183 		if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
184 			if (!check_cqe64_seen(cqe))
185 				ERROR("unexpected compressed CQE, TX stopped");
186 			return;
187 		}
188 		if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
189 		    (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
190 			if (!check_cqe64_seen(cqe))
191 				ERROR("unexpected error CQE, TX stopped");
192 			return;
193 		}
194 #endif /* NDEBUG */
195 		++cq_ci;
196 	} while (1);
197 	if (unlikely(cqe == NULL))
198 		return;
199 	wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
200 	elts_tail = wqe->wqe.ctrl.data[3];
201 	assert(elts_tail < txq->wqe_n);
202 	/* Free buffers. */
203 	while (elts_free != elts_tail) {
204 		struct rte_mbuf *elt = (*txq->elts)[elts_free];
205 		unsigned int elts_free_next =
206 			(elts_free + 1) & (elts_n - 1);
207 		struct rte_mbuf *elt_next = (*txq->elts)[elts_free_next];
208 
209 #ifndef NDEBUG
210 		/* Poisoning. */
211 		memset(&(*txq->elts)[elts_free],
212 		       0x66,
213 		       sizeof((*txq->elts)[elts_free]));
214 #endif
215 		RTE_MBUF_PREFETCH_TO_FREE(elt_next);
216 		/* Only one segment needs to be freed. */
217 		rte_pktmbuf_free_seg(elt);
218 		elts_free = elts_free_next;
219 	}
220 	txq->cq_ci = cq_ci;
221 	txq->elts_tail = elts_tail;
222 	/* Update the consumer index. */
223 	rte_wmb();
224 	*txq->cq_db = htonl(cq_ci);
225 }
226 
227 /**
228  * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
229  * the cloned mbuf is allocated is returned instead.
230  *
231  * @param buf
232  *   Pointer to mbuf.
233  *
234  * @return
235  *   Memory pool where data is located for given mbuf.
236  */
237 static struct rte_mempool *
238 txq_mb2mp(struct rte_mbuf *buf)
239 {
240 	if (unlikely(RTE_MBUF_INDIRECT(buf)))
241 		return rte_mbuf_from_indirect(buf)->pool;
242 	return buf->pool;
243 }
244 
245 static inline uint32_t
246 txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
247 	__attribute__((always_inline));
248 
249 /**
250  * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
251  * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
252  * remove an entry first.
253  *
254  * @param txq
255  *   Pointer to TX queue structure.
256  * @param[in] mp
257  *   Memory Pool for which a Memory Region lkey must be returned.
258  *
259  * @return
260  *   mr->lkey on success, (uint32_t)-1 on failure.
261  */
262 static inline uint32_t
263 txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
264 {
265 	unsigned int i;
266 	uint32_t lkey = (uint32_t)-1;
267 
268 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
269 		if (unlikely(txq->mp2mr[i].mp == NULL)) {
270 			/* Unknown MP, add a new MR for it. */
271 			break;
272 		}
273 		if (txq->mp2mr[i].mp == mp) {
274 			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
275 			assert(htonl(txq->mp2mr[i].mr->lkey) ==
276 			       txq->mp2mr[i].lkey);
277 			lkey = txq->mp2mr[i].lkey;
278 			break;
279 		}
280 	}
281 	if (unlikely(lkey == (uint32_t)-1))
282 		lkey = txq_mp2mr_reg(txq, mp, i);
283 	return lkey;
284 }
285 
286 /**
287  * Write a regular WQE.
288  *
289  * @param txq
290  *   Pointer to TX queue structure.
291  * @param wqe
292  *   Pointer to the WQE to fill.
293  * @param addr
294  *   Buffer data address.
295  * @param length
296  *   Packet length.
297  * @param lkey
298  *   Memory region lkey.
299  */
300 static inline void
301 mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
302 	       uintptr_t addr, uint32_t length, uint32_t lkey)
303 {
304 	wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
305 	wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
306 	wqe->wqe.ctrl.data[2] = 0;
307 	wqe->wqe.ctrl.data[3] = 0;
308 	wqe->inl.eseg.rsvd0 = 0;
309 	wqe->inl.eseg.rsvd1 = 0;
310 	wqe->inl.eseg.mss = 0;
311 	wqe->inl.eseg.rsvd2 = 0;
312 	wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE);
313 	/* Copy the first 16 bytes into inline header. */
314 	rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
315 		   (uint8_t *)(uintptr_t)addr,
316 		   MLX5_ETH_INLINE_HEADER_SIZE);
317 	addr += MLX5_ETH_INLINE_HEADER_SIZE;
318 	length -= MLX5_ETH_INLINE_HEADER_SIZE;
319 	/* Store remaining data in data segment. */
320 	wqe->wqe.dseg.byte_count = htonl(length);
321 	wqe->wqe.dseg.lkey = lkey;
322 	wqe->wqe.dseg.addr = htonll(addr);
323 	/* Increment consumer index. */
324 	++txq->wqe_ci;
325 }
326 
327 /**
328  * Write a regular WQE with VLAN.
329  *
330  * @param txq
331  *   Pointer to TX queue structure.
332  * @param wqe
333  *   Pointer to the WQE to fill.
334  * @param addr
335  *   Buffer data address.
336  * @param length
337  *   Packet length.
338  * @param lkey
339  *   Memory region lkey.
340  * @param vlan_tci
341  *   VLAN field to insert in packet.
342  */
343 static inline void
344 mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe,
345 		    uintptr_t addr, uint32_t length, uint32_t lkey,
346 		    uint16_t vlan_tci)
347 {
348 	uint32_t vlan = htonl(0x81000000 | vlan_tci);
349 
350 	wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
351 	wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
352 	wqe->wqe.ctrl.data[2] = 0;
353 	wqe->wqe.ctrl.data[3] = 0;
354 	wqe->inl.eseg.rsvd0 = 0;
355 	wqe->inl.eseg.rsvd1 = 0;
356 	wqe->inl.eseg.mss = 0;
357 	wqe->inl.eseg.rsvd2 = 0;
358 	wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE);
359 	/*
360 	 * Copy 12 bytes of source & destination MAC address.
361 	 * Copy 4 bytes of VLAN.
362 	 * Copy 2 bytes of Ether type.
363 	 */
364 	rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
365 		   (uint8_t *)(uintptr_t)addr, 12);
366 	rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 12),
367 		   &vlan, sizeof(vlan));
368 	rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 16),
369 		   (uint8_t *)((uintptr_t)addr + 12), 2);
370 	addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
371 	length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
372 	/* Store remaining data in data segment. */
373 	wqe->wqe.dseg.byte_count = htonl(length);
374 	wqe->wqe.dseg.lkey = lkey;
375 	wqe->wqe.dseg.addr = htonll(addr);
376 	/* Increment consumer index. */
377 	++txq->wqe_ci;
378 }
379 
380 /**
381  * Write a inline WQE.
382  *
383  * @param txq
384  *   Pointer to TX queue structure.
385  * @param wqe
386  *   Pointer to the WQE to fill.
387  * @param addr
388  *   Buffer data address.
389  * @param length
390  *   Packet length.
391  * @param lkey
392  *   Memory region lkey.
393  */
394 static inline void
395 mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe,
396 		      uintptr_t addr, uint32_t length)
397 {
398 	uint32_t size;
399 	uint16_t wqe_cnt = txq->wqe_n - 1;
400 	uint16_t wqe_ci = txq->wqe_ci + 1;
401 
402 	/* Copy the first 16 bytes into inline header. */
403 	rte_memcpy((void *)(uintptr_t)wqe->inl.eseg.inline_hdr_start,
404 		   (void *)(uintptr_t)addr,
405 		   MLX5_ETH_INLINE_HEADER_SIZE);
406 	addr += MLX5_ETH_INLINE_HEADER_SIZE;
407 	length -= MLX5_ETH_INLINE_HEADER_SIZE;
408 	size = 3 + ((4 + length + 15) / 16);
409 	wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG);
410 	rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0],
411 		   (void *)addr, MLX5_WQE64_INL_DATA);
412 	addr += MLX5_WQE64_INL_DATA;
413 	length -= MLX5_WQE64_INL_DATA;
414 	while (length) {
415 		volatile union mlx5_wqe *wqe_next =
416 			&(*txq->wqes)[wqe_ci & wqe_cnt];
417 		uint32_t copy_bytes = (length > sizeof(*wqe)) ?
418 				      sizeof(*wqe) :
419 				      length;
420 
421 		rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0],
422 			  (uint8_t *)addr);
423 		addr += copy_bytes;
424 		length -= copy_bytes;
425 		++wqe_ci;
426 	}
427 	assert(size < 64);
428 	wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
429 	wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
430 	wqe->inl.ctrl.data[2] = 0;
431 	wqe->inl.ctrl.data[3] = 0;
432 	wqe->inl.eseg.rsvd0 = 0;
433 	wqe->inl.eseg.rsvd1 = 0;
434 	wqe->inl.eseg.mss = 0;
435 	wqe->inl.eseg.rsvd2 = 0;
436 	wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE);
437 	/* Increment consumer index. */
438 	txq->wqe_ci = wqe_ci;
439 }
440 
441 /**
442  * Write a inline WQE with VLAN.
443  *
444  * @param txq
445  *   Pointer to TX queue structure.
446  * @param wqe
447  *   Pointer to the WQE to fill.
448  * @param addr
449  *   Buffer data address.
450  * @param length
451  *   Packet length.
452  * @param lkey
453  *   Memory region lkey.
454  * @param vlan_tci
455  *   VLAN field to insert in packet.
456  */
457 static inline void
458 mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union mlx5_wqe *wqe,
459 			   uintptr_t addr, uint32_t length, uint16_t vlan_tci)
460 {
461 	uint32_t size;
462 	uint32_t wqe_cnt = txq->wqe_n - 1;
463 	uint16_t wqe_ci = txq->wqe_ci + 1;
464 	uint32_t vlan = htonl(0x81000000 | vlan_tci);
465 
466 	/*
467 	 * Copy 12 bytes of source & destination MAC address.
468 	 * Copy 4 bytes of VLAN.
469 	 * Copy 2 bytes of Ether type.
470 	 */
471 	rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start,
472 		   (uint8_t *)addr, 12);
473 	rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start + 12,
474 		   &vlan, sizeof(vlan));
475 	rte_memcpy((uint8_t *)((uintptr_t)wqe->inl.eseg.inline_hdr_start + 16),
476 		   (uint8_t *)(addr + 12), 2);
477 	addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
478 	length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
479 	size = (sizeof(wqe->inl.ctrl.ctrl) +
480 		sizeof(wqe->inl.eseg) +
481 		sizeof(wqe->inl.byte_cnt) +
482 		length + 15) / 16;
483 	wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG);
484 	rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0],
485 		   (void *)addr, MLX5_WQE64_INL_DATA);
486 	addr += MLX5_WQE64_INL_DATA;
487 	length -= MLX5_WQE64_INL_DATA;
488 	while (length) {
489 		volatile union mlx5_wqe *wqe_next =
490 			&(*txq->wqes)[wqe_ci & wqe_cnt];
491 		uint32_t copy_bytes = (length > sizeof(*wqe)) ?
492 				      sizeof(*wqe) :
493 				      length;
494 
495 		rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0],
496 			  (uint8_t *)addr);
497 		addr += copy_bytes;
498 		length -= copy_bytes;
499 		++wqe_ci;
500 	}
501 	assert(size < 64);
502 	wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
503 	wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
504 	wqe->inl.ctrl.data[2] = 0;
505 	wqe->inl.ctrl.data[3] = 0;
506 	wqe->inl.eseg.rsvd0 = 0;
507 	wqe->inl.eseg.rsvd1 = 0;
508 	wqe->inl.eseg.mss = 0;
509 	wqe->inl.eseg.rsvd2 = 0;
510 	wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE);
511 	/* Increment consumer index. */
512 	txq->wqe_ci = wqe_ci;
513 }
514 
515 /**
516  * Ring TX queue doorbell.
517  *
518  * @param txq
519  *   Pointer to TX queue structure.
520  */
521 static inline void
522 mlx5_tx_dbrec(struct txq *txq)
523 {
524 	uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset);
525 	uint32_t data[4] = {
526 		htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
527 		htonl(txq->qp_num_8s),
528 		0,
529 		0,
530 	};
531 	rte_wmb();
532 	*txq->qp_db = htonl(txq->wqe_ci);
533 	/* Ensure ordering between DB record and BF copy. */
534 	rte_wmb();
535 	rte_mov16(dst, (uint8_t *)data);
536 	txq->bf_offset ^= txq->bf_buf_size;
537 }
538 
539 /**
540  * Prefetch a CQE.
541  *
542  * @param txq
543  *   Pointer to TX queue structure.
544  * @param cqe_ci
545  *   CQE consumer index.
546  */
547 static inline void
548 tx_prefetch_cqe(struct txq *txq, uint16_t ci)
549 {
550 	volatile struct mlx5_cqe64 *cqe;
551 
552 	cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64;
553 	rte_prefetch0(cqe);
554 }
555 
556 /**
557  * Prefetch a WQE.
558  *
559  * @param txq
560  *   Pointer to TX queue structure.
561  * @param  wqe_ci
562  *   WQE consumer index.
563  */
564 static inline void
565 tx_prefetch_wqe(struct txq *txq, uint16_t ci)
566 {
567 	volatile union mlx5_wqe *wqe;
568 
569 	wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
570 	rte_prefetch0(wqe);
571 }
572 
573 /**
574  * DPDK callback for TX.
575  *
576  * @param dpdk_txq
577  *   Generic pointer to TX queue structure.
578  * @param[in] pkts
579  *   Packets to transmit.
580  * @param pkts_n
581  *   Number of packets in array.
582  *
583  * @return
584  *   Number of packets successfully transmitted (<= pkts_n).
585  */
586 uint16_t
587 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
588 {
589 	struct txq *txq = (struct txq *)dpdk_txq;
590 	uint16_t elts_head = txq->elts_head;
591 	const unsigned int elts_n = txq->elts_n;
592 	unsigned int i = 0;
593 	unsigned int j = 0;
594 	unsigned int max;
595 	unsigned int comp;
596 	volatile union mlx5_wqe *wqe = NULL;
597 
598 	if (unlikely(!pkts_n))
599 		return 0;
600 	/* Prefetch first packet cacheline. */
601 	tx_prefetch_cqe(txq, txq->cq_ci);
602 	tx_prefetch_cqe(txq, txq->cq_ci + 1);
603 	rte_prefetch0(*pkts);
604 	/* Start processing. */
605 	txq_complete(txq);
606 	max = (elts_n - (elts_head - txq->elts_tail));
607 	if (max > elts_n)
608 		max -= elts_n;
609 	do {
610 		struct rte_mbuf *buf = *(pkts++);
611 		unsigned int elts_head_next;
612 		uintptr_t addr;
613 		uint32_t length;
614 		uint32_t lkey;
615 		unsigned int segs_n = buf->nb_segs;
616 		volatile struct mlx5_wqe_data_seg *dseg;
617 		unsigned int ds = sizeof(*wqe) / 16;
618 
619 		/*
620 		 * Make sure there is enough room to store this packet and
621 		 * that one ring entry remains unused.
622 		 */
623 		assert(segs_n);
624 		if (max < segs_n + 1)
625 			break;
626 		max -= segs_n;
627 		--pkts_n;
628 		elts_head_next = (elts_head + 1) & (elts_n - 1);
629 		wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
630 		dseg = &wqe->wqe.dseg;
631 		rte_prefetch0(wqe);
632 		if (pkts_n)
633 			rte_prefetch0(*pkts);
634 		/* Retrieve buffer information. */
635 		addr = rte_pktmbuf_mtod(buf, uintptr_t);
636 		length = DATA_LEN(buf);
637 		/* Update element. */
638 		(*txq->elts)[elts_head] = buf;
639 		/* Prefetch next buffer data. */
640 		if (pkts_n)
641 			rte_prefetch0(rte_pktmbuf_mtod(*pkts,
642 						       volatile void *));
643 		/* Retrieve Memory Region key for this memory pool. */
644 		lkey = txq_mp2mr(txq, txq_mb2mp(buf));
645 		if (buf->ol_flags & PKT_TX_VLAN_PKT)
646 			mlx5_wqe_write_vlan(txq, wqe, addr, length, lkey,
647 					    buf->vlan_tci);
648 		else
649 			mlx5_wqe_write(txq, wqe, addr, length, lkey);
650 		/* Should we enable HW CKSUM offload */
651 		if (buf->ol_flags &
652 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
653 			wqe->wqe.eseg.cs_flags =
654 				MLX5_ETH_WQE_L3_CSUM |
655 				MLX5_ETH_WQE_L4_CSUM;
656 		} else {
657 			wqe->wqe.eseg.cs_flags = 0;
658 		}
659 		while (--segs_n) {
660 			/*
661 			 * Spill on next WQE when the current one does not have
662 			 * enough room left. Size of WQE must a be a multiple
663 			 * of data segment size.
664 			 */
665 			assert(!(sizeof(*wqe) % sizeof(*dseg)));
666 			if (!(ds % (sizeof(*wqe) / 16)))
667 				dseg = (volatile void *)
668 					&(*txq->wqes)[txq->wqe_ci++ &
669 						      (txq->wqe_n - 1)];
670 			else
671 				++dseg;
672 			++ds;
673 			buf = buf->next;
674 			assert(buf);
675 			/* Store segment information. */
676 			dseg->byte_count = htonl(DATA_LEN(buf));
677 			dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf));
678 			dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
679 			(*txq->elts)[elts_head_next] = buf;
680 			elts_head_next = (elts_head_next + 1) & (elts_n - 1);
681 #ifdef MLX5_PMD_SOFT_COUNTERS
682 			length += DATA_LEN(buf);
683 #endif
684 			++j;
685 		}
686 		/* Update DS field in WQE. */
687 		wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0);
688 		wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f);
689 		elts_head = elts_head_next;
690 #ifdef MLX5_PMD_SOFT_COUNTERS
691 		/* Increment sent bytes counter. */
692 		txq->stats.obytes += length;
693 #endif
694 		elts_head = elts_head_next;
695 		++i;
696 	} while (pkts_n);
697 	/* Take a shortcut if nothing must be sent. */
698 	if (unlikely(i == 0))
699 		return 0;
700 	/* Check whether completion threshold has been reached. */
701 	comp = txq->elts_comp + i + j;
702 	if (comp >= MLX5_TX_COMP_THRESH) {
703 		/* Request completion on last WQE. */
704 		wqe->wqe.ctrl.data[2] = htonl(8);
705 		/* Save elts_head in unused "immediate" field of WQE. */
706 		wqe->wqe.ctrl.data[3] = elts_head;
707 		txq->elts_comp = 0;
708 	} else {
709 		txq->elts_comp = comp;
710 	}
711 #ifdef MLX5_PMD_SOFT_COUNTERS
712 	/* Increment sent packets counter. */
713 	txq->stats.opackets += i;
714 #endif
715 	/* Ring QP doorbell. */
716 	mlx5_tx_dbrec(txq);
717 	txq->elts_head = elts_head;
718 	return i;
719 }
720 
721 /**
722  * DPDK callback for TX with inline support.
723  *
724  * @param dpdk_txq
725  *   Generic pointer to TX queue structure.
726  * @param[in] pkts
727  *   Packets to transmit.
728  * @param pkts_n
729  *   Number of packets in array.
730  *
731  * @return
732  *   Number of packets successfully transmitted (<= pkts_n).
733  */
734 uint16_t
735 mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
736 {
737 	struct txq *txq = (struct txq *)dpdk_txq;
738 	uint16_t elts_head = txq->elts_head;
739 	const unsigned int elts_n = txq->elts_n;
740 	unsigned int i = 0;
741 	unsigned int j = 0;
742 	unsigned int max;
743 	unsigned int comp;
744 	volatile union mlx5_wqe *wqe = NULL;
745 	unsigned int max_inline = txq->max_inline;
746 
747 	if (unlikely(!pkts_n))
748 		return 0;
749 	/* Prefetch first packet cacheline. */
750 	tx_prefetch_cqe(txq, txq->cq_ci);
751 	tx_prefetch_cqe(txq, txq->cq_ci + 1);
752 	rte_prefetch0(*pkts);
753 	/* Start processing. */
754 	txq_complete(txq);
755 	max = (elts_n - (elts_head - txq->elts_tail));
756 	if (max > elts_n)
757 		max -= elts_n;
758 	do {
759 		struct rte_mbuf *buf = *(pkts++);
760 		unsigned int elts_head_next;
761 		uintptr_t addr;
762 		uint32_t length;
763 		uint32_t lkey;
764 		unsigned int segs_n = buf->nb_segs;
765 		volatile struct mlx5_wqe_data_seg *dseg;
766 		unsigned int ds = sizeof(*wqe) / 16;
767 
768 		/*
769 		 * Make sure there is enough room to store this packet and
770 		 * that one ring entry remains unused.
771 		 */
772 		assert(segs_n);
773 		if (max < segs_n + 1)
774 			break;
775 		max -= segs_n;
776 		--pkts_n;
777 		elts_head_next = (elts_head + 1) & (elts_n - 1);
778 		wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
779 		dseg = &wqe->wqe.dseg;
780 		tx_prefetch_wqe(txq, txq->wqe_ci);
781 		tx_prefetch_wqe(txq, txq->wqe_ci + 1);
782 		if (pkts_n)
783 			rte_prefetch0(*pkts);
784 		/* Should we enable HW CKSUM offload */
785 		if (buf->ol_flags &
786 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
787 			wqe->inl.eseg.cs_flags =
788 				MLX5_ETH_WQE_L3_CSUM |
789 				MLX5_ETH_WQE_L4_CSUM;
790 		} else {
791 			wqe->inl.eseg.cs_flags = 0;
792 		}
793 		/* Retrieve buffer information. */
794 		addr = rte_pktmbuf_mtod(buf, uintptr_t);
795 		length = DATA_LEN(buf);
796 		/* Update element. */
797 		(*txq->elts)[elts_head] = buf;
798 		/* Prefetch next buffer data. */
799 		if (pkts_n)
800 			rte_prefetch0(rte_pktmbuf_mtod(*pkts,
801 						       volatile void *));
802 		if ((length <= max_inline) && (segs_n == 1)) {
803 			if (buf->ol_flags & PKT_TX_VLAN_PKT)
804 				mlx5_wqe_write_inline_vlan(txq, wqe,
805 							   addr, length,
806 							   buf->vlan_tci);
807 			else
808 				mlx5_wqe_write_inline(txq, wqe, addr, length);
809 			goto skip_segs;
810 		} else {
811 			/* Retrieve Memory Region key for this memory pool. */
812 			lkey = txq_mp2mr(txq, txq_mb2mp(buf));
813 			if (buf->ol_flags & PKT_TX_VLAN_PKT)
814 				mlx5_wqe_write_vlan(txq, wqe, addr, length,
815 						    lkey, buf->vlan_tci);
816 			else
817 				mlx5_wqe_write(txq, wqe, addr, length, lkey);
818 		}
819 		while (--segs_n) {
820 			/*
821 			 * Spill on next WQE when the current one does not have
822 			 * enough room left. Size of WQE must a be a multiple
823 			 * of data segment size.
824 			 */
825 			assert(!(sizeof(*wqe) % sizeof(*dseg)));
826 			if (!(ds % (sizeof(*wqe) / 16)))
827 				dseg = (volatile void *)
828 					&(*txq->wqes)[txq->wqe_ci++ &
829 						      (txq->wqe_n - 1)];
830 			else
831 				++dseg;
832 			++ds;
833 			buf = buf->next;
834 			assert(buf);
835 			/* Store segment information. */
836 			dseg->byte_count = htonl(DATA_LEN(buf));
837 			dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf));
838 			dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
839 			(*txq->elts)[elts_head_next] = buf;
840 			elts_head_next = (elts_head_next + 1) & (elts_n - 1);
841 #ifdef MLX5_PMD_SOFT_COUNTERS
842 			length += DATA_LEN(buf);
843 #endif
844 			++j;
845 		}
846 		/* Update DS field in WQE. */
847 		wqe->inl.ctrl.data[1] &= htonl(0xffffffc0);
848 		wqe->inl.ctrl.data[1] |= htonl(ds & 0x3f);
849 skip_segs:
850 		elts_head = elts_head_next;
851 #ifdef MLX5_PMD_SOFT_COUNTERS
852 		/* Increment sent bytes counter. */
853 		txq->stats.obytes += length;
854 #endif
855 		++i;
856 	} while (pkts_n);
857 	/* Take a shortcut if nothing must be sent. */
858 	if (unlikely(i == 0))
859 		return 0;
860 	/* Check whether completion threshold has been reached. */
861 	comp = txq->elts_comp + i + j;
862 	if (comp >= MLX5_TX_COMP_THRESH) {
863 		/* Request completion on last WQE. */
864 		wqe->inl.ctrl.data[2] = htonl(8);
865 		/* Save elts_head in unused "immediate" field of WQE. */
866 		wqe->inl.ctrl.data[3] = elts_head;
867 		txq->elts_comp = 0;
868 	} else {
869 		txq->elts_comp = comp;
870 	}
871 #ifdef MLX5_PMD_SOFT_COUNTERS
872 	/* Increment sent packets counter. */
873 	txq->stats.opackets += i;
874 #endif
875 	/* Ring QP doorbell. */
876 	mlx5_tx_dbrec(txq);
877 	txq->elts_head = elts_head;
878 	return i;
879 }
880 
881 /**
882  * Open a MPW session.
883  *
884  * @param txq
885  *   Pointer to TX queue structure.
886  * @param mpw
887  *   Pointer to MPW session structure.
888  * @param length
889  *   Packet length.
890  */
891 static inline void
892 mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
893 {
894 	uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
895 	volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
896 		(volatile struct mlx5_wqe_data_seg (*)[])
897 		(uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
898 
899 	mpw->state = MLX5_MPW_STATE_OPENED;
900 	mpw->pkts_n = 0;
901 	mpw->len = length;
902 	mpw->total_len = 0;
903 	mpw->wqe = &(*txq->wqes)[idx];
904 	mpw->wqe->mpw.eseg.mss = htons(length);
905 	mpw->wqe->mpw.eseg.inline_hdr_sz = 0;
906 	mpw->wqe->mpw.eseg.rsvd0 = 0;
907 	mpw->wqe->mpw.eseg.rsvd1 = 0;
908 	mpw->wqe->mpw.eseg.rsvd2 = 0;
909 	mpw->wqe->mpw.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
910 					   (txq->wqe_ci << 8) |
911 					   MLX5_OPCODE_LSO_MPW);
912 	mpw->wqe->mpw.ctrl.data[2] = 0;
913 	mpw->wqe->mpw.ctrl.data[3] = 0;
914 	mpw->data.dseg[0] = &mpw->wqe->mpw.dseg[0];
915 	mpw->data.dseg[1] = &mpw->wqe->mpw.dseg[1];
916 	mpw->data.dseg[2] = &(*dseg)[0];
917 	mpw->data.dseg[3] = &(*dseg)[1];
918 	mpw->data.dseg[4] = &(*dseg)[2];
919 }
920 
921 /**
922  * Close a MPW session.
923  *
924  * @param txq
925  *   Pointer to TX queue structure.
926  * @param mpw
927  *   Pointer to MPW session structure.
928  */
929 static inline void
930 mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
931 {
932 	unsigned int num = mpw->pkts_n;
933 
934 	/*
935 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
936 	 * count as 2.
937 	 */
938 	mpw->wqe->mpw.ctrl.data[1] = htonl(txq->qp_num_8s | (2 + num));
939 	mpw->state = MLX5_MPW_STATE_CLOSED;
940 	if (num < 3)
941 		++txq->wqe_ci;
942 	else
943 		txq->wqe_ci += 2;
944 	tx_prefetch_wqe(txq, txq->wqe_ci);
945 	tx_prefetch_wqe(txq, txq->wqe_ci + 1);
946 }
947 
948 /**
949  * DPDK callback for TX with MPW support.
950  *
951  * @param dpdk_txq
952  *   Generic pointer to TX queue structure.
953  * @param[in] pkts
954  *   Packets to transmit.
955  * @param pkts_n
956  *   Number of packets in array.
957  *
958  * @return
959  *   Number of packets successfully transmitted (<= pkts_n).
960  */
961 uint16_t
962 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
963 {
964 	struct txq *txq = (struct txq *)dpdk_txq;
965 	uint16_t elts_head = txq->elts_head;
966 	const unsigned int elts_n = txq->elts_n;
967 	unsigned int i = 0;
968 	unsigned int j = 0;
969 	unsigned int max;
970 	unsigned int comp;
971 	struct mlx5_mpw mpw = {
972 		.state = MLX5_MPW_STATE_CLOSED,
973 	};
974 
975 	if (unlikely(!pkts_n))
976 		return 0;
977 	/* Prefetch first packet cacheline. */
978 	tx_prefetch_cqe(txq, txq->cq_ci);
979 	tx_prefetch_wqe(txq, txq->wqe_ci);
980 	tx_prefetch_wqe(txq, txq->wqe_ci + 1);
981 	/* Start processing. */
982 	txq_complete(txq);
983 	max = (elts_n - (elts_head - txq->elts_tail));
984 	if (max > elts_n)
985 		max -= elts_n;
986 	do {
987 		struct rte_mbuf *buf = *(pkts++);
988 		unsigned int elts_head_next;
989 		uint32_t length;
990 		unsigned int segs_n = buf->nb_segs;
991 		uint32_t cs_flags = 0;
992 
993 		/*
994 		 * Make sure there is enough room to store this packet and
995 		 * that one ring entry remains unused.
996 		 */
997 		assert(segs_n);
998 		if (max < segs_n + 1)
999 			break;
1000 		/* Do not bother with large packets MPW cannot handle. */
1001 		if (segs_n > MLX5_MPW_DSEG_MAX)
1002 			break;
1003 		max -= segs_n;
1004 		--pkts_n;
1005 		/* Should we enable HW CKSUM offload */
1006 		if (buf->ol_flags &
1007 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
1008 			cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
1009 		/* Retrieve packet information. */
1010 		length = PKT_LEN(buf);
1011 		assert(length);
1012 		/* Start new session if packet differs. */
1013 		if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
1014 		    ((mpw.len != length) ||
1015 		     (segs_n != 1) ||
1016 		     (mpw.wqe->mpw.eseg.cs_flags != cs_flags)))
1017 			mlx5_mpw_close(txq, &mpw);
1018 		if (mpw.state == MLX5_MPW_STATE_CLOSED) {
1019 			mlx5_mpw_new(txq, &mpw, length);
1020 			mpw.wqe->mpw.eseg.cs_flags = cs_flags;
1021 		}
1022 		/* Multi-segment packets must be alone in their MPW. */
1023 		assert((segs_n == 1) || (mpw.pkts_n == 0));
1024 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1025 		length = 0;
1026 #endif
1027 		do {
1028 			volatile struct mlx5_wqe_data_seg *dseg;
1029 			uintptr_t addr;
1030 
1031 			elts_head_next = (elts_head + 1) & (elts_n - 1);
1032 			assert(buf);
1033 			(*txq->elts)[elts_head] = buf;
1034 			dseg = mpw.data.dseg[mpw.pkts_n];
1035 			addr = rte_pktmbuf_mtod(buf, uintptr_t);
1036 			*dseg = (struct mlx5_wqe_data_seg){
1037 				.byte_count = htonl(DATA_LEN(buf)),
1038 				.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
1039 				.addr = htonll(addr),
1040 			};
1041 			elts_head = elts_head_next;
1042 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1043 			length += DATA_LEN(buf);
1044 #endif
1045 			buf = buf->next;
1046 			++mpw.pkts_n;
1047 			++j;
1048 		} while (--segs_n);
1049 		assert(length == mpw.len);
1050 		if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
1051 			mlx5_mpw_close(txq, &mpw);
1052 		elts_head = elts_head_next;
1053 #ifdef MLX5_PMD_SOFT_COUNTERS
1054 		/* Increment sent bytes counter. */
1055 		txq->stats.obytes += length;
1056 #endif
1057 		++i;
1058 	} while (pkts_n);
1059 	/* Take a shortcut if nothing must be sent. */
1060 	if (unlikely(i == 0))
1061 		return 0;
1062 	/* Check whether completion threshold has been reached. */
1063 	/* "j" includes both packets and segments. */
1064 	comp = txq->elts_comp + j;
1065 	if (comp >= MLX5_TX_COMP_THRESH) {
1066 		volatile union mlx5_wqe *wqe = mpw.wqe;
1067 
1068 		/* Request completion on last WQE. */
1069 		wqe->mpw.ctrl.data[2] = htonl(8);
1070 		/* Save elts_head in unused "immediate" field of WQE. */
1071 		wqe->mpw.ctrl.data[3] = elts_head;
1072 		txq->elts_comp = 0;
1073 	} else {
1074 		txq->elts_comp = comp;
1075 	}
1076 #ifdef MLX5_PMD_SOFT_COUNTERS
1077 	/* Increment sent packets counter. */
1078 	txq->stats.opackets += i;
1079 #endif
1080 	/* Ring QP doorbell. */
1081 	if (mpw.state == MLX5_MPW_STATE_OPENED)
1082 		mlx5_mpw_close(txq, &mpw);
1083 	mlx5_tx_dbrec(txq);
1084 	txq->elts_head = elts_head;
1085 	return i;
1086 }
1087 
1088 /**
1089  * Open a MPW inline session.
1090  *
1091  * @param txq
1092  *   Pointer to TX queue structure.
1093  * @param mpw
1094  *   Pointer to MPW session structure.
1095  * @param length
1096  *   Packet length.
1097  */
1098 static inline void
1099 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
1100 {
1101 	uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
1102 
1103 	mpw->state = MLX5_MPW_INL_STATE_OPENED;
1104 	mpw->pkts_n = 0;
1105 	mpw->len = length;
1106 	mpw->total_len = 0;
1107 	mpw->wqe = &(*txq->wqes)[idx];
1108 	mpw->wqe->mpw_inl.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
1109 					       (txq->wqe_ci << 8) |
1110 					       MLX5_OPCODE_LSO_MPW);
1111 	mpw->wqe->mpw_inl.ctrl.data[2] = 0;
1112 	mpw->wqe->mpw_inl.ctrl.data[3] = 0;
1113 	mpw->wqe->mpw_inl.eseg.mss = htons(length);
1114 	mpw->wqe->mpw_inl.eseg.inline_hdr_sz = 0;
1115 	mpw->wqe->mpw_inl.eseg.cs_flags = 0;
1116 	mpw->wqe->mpw_inl.eseg.rsvd0 = 0;
1117 	mpw->wqe->mpw_inl.eseg.rsvd1 = 0;
1118 	mpw->wqe->mpw_inl.eseg.rsvd2 = 0;
1119 	mpw->data.raw = &mpw->wqe->mpw_inl.data[0];
1120 }
1121 
1122 /**
1123  * Close a MPW inline session.
1124  *
1125  * @param txq
1126  *   Pointer to TX queue structure.
1127  * @param mpw
1128  *   Pointer to MPW session structure.
1129  */
1130 static inline void
1131 mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
1132 {
1133 	unsigned int size;
1134 
1135 	size = sizeof(*mpw->wqe) - MLX5_MWQE64_INL_DATA + mpw->total_len;
1136 	/*
1137 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
1138 	 * count as 2.
1139 	 */
1140 	mpw->wqe->mpw_inl.ctrl.data[1] =
1141 		htonl(txq->qp_num_8s | ((size + 15) / 16));
1142 	mpw->state = MLX5_MPW_STATE_CLOSED;
1143 	mpw->wqe->mpw_inl.byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
1144 	txq->wqe_ci += (size + (sizeof(*mpw->wqe) - 1)) / sizeof(*mpw->wqe);
1145 }
1146 
1147 /**
1148  * DPDK callback for TX with MPW inline support.
1149  *
1150  * @param dpdk_txq
1151  *   Generic pointer to TX queue structure.
1152  * @param[in] pkts
1153  *   Packets to transmit.
1154  * @param pkts_n
1155  *   Number of packets in array.
1156  *
1157  * @return
1158  *   Number of packets successfully transmitted (<= pkts_n).
1159  */
1160 uint16_t
1161 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
1162 			 uint16_t pkts_n)
1163 {
1164 	struct txq *txq = (struct txq *)dpdk_txq;
1165 	uint16_t elts_head = txq->elts_head;
1166 	const unsigned int elts_n = txq->elts_n;
1167 	unsigned int i = 0;
1168 	unsigned int j = 0;
1169 	unsigned int max;
1170 	unsigned int comp;
1171 	unsigned int inline_room = txq->max_inline;
1172 	struct mlx5_mpw mpw = {
1173 		.state = MLX5_MPW_STATE_CLOSED,
1174 	};
1175 
1176 	if (unlikely(!pkts_n))
1177 		return 0;
1178 	/* Prefetch first packet cacheline. */
1179 	tx_prefetch_cqe(txq, txq->cq_ci);
1180 	tx_prefetch_wqe(txq, txq->wqe_ci);
1181 	tx_prefetch_wqe(txq, txq->wqe_ci + 1);
1182 	/* Start processing. */
1183 	txq_complete(txq);
1184 	max = (elts_n - (elts_head - txq->elts_tail));
1185 	if (max > elts_n)
1186 		max -= elts_n;
1187 	do {
1188 		struct rte_mbuf *buf = *(pkts++);
1189 		unsigned int elts_head_next;
1190 		uintptr_t addr;
1191 		uint32_t length;
1192 		unsigned int segs_n = buf->nb_segs;
1193 		uint32_t cs_flags = 0;
1194 
1195 		/*
1196 		 * Make sure there is enough room to store this packet and
1197 		 * that one ring entry remains unused.
1198 		 */
1199 		assert(segs_n);
1200 		if (max < segs_n + 1)
1201 			break;
1202 		/* Do not bother with large packets MPW cannot handle. */
1203 		if (segs_n > MLX5_MPW_DSEG_MAX)
1204 			break;
1205 		max -= segs_n;
1206 		--pkts_n;
1207 		/* Should we enable HW CKSUM offload */
1208 		if (buf->ol_flags &
1209 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
1210 			cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
1211 		/* Retrieve packet information. */
1212 		length = PKT_LEN(buf);
1213 		/* Start new session if packet differs. */
1214 		if (mpw.state == MLX5_MPW_STATE_OPENED) {
1215 			if ((mpw.len != length) ||
1216 			    (segs_n != 1) ||
1217 			    (mpw.wqe->mpw.eseg.cs_flags != cs_flags))
1218 				mlx5_mpw_close(txq, &mpw);
1219 		} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
1220 			if ((mpw.len != length) ||
1221 			    (segs_n != 1) ||
1222 			    (length > inline_room) ||
1223 			    (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) {
1224 				mlx5_mpw_inline_close(txq, &mpw);
1225 				inline_room = txq->max_inline;
1226 			}
1227 		}
1228 		if (mpw.state == MLX5_MPW_STATE_CLOSED) {
1229 			if ((segs_n != 1) ||
1230 			    (length > inline_room)) {
1231 				mlx5_mpw_new(txq, &mpw, length);
1232 				mpw.wqe->mpw.eseg.cs_flags = cs_flags;
1233 			} else {
1234 				mlx5_mpw_inline_new(txq, &mpw, length);
1235 				mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags;
1236 			}
1237 		}
1238 		/* Multi-segment packets must be alone in their MPW. */
1239 		assert((segs_n == 1) || (mpw.pkts_n == 0));
1240 		if (mpw.state == MLX5_MPW_STATE_OPENED) {
1241 			assert(inline_room == txq->max_inline);
1242 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1243 			length = 0;
1244 #endif
1245 			do {
1246 				volatile struct mlx5_wqe_data_seg *dseg;
1247 
1248 				elts_head_next =
1249 					(elts_head + 1) & (elts_n - 1);
1250 				assert(buf);
1251 				(*txq->elts)[elts_head] = buf;
1252 				dseg = mpw.data.dseg[mpw.pkts_n];
1253 				addr = rte_pktmbuf_mtod(buf, uintptr_t);
1254 				*dseg = (struct mlx5_wqe_data_seg){
1255 					.byte_count = htonl(DATA_LEN(buf)),
1256 					.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
1257 					.addr = htonll(addr),
1258 				};
1259 				elts_head = elts_head_next;
1260 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1261 				length += DATA_LEN(buf);
1262 #endif
1263 				buf = buf->next;
1264 				++mpw.pkts_n;
1265 				++j;
1266 			} while (--segs_n);
1267 			assert(length == mpw.len);
1268 			if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
1269 				mlx5_mpw_close(txq, &mpw);
1270 		} else {
1271 			unsigned int max;
1272 
1273 			assert(mpw.state == MLX5_MPW_INL_STATE_OPENED);
1274 			assert(length <= inline_room);
1275 			assert(length == DATA_LEN(buf));
1276 			elts_head_next = (elts_head + 1) & (elts_n - 1);
1277 			addr = rte_pktmbuf_mtod(buf, uintptr_t);
1278 			(*txq->elts)[elts_head] = buf;
1279 			/* Maximum number of bytes before wrapping. */
1280 			max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
1281 			       (uintptr_t)mpw.data.raw);
1282 			if (length > max) {
1283 				rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1284 					   (void *)addr,
1285 					   max);
1286 				mpw.data.raw =
1287 					(volatile void *)&(*txq->wqes)[0];
1288 				rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1289 					   (void *)(addr + max),
1290 					   length - max);
1291 				mpw.data.raw += length - max;
1292 			} else {
1293 				rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1294 					   (void *)addr,
1295 					   length);
1296 				mpw.data.raw += length;
1297 			}
1298 			if ((uintptr_t)mpw.data.raw ==
1299 			    (uintptr_t)&(*txq->wqes)[txq->wqe_n])
1300 				mpw.data.raw =
1301 					(volatile void *)&(*txq->wqes)[0];
1302 			++mpw.pkts_n;
1303 			++j;
1304 			if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
1305 				mlx5_mpw_inline_close(txq, &mpw);
1306 				inline_room = txq->max_inline;
1307 			} else {
1308 				inline_room -= length;
1309 			}
1310 		}
1311 		mpw.total_len += length;
1312 		elts_head = elts_head_next;
1313 #ifdef MLX5_PMD_SOFT_COUNTERS
1314 		/* Increment sent bytes counter. */
1315 		txq->stats.obytes += length;
1316 #endif
1317 		++i;
1318 	} while (pkts_n);
1319 	/* Take a shortcut if nothing must be sent. */
1320 	if (unlikely(i == 0))
1321 		return 0;
1322 	/* Check whether completion threshold has been reached. */
1323 	/* "j" includes both packets and segments. */
1324 	comp = txq->elts_comp + j;
1325 	if (comp >= MLX5_TX_COMP_THRESH) {
1326 		volatile union mlx5_wqe *wqe = mpw.wqe;
1327 
1328 		/* Request completion on last WQE. */
1329 		wqe->mpw_inl.ctrl.data[2] = htonl(8);
1330 		/* Save elts_head in unused "immediate" field of WQE. */
1331 		wqe->mpw_inl.ctrl.data[3] = elts_head;
1332 		txq->elts_comp = 0;
1333 	} else {
1334 		txq->elts_comp = comp;
1335 	}
1336 #ifdef MLX5_PMD_SOFT_COUNTERS
1337 	/* Increment sent packets counter. */
1338 	txq->stats.opackets += i;
1339 #endif
1340 	/* Ring QP doorbell. */
1341 	if (mpw.state == MLX5_MPW_INL_STATE_OPENED)
1342 		mlx5_mpw_inline_close(txq, &mpw);
1343 	else if (mpw.state == MLX5_MPW_STATE_OPENED)
1344 		mlx5_mpw_close(txq, &mpw);
1345 	mlx5_tx_dbrec(txq);
1346 	txq->elts_head = elts_head;
1347 	return i;
1348 }
1349 
1350 /**
1351  * Translate RX completion flags to packet type.
1352  *
1353  * @param[in] cqe
1354  *   Pointer to CQE.
1355  *
1356  * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
1357  *
1358  * @return
1359  *   Packet type for struct rte_mbuf.
1360  */
1361 static inline uint32_t
1362 rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
1363 {
1364 	uint32_t pkt_type;
1365 	uint8_t flags = cqe->l4_hdr_type_etc;
1366 	uint8_t info = cqe->rsvd0[0];
1367 
1368 	if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
1369 		pkt_type =
1370 			TRANSPOSE(flags,
1371 				  IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
1372 				  RTE_PTYPE_L3_IPV4) |
1373 			TRANSPOSE(flags,
1374 				  IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
1375 				  RTE_PTYPE_L3_IPV6) |
1376 			TRANSPOSE(flags,
1377 				  IBV_EXP_CQ_RX_IPV4_PACKET,
1378 				  RTE_PTYPE_INNER_L3_IPV4) |
1379 			TRANSPOSE(flags,
1380 				  IBV_EXP_CQ_RX_IPV6_PACKET,
1381 				  RTE_PTYPE_INNER_L3_IPV6);
1382 	else
1383 		pkt_type =
1384 			TRANSPOSE(flags,
1385 				  MLX5_CQE_L3_HDR_TYPE_IPV6,
1386 				  RTE_PTYPE_L3_IPV6) |
1387 			TRANSPOSE(flags,
1388 				  MLX5_CQE_L3_HDR_TYPE_IPV4,
1389 				  RTE_PTYPE_L3_IPV4);
1390 	return pkt_type;
1391 }
1392 
1393 /**
1394  * Get size of the next packet for a given CQE. For compressed CQEs, the
1395  * consumer index is updated only once all packets of the current one have
1396  * been processed.
1397  *
1398  * @param rxq
1399  *   Pointer to RX queue.
1400  * @param cqe
1401  *   CQE to process.
1402  *
1403  * @return
1404  *   Packet size in bytes (0 if there is none), -1 in case of completion
1405  *   with error.
1406  */
1407 static inline int
1408 mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
1409 		 uint16_t cqe_cnt)
1410 {
1411 	struct rxq_zip *zip = &rxq->zip;
1412 	uint16_t cqe_n = cqe_cnt + 1;
1413 	int len = 0;
1414 
1415 	/* Process compressed data in the CQE and mini arrays. */
1416 	if (zip->ai) {
1417 		volatile struct mlx5_mini_cqe8 (*mc)[8] =
1418 			(volatile struct mlx5_mini_cqe8 (*)[8])
1419 			(uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].cqe64);
1420 
1421 		len = ntohl((*mc)[zip->ai & 7].byte_cnt);
1422 		if ((++zip->ai & 7) == 0) {
1423 			/*
1424 			 * Increment consumer index to skip the number of
1425 			 * CQEs consumed. Hardware leaves holes in the CQ
1426 			 * ring for software use.
1427 			 */
1428 			zip->ca = zip->na;
1429 			zip->na += 8;
1430 		}
1431 		if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
1432 			uint16_t idx = rxq->cq_ci;
1433 			uint16_t end = zip->cq_ci;
1434 
1435 			while (idx != end) {
1436 				(*rxq->cqes)[idx & cqe_cnt].cqe64.op_own =
1437 					MLX5_CQE_INVALIDATE;
1438 				++idx;
1439 			}
1440 			rxq->cq_ci = zip->cq_ci;
1441 			zip->ai = 0;
1442 		}
1443 	/* No compressed data, get next CQE and verify if it is compressed. */
1444 	} else {
1445 		int ret;
1446 		int8_t op_own;
1447 
1448 		ret = check_cqe64(cqe, cqe_n, rxq->cq_ci);
1449 		if (unlikely(ret == 1))
1450 			return 0;
1451 		++rxq->cq_ci;
1452 		op_own = cqe->op_own;
1453 		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
1454 			volatile struct mlx5_mini_cqe8 (*mc)[8] =
1455 				(volatile struct mlx5_mini_cqe8 (*)[8])
1456 				(uintptr_t)(&(*rxq->cqes)[rxq->cq_ci &
1457 							  cqe_cnt].cqe64);
1458 
1459 			/* Fix endianness. */
1460 			zip->cqe_cnt = ntohl(cqe->byte_cnt);
1461 			/*
1462 			 * Current mini array position is the one returned by
1463 			 * check_cqe64().
1464 			 *
1465 			 * If completion comprises several mini arrays, as a
1466 			 * special case the second one is located 7 CQEs after
1467 			 * the initial CQE instead of 8 for subsequent ones.
1468 			 */
1469 			zip->ca = rxq->cq_ci & cqe_cnt;
1470 			zip->na = zip->ca + 7;
1471 			/* Compute the next non compressed CQE. */
1472 			--rxq->cq_ci;
1473 			zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
1474 			/* Get packet size to return. */
1475 			len = ntohl((*mc)[0].byte_cnt);
1476 			zip->ai = 1;
1477 		} else {
1478 			len = ntohl(cqe->byte_cnt);
1479 		}
1480 		/* Error while receiving packet. */
1481 		if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
1482 			return -1;
1483 	}
1484 	return len;
1485 }
1486 
1487 /**
1488  * Translate RX completion flags to offload flags.
1489  *
1490  * @param[in] rxq
1491  *   Pointer to RX queue structure.
1492  * @param[in] cqe
1493  *   Pointer to CQE.
1494  *
1495  * @return
1496  *   Offload flags (ol_flags) for struct rte_mbuf.
1497  */
1498 static inline uint32_t
1499 rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe)
1500 {
1501 	uint32_t ol_flags = 0;
1502 	uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
1503 	uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
1504 	uint8_t info = cqe->rsvd0[0];
1505 
1506 	if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
1507 	    (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
1508 		ol_flags |=
1509 			(!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) *
1510 			 PKT_RX_IP_CKSUM_BAD);
1511 	if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
1512 	    (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
1513 	    (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
1514 	    (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
1515 		ol_flags |=
1516 			(!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) *
1517 			 PKT_RX_L4_CKSUM_BAD);
1518 	/*
1519 	 * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
1520 	 * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
1521 	 * (its value is 0).
1522 	 */
1523 	if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
1524 		ol_flags |=
1525 			TRANSPOSE(~cqe->l4_hdr_type_etc,
1526 				  IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
1527 				  PKT_RX_IP_CKSUM_BAD) |
1528 			TRANSPOSE(~cqe->l4_hdr_type_etc,
1529 				  IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
1530 				  PKT_RX_L4_CKSUM_BAD);
1531 	return ol_flags;
1532 }
1533 
1534 /**
1535  * DPDK callback for RX.
1536  *
1537  * @param dpdk_rxq
1538  *   Generic pointer to RX queue structure.
1539  * @param[out] pkts
1540  *   Array to store received packets.
1541  * @param pkts_n
1542  *   Maximum number of packets in array.
1543  *
1544  * @return
1545  *   Number of packets successfully received (<= pkts_n).
1546  */
1547 uint16_t
1548 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
1549 {
1550 	struct rxq *rxq = dpdk_rxq;
1551 	const unsigned int wqe_cnt = rxq->elts_n - 1;
1552 	const unsigned int cqe_cnt = rxq->cqe_n - 1;
1553 	const unsigned int sges_n = rxq->sges_n;
1554 	struct rte_mbuf *pkt = NULL;
1555 	struct rte_mbuf *seg = NULL;
1556 	volatile struct mlx5_cqe64 *cqe =
1557 		&(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
1558 	unsigned int i = 0;
1559 	unsigned int rq_ci = rxq->rq_ci << sges_n;
1560 	int len;
1561 
1562 	while (pkts_n) {
1563 		unsigned int idx = rq_ci & wqe_cnt;
1564 		volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
1565 		struct rte_mbuf *rep = (*rxq->elts)[idx];
1566 
1567 		if (pkt)
1568 			NEXT(seg) = rep;
1569 		seg = rep;
1570 		rte_prefetch0(seg);
1571 		rte_prefetch0(cqe);
1572 		rte_prefetch0(wqe);
1573 		rep = rte_mbuf_raw_alloc(rxq->mp);
1574 		if (unlikely(rep == NULL)) {
1575 			while (pkt != seg) {
1576 				assert(pkt != (*rxq->elts)[idx]);
1577 				seg = NEXT(pkt);
1578 				rte_mbuf_refcnt_set(pkt, 0);
1579 				__rte_mbuf_raw_free(pkt);
1580 				pkt = seg;
1581 			}
1582 			++rxq->stats.rx_nombuf;
1583 			break;
1584 		}
1585 		if (!pkt) {
1586 			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
1587 			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt);
1588 			if (len == 0) {
1589 				rte_mbuf_refcnt_set(rep, 0);
1590 				__rte_mbuf_raw_free(rep);
1591 				break;
1592 			}
1593 			if (unlikely(len == -1)) {
1594 				/* RX error, packet is likely too large. */
1595 				rte_mbuf_refcnt_set(rep, 0);
1596 				__rte_mbuf_raw_free(rep);
1597 				++rxq->stats.idropped;
1598 				goto skip;
1599 			}
1600 			pkt = seg;
1601 			assert(len >= (rxq->crc_present << 2));
1602 			/* Update packet information. */
1603 			pkt->packet_type = 0;
1604 			pkt->ol_flags = 0;
1605 			if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
1606 			    rxq->crc_present) {
1607 				if (rxq->csum) {
1608 					pkt->packet_type =
1609 						rxq_cq_to_pkt_type(cqe);
1610 					pkt->ol_flags =
1611 						rxq_cq_to_ol_flags(rxq, cqe);
1612 				}
1613 				if (cqe->l4_hdr_type_etc &
1614 				    MLX5_CQE_VLAN_STRIPPED) {
1615 					pkt->ol_flags |= PKT_RX_VLAN_PKT |
1616 						PKT_RX_VLAN_STRIPPED;
1617 					pkt->vlan_tci = ntohs(cqe->vlan_info);
1618 				}
1619 				if (rxq->crc_present)
1620 					len -= ETHER_CRC_LEN;
1621 			}
1622 			PKT_LEN(pkt) = len;
1623 		}
1624 		DATA_LEN(rep) = DATA_LEN(seg);
1625 		PKT_LEN(rep) = PKT_LEN(seg);
1626 		SET_DATA_OFF(rep, DATA_OFF(seg));
1627 		NB_SEGS(rep) = NB_SEGS(seg);
1628 		PORT(rep) = PORT(seg);
1629 		NEXT(rep) = NULL;
1630 		(*rxq->elts)[idx] = rep;
1631 		/*
1632 		 * Fill NIC descriptor with the new buffer.  The lkey and size
1633 		 * of the buffers are already known, only the buffer address
1634 		 * changes.
1635 		 */
1636 		wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t));
1637 		if (len > DATA_LEN(seg)) {
1638 			len -= DATA_LEN(seg);
1639 			++NB_SEGS(pkt);
1640 			++rq_ci;
1641 			continue;
1642 		}
1643 		DATA_LEN(seg) = len;
1644 #ifdef MLX5_PMD_SOFT_COUNTERS
1645 		/* Increment bytes counter. */
1646 		rxq->stats.ibytes += PKT_LEN(pkt);
1647 #endif
1648 		/* Return packet. */
1649 		*(pkts++) = pkt;
1650 		pkt = NULL;
1651 		--pkts_n;
1652 		++i;
1653 skip:
1654 		/* Align consumer index to the next stride. */
1655 		rq_ci >>= sges_n;
1656 		++rq_ci;
1657 		rq_ci <<= sges_n;
1658 	}
1659 	if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci)))
1660 		return 0;
1661 	/* Update the consumer index. */
1662 	rxq->rq_ci = rq_ci >> sges_n;
1663 	rte_wmb();
1664 	*rxq->cq_db = htonl(rxq->cq_ci);
1665 	rte_wmb();
1666 	*rxq->rq_db = htonl(rxq->rq_ci);
1667 #ifdef MLX5_PMD_SOFT_COUNTERS
1668 	/* Increment packets counter. */
1669 	rxq->stats.ipackets += i;
1670 #endif
1671 	return i;
1672 }
1673 
1674 /**
1675  * Dummy DPDK callback for TX.
1676  *
1677  * This function is used to temporarily replace the real callback during
1678  * unsafe control operations on the queue, or in case of error.
1679  *
1680  * @param dpdk_txq
1681  *   Generic pointer to TX queue structure.
1682  * @param[in] pkts
1683  *   Packets to transmit.
1684  * @param pkts_n
1685  *   Number of packets in array.
1686  *
1687  * @return
1688  *   Number of packets successfully transmitted (<= pkts_n).
1689  */
1690 uint16_t
1691 removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
1692 {
1693 	(void)dpdk_txq;
1694 	(void)pkts;
1695 	(void)pkts_n;
1696 	return 0;
1697 }
1698 
1699 /**
1700  * Dummy DPDK callback for RX.
1701  *
1702  * This function is used to temporarily replace the real callback during
1703  * unsafe control operations on the queue, or in case of error.
1704  *
1705  * @param dpdk_rxq
1706  *   Generic pointer to RX queue structure.
1707  * @param[out] pkts
1708  *   Array to store received packets.
1709  * @param pkts_n
1710  *   Maximum number of packets in array.
1711  *
1712  * @return
1713  *   Number of packets successfully received (<= pkts_n).
1714  */
1715 uint16_t
1716 removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
1717 {
1718 	(void)dpdk_rxq;
1719 	(void)pkts;
1720 	(void)pkts_n;
1721 	return 0;
1722 }
1723