xref: /dpdk/drivers/net/thunderx/nicvf_rxtx.c (revision e12a0166c80f65e35408f4715b2f3a60763c3741)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016 Cavium, Inc
3  */
4 
5 #include <unistd.h>
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 
10 #include <rte_atomic.h>
11 #include <rte_branch_prediction.h>
12 #include <rte_byteorder.h>
13 #include <rte_common.h>
14 #include <rte_cycles.h>
15 #include <rte_errno.h>
16 #include <ethdev_driver.h>
17 #include <rte_ether.h>
18 #include <rte_log.h>
19 #include <rte_mbuf.h>
20 #include <rte_prefetch.h>
21 
22 #include "base/nicvf_plat.h"
23 
24 #include "nicvf_ethdev.h"
25 #include "nicvf_rxtx.h"
26 #include "nicvf_logs.h"
27 
28 static inline void __rte_hot
fill_sq_desc_header(union sq_entry_t * entry,struct rte_mbuf * pkt)29 fill_sq_desc_header(union sq_entry_t *entry, struct rte_mbuf *pkt)
30 {
31 	/* Local variable sqe to avoid read from sq desc memory*/
32 	union sq_entry_t sqe;
33 	uint64_t ol_flags;
34 
35 	/* Fill SQ header descriptor */
36 	sqe.buff[0] = 0;
37 	sqe.hdr.subdesc_type = SQ_DESC_TYPE_HEADER;
38 	/* Number of sub-descriptors following this one */
39 	sqe.hdr.subdesc_cnt = pkt->nb_segs;
40 	sqe.hdr.tot_len = pkt->pkt_len;
41 
42 	ol_flags = pkt->ol_flags & NICVF_TX_OFFLOAD_MASK;
43 	if (unlikely(ol_flags)) {
44 		/* L4 cksum */
45 		uint64_t l4_flags = ol_flags & RTE_MBUF_F_TX_L4_MASK;
46 		if (l4_flags == RTE_MBUF_F_TX_TCP_CKSUM)
47 			sqe.hdr.csum_l4 = SEND_L4_CSUM_TCP;
48 		else if (l4_flags == RTE_MBUF_F_TX_UDP_CKSUM)
49 			sqe.hdr.csum_l4 = SEND_L4_CSUM_UDP;
50 		else
51 			sqe.hdr.csum_l4 = SEND_L4_CSUM_DISABLE;
52 
53 		sqe.hdr.l3_offset = pkt->l2_len;
54 		sqe.hdr.l4_offset = pkt->l3_len + pkt->l2_len;
55 
56 		/* L3 cksum */
57 		if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
58 			sqe.hdr.csum_l3 = 1;
59 	}
60 
61 	entry->buff[0] = sqe.buff[0];
62 }
63 
64 static inline void __rte_hot
fill_sq_desc_header_zero_w1(union sq_entry_t * entry,struct rte_mbuf * pkt)65 fill_sq_desc_header_zero_w1(union sq_entry_t *entry,
66 				struct rte_mbuf *pkt)
67 {
68 	fill_sq_desc_header(entry, pkt);
69 	entry->buff[1] = 0ULL;
70 }
71 
72 void __rte_hot
nicvf_single_pool_free_xmited_buffers(struct nicvf_txq * sq)73 nicvf_single_pool_free_xmited_buffers(struct nicvf_txq *sq)
74 {
75 	int j = 0;
76 	uint32_t curr_head;
77 	uint32_t head = sq->head;
78 	struct rte_mbuf **txbuffs = sq->txbuffs;
79 	alignas(RTE_CACHE_LINE_SIZE) void *obj_p[NICVF_MAX_TX_FREE_THRESH];
80 
81 	curr_head = nicvf_addr_read(sq->sq_head) >> 4;
82 	while (head != curr_head) {
83 		if (txbuffs[head])
84 			obj_p[j++] = txbuffs[head];
85 
86 		head = (head + 1) & sq->qlen_mask;
87 	}
88 
89 	rte_mempool_put_bulk(sq->pool, obj_p, j);
90 	sq->head = curr_head;
91 	sq->xmit_bufs -= j;
92 	NICVF_TX_ASSERT(sq->xmit_bufs >= 0);
93 }
94 
95 void __rte_hot
nicvf_multi_pool_free_xmited_buffers(struct nicvf_txq * sq)96 nicvf_multi_pool_free_xmited_buffers(struct nicvf_txq *sq)
97 {
98 	uint32_t n = 0;
99 	uint32_t curr_head;
100 	uint32_t head = sq->head;
101 	struct rte_mbuf **txbuffs = sq->txbuffs;
102 
103 	curr_head = nicvf_addr_read(sq->sq_head) >> 4;
104 	while (head != curr_head) {
105 		if (txbuffs[head]) {
106 			rte_pktmbuf_free_seg(txbuffs[head]);
107 			n++;
108 		}
109 
110 		head = (head + 1) & sq->qlen_mask;
111 	}
112 
113 	sq->head = curr_head;
114 	sq->xmit_bufs -= n;
115 	NICVF_TX_ASSERT(sq->xmit_bufs >= 0);
116 }
117 
118 static inline uint32_t __rte_hot
nicvf_free_tx_desc(struct nicvf_txq * sq)119 nicvf_free_tx_desc(struct nicvf_txq *sq)
120 {
121 	return ((sq->head - sq->tail - 1) & sq->qlen_mask);
122 }
123 
124 /* Send Header + Packet */
125 #define TX_DESC_PER_PKT 2
126 
127 static inline uint32_t __rte_hot
nicvf_free_xmitted_buffers(struct nicvf_txq * sq,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)128 nicvf_free_xmitted_buffers(struct nicvf_txq *sq, struct rte_mbuf **tx_pkts,
129 			    uint16_t nb_pkts)
130 {
131 	uint32_t free_desc = nicvf_free_tx_desc(sq);
132 
133 	if (free_desc < nb_pkts * TX_DESC_PER_PKT ||
134 			sq->xmit_bufs > sq->tx_free_thresh) {
135 		if (unlikely(sq->pool == NULL))
136 			sq->pool = tx_pkts[0]->pool;
137 
138 		sq->pool_free(sq);
139 		/* Freed now, let see the number of free descs again */
140 		free_desc = nicvf_free_tx_desc(sq);
141 	}
142 	return free_desc;
143 }
144 
145 uint16_t __rte_hot
nicvf_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)146 nicvf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
147 {
148 	int i;
149 	uint32_t free_desc;
150 	uint32_t tail;
151 	struct nicvf_txq *sq = tx_queue;
152 	union sq_entry_t *desc_ptr = sq->desc;
153 	struct rte_mbuf **txbuffs = sq->txbuffs;
154 	struct rte_mbuf *pkt;
155 	uint32_t qlen_mask = sq->qlen_mask;
156 
157 	tail = sq->tail;
158 	free_desc = nicvf_free_xmitted_buffers(sq, tx_pkts, nb_pkts);
159 
160 	for (i = 0; i < nb_pkts && (int)free_desc >= TX_DESC_PER_PKT; i++) {
161 		pkt = tx_pkts[i];
162 
163 		txbuffs[tail] = NULL;
164 		fill_sq_desc_header(desc_ptr + tail, pkt);
165 		tail = (tail + 1) & qlen_mask;
166 
167 		txbuffs[tail] = pkt;
168 		fill_sq_desc_gather(desc_ptr + tail, pkt);
169 		tail = (tail + 1) & qlen_mask;
170 		free_desc -= TX_DESC_PER_PKT;
171 	}
172 
173 	if (likely(i)) {
174 		sq->tail = tail;
175 		sq->xmit_bufs += i;
176 		rte_wmb();
177 
178 		/* Inform HW to xmit the packets */
179 		nicvf_addr_write(sq->sq_door, i * TX_DESC_PER_PKT);
180 	}
181 	return i;
182 }
183 
184 uint16_t __rte_hot
nicvf_xmit_pkts_multiseg(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)185 nicvf_xmit_pkts_multiseg(void *tx_queue, struct rte_mbuf **tx_pkts,
186 			 uint16_t nb_pkts)
187 {
188 	int i, k;
189 	uint32_t used_desc, next_used_desc, used_bufs, free_desc, tail;
190 	struct nicvf_txq *sq = tx_queue;
191 	union sq_entry_t *desc_ptr = sq->desc;
192 	struct rte_mbuf **txbuffs = sq->txbuffs;
193 	struct rte_mbuf *pkt, *seg;
194 	uint32_t qlen_mask = sq->qlen_mask;
195 	uint16_t nb_segs;
196 
197 	tail = sq->tail;
198 	used_desc = 0;
199 	used_bufs = 0;
200 
201 	free_desc = nicvf_free_xmitted_buffers(sq, tx_pkts, nb_pkts);
202 
203 	for (i = 0; i < nb_pkts; i++) {
204 		pkt = tx_pkts[i];
205 
206 		nb_segs = pkt->nb_segs;
207 
208 		next_used_desc = used_desc + nb_segs + 1;
209 		if (next_used_desc > free_desc)
210 			break;
211 		used_desc = next_used_desc;
212 		used_bufs += nb_segs;
213 
214 		txbuffs[tail] = NULL;
215 		fill_sq_desc_header_zero_w1(desc_ptr + tail, pkt);
216 		tail = (tail + 1) & qlen_mask;
217 
218 		txbuffs[tail] = pkt;
219 		fill_sq_desc_gather(desc_ptr + tail, pkt);
220 		tail = (tail + 1) & qlen_mask;
221 
222 		seg = pkt->next;
223 		for (k = 1; k < nb_segs; k++) {
224 			txbuffs[tail] = seg;
225 			fill_sq_desc_gather(desc_ptr + tail, seg);
226 			tail = (tail + 1) & qlen_mask;
227 			seg = seg->next;
228 		}
229 	}
230 
231 	if (likely(used_desc)) {
232 		sq->tail = tail;
233 		sq->xmit_bufs += used_bufs;
234 		rte_wmb();
235 
236 		/* Inform HW to xmit the packets */
237 		nicvf_addr_write(sq->sq_door, used_desc);
238 	}
239 	return i;
240 }
241 
242 static const alignas(RTE_CACHE_LINE_SIZE) uint32_t ptype_table[16][16] = {
243 	[L3_NONE][L4_NONE] = RTE_PTYPE_UNKNOWN,
244 	[L3_NONE][L4_IPSEC_ESP] = RTE_PTYPE_UNKNOWN,
245 	[L3_NONE][L4_IPFRAG] = RTE_PTYPE_L4_FRAG,
246 	[L3_NONE][L4_IPCOMP] = RTE_PTYPE_UNKNOWN,
247 	[L3_NONE][L4_TCP] = RTE_PTYPE_L4_TCP,
248 	[L3_NONE][L4_UDP_PASS1] = RTE_PTYPE_L4_UDP,
249 	[L3_NONE][L4_GRE] = RTE_PTYPE_TUNNEL_GRE,
250 	[L3_NONE][L4_UDP_PASS2] = RTE_PTYPE_L4_UDP,
251 	[L3_NONE][L4_UDP_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
252 	[L3_NONE][L4_UDP_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
253 	[L3_NONE][L4_NVGRE] = RTE_PTYPE_TUNNEL_NVGRE,
254 
255 	[L3_IPV4][L4_NONE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_UNKNOWN,
256 	[L3_IPV4][L4_IPSEC_ESP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV4,
257 	[L3_IPV4][L4_IPFRAG] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_FRAG,
258 	[L3_IPV4][L4_IPCOMP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_UNKNOWN,
259 	[L3_IPV4][L4_TCP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
260 	[L3_IPV4][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
261 	[L3_IPV4][L4_GRE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_GRE,
262 	[L3_IPV4][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
263 	[L3_IPV4][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_GENEVE,
264 	[L3_IPV4][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_VXLAN,
265 	[L3_IPV4][L4_NVGRE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_NVGRE,
266 
267 	[L3_IPV4_OPT][L4_NONE] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_UNKNOWN,
268 	[L3_IPV4_OPT][L4_IPSEC_ESP] =  RTE_PTYPE_L3_IPV4_EXT |
269 				RTE_PTYPE_L3_IPV4,
270 	[L3_IPV4_OPT][L4_IPFRAG] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_FRAG,
271 	[L3_IPV4_OPT][L4_IPCOMP] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_UNKNOWN,
272 	[L3_IPV4_OPT][L4_TCP] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
273 	[L3_IPV4_OPT][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
274 	[L3_IPV4_OPT][L4_GRE] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_GRE,
275 	[L3_IPV4_OPT][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
276 	[L3_IPV4_OPT][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV4_EXT |
277 				RTE_PTYPE_TUNNEL_GENEVE,
278 	[L3_IPV4_OPT][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV4_EXT |
279 				RTE_PTYPE_TUNNEL_VXLAN,
280 	[L3_IPV4_OPT][L4_NVGRE] = RTE_PTYPE_L3_IPV4_EXT |
281 				RTE_PTYPE_TUNNEL_NVGRE,
282 
283 	[L3_IPV6][L4_NONE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_UNKNOWN,
284 	[L3_IPV6][L4_IPSEC_ESP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L3_IPV4,
285 	[L3_IPV6][L4_IPFRAG] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_FRAG,
286 	[L3_IPV6][L4_IPCOMP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_UNKNOWN,
287 	[L3_IPV6][L4_TCP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
288 	[L3_IPV6][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
289 	[L3_IPV6][L4_GRE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_GRE,
290 	[L3_IPV6][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
291 	[L3_IPV6][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_GENEVE,
292 	[L3_IPV6][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_VXLAN,
293 	[L3_IPV6][L4_NVGRE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_NVGRE,
294 
295 	[L3_IPV6_OPT][L4_NONE] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_UNKNOWN,
296 	[L3_IPV6_OPT][L4_IPSEC_ESP] =  RTE_PTYPE_L3_IPV6_EXT |
297 					RTE_PTYPE_L3_IPV4,
298 	[L3_IPV6_OPT][L4_IPFRAG] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_FRAG,
299 	[L3_IPV6_OPT][L4_IPCOMP] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_UNKNOWN,
300 	[L3_IPV6_OPT][L4_TCP] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
301 	[L3_IPV6_OPT][L4_UDP_PASS1] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
302 	[L3_IPV6_OPT][L4_GRE] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_TUNNEL_GRE,
303 	[L3_IPV6_OPT][L4_UDP_PASS2] = RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
304 	[L3_IPV6_OPT][L4_UDP_GENEVE] = RTE_PTYPE_L3_IPV6_EXT |
305 					RTE_PTYPE_TUNNEL_GENEVE,
306 	[L3_IPV6_OPT][L4_UDP_VXLAN] = RTE_PTYPE_L3_IPV6_EXT |
307 					RTE_PTYPE_TUNNEL_VXLAN,
308 	[L3_IPV6_OPT][L4_NVGRE] = RTE_PTYPE_L3_IPV6_EXT |
309 					RTE_PTYPE_TUNNEL_NVGRE,
310 
311 	[L3_ET_STOP][L4_NONE] = RTE_PTYPE_UNKNOWN,
312 	[L3_ET_STOP][L4_IPSEC_ESP] = RTE_PTYPE_UNKNOWN,
313 	[L3_ET_STOP][L4_IPFRAG] = RTE_PTYPE_L4_FRAG,
314 	[L3_ET_STOP][L4_IPCOMP] = RTE_PTYPE_UNKNOWN,
315 	[L3_ET_STOP][L4_TCP] = RTE_PTYPE_L4_TCP,
316 	[L3_ET_STOP][L4_UDP_PASS1] = RTE_PTYPE_L4_UDP,
317 	[L3_ET_STOP][L4_GRE] = RTE_PTYPE_TUNNEL_GRE,
318 	[L3_ET_STOP][L4_UDP_PASS2] = RTE_PTYPE_L4_UDP,
319 	[L3_ET_STOP][L4_UDP_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
320 	[L3_ET_STOP][L4_UDP_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
321 	[L3_ET_STOP][L4_NVGRE] = RTE_PTYPE_TUNNEL_NVGRE,
322 
323 	[L3_OTHER][L4_NONE] = RTE_PTYPE_UNKNOWN,
324 	[L3_OTHER][L4_IPSEC_ESP] = RTE_PTYPE_UNKNOWN,
325 	[L3_OTHER][L4_IPFRAG] = RTE_PTYPE_L4_FRAG,
326 	[L3_OTHER][L4_IPCOMP] = RTE_PTYPE_UNKNOWN,
327 	[L3_OTHER][L4_TCP] = RTE_PTYPE_L4_TCP,
328 	[L3_OTHER][L4_UDP_PASS1] = RTE_PTYPE_L4_UDP,
329 	[L3_OTHER][L4_GRE] = RTE_PTYPE_TUNNEL_GRE,
330 	[L3_OTHER][L4_UDP_PASS2] = RTE_PTYPE_L4_UDP,
331 	[L3_OTHER][L4_UDP_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
332 	[L3_OTHER][L4_UDP_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
333 	[L3_OTHER][L4_NVGRE] = RTE_PTYPE_TUNNEL_NVGRE,
334 };
335 
336 static inline uint32_t __rte_hot
nicvf_rx_classify_pkt(cqe_rx_word0_t cqe_rx_w0)337 nicvf_rx_classify_pkt(cqe_rx_word0_t cqe_rx_w0)
338 {
339 	return ptype_table[cqe_rx_w0.l3_type][cqe_rx_w0.l4_type];
340 }
341 
342 static inline uint64_t __rte_hot
nicvf_set_olflags(const cqe_rx_word0_t cqe_rx_w0)343 nicvf_set_olflags(const cqe_rx_word0_t cqe_rx_w0)
344 {
345 	static const alignas(RTE_CACHE_LINE_SIZE) uint64_t flag_table[3] = {
346 		RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
347 		RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN,
348 		RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
349 	};
350 
351 	const uint8_t idx = (cqe_rx_w0.err_opcode == CQE_RX_ERR_L4_CHK) << 1 |
352 		(cqe_rx_w0.err_opcode == CQE_RX_ERR_IP_CHK);
353 	return flag_table[idx];
354 }
355 
356 static inline int __rte_hot
nicvf_fill_rbdr(struct nicvf_rxq * rxq,int to_fill)357 nicvf_fill_rbdr(struct nicvf_rxq *rxq, int to_fill)
358 {
359 	int i;
360 	uint32_t ltail, next_tail;
361 	struct nicvf_rbdr *rbdr = rxq->shared_rbdr;
362 	uint64_t mbuf_phys_off = rxq->mbuf_phys_off;
363 	struct rbdr_entry_t *desc = rbdr->desc;
364 	uint32_t qlen_mask = rbdr->qlen_mask;
365 	uintptr_t door = rbdr->rbdr_door;
366 	alignas(RTE_CACHE_LINE_SIZE) void *obj_p[NICVF_MAX_RX_FREE_THRESH];
367 
368 	if (unlikely(rte_mempool_get_bulk(rxq->pool, obj_p, to_fill) < 0)) {
369 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
370 			to_fill;
371 		return 0;
372 	}
373 
374 	NICVF_RX_ASSERT((unsigned int)to_fill <= (qlen_mask -
375 		(nicvf_addr_read(rbdr->rbdr_status) & NICVF_RBDR_COUNT_MASK)));
376 
377 	next_tail = rte_atomic_fetch_add_explicit(&rbdr->next_tail, to_fill,
378 					rte_memory_order_acquire);
379 	ltail = next_tail;
380 	for (i = 0; i < to_fill; i++) {
381 		struct rbdr_entry_t *entry = desc + (ltail & qlen_mask);
382 
383 		entry->full_addr = nicvf_mbuff_virt2phy((uintptr_t)obj_p[i],
384 							mbuf_phys_off);
385 		ltail++;
386 	}
387 
388 	rte_wait_until_equal_32((uint32_t *)(uintptr_t)&rbdr->tail, next_tail,
389 			rte_memory_order_relaxed);
390 
391 	rte_atomic_store_explicit(&rbdr->tail, ltail, rte_memory_order_release);
392 	nicvf_addr_write(door, to_fill);
393 	return to_fill;
394 }
395 
396 static inline int32_t __rte_hot
nicvf_rx_pkts_to_process(struct nicvf_rxq * rxq,uint16_t nb_pkts,int32_t available_space)397 nicvf_rx_pkts_to_process(struct nicvf_rxq *rxq, uint16_t nb_pkts,
398 			 int32_t available_space)
399 {
400 	if (unlikely(available_space < nb_pkts))
401 		rxq->available_space = nicvf_addr_read(rxq->cq_status)
402 						& NICVF_CQ_CQE_COUNT_MASK;
403 
404 	return RTE_MIN(nb_pkts, available_space);
405 }
406 
407 static inline void __rte_hot
nicvf_rx_offload(cqe_rx_word0_t cqe_rx_w0,cqe_rx_word2_t cqe_rx_w2,struct rte_mbuf * pkt)408 nicvf_rx_offload(cqe_rx_word0_t cqe_rx_w0, cqe_rx_word2_t cqe_rx_w2,
409 		 struct rte_mbuf *pkt)
410 {
411 	if (likely(cqe_rx_w0.rss_alg)) {
412 		pkt->hash.rss = cqe_rx_w2.rss_tag;
413 		pkt->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
414 
415 	}
416 }
417 
418 static __rte_always_inline uint16_t
nicvf_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts,const uint32_t flag)419 nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
420 		const uint32_t flag)
421 {
422 	uint32_t i, to_process;
423 	struct cqe_rx_t *cqe_rx;
424 	struct rte_mbuf *pkt;
425 	cqe_rx_word0_t cqe_rx_w0;
426 	cqe_rx_word1_t cqe_rx_w1;
427 	cqe_rx_word2_t cqe_rx_w2;
428 	cqe_rx_word3_t cqe_rx_w3;
429 	struct nicvf_rxq *rxq = rx_queue;
430 	union cq_entry_t *desc = rxq->desc;
431 	const uint64_t cqe_mask = rxq->qlen_mask;
432 	uint64_t rb0_ptr, mbuf_phys_off = rxq->mbuf_phys_off;
433 	const uint64_t mbuf_init = rxq->mbuf_initializer.value;
434 	uint32_t cqe_head = rxq->head & cqe_mask;
435 	int32_t available_space = rxq->available_space;
436 	const uint8_t rbptr_offset = rxq->rbptr_offset;
437 
438 	to_process = nicvf_rx_pkts_to_process(rxq, nb_pkts, available_space);
439 
440 	for (i = 0; i < to_process; i++) {
441 		rte_prefetch_non_temporal(&desc[cqe_head + 2]);
442 		cqe_rx = (struct cqe_rx_t *)&desc[cqe_head];
443 		NICVF_RX_ASSERT(((struct cq_entry_type_t *)cqe_rx)->cqe_type
444 						 == CQE_TYPE_RX);
445 
446 		NICVF_LOAD_PAIR(cqe_rx_w0.u64, cqe_rx_w1.u64, cqe_rx);
447 		NICVF_LOAD_PAIR(cqe_rx_w2.u64, cqe_rx_w3.u64, &cqe_rx->word2);
448 		rb0_ptr = *((uint64_t *)cqe_rx + rbptr_offset);
449 		pkt = (struct rte_mbuf *)nicvf_mbuff_phy2virt
450 				(rb0_ptr - cqe_rx_w1.align_pad, mbuf_phys_off);
451 
452 		if (flag & NICVF_RX_OFFLOAD_NONE)
453 			pkt->ol_flags = 0;
454 		if (flag & NICVF_RX_OFFLOAD_CKSUM)
455 			pkt->ol_flags = nicvf_set_olflags(cqe_rx_w0);
456 		if (flag & NICVF_RX_OFFLOAD_VLAN_STRIP) {
457 			if (unlikely(cqe_rx_w0.vlan_stripped)) {
458 				pkt->ol_flags |= RTE_MBUF_F_RX_VLAN
459 							| RTE_MBUF_F_RX_VLAN_STRIPPED;
460 				pkt->vlan_tci =
461 					rte_cpu_to_be_16(cqe_rx_w2.vlan_tci);
462 			}
463 		}
464 		pkt->data_len = cqe_rx_w3.rb0_sz;
465 		pkt->pkt_len = cqe_rx_w3.rb0_sz;
466 		pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0);
467 		nicvf_mbuff_init_update(pkt, mbuf_init, cqe_rx_w1.align_pad);
468 		nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt);
469 		rx_pkts[i] = pkt;
470 		cqe_head = (cqe_head + 1) & cqe_mask;
471 		nicvf_prefetch_store_keep(pkt);
472 	}
473 
474 	if (likely(to_process)) {
475 		rxq->available_space -= to_process;
476 		rxq->head = cqe_head;
477 		nicvf_addr_write(rxq->cq_door, to_process);
478 		rxq->recv_buffers += to_process;
479 	}
480 	if (rxq->recv_buffers > rxq->rx_free_thresh) {
481 		rxq->recv_buffers -= nicvf_fill_rbdr(rxq, rxq->rx_free_thresh);
482 		NICVF_RX_ASSERT(rxq->recv_buffers >= 0);
483 	}
484 
485 	return to_process;
486 }
487 
488 uint16_t __rte_hot
nicvf_recv_pkts_no_offload(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)489 nicvf_recv_pkts_no_offload(void *rx_queue, struct rte_mbuf **rx_pkts,
490 		uint16_t nb_pkts)
491 {
492 	return nicvf_recv_pkts(rx_queue, rx_pkts, nb_pkts,
493 			NICVF_RX_OFFLOAD_NONE);
494 }
495 
496 uint16_t __rte_hot
nicvf_recv_pkts_cksum(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)497 nicvf_recv_pkts_cksum(void *rx_queue, struct rte_mbuf **rx_pkts,
498 		uint16_t nb_pkts)
499 {
500 	return nicvf_recv_pkts(rx_queue, rx_pkts, nb_pkts,
501 			NICVF_RX_OFFLOAD_CKSUM);
502 }
503 
504 uint16_t __rte_hot
nicvf_recv_pkts_vlan_strip(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)505 nicvf_recv_pkts_vlan_strip(void *rx_queue, struct rte_mbuf **rx_pkts,
506 		uint16_t nb_pkts)
507 {
508 	return nicvf_recv_pkts(rx_queue, rx_pkts, nb_pkts,
509 			NICVF_RX_OFFLOAD_NONE | NICVF_RX_OFFLOAD_VLAN_STRIP);
510 }
511 
512 uint16_t __rte_hot
nicvf_recv_pkts_cksum_vlan_strip(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)513 nicvf_recv_pkts_cksum_vlan_strip(void *rx_queue, struct rte_mbuf **rx_pkts,
514 		uint16_t nb_pkts)
515 {
516 	return nicvf_recv_pkts(rx_queue, rx_pkts, nb_pkts,
517 			NICVF_RX_OFFLOAD_CKSUM | NICVF_RX_OFFLOAD_VLAN_STRIP);
518 }
519 
520 static __rte_always_inline uint16_t __rte_hot
nicvf_process_cq_mseg_entry(struct cqe_rx_t * cqe_rx,uint64_t mbuf_phys_off,struct rte_mbuf ** rx_pkt,uint8_t rbptr_offset,uint64_t mbuf_init,const uint32_t flag)521 nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx,
522 			uint64_t mbuf_phys_off,
523 			struct rte_mbuf **rx_pkt, uint8_t rbptr_offset,
524 			uint64_t mbuf_init, const uint32_t flag)
525 {
526 	struct rte_mbuf *pkt, *seg, *prev;
527 	cqe_rx_word0_t cqe_rx_w0;
528 	cqe_rx_word1_t cqe_rx_w1;
529 	cqe_rx_word2_t cqe_rx_w2;
530 	uint16_t *rb_sz, nb_segs, seg_idx;
531 	uint64_t *rb_ptr;
532 
533 	NICVF_LOAD_PAIR(cqe_rx_w0.u64, cqe_rx_w1.u64, cqe_rx);
534 	NICVF_RX_ASSERT(cqe_rx_w0.cqe_type == CQE_TYPE_RX);
535 	cqe_rx_w2 = cqe_rx->word2;
536 	rb_sz = &cqe_rx->word3.rb0_sz;
537 	rb_ptr = (uint64_t *)cqe_rx + rbptr_offset;
538 	nb_segs = cqe_rx_w0.rb_cnt;
539 	pkt = (struct rte_mbuf *)nicvf_mbuff_phy2virt
540 			(rb_ptr[0] - cqe_rx_w1.align_pad, mbuf_phys_off);
541 
542 	pkt->pkt_len = cqe_rx_w1.pkt_len;
543 	pkt->data_len = rb_sz[nicvf_frag_num(0)];
544 	nicvf_mbuff_init_mseg_update(
545 				pkt, mbuf_init, cqe_rx_w1.align_pad, nb_segs);
546 	pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0);
547 	if (flag & NICVF_RX_OFFLOAD_NONE)
548 		pkt->ol_flags = 0;
549 	if (flag & NICVF_RX_OFFLOAD_CKSUM)
550 		pkt->ol_flags = nicvf_set_olflags(cqe_rx_w0);
551 	if (flag & NICVF_RX_OFFLOAD_VLAN_STRIP) {
552 		if (unlikely(cqe_rx_w0.vlan_stripped)) {
553 			pkt->ol_flags |= RTE_MBUF_F_RX_VLAN
554 				| RTE_MBUF_F_RX_VLAN_STRIPPED;
555 			pkt->vlan_tci = rte_cpu_to_be_16(cqe_rx_w2.vlan_tci);
556 		}
557 	}
558 	nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt);
559 
560 	*rx_pkt = pkt;
561 	prev = pkt;
562 	for (seg_idx = 1; seg_idx < nb_segs; seg_idx++) {
563 		seg = (struct rte_mbuf *)nicvf_mbuff_phy2virt
564 			(rb_ptr[seg_idx], mbuf_phys_off);
565 
566 		prev->next = seg;
567 		seg->data_len = rb_sz[nicvf_frag_num(seg_idx)];
568 		nicvf_mbuff_init_update(seg, mbuf_init, 0);
569 
570 		prev = seg;
571 	}
572 	prev->next = NULL;
573 	return nb_segs;
574 }
575 
576 static __rte_always_inline uint16_t __rte_hot
nicvf_recv_pkts_multiseg(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts,const uint32_t flag)577 nicvf_recv_pkts_multiseg(void *rx_queue, struct rte_mbuf **rx_pkts,
578 			 uint16_t nb_pkts, const uint32_t flag)
579 {
580 	union cq_entry_t *cq_entry;
581 	struct cqe_rx_t *cqe_rx;
582 	struct nicvf_rxq *rxq = rx_queue;
583 	union cq_entry_t *desc = rxq->desc;
584 	const uint64_t cqe_mask = rxq->qlen_mask;
585 	uint64_t mbuf_phys_off = rxq->mbuf_phys_off;
586 	uint32_t i, to_process, cqe_head, buffers_consumed = 0;
587 	int32_t available_space = rxq->available_space;
588 	uint16_t nb_segs;
589 	const uint64_t mbuf_init = rxq->mbuf_initializer.value;
590 	const uint8_t rbptr_offset = rxq->rbptr_offset;
591 
592 	cqe_head = rxq->head & cqe_mask;
593 	to_process = nicvf_rx_pkts_to_process(rxq, nb_pkts, available_space);
594 
595 	for (i = 0; i < to_process; i++) {
596 		rte_prefetch_non_temporal(&desc[cqe_head + 2]);
597 		cq_entry = &desc[cqe_head];
598 		cqe_rx = (struct cqe_rx_t *)cq_entry;
599 		nb_segs = nicvf_process_cq_mseg_entry(cqe_rx, mbuf_phys_off,
600 			rx_pkts + i, rbptr_offset, mbuf_init, flag);
601 		buffers_consumed += nb_segs;
602 		cqe_head = (cqe_head + 1) & cqe_mask;
603 		nicvf_prefetch_store_keep(rx_pkts[i]);
604 	}
605 
606 	if (likely(to_process)) {
607 		rxq->available_space -= to_process;
608 		rxq->head = cqe_head;
609 		nicvf_addr_write(rxq->cq_door, to_process);
610 		rxq->recv_buffers += buffers_consumed;
611 	}
612 	if (rxq->recv_buffers > rxq->rx_free_thresh) {
613 		rxq->recv_buffers -= nicvf_fill_rbdr(rxq, rxq->rx_free_thresh);
614 		NICVF_RX_ASSERT(rxq->recv_buffers >= 0);
615 	}
616 
617 	return to_process;
618 }
619 
620 uint16_t __rte_hot
nicvf_recv_pkts_multiseg_no_offload(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)621 nicvf_recv_pkts_multiseg_no_offload(void *rx_queue, struct rte_mbuf **rx_pkts,
622 		uint16_t nb_pkts)
623 {
624 	return nicvf_recv_pkts_multiseg(rx_queue, rx_pkts, nb_pkts,
625 			NICVF_RX_OFFLOAD_NONE);
626 }
627 
628 uint16_t __rte_hot
nicvf_recv_pkts_multiseg_cksum(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)629 nicvf_recv_pkts_multiseg_cksum(void *rx_queue, struct rte_mbuf **rx_pkts,
630 		uint16_t nb_pkts)
631 {
632 	return nicvf_recv_pkts_multiseg(rx_queue, rx_pkts, nb_pkts,
633 			NICVF_RX_OFFLOAD_CKSUM);
634 }
635 
636 uint16_t __rte_hot
nicvf_recv_pkts_multiseg_vlan_strip(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)637 nicvf_recv_pkts_multiseg_vlan_strip(void *rx_queue, struct rte_mbuf **rx_pkts,
638 		uint16_t nb_pkts)
639 {
640 	return nicvf_recv_pkts_multiseg(rx_queue, rx_pkts, nb_pkts,
641 			NICVF_RX_OFFLOAD_NONE | NICVF_RX_OFFLOAD_VLAN_STRIP);
642 }
643 
644 uint16_t __rte_hot
nicvf_recv_pkts_multiseg_cksum_vlan_strip(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)645 nicvf_recv_pkts_multiseg_cksum_vlan_strip(void *rx_queue,
646 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
647 {
648 	return nicvf_recv_pkts_multiseg(rx_queue, rx_pkts, nb_pkts,
649 			NICVF_RX_OFFLOAD_CKSUM | NICVF_RX_OFFLOAD_VLAN_STRIP);
650 }
651 
652 uint32_t
nicvf_dev_rx_queue_count(void * rx_queue)653 nicvf_dev_rx_queue_count(void *rx_queue)
654 {
655 	struct nicvf_rxq *rxq;
656 
657 	rxq = rx_queue;
658 	return nicvf_addr_read(rxq->cq_status) & NICVF_CQ_CQE_COUNT_MASK;
659 }
660 
661 uint32_t
nicvf_dev_rbdr_refill(struct rte_eth_dev * dev,uint16_t queue_idx)662 nicvf_dev_rbdr_refill(struct rte_eth_dev *dev, uint16_t queue_idx)
663 {
664 	struct nicvf_rxq *rxq;
665 	uint32_t to_process;
666 	uint32_t rx_free;
667 
668 	rxq = dev->data->rx_queues[queue_idx];
669 	to_process = rxq->recv_buffers;
670 	while (rxq->recv_buffers > 0) {
671 		rx_free = RTE_MIN(rxq->recv_buffers, NICVF_MAX_RX_FREE_THRESH);
672 		rxq->recv_buffers -= nicvf_fill_rbdr(rxq, rx_free);
673 	}
674 
675 	assert(rxq->recv_buffers == 0);
676 	return to_process;
677 }
678