xref: /dpdk/drivers/net/hns3/hns3_rxtx_vec.c (revision 02d36ef6a9528e0f4a3403956e66bcea5fadbf8c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020-2021 HiSilicon Limited.
3  */
4 
5 #include <rte_io.h>
6 #include <ethdev_driver.h>
7 
8 #include "hns3_ethdev.h"
9 #include "hns3_rxtx.h"
10 #include "hns3_rxtx_vec.h"
11 
12 #if defined RTE_ARCH_ARM64
13 #include "hns3_rxtx_vec_neon.h"
14 #endif
15 
16 int
17 hns3_tx_check_vec_support(struct rte_eth_dev *dev)
18 {
19 	struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
20 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
21 
22 	/* Only support RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
23 	if (txmode->offloads != RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
24 		return -ENOTSUP;
25 
26 	/*
27 	 * PTP function requires the cooperation of Rx and Tx.
28 	 * Tx vector isn't supported if RTE_ETH_RX_OFFLOAD_TIMESTAMP is set
29 	 * in Rx offloads.
30 	 */
31 	if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
32 		return -ENOTSUP;
33 
34 	return 0;
35 }
36 
37 uint16_t
38 hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
39 {
40 	struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue;
41 	uint16_t nb_tx = 0;
42 
43 	while (nb_pkts) {
44 		uint16_t ret, new_burst;
45 
46 		new_burst = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
47 		ret = hns3_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
48 						new_burst);
49 		nb_tx += ret;
50 		nb_pkts -= ret;
51 		if (ret < new_burst)
52 			break;
53 	}
54 
55 	return nb_tx;
56 }
57 
58 static inline void
59 hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq)
60 {
61 #define REARM_LOOP_STEP_NUM	4
62 	struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start];
63 	struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start;
64 	uint64_t dma_addr;
65 	int i;
66 
67 	if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
68 					  HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) {
69 		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
70 		return;
71 	}
72 
73 	for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM,
74 		rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) {
75 		if (likely(i <
76 			HNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) {
77 			rte_prefetch_non_temporal(rxep[4].mbuf);
78 			rte_prefetch_non_temporal(rxep[5].mbuf);
79 			rte_prefetch_non_temporal(rxep[6].mbuf);
80 			rte_prefetch_non_temporal(rxep[7].mbuf);
81 		}
82 
83 		dma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf);
84 		rxdp[0].addr = rte_cpu_to_le_64(dma_addr);
85 		rxdp[0].rx.bd_base_info = 0;
86 
87 		dma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf);
88 		rxdp[1].addr = rte_cpu_to_le_64(dma_addr);
89 		rxdp[1].rx.bd_base_info = 0;
90 
91 		dma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf);
92 		rxdp[2].addr = rte_cpu_to_le_64(dma_addr);
93 		rxdp[2].rx.bd_base_info = 0;
94 
95 		dma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf);
96 		rxdp[3].addr = rte_cpu_to_le_64(dma_addr);
97 		rxdp[3].rx.bd_base_info = 0;
98 	}
99 
100 	rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH;
101 	if (rxq->rx_rearm_start >= rxq->nb_rx_desc)
102 		rxq->rx_rearm_start = 0;
103 
104 	rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH;
105 
106 	hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH);
107 }
108 
109 uint16_t
110 hns3_recv_pkts_vec(void *__restrict rx_queue,
111 		   struct rte_mbuf **__restrict rx_pkts,
112 		   uint16_t nb_pkts)
113 {
114 	struct hns3_rx_queue *rxq = rx_queue;
115 	struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
116 	uint64_t pkt_err_mask;  /* bit mask indicate whick pkts is error */
117 	uint16_t nb_rx;
118 
119 	rte_prefetch_non_temporal(rxdp);
120 
121 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
122 
123 	if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
124 		hns3_rxq_rearm_mbuf(rxq);
125 
126 	if (unlikely(!(rxdp->rx.bd_base_info &
127 			rte_cpu_to_le_32(1u << HNS3_RXD_VLD_B))))
128 		return 0;
129 
130 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 0].mbuf);
131 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 1].mbuf);
132 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);
133 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);
134 
135 	if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
136 		pkt_err_mask = 0;
137 		nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts,
138 					    &pkt_err_mask);
139 		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, pkt_err_mask);
140 		return nb_rx;
141 	}
142 
143 	nb_rx = 0;
144 	while (nb_pkts > 0) {
145 		uint16_t ret, n;
146 
147 		n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
148 		pkt_err_mask = 0;
149 		ret = hns3_recv_burst_vec(rxq, &rx_pkts[nb_rx], n,
150 					  &pkt_err_mask);
151 		nb_pkts -= ret;
152 		nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
153 						 pkt_err_mask);
154 		if (ret < n)
155 			break;
156 
157 		if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
158 			hns3_rxq_rearm_mbuf(rxq);
159 	}
160 
161 	return nb_rx;
162 }
163 
164 static void
165 hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue *rxq)
166 {
167 	uintptr_t p;
168 	struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
169 
170 	mb_def.nb_segs = 1;
171 	mb_def.data_off = RTE_PKTMBUF_HEADROOM;
172 	mb_def.port = rxq->port_id;
173 	rte_mbuf_refcnt_set(&mb_def, 1);
174 
175 	/* compile-time verifies the rearm_data first 8bytes */
176 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) <
177 			 offsetof(struct rte_mbuf, rearm_data));
178 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) <
179 			 offsetof(struct rte_mbuf, rearm_data));
180 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) <
181 			 offsetof(struct rte_mbuf, rearm_data));
182 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) <
183 			 offsetof(struct rte_mbuf, rearm_data));
184 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) -
185 			 offsetof(struct rte_mbuf, rearm_data) > 6);
186 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) -
187 			 offsetof(struct rte_mbuf, rearm_data) > 6);
188 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) -
189 			 offsetof(struct rte_mbuf, rearm_data) > 6);
190 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) -
191 			 offsetof(struct rte_mbuf, rearm_data) > 6);
192 
193 	/* prevent compiler reordering: rearm_data covers previous fields */
194 	rte_compiler_barrier();
195 	p = (uintptr_t)&mb_def.rearm_data;
196 	rxq->mbuf_initializer = *(uint64_t *)p;
197 }
198 
199 void
200 hns3_rxq_vec_setup(struct hns3_rx_queue *rxq)
201 {
202 	struct hns3_entry *sw_ring = &rxq->sw_ring[rxq->nb_rx_desc];
203 	unsigned int i;
204 
205 	memset(&rxq->rx_ring[rxq->nb_rx_desc], 0,
206 		sizeof(struct hns3_desc) * HNS3_DEFAULT_RX_BURST);
207 
208 	memset(&rxq->fake_mbuf, 0, sizeof(rxq->fake_mbuf));
209 	for (i = 0; i < HNS3_DEFAULT_RX_BURST; i++)
210 		sw_ring[i].mbuf = &rxq->fake_mbuf;
211 
212 	hns3_rxq_vec_setup_rearm_data(rxq);
213 
214 	memset(rxq->offset_table, 0, sizeof(rxq->offset_table));
215 }
216 
217 static int
218 hns3_rxq_vec_check(struct hns3_rx_queue *rxq, void *arg)
219 {
220 	uint32_t min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH +
221 				HNS3_DEFAULT_RX_BURST;
222 
223 	if (rxq->nb_rx_desc < min_vec_bds)
224 		return -ENOTSUP;
225 
226 	if (rxq->nb_rx_desc % HNS3_DEFAULT_RXQ_REARM_THRESH)
227 		return -ENOTSUP;
228 
229 	RTE_SET_USED(arg);
230 	return 0;
231 }
232 
233 int
234 hns3_rx_check_vec_support(struct rte_eth_dev *dev)
235 {
236 	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
237 	uint64_t offloads_mask = RTE_ETH_RX_OFFLOAD_TCP_LRO |
238 				 RTE_ETH_RX_OFFLOAD_VLAN |
239 				 RTE_ETH_RX_OFFLOAD_TIMESTAMP;
240 
241 	if (dev->data->scattered_rx)
242 		return -ENOTSUP;
243 
244 	if (rxmode->offloads & offloads_mask)
245 		return -ENOTSUP;
246 
247 	if (hns3_rxq_iterate(dev, hns3_rxq_vec_check, NULL) != 0)
248 		return -ENOTSUP;
249 
250 	return 0;
251 }
252