1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020-2021 HiSilicon Limited.
3 */
4
5 #include <rte_io.h>
6 #include <ethdev_driver.h>
7
8 #include "hns3_ethdev.h"
9 #include "hns3_rxtx.h"
10 #include "hns3_rxtx_vec.h"
11
12 #if defined RTE_ARCH_ARM64
13 #include "hns3_rxtx_vec_neon.h"
14 #endif
15
16 int
hns3_tx_check_vec_support(struct rte_eth_dev * dev)17 hns3_tx_check_vec_support(struct rte_eth_dev *dev)
18 {
19 struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
20 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
21
22 /* Only support RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
23 if (txmode->offloads != RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
24 return -ENOTSUP;
25
26 /*
27 * PTP function requires the cooperation of Rx and Tx.
28 * Tx vector isn't supported if RTE_ETH_RX_OFFLOAD_TIMESTAMP is set
29 * in Rx offloads.
30 */
31 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
32 return -ENOTSUP;
33
34 return 0;
35 }
36
37 uint16_t
hns3_xmit_pkts_vec(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)38 hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
39 {
40 struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue;
41 uint16_t nb_tx = 0;
42
43 while (nb_pkts) {
44 uint16_t ret, new_burst;
45
46 new_burst = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
47 ret = hns3_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
48 new_burst);
49 nb_tx += ret;
50 nb_pkts -= ret;
51 if (ret < new_burst)
52 break;
53 }
54
55 return nb_tx;
56 }
57
58 uint16_t
hns3_recv_pkts_vec(void * __restrict rx_queue,struct rte_mbuf ** __restrict rx_pkts,uint16_t nb_pkts)59 hns3_recv_pkts_vec(void *__restrict rx_queue,
60 struct rte_mbuf **__restrict rx_pkts,
61 uint16_t nb_pkts)
62 {
63 struct hns3_rx_queue *rxq = rx_queue;
64 struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
65 uint64_t pkt_err_mask; /* bit mask indicate whick pkts is error */
66 uint16_t nb_rx;
67
68 rte_prefetch_non_temporal(rxdp);
69
70 nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
71
72 if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
73 hns3_rxq_rearm_mbuf(rxq);
74
75 if (unlikely(!(rxdp->rx.bd_base_info &
76 rte_cpu_to_le_32(1u << HNS3_RXD_VLD_B))))
77 return 0;
78
79 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 0].mbuf);
80 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 1].mbuf);
81 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);
82 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);
83
84 if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
85 pkt_err_mask = 0;
86 nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts,
87 &pkt_err_mask);
88 nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, pkt_err_mask);
89 return nb_rx;
90 }
91
92 nb_rx = 0;
93 while (nb_pkts > 0) {
94 uint16_t ret, n;
95
96 n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
97 pkt_err_mask = 0;
98 ret = hns3_recv_burst_vec(rxq, &rx_pkts[nb_rx], n,
99 &pkt_err_mask);
100 nb_pkts -= ret;
101 nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
102 pkt_err_mask);
103 if (ret < n)
104 break;
105
106 if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
107 hns3_rxq_rearm_mbuf(rxq);
108 }
109
110 return nb_rx;
111 }
112
113 static void
hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue * rxq)114 hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue *rxq)
115 {
116 uintptr_t p;
117 struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
118
119 mb_def.nb_segs = 1;
120 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
121 mb_def.port = rxq->port_id;
122 rte_mbuf_refcnt_set(&mb_def, 1);
123
124 /* compile-time verifies the rearm_data first 8bytes */
125 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) <
126 offsetof(struct rte_mbuf, rearm_data));
127 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) <
128 offsetof(struct rte_mbuf, rearm_data));
129 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) <
130 offsetof(struct rte_mbuf, rearm_data));
131 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) <
132 offsetof(struct rte_mbuf, rearm_data));
133 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) -
134 offsetof(struct rte_mbuf, rearm_data) > 6);
135 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) -
136 offsetof(struct rte_mbuf, rearm_data) > 6);
137 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) -
138 offsetof(struct rte_mbuf, rearm_data) > 6);
139 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) -
140 offsetof(struct rte_mbuf, rearm_data) > 6);
141
142 /* prevent compiler reordering: rearm_data covers previous fields */
143 rte_compiler_barrier();
144 p = (uintptr_t)&mb_def.rearm_data;
145 rxq->mbuf_initializer = *(uint64_t *)p;
146 }
147
148 void
hns3_rxq_vec_setup(struct hns3_rx_queue * rxq)149 hns3_rxq_vec_setup(struct hns3_rx_queue *rxq)
150 {
151 struct hns3_entry *sw_ring = &rxq->sw_ring[rxq->nb_rx_desc];
152 unsigned int i;
153
154 memset(&rxq->rx_ring[rxq->nb_rx_desc], 0,
155 sizeof(struct hns3_desc) * HNS3_DEFAULT_RX_BURST);
156
157 memset(&rxq->fake_mbuf, 0, sizeof(rxq->fake_mbuf));
158 for (i = 0; i < HNS3_DEFAULT_RX_BURST; i++)
159 sw_ring[i].mbuf = &rxq->fake_mbuf;
160
161 hns3_rxq_vec_setup_rearm_data(rxq);
162
163 memset(rxq->offset_table, 0, sizeof(rxq->offset_table));
164 }
165
166 static int
hns3_rxq_vec_check(struct hns3_rx_queue * rxq,void * arg)167 hns3_rxq_vec_check(struct hns3_rx_queue *rxq, void *arg)
168 {
169 uint32_t min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH +
170 HNS3_DEFAULT_RX_BURST;
171
172 if (rxq->nb_rx_desc < min_vec_bds)
173 return -ENOTSUP;
174
175 if (rxq->nb_rx_desc % HNS3_DEFAULT_RXQ_REARM_THRESH)
176 return -ENOTSUP;
177
178 RTE_SET_USED(arg);
179 return 0;
180 }
181
182 int
hns3_rx_check_vec_support(struct rte_eth_dev * dev)183 hns3_rx_check_vec_support(struct rte_eth_dev *dev)
184 {
185 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
186 uint64_t offloads_mask = RTE_ETH_RX_OFFLOAD_TCP_LRO |
187 RTE_ETH_RX_OFFLOAD_VLAN |
188 RTE_ETH_RX_OFFLOAD_TIMESTAMP;
189
190 if (dev->data->scattered_rx)
191 return -ENOTSUP;
192
193 if (rxmode->offloads & offloads_mask)
194 return -ENOTSUP;
195
196 if (hns3_rxq_iterate(dev, hns3_rxq_vec_check, NULL) != 0)
197 return -ENOTSUP;
198
199 return 0;
200 }
201