1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2023 Marvell.
3 */
4
5 #include <rte_vect.h>
6
7 #include "otx_ep_common.h"
8 #include "otx2_ep_vf.h"
9 #include "otx_ep_rxtx.h"
10
11 #define CNXK_EP_OQ_DESC_PER_LOOP_SSE 4
12 #define CNXK_EP_OQ_DESC_PER_LOOP_AVX 8
13
14 static inline int
cnxk_ep_rx_refill_mbuf(struct otx_ep_droq * droq,uint32_t count)15 cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
16 {
17 struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
18 struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
19 uint32_t refill_idx = droq->refill_idx;
20 struct rte_mbuf *buf;
21 uint32_t i;
22 int rc;
23
24 rc = rte_mempool_get_bulk(droq->mpool, (void **)&recv_buf_list[refill_idx], count);
25 if (unlikely(rc)) {
26 droq->stats.rx_alloc_failure++;
27 return rc;
28 }
29
30 for (i = 0; i < count; i++) {
31 rte_prefetch_non_temporal(&desc_ring[(refill_idx + 1) & 3]);
32 if (i < count - 1)
33 rte_prefetch_non_temporal(recv_buf_list[refill_idx + 1]);
34 buf = recv_buf_list[refill_idx];
35 desc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
36 refill_idx++;
37 }
38
39 droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);
40 droq->refill_count -= count;
41
42 return 0;
43 }
44
45 static inline void
cnxk_ep_rx_refill(struct otx_ep_droq * droq)46 cnxk_ep_rx_refill(struct otx_ep_droq *droq)
47 {
48 const uint32_t nb_desc = droq->nb_desc;
49 uint32_t refill_idx = droq->refill_idx;
50 uint32_t desc_refilled = 0, count;
51 int rc;
52
53 if (unlikely(droq->read_idx == refill_idx))
54 return;
55
56 if (refill_idx < droq->read_idx) {
57 count = droq->read_idx - refill_idx;
58 rc = cnxk_ep_rx_refill_mbuf(droq, count);
59 if (unlikely(rc)) {
60 droq->stats.rx_alloc_failure++;
61 return;
62 }
63 desc_refilled = count;
64 } else {
65 count = nb_desc - refill_idx;
66 rc = cnxk_ep_rx_refill_mbuf(droq, count);
67 if (unlikely(rc)) {
68 droq->stats.rx_alloc_failure++;
69 return;
70 }
71
72 desc_refilled = count;
73 count = droq->read_idx;
74 rc = cnxk_ep_rx_refill_mbuf(droq, count);
75 if (unlikely(rc)) {
76 droq->stats.rx_alloc_failure++;
77 return;
78 }
79 desc_refilled += count;
80 }
81
82 /* Flush the droq descriptor data to memory to be sure
83 * that when we update the credits the data in memory is
84 * accurate.
85 */
86 rte_io_wmb();
87 rte_write32(desc_refilled, droq->pkts_credit_reg);
88 }
89
90 static inline uint32_t
cnxk_ep_check_rx_ism_mem(void * rx_queue)91 cnxk_ep_check_rx_ism_mem(void *rx_queue)
92 {
93 struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
94 uint32_t new_pkts;
95 uint32_t val;
96
97 /* Batch subtractions from the HW counter to reduce PCIe traffic
98 * This adds an extra local variable, but almost halves the
99 * number of PCIe writes.
100 */
101 val = rte_atomic_load_explicit(droq->pkts_sent_ism, rte_memory_order_relaxed);
102
103 new_pkts = val - droq->pkts_sent_prev;
104 droq->pkts_sent_prev = val;
105
106 if (val > RTE_BIT32(31)) {
107 /* Only subtract the packet count in the HW counter
108 * when count above halfway to saturation.
109 */
110 rte_write64((uint64_t)val, droq->pkts_sent_reg);
111 rte_mb();
112
113 rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
114 while (rte_atomic_load_explicit(droq->pkts_sent_ism,
115 rte_memory_order_relaxed) >= val) {
116 rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
117 rte_mb();
118 }
119 droq->pkts_sent_prev = 0;
120 }
121
122 rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
123
124 return new_pkts;
125 }
126
127 static inline uint32_t
cnxk_ep_check_rx_pkt_reg(void * rx_queue)128 cnxk_ep_check_rx_pkt_reg(void *rx_queue)
129 {
130 struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
131 uint32_t new_pkts;
132 uint32_t val;
133
134 val = rte_read32(droq->pkts_sent_reg);
135
136 new_pkts = val - droq->pkts_sent_prev;
137 droq->pkts_sent_prev = val;
138
139 if (val > RTE_BIT32(31)) {
140 /* Only subtract the packet count in the HW counter
141 * when count above halfway to saturation.
142 */
143 rte_write64((uint64_t)val, droq->pkts_sent_reg);
144 rte_mb();
145 droq->pkts_sent_prev = 0;
146 }
147
148 return new_pkts;
149 }
150
151 static inline int16_t __rte_hot
cnxk_ep_rx_pkts_to_process(struct otx_ep_droq * droq,uint16_t nb_pkts)152 cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
153 {
154 const otx_ep_check_pkt_count_t cnxk_rx_pkt_count[2] = { cnxk_ep_check_rx_pkt_reg,
155 cnxk_ep_check_rx_ism_mem};
156
157 if (droq->pkts_pending < nb_pkts)
158 droq->pkts_pending += cnxk_rx_pkt_count[droq->ism_ena](droq);
159
160 return RTE_MIN(nb_pkts, droq->pkts_pending);
161 }
162
163 #define cnxk_pktmbuf_mtod(m, t) ((t)(void *)((char *)(m)->buf_addr + RTE_PKTMBUF_HEADROOM))
164
165 static __rte_always_inline void
cnxk_ep_process_pkts_scalar(struct rte_mbuf ** rx_pkts,struct otx_ep_droq * droq,uint16_t new_pkts)166 cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
167 {
168 struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
169 uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
170 uint16_t nb_desc = droq->nb_desc;
171 uint16_t pkts;
172
173 for (pkts = 0; pkts < new_pkts; pkts++) {
174 struct otx_ep_droq_info *info;
175 struct rte_mbuf *mbuf;
176 uint16_t pkt_len;
177
178 rte_prefetch0(recv_buf_list[otx_ep_incr_index(read_idx, 2, nb_desc)]);
179 rte_prefetch0(rte_pktmbuf_mtod(recv_buf_list[otx_ep_incr_index(read_idx,
180 2, nb_desc)],
181 void *));
182
183 mbuf = recv_buf_list[read_idx];
184 info = cnxk_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
185 read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
186 pkt_len = rte_bswap16(info->length >> 48);
187 mbuf->pkt_len = pkt_len;
188 mbuf->data_len = pkt_len;
189
190 *(uint64_t *)&mbuf->rearm_data = droq->rearm_data;
191 rx_pkts[pkts] = mbuf;
192 bytes_rsvd += pkt_len;
193 }
194 droq->read_idx = read_idx;
195
196 droq->refill_count += new_pkts;
197 droq->pkts_pending -= new_pkts;
198 /* Stats */
199 droq->stats.pkts_received += new_pkts;
200 droq->stats.bytes_received += bytes_rsvd;
201 }
202