xref: /dpdk/drivers/net/octeon_ep/cnxk_ep_tx.c (revision f665790a5dbad7b645ff46f31d65e977324e7bfc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2023 Marvell.
3  */
4 
5 #include "cnxk_ep_vf.h"
6 #include "otx_ep_rxtx.h"
7 
8 static inline uint32_t
9 cnxk_ep_check_tx_ism_mem(void *tx_queue)
10 {
11 	struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
12 	uint32_t val;
13 
14 	/* Batch subtractions from the HW counter to reduce PCIe traffic
15 	 * This adds an extra local variable, but almost halves the
16 	 * number of PCIe writes.
17 	 */
18 	val = rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed);
19 	iq->inst_cnt += val - iq->inst_cnt_prev;
20 	iq->inst_cnt_prev = val;
21 
22 	if (val > (uint32_t)(1 << 31)) {
23 		/* Only subtract the packet count in the HW counter
24 		 * when count above halfway to saturation.
25 		 */
26 		rte_write64((uint64_t)val, iq->inst_cnt_reg);
27 		rte_mb();
28 
29 		rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
30 		while (rte_atomic_load_explicit(iq->inst_cnt_ism,
31 				rte_memory_order_relaxed) >= val) {
32 			rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
33 			rte_mb();
34 		}
35 
36 		iq->inst_cnt_prev = 0;
37 	}
38 	rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
39 
40 	/* Modulo of the new index with the IQ size will give us
41 	 * the new index.
42 	 */
43 	return iq->inst_cnt & (iq->nb_desc - 1);
44 }
45 
46 static inline uint32_t
47 cnxk_ep_check_tx_pkt_reg(void *tx_queue)
48 {
49 	struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
50 	uint32_t val;
51 
52 	val = rte_read32(iq->inst_cnt_reg);
53 	iq->inst_cnt += val - iq->inst_cnt_prev;
54 	iq->inst_cnt_prev = val;
55 
56 	if (val > (uint32_t)(1 << 31)) {
57 		/* Only subtract the packet count in the HW counter
58 		 * when count above halfway to saturation.
59 		 */
60 		rte_write64((uint64_t)val, iq->inst_cnt_reg);
61 		rte_mb();
62 
63 		iq->inst_cnt_prev = 0;
64 	}
65 
66 	/* Modulo of the new index with the IQ size will give us
67 	 * the new index.
68 	 */
69 	return iq->inst_cnt & (iq->nb_desc - 1);
70 }
71 
72 static inline void
73 cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq)
74 {
75 	const otx_ep_check_pkt_count_t cnxk_tx_pkt_count[2] = { cnxk_ep_check_tx_pkt_reg,
76 								cnxk_ep_check_tx_ism_mem };
77 
78 	uint32_t instr_processed = 0;
79 	uint32_t cnt = 0;
80 
81 	iq->otx_read_index = cnxk_tx_pkt_count[iq->ism_ena](iq);
82 
83 	if (unlikely(iq->flush_index == iq->otx_read_index))
84 		return;
85 
86 	if (iq->flush_index < iq->otx_read_index) {
87 		instr_processed = iq->otx_read_index - iq->flush_index;
88 		rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
89 		iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
90 	} else {
91 		cnt = iq->nb_desc - iq->flush_index;
92 		rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt);
93 		iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc);
94 
95 		instr_processed = iq->otx_read_index;
96 		rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed);
97 		iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc);
98 
99 		instr_processed += cnt;
100 	}
101 
102 	iq->stats.instr_processed = instr_processed;
103 	iq->instr_pending -= instr_processed;
104 }
105 
106 static inline void
107 set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
108 {
109 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
110 	sg_entry->u.size[pos] = size;
111 #elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
112 	sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
113 #endif
114 }
115 
116 static __rte_always_inline void
117 cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts)
118 {
119 	struct cnxk_ep_instr_32B *iqcmd;
120 	struct rte_mbuf *m;
121 	uint32_t pkt_len;
122 	uint32_t tx_bytes = 0;
123 	uint32_t write_idx = iq->host_write_index;
124 	uint16_t pkts, nb_desc = iq->nb_desc;
125 	uint8_t desc_size = iq->desc_size;
126 
127 	for (pkts = 0; pkts < nb_pkts; pkts++) {
128 		m = tx_pkts[pkts];
129 		iq->mbuf_list[write_idx] = m;
130 		pkt_len = rte_pktmbuf_data_len(m);
131 
132 		iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
133 		iqcmd->ih.u64 = iq->partial_ih | pkt_len;
134 		iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/
135 		tx_bytes += pkt_len;
136 
137 		/* Increment the host write index */
138 		write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
139 	}
140 	iq->host_write_index = write_idx;
141 
142 	/* ring dbell */
143 	rte_io_wmb();
144 	rte_write64(pkts, iq->doorbell_reg);
145 	iq->instr_pending += pkts;
146 	iq->stats.tx_pkts += pkts;
147 	iq->stats.tx_bytes += tx_bytes;
148 }
149 
150 static __rte_always_inline uint16_t
151 cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq,
152 			      uint16_t nb_pkts)
153 {
154 	uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
155 	struct otx_ep_buf_free_info *finfo;
156 	struct cnxk_ep_instr_32B *iqcmd;
157 	struct rte_mbuf *m;
158 	uint32_t pkt_len, tx_bytes = 0;
159 	uint32_t write_idx = iq->host_write_index;
160 	uint16_t pkts, nb_desc = iq->nb_desc;
161 	uint8_t desc_size = iq->desc_size;
162 
163 	for (pkts = 0; pkts < nb_pkts; pkts++) {
164 		uint16_t j = 0;
165 
166 		m = tx_pkts[pkts];
167 		frags = m->nb_segs;
168 
169 		pkt_len = rte_pktmbuf_pkt_len(m);
170 		num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
171 
172 		if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
173 			otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments");
174 			goto exit;
175 		}
176 
177 		finfo = &iq->req_list[write_idx].finfo;
178 
179 		iq->mbuf_list[write_idx] = m;
180 		iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size));
181 		iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg);
182 		iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len;
183 
184 		while (frags--) {
185 			finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
186 			set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
187 			j++;
188 			m = m->next;
189 		}
190 
191 		/* Increment the host write index */
192 		write_idx = otx_ep_incr_index(write_idx, 1, nb_desc);
193 		tx_bytes += pkt_len;
194 	}
195 exit:
196 	iq->host_write_index = write_idx;
197 
198 	/* ring dbell */
199 	rte_io_wmb();
200 	rte_write64(pkts, iq->doorbell_reg);
201 	iq->instr_pending += pkts;
202 	iq->stats.tx_pkts += pkts;
203 	iq->stats.tx_bytes += tx_bytes;
204 
205 	return pkts;
206 }
207 
208 uint16_t __rte_noinline __rte_hot
209 cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
210 {
211 	struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
212 	uint16_t pkts;
213 
214 	pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
215 
216 	cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts);
217 
218 	if (iq->instr_pending >= OTX_EP_MAX_INSTR)
219 		cnxk_ep_flush_iq(iq);
220 
221 	/* Return no# of instructions posted successfully. */
222 	return pkts;
223 }
224 
225 uint16_t __rte_noinline __rte_hot
226 cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
227 {
228 	struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
229 	uint16_t pkts;
230 
231 	pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending);
232 
233 	pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts);
234 
235 	if (iq->instr_pending >= OTX_EP_MAX_INSTR)
236 		cnxk_ep_flush_iq(iq);
237 
238 	/* Return no# of instructions posted successfully. */
239 	return pkts;
240 }
241