xref: /dpdk/drivers/net/octeon_ep/otx_ep_rxtx.c (revision 3da59f30a23f2e795d2315f3d949e1b3e0ce0c3d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 
5 #include <unistd.h>
6 #include <assert.h>
7 #include <rte_eal.h>
8 #include <rte_mempool.h>
9 #include <rte_mbuf.h>
10 #include <rte_io.h>
11 #include <rte_net.h>
12 #include <ethdev_pci.h>
13 
14 #include "otx_ep_common.h"
15 #include "otx_ep_vf.h"
16 #include "otx_ep_rxtx.h"
17 
18 static void
19 otx_ep_dmazone_free(const struct rte_memzone *mz)
20 {
21 	const struct rte_memzone *mz_tmp;
22 	int ret = 0;
23 
24 	if (mz == NULL) {
25 		otx_ep_err("Memzone: NULL\n");
26 		return;
27 	}
28 
29 	mz_tmp = rte_memzone_lookup(mz->name);
30 	if (mz_tmp == NULL) {
31 		otx_ep_err("Memzone %s Not Found\n", mz->name);
32 		return;
33 	}
34 
35 	ret = rte_memzone_free(mz);
36 	if (ret)
37 		otx_ep_err("Memzone free failed : ret = %d\n", ret);
38 }
39 
40 /* Free IQ resources */
41 int
42 otx_ep_delete_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no)
43 {
44 	struct otx_ep_instr_queue *iq;
45 	uint32_t i;
46 
47 	iq = otx_ep->instr_queue[iq_no];
48 	if (iq == NULL) {
49 		otx_ep_err("Invalid IQ[%d]\n", iq_no);
50 		return -EINVAL;
51 	}
52 
53 	if (iq->req_list) {
54 		for (i = 0; i < iq->nb_desc; i++)
55 			rte_free(iq->req_list[i].finfo.g.sg);
56 		rte_free(iq->req_list);
57 	}
58 
59 	iq->req_list = NULL;
60 
61 	if (iq->iq_mz) {
62 		otx_ep_dmazone_free(iq->iq_mz);
63 		iq->iq_mz = NULL;
64 	}
65 
66 	rte_free(otx_ep->instr_queue[iq_no]);
67 	otx_ep->instr_queue[iq_no] = NULL;
68 
69 	otx_ep->nb_tx_queues--;
70 
71 	otx_ep_info("IQ[%d] is deleted\n", iq_no);
72 
73 	return 0;
74 }
75 
76 /* IQ initialization */
77 static int
78 otx_ep_init_instr_queue(struct otx_ep_device *otx_ep, int iq_no, int num_descs,
79 		     unsigned int socket_id)
80 {
81 	const struct otx_ep_config *conf;
82 	struct otx_ep_instr_queue *iq;
83 	struct otx_ep_sg_entry *sg;
84 	uint32_t i, q_size;
85 	int ret;
86 
87 	conf = otx_ep->conf;
88 	iq = otx_ep->instr_queue[iq_no];
89 	q_size = conf->iq.instr_type * num_descs;
90 
91 	/* IQ memory creation for Instruction submission to OCTEON 9 */
92 	iq->iq_mz = rte_eth_dma_zone_reserve(otx_ep->eth_dev,
93 					     "instr_queue", iq_no, q_size,
94 					     OTX_EP_PCI_RING_ALIGN,
95 					     socket_id);
96 	if (iq->iq_mz == NULL) {
97 		otx_ep_err("IQ[%d] memzone alloc failed\n", iq_no);
98 		goto iq_init_fail;
99 	}
100 
101 	iq->base_addr_dma = iq->iq_mz->iova;
102 	iq->base_addr = (uint8_t *)iq->iq_mz->addr;
103 
104 	if (num_descs & (num_descs - 1)) {
105 		otx_ep_err("IQ[%d] descs not in power of 2\n", iq_no);
106 		goto iq_init_fail;
107 	}
108 
109 	iq->nb_desc = num_descs;
110 
111 	/* Create a IQ request list to hold requests that have been
112 	 * posted to OCTEON 9. This list will be used for freeing the IQ
113 	 * data buffer(s) later once the OCTEON 9 fetched the requests.
114 	 */
115 	iq->req_list = rte_zmalloc_socket("request_list",
116 			(iq->nb_desc * OTX_EP_IQREQ_LIST_SIZE),
117 			RTE_CACHE_LINE_SIZE,
118 			rte_socket_id());
119 	if (iq->req_list == NULL) {
120 		otx_ep_err("IQ[%d] req_list alloc failed\n", iq_no);
121 		goto iq_init_fail;
122 	}
123 
124 	for (i = 0; i < iq->nb_desc; i++) {
125 		sg = rte_zmalloc_socket("sg_entry", (OTX_EP_MAX_SG_LISTS * OTX_EP_SG_ENTRY_SIZE),
126 			OTX_EP_SG_ALIGN, rte_socket_id());
127 		if (sg == NULL) {
128 			otx_ep_err("IQ[%d] sg_entries alloc failed\n", iq_no);
129 			goto iq_init_fail;
130 		}
131 
132 		iq->req_list[i].finfo.g.num_sg = OTX_EP_MAX_SG_LISTS;
133 		iq->req_list[i].finfo.g.sg = sg;
134 	}
135 
136 	otx_ep_info("IQ[%d]: base: %p basedma: %lx count: %d\n",
137 		     iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
138 		     iq->nb_desc);
139 
140 	iq->mbuf_list = rte_zmalloc_socket("mbuf_list",	(iq->nb_desc * sizeof(struct rte_mbuf *)),
141 					   RTE_CACHE_LINE_SIZE, rte_socket_id());
142 	if (!iq->mbuf_list) {
143 		otx_ep_err("IQ[%d] mbuf_list alloc failed\n", iq_no);
144 		goto iq_init_fail;
145 	}
146 
147 	iq->otx_ep_dev = otx_ep;
148 	iq->q_no = iq_no;
149 	iq->fill_cnt = 0;
150 	iq->host_write_index = 0;
151 	iq->otx_read_index = 0;
152 	iq->flush_index = 0;
153 	iq->instr_pending = 0;
154 
155 	otx_ep->io_qmask.iq |= (1ull << iq_no);
156 
157 	/* Set 32B/64B mode for each input queue */
158 	if (conf->iq.instr_type == 64)
159 		otx_ep->io_qmask.iq64B |= (1ull << iq_no);
160 
161 	iq->iqcmd_64B = (conf->iq.instr_type == 64);
162 
163 	/* Set up IQ registers */
164 	ret = otx_ep->fn_list.setup_iq_regs(otx_ep, iq_no);
165 	if (ret)
166 		return ret;
167 
168 	return 0;
169 
170 iq_init_fail:
171 	return -ENOMEM;
172 }
173 
174 int
175 otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no, int num_descs,
176 		 unsigned int socket_id)
177 {
178 	struct otx_ep_instr_queue *iq;
179 
180 	iq = (struct otx_ep_instr_queue *)rte_zmalloc("otx_ep_IQ", sizeof(*iq),
181 						RTE_CACHE_LINE_SIZE);
182 	if (iq == NULL)
183 		return -ENOMEM;
184 
185 	otx_ep->instr_queue[iq_no] = iq;
186 
187 	if (otx_ep_init_instr_queue(otx_ep, iq_no, num_descs, socket_id)) {
188 		otx_ep_err("IQ init is failed\n");
189 		goto delete_IQ;
190 	}
191 	otx_ep->nb_tx_queues++;
192 
193 	otx_ep_info("IQ[%d] is created.\n", iq_no);
194 
195 	return 0;
196 
197 delete_IQ:
198 	otx_ep_delete_iqs(otx_ep, iq_no);
199 	return -ENOMEM;
200 }
201 
202 static void
203 otx_ep_droq_reset_indices(struct otx_ep_droq *droq)
204 {
205 	droq->read_idx  = 0;
206 	droq->write_idx = 0;
207 	droq->refill_idx = 0;
208 	droq->refill_count = 0;
209 	droq->last_pkt_count = 0;
210 	droq->pkts_pending = 0;
211 }
212 
213 static void
214 otx_ep_droq_destroy_ring_buffers(struct otx_ep_droq *droq)
215 {
216 	uint32_t idx;
217 
218 	for (idx = 0; idx < droq->nb_desc; idx++) {
219 		if (droq->recv_buf_list[idx]) {
220 			rte_pktmbuf_free(droq->recv_buf_list[idx]);
221 			droq->recv_buf_list[idx] = NULL;
222 		}
223 	}
224 
225 	otx_ep_droq_reset_indices(droq);
226 }
227 
228 /* Free OQs resources */
229 int
230 otx_ep_delete_oqs(struct otx_ep_device *otx_ep, uint32_t oq_no)
231 {
232 	struct otx_ep_droq *droq;
233 
234 	droq = otx_ep->droq[oq_no];
235 	if (droq == NULL) {
236 		otx_ep_err("Invalid droq[%d]\n", oq_no);
237 		return -EINVAL;
238 	}
239 
240 	otx_ep_droq_destroy_ring_buffers(droq);
241 	rte_free(droq->recv_buf_list);
242 	droq->recv_buf_list = NULL;
243 
244 	if (droq->desc_ring_mz) {
245 		otx_ep_dmazone_free(droq->desc_ring_mz);
246 		droq->desc_ring_mz = NULL;
247 	}
248 
249 	memset(droq, 0, OTX_EP_DROQ_SIZE);
250 
251 	rte_free(otx_ep->droq[oq_no]);
252 	otx_ep->droq[oq_no] = NULL;
253 
254 	otx_ep->nb_rx_queues--;
255 
256 	otx_ep_info("OQ[%d] is deleted\n", oq_no);
257 	return 0;
258 }
259 
260 static int
261 otx_ep_droq_setup_ring_buffers(struct otx_ep_droq *droq)
262 {
263 	struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
264 	struct otx_ep_droq_info *info;
265 	struct rte_mbuf *buf;
266 	uint32_t idx;
267 
268 	for (idx = 0; idx < droq->nb_desc; idx++) {
269 		buf = rte_pktmbuf_alloc(droq->mpool);
270 		if (buf == NULL) {
271 			otx_ep_err("OQ buffer alloc failed\n");
272 			droq->stats.rx_alloc_failure++;
273 			return -ENOMEM;
274 		}
275 
276 		droq->recv_buf_list[idx] = buf;
277 		info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
278 		memset(info, 0, sizeof(*info));
279 		desc_ring[idx].buffer_ptr = rte_mbuf_data_iova_default(buf);
280 	}
281 
282 	otx_ep_droq_reset_indices(droq);
283 
284 	return 0;
285 }
286 
287 /* OQ initialization */
288 static int
289 otx_ep_init_droq(struct otx_ep_device *otx_ep, uint32_t q_no,
290 	      uint32_t num_descs, uint32_t desc_size,
291 	      struct rte_mempool *mpool, unsigned int socket_id)
292 {
293 	const struct otx_ep_config *conf = otx_ep->conf;
294 	uint32_t c_refill_threshold;
295 	struct otx_ep_droq *droq;
296 	uint32_t desc_ring_size;
297 	int ret;
298 
299 	otx_ep_info("OQ[%d] Init start\n", q_no);
300 
301 	droq = otx_ep->droq[q_no];
302 	droq->otx_ep_dev = otx_ep;
303 	droq->q_no = q_no;
304 	droq->mpool = mpool;
305 
306 	droq->nb_desc      = num_descs;
307 	droq->buffer_size  = desc_size;
308 	c_refill_threshold = RTE_MAX(conf->oq.refill_threshold,
309 				     droq->nb_desc / 2);
310 
311 	/* OQ desc_ring set up */
312 	desc_ring_size = droq->nb_desc * OTX_EP_DROQ_DESC_SIZE;
313 	droq->desc_ring_mz = rte_eth_dma_zone_reserve(otx_ep->eth_dev, "droq",
314 						      q_no, desc_ring_size,
315 						      OTX_EP_PCI_RING_ALIGN,
316 						      socket_id);
317 
318 	if (droq->desc_ring_mz == NULL) {
319 		otx_ep_err("OQ:%d desc_ring allocation failed\n", q_no);
320 		goto init_droq_fail;
321 	}
322 
323 	droq->desc_ring_dma = droq->desc_ring_mz->iova;
324 	droq->desc_ring = (struct otx_ep_droq_desc *)droq->desc_ring_mz->addr;
325 
326 	otx_ep_dbg("OQ[%d]: desc_ring: virt: 0x%p, dma: %lx\n",
327 		    q_no, droq->desc_ring, (unsigned long)droq->desc_ring_dma);
328 	otx_ep_dbg("OQ[%d]: num_desc: %d\n", q_no, droq->nb_desc);
329 
330 	/* OQ buf_list set up */
331 	droq->recv_buf_list = rte_zmalloc_socket("recv_buf_list",
332 				(droq->nb_desc * sizeof(struct rte_mbuf *)),
333 				 RTE_CACHE_LINE_SIZE, socket_id);
334 	if (droq->recv_buf_list == NULL) {
335 		otx_ep_err("OQ recv_buf_list alloc failed\n");
336 		goto init_droq_fail;
337 	}
338 
339 	if (otx_ep_droq_setup_ring_buffers(droq))
340 		goto init_droq_fail;
341 
342 	droq->refill_threshold = c_refill_threshold;
343 
344 	/* Set up OQ registers */
345 	ret = otx_ep->fn_list.setup_oq_regs(otx_ep, q_no);
346 	if (ret)
347 		return ret;
348 
349 	otx_ep->io_qmask.oq |= (1ull << q_no);
350 
351 	return 0;
352 
353 init_droq_fail:
354 	return -ENOMEM;
355 }
356 
357 /* OQ configuration and setup */
358 int
359 otx_ep_setup_oqs(struct otx_ep_device *otx_ep, int oq_no, int num_descs,
360 		 int desc_size, struct rte_mempool *mpool,
361 		 unsigned int socket_id)
362 {
363 	struct otx_ep_droq *droq;
364 
365 	/* Allocate new droq. */
366 	droq = (struct otx_ep_droq *)rte_zmalloc("otx_ep_OQ",
367 				sizeof(*droq), RTE_CACHE_LINE_SIZE);
368 	if (droq == NULL) {
369 		otx_ep_err("Droq[%d] Creation Failed\n", oq_no);
370 		return -ENOMEM;
371 	}
372 	otx_ep->droq[oq_no] = droq;
373 
374 	if (otx_ep_init_droq(otx_ep, oq_no, num_descs, desc_size, mpool,
375 			     socket_id)) {
376 		otx_ep_err("Droq[%d] Initialization failed\n", oq_no);
377 		goto delete_OQ;
378 	}
379 	otx_ep_info("OQ[%d] is created.\n", oq_no);
380 
381 	otx_ep->nb_rx_queues++;
382 
383 	return 0;
384 
385 delete_OQ:
386 	otx_ep_delete_oqs(otx_ep, oq_no);
387 	return -ENOMEM;
388 }
389 
390 static inline void
391 otx_ep_iqreq_delete(struct otx_ep_instr_queue *iq, uint32_t idx)
392 {
393 	struct rte_mbuf *mbuf;
394 	uint32_t reqtype;
395 
396 	mbuf    = iq->req_list[idx].finfo.mbuf;
397 	reqtype = iq->req_list[idx].reqtype;
398 
399 	switch (reqtype) {
400 	case OTX_EP_REQTYPE_NORESP_NET:
401 	case OTX_EP_REQTYPE_NORESP_GATHER:
402 		/* This will take care of multiple segments also */
403 		rte_pktmbuf_free(mbuf);
404 		otx_ep_dbg("IQ buffer freed at idx[%d]\n", idx);
405 		break;
406 
407 	case OTX_EP_REQTYPE_NONE:
408 	default:
409 		otx_ep_info("This iqreq mode is not supported:%d\n", reqtype);
410 	}
411 
412 	/* Reset the request list at this index */
413 	iq->req_list[idx].finfo.mbuf = NULL;
414 	iq->req_list[idx].reqtype = 0;
415 }
416 
417 static inline void
418 otx_ep_iqreq_add(struct otx_ep_instr_queue *iq, struct rte_mbuf *mbuf,
419 		uint32_t reqtype, int index)
420 {
421 	iq->req_list[index].finfo.mbuf = mbuf;
422 	iq->req_list[index].reqtype = reqtype;
423 }
424 
425 static uint32_t
426 otx_vf_update_read_index(struct otx_ep_instr_queue *iq)
427 {
428 	uint32_t val;
429 
430 	/*
431 	 * Batch subtractions from the HW counter to reduce PCIe traffic
432 	 * This adds an extra local variable, but almost halves the
433 	 * number of PCIe writes.
434 	 */
435 	val = *iq->inst_cnt_ism;
436 	iq->inst_cnt += val - iq->inst_cnt_ism_prev;
437 	iq->inst_cnt_ism_prev = val;
438 
439 	if (val > (uint32_t)(1 << 31)) {
440 		/*
441 		 * Only subtract the packet count in the HW counter
442 		 * when count above halfway to saturation.
443 		 */
444 		rte_write32(val, iq->inst_cnt_reg);
445 		rte_mb();
446 
447 		rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
448 		while (__atomic_load_n(iq->inst_cnt_ism, __ATOMIC_RELAXED) >= val) {
449 			rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
450 			rte_mb();
451 		}
452 
453 		iq->inst_cnt_ism_prev = 0;
454 	}
455 	rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg);
456 
457 	/* Modulo of the new index with the IQ size will give us
458 	 * the new index.
459 	 */
460 	return iq->inst_cnt & (iq->nb_desc - 1);
461 }
462 
463 static void
464 otx_ep_flush_iq(struct otx_ep_instr_queue *iq)
465 {
466 	uint32_t instr_processed = 0;
467 
468 	iq->otx_read_index = otx_vf_update_read_index(iq);
469 	while (iq->flush_index != iq->otx_read_index) {
470 		/* Free the IQ data buffer to the pool */
471 		otx_ep_iqreq_delete(iq, iq->flush_index);
472 		iq->flush_index =
473 			otx_ep_incr_index(iq->flush_index, 1, iq->nb_desc);
474 
475 		instr_processed++;
476 	}
477 
478 	iq->stats.instr_processed = instr_processed;
479 	iq->instr_pending -= instr_processed;
480 }
481 
482 static inline void
483 otx_ep_ring_doorbell(struct otx_ep_device *otx_ep __rte_unused,
484 		struct otx_ep_instr_queue *iq)
485 {
486 	rte_wmb();
487 	rte_write64(iq->fill_cnt, iq->doorbell_reg);
488 	iq->fill_cnt = 0;
489 }
490 
491 static inline int
492 post_iqcmd(struct otx_ep_instr_queue *iq, uint8_t *iqcmd)
493 {
494 	uint8_t *iqptr, cmdsize;
495 
496 	/* This ensures that the read index does not wrap around to
497 	 * the same position if queue gets full before OCTEON 9 could
498 	 * fetch any instr.
499 	 */
500 	if (iq->instr_pending > (iq->nb_desc - 1))
501 		return OTX_EP_IQ_SEND_FAILED;
502 
503 	/* Copy cmd into iq */
504 	cmdsize = 64;
505 	iqptr   = iq->base_addr + (iq->host_write_index << 6);
506 
507 	rte_memcpy(iqptr, iqcmd, cmdsize);
508 
509 	/* Increment the host write index */
510 	iq->host_write_index =
511 		otx_ep_incr_index(iq->host_write_index, 1, iq->nb_desc);
512 
513 	iq->fill_cnt++;
514 
515 	/* Flush the command into memory. We need to be sure the data
516 	 * is in memory before indicating that the instruction is
517 	 * pending.
518 	 */
519 	iq->instr_pending++;
520 	/* OTX_EP_IQ_SEND_SUCCESS */
521 	return 0;
522 }
523 
524 
525 static int
526 otx_ep_send_data(struct otx_ep_device *otx_ep, struct otx_ep_instr_queue *iq,
527 		 void *cmd, int dbell)
528 {
529 	uint32_t ret;
530 
531 	/* Submit IQ command */
532 	ret = post_iqcmd(iq, cmd);
533 
534 	if (ret == OTX_EP_IQ_SEND_SUCCESS) {
535 		if (dbell)
536 			otx_ep_ring_doorbell(otx_ep, iq);
537 		iq->stats.instr_posted++;
538 
539 	} else {
540 		iq->stats.instr_dropped++;
541 		if (iq->fill_cnt)
542 			otx_ep_ring_doorbell(otx_ep, iq);
543 	}
544 	return ret;
545 }
546 
547 static inline void
548 set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos)
549 {
550 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
551 	sg_entry->u.size[pos] = size;
552 #elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
553 	sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size;
554 #endif
555 }
556 
557 static inline int
558 prepare_xmit_gather_list(struct otx_ep_instr_queue *iq, struct rte_mbuf *m, uint64_t *dptr,
559 			 union otx_ep_instr_ih *ih)
560 {
561 	uint16_t j = 0, frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1;
562 	struct otx_ep_buf_free_info *finfo;
563 	uint32_t pkt_len;
564 	int rc = -1;
565 
566 	pkt_len = rte_pktmbuf_pkt_len(m);
567 	frags = m->nb_segs;
568 	num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS;
569 
570 	if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) {
571 		otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments\n");
572 		goto exit;
573 	}
574 
575 	finfo = &iq->req_list[iq->host_write_index].finfo;
576 	*dptr = rte_mem_virt2iova(finfo->g.sg);
577 	ih->u64 |= ((1ULL << 62) | ((uint64_t)frags << 48) | (pkt_len + ih->s.fsz));
578 
579 	while (frags--) {
580 		finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m);
581 		set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask));
582 		j++;
583 		m = m->next;
584 	}
585 
586 	return 0;
587 
588 exit:
589 	return rc;
590 }
591 
592 /* Enqueue requests/packets to OTX_EP IQ queue.
593  * returns number of requests enqueued successfully
594  */
595 uint16_t
596 otx_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
597 {
598 	struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue;
599 	struct otx_ep_device *otx_ep = iq->otx_ep_dev;
600 	struct otx_ep_instr_64B iqcmd;
601 	int dbell, index, count = 0;
602 	uint32_t iqreq_type;
603 	uint32_t pkt_len, i;
604 	struct rte_mbuf *m;
605 
606 	iqcmd.ih.u64 = 0;
607 	iqcmd.pki_ih3.u64 = 0;
608 	iqcmd.irh.u64 = 0;
609 
610 	/* ih invars */
611 	iqcmd.ih.s.fsz = OTX_EP_FSZ;
612 	iqcmd.ih.s.pkind = otx_ep->pkind; /* The SDK decided PKIND value */
613 
614 	/* pki ih3 invars */
615 	iqcmd.pki_ih3.s.w = 1;
616 	iqcmd.pki_ih3.s.utt = 1;
617 	iqcmd.pki_ih3.s.tagtype = ORDERED_TAG;
618 	/* sl will be sizeof(pki_ih3) */
619 	iqcmd.pki_ih3.s.sl = OTX_EP_FSZ + OTX_CUST_DATA_LEN;
620 
621 	/* irh invars */
622 	iqcmd.irh.s.opcode = OTX_EP_NW_PKT_OP;
623 
624 	for (i = 0; i < nb_pkts; i++) {
625 		m = pkts[i];
626 		if (m->nb_segs == 1) {
627 			pkt_len = rte_pktmbuf_data_len(m);
628 			iqcmd.ih.s.tlen = pkt_len + iqcmd.ih.s.fsz;
629 			iqcmd.dptr = rte_mbuf_data_iova(m); /*dptr*/
630 			iqcmd.ih.s.gather = 0;
631 			iqcmd.ih.s.gsz = 0;
632 			iqreq_type = OTX_EP_REQTYPE_NORESP_NET;
633 		} else {
634 			if (!(otx_ep->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
635 				goto xmit_fail;
636 
637 			if (unlikely(prepare_xmit_gather_list(iq, m, &iqcmd.dptr, &iqcmd.ih) < 0))
638 				goto xmit_fail;
639 
640 			pkt_len = rte_pktmbuf_pkt_len(m);
641 			iqreq_type = OTX_EP_REQTYPE_NORESP_GATHER;
642 		}
643 
644 		iqcmd.irh.u64 = rte_bswap64(iqcmd.irh.u64);
645 
646 #ifdef OTX_EP_IO_DEBUG
647 		otx_ep_dbg("After swapping\n");
648 		otx_ep_dbg("Word0 [dptr]: 0x%016lx\n",
649 			   (unsigned long)iqcmd.dptr);
650 		otx_ep_dbg("Word1 [ihtx]: 0x%016lx\n", (unsigned long)iqcmd.ih);
651 		otx_ep_dbg("Word2 [pki_ih3]: 0x%016lx\n",
652 			   (unsigned long)iqcmd.pki_ih3);
653 		otx_ep_dbg("Word3 [rptr]: 0x%016lx\n",
654 			   (unsigned long)iqcmd.rptr);
655 		otx_ep_dbg("Word4 [irh]: 0x%016lx\n", (unsigned long)iqcmd.irh);
656 		otx_ep_dbg("Word5 [exhdr[0]]: 0x%016lx\n",
657 				(unsigned long)iqcmd.exhdr[0]);
658 		rte_pktmbuf_dump(stdout, m, rte_pktmbuf_pkt_len(m));
659 #endif
660 		dbell = (i == (unsigned int)(nb_pkts - 1)) ? 1 : 0;
661 		index = iq->host_write_index;
662 		if (otx_ep_send_data(otx_ep, iq, &iqcmd, dbell))
663 			goto xmit_fail;
664 		otx_ep_iqreq_add(iq, m, iqreq_type, index);
665 		iq->stats.tx_pkts++;
666 		iq->stats.tx_bytes += pkt_len;
667 		count++;
668 	}
669 
670 xmit_fail:
671 	if (iq->instr_pending >= OTX_EP_MAX_INSTR)
672 		otx_ep_flush_iq(iq);
673 
674 	/* Return no# of instructions posted successfully. */
675 	return count;
676 }
677 
678 static uint32_t
679 otx_ep_droq_refill(struct otx_ep_droq *droq)
680 {
681 	struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
682 	struct otx_ep_droq_info *info;
683 	struct rte_mbuf *buf = NULL;
684 	uint32_t desc_refilled = 0;
685 
686 	while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
687 		buf = rte_pktmbuf_alloc(droq->mpool);
688 		/* If a buffer could not be allocated, no point in
689 		 * continuing
690 		 */
691 		if (unlikely(!buf)) {
692 			droq->stats.rx_alloc_failure++;
693 			break;
694 		}
695 		info = rte_pktmbuf_mtod(buf, struct otx_ep_droq_info *);
696 		info->length = 0;
697 
698 		droq->recv_buf_list[droq->refill_idx] = buf;
699 		desc_ring[droq->refill_idx].buffer_ptr =
700 					rte_mbuf_data_iova_default(buf);
701 		droq->refill_idx = otx_ep_incr_index(droq->refill_idx, 1,
702 				droq->nb_desc);
703 
704 		desc_refilled++;
705 		droq->refill_count--;
706 	}
707 
708 	return desc_refilled;
709 }
710 
711 static struct rte_mbuf *
712 otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, struct otx_ep_droq *droq, int next_fetch)
713 {
714 	volatile struct otx_ep_droq_info *info;
715 	struct rte_mbuf *mbuf_next = NULL;
716 	struct rte_mbuf *mbuf = NULL;
717 	uint64_t total_pkt_len;
718 	uint32_t pkt_len = 0;
719 	int next_idx;
720 
721 	mbuf = droq->recv_buf_list[droq->read_idx];
722 	info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
723 
724 	/* make sure info is available */
725 	rte_rmb();
726 	if (unlikely(!info->length)) {
727 		int retry = OTX_EP_MAX_DELAYED_PKT_RETRIES;
728 		/* otx_ep_dbg("OCTEON DROQ[%d]: read_idx: %d; Data not ready "
729 		 * "yet, Retry; pending=%lu\n", droq->q_no, droq->read_idx,
730 		 * droq->pkts_pending);
731 		 */
732 		droq->stats.pkts_delayed_data++;
733 		while (retry && !info->length) {
734 			retry--;
735 			rte_delay_us_block(50);
736 		}
737 		if (!retry && !info->length) {
738 			otx_ep_err("OCTEON DROQ[%d]: read_idx: %d; Retry failed !!\n",
739 				   droq->q_no, droq->read_idx);
740 			/* May be zero length packet; drop it */
741 			assert(0);
742 		}
743 	}
744 
745 	if (next_fetch) {
746 		next_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
747 		mbuf_next = droq->recv_buf_list[next_idx];
748 		rte_prefetch0(rte_pktmbuf_mtod(mbuf_next, void *));
749 	}
750 
751 	info->length = rte_bswap16(info->length >> 48);
752 	/* Deduce the actual data size */
753 	total_pkt_len = info->length + OTX_EP_INFO_SIZE;
754 	if (total_pkt_len <= droq->buffer_size) {
755 		mbuf->data_off += OTX_EP_INFO_SIZE;
756 		pkt_len = (uint32_t)info->length;
757 		mbuf->pkt_len  = pkt_len;
758 		mbuf->data_len  = pkt_len;
759 		mbuf->port = otx_ep->port_id;
760 		droq->recv_buf_list[droq->read_idx] = NULL;
761 		droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
762 		droq->refill_count++;
763 	} else {
764 		struct rte_mbuf *first_buf = NULL;
765 		struct rte_mbuf *last_buf = NULL;
766 
767 		/* csr read helps to flush pending dma */
768 		droq->sent_reg_val = rte_read32(droq->pkts_sent_reg);
769 		rte_rmb();
770 
771 		while (pkt_len < total_pkt_len) {
772 			int cpy_len = 0;
773 
774 			cpy_len = ((pkt_len + droq->buffer_size) > total_pkt_len)
775 					? ((uint32_t)total_pkt_len - pkt_len)
776 					: droq->buffer_size;
777 
778 			mbuf = droq->recv_buf_list[droq->read_idx];
779 			droq->recv_buf_list[droq->read_idx] = NULL;
780 
781 			if (likely(mbuf)) {
782 				/* Note the first seg */
783 				if (!pkt_len)
784 					first_buf = mbuf;
785 
786 				mbuf->port = otx_ep->port_id;
787 				if (!pkt_len) {
788 					mbuf->data_off += OTX_EP_INFO_SIZE;
789 					mbuf->pkt_len = cpy_len - OTX_EP_INFO_SIZE;
790 					mbuf->data_len = cpy_len - OTX_EP_INFO_SIZE;
791 				} else {
792 					mbuf->pkt_len = cpy_len;
793 					mbuf->data_len = cpy_len;
794 				}
795 
796 				if (pkt_len) {
797 					first_buf->nb_segs++;
798 					first_buf->pkt_len += mbuf->pkt_len;
799 				}
800 
801 				if (last_buf)
802 					last_buf->next = mbuf;
803 
804 				last_buf = mbuf;
805 			} else {
806 				otx_ep_err("no buf\n");
807 				assert(0);
808 			}
809 
810 			pkt_len += cpy_len;
811 			droq->read_idx = otx_ep_incr_index(droq->read_idx, 1, droq->nb_desc);
812 			droq->refill_count++;
813 		}
814 		mbuf = first_buf;
815 	}
816 
817 	return mbuf;
818 }
819 
820 static inline uint32_t
821 otx_ep_check_droq_pkts(struct otx_ep_droq *droq)
822 {
823 	uint32_t new_pkts;
824 	uint32_t val;
825 
826 	/*
827 	 * Batch subtractions from the HW counter to reduce PCIe traffic
828 	 * This adds an extra local variable, but almost halves the
829 	 * number of PCIe writes.
830 	 */
831 	val = *droq->pkts_sent_ism;
832 	new_pkts = val - droq->pkts_sent_ism_prev;
833 	droq->pkts_sent_ism_prev = val;
834 
835 	if (val > (uint32_t)(1 << 31)) {
836 		/*
837 		 * Only subtract the packet count in the HW counter
838 		 * when count above halfway to saturation.
839 		 */
840 		rte_write32(val, droq->pkts_sent_reg);
841 		rte_mb();
842 
843 		rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
844 		while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {
845 			rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
846 			rte_mb();
847 		}
848 
849 		droq->pkts_sent_ism_prev = 0;
850 	}
851 	rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
852 	droq->pkts_pending += new_pkts;
853 
854 	return new_pkts;
855 }
856 
857 static inline int32_t __rte_hot
858 otx_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)
859 {
860 	if (unlikely(droq->pkts_pending < nb_pkts))
861 		otx_ep_check_droq_pkts(droq);
862 
863 	return RTE_MIN(nb_pkts, droq->pkts_pending);
864 }
865 
866 /* Check for response arrival from OCTEON 9
867  * returns number of requests completed
868  */
869 uint16_t
870 otx_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
871 {
872 	struct otx_ep_droq *droq = rx_queue;
873 	struct otx_ep_device *otx_ep;
874 	struct rte_mbuf *oq_pkt;
875 	uint16_t pkts, new_pkts;
876 	uint32_t valid_pkts = 0;
877 	int next_fetch;
878 
879 	otx_ep = droq->otx_ep_dev;
880 	new_pkts = otx_ep_rx_pkts_to_process(droq, nb_pkts);
881 
882 	for (pkts = 0; pkts < new_pkts; pkts++) {
883 		/* Push the received pkt to application */
884 		next_fetch = (pkts == new_pkts - 1) ? 0 : 1;
885 		oq_pkt = otx_ep_droq_read_packet(otx_ep, droq, next_fetch);
886 		if (!oq_pkt) {
887 			RTE_LOG_DP(ERR, OTX_NET_EP,
888 				   "DROQ read pkt failed pending %" PRIu64
889 				    "last_pkt_count %" PRIu64 "new_pkts %d.\n",
890 				   droq->pkts_pending, droq->last_pkt_count,
891 				   new_pkts);
892 			droq->stats.rx_err++;
893 			continue;
894 		} else {
895 			rx_pkts[valid_pkts] = oq_pkt;
896 			valid_pkts++;
897 			/* Stats */
898 			droq->stats.pkts_received++;
899 			droq->stats.bytes_received += oq_pkt->pkt_len;
900 		}
901 	}
902 	droq->pkts_pending -= pkts;
903 
904 	/* Refill DROQ buffers */
905 	if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
906 		int desc_refilled = otx_ep_droq_refill(droq);
907 
908 		/* Flush the droq descriptor data to memory to be sure
909 		 * that when we update the credits the data in memory is
910 		 * accurate.
911 		 */
912 		rte_io_wmb();
913 		rte_write32(desc_refilled, droq->pkts_credit_reg);
914 	} else {
915 		/*
916 		 * SDP output goes into DROP state when output doorbell count
917 		 * goes below drop count. When door bell count is written with
918 		 * a value greater than drop count SDP output should come out
919 		 * of DROP state. Due to a race condition this is not happening.
920 		 * Writing doorbell register with 0 again may make SDP output
921 		 * come out of this state.
922 		 */
923 
924 		rte_write32(0, droq->pkts_credit_reg);
925 	}
926 	return valid_pkts;
927 }
928