xref: /dpdk/drivers/net/gve/gve_tx.c (revision f665790a5dbad7b645ff46f31d65e977324e7bfc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2022 Intel Corporation
3  */
4 
5 #include "gve_ethdev.h"
6 #include "base/gve_adminq.h"
7 
8 static inline void
9 gve_free_bulk_mbuf(struct rte_mbuf **txep, int num)
10 {
11 	struct rte_mbuf *m, *free[GVE_TX_MAX_FREE_SZ];
12 	int nb_free = 0;
13 	int i, s;
14 
15 	if (unlikely(num == 0))
16 		return;
17 
18 	/* Find the 1st mbuf which needs to be free */
19 	for (s = 0; s < num; s++) {
20 		if (txep[s] != NULL) {
21 			m = rte_pktmbuf_prefree_seg(txep[s]);
22 			if (m != NULL)
23 				break;
24 		}
25 	}
26 
27 	if (s == num)
28 		return;
29 
30 	free[0] = m;
31 	nb_free = 1;
32 	for (i = s + 1; i < num; i++) {
33 		if (likely(txep[i] != NULL)) {
34 			m = rte_pktmbuf_prefree_seg(txep[i]);
35 			if (likely(m != NULL)) {
36 				if (likely(m->pool == free[0]->pool)) {
37 					free[nb_free++] = m;
38 				} else {
39 					rte_mempool_put_bulk(free[0]->pool, (void *)free, nb_free);
40 					free[0] = m;
41 					nb_free = 1;
42 				}
43 			}
44 			txep[i] = NULL;
45 		}
46 	}
47 	rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
48 }
49 
50 static inline void
51 gve_tx_clean(struct gve_tx_queue *txq)
52 {
53 	uint16_t mask = txq->nb_tx_desc - 1;
54 	uint32_t start = txq->next_to_clean & mask;
55 	uint32_t ntc, nb_clean, i;
56 	struct gve_tx_iovec *iov;
57 
58 	ntc = rte_be_to_cpu_32(rte_read32(txq->qtx_head));
59 	ntc = ntc & mask;
60 
61 	if (ntc == start)
62 		return;
63 
64 	/* if wrap around, free twice. */
65 	if (ntc < start) {
66 		nb_clean = txq->nb_tx_desc - start;
67 		if (nb_clean > GVE_TX_MAX_FREE_SZ)
68 			nb_clean = GVE_TX_MAX_FREE_SZ;
69 		if (txq->is_gqi_qpl) {
70 			for (i = start; i < start + nb_clean; i++) {
71 				iov = &txq->iov_ring[i];
72 				txq->fifo_avail += iov->iov_len;
73 				iov->iov_base = 0;
74 				iov->iov_len = 0;
75 			}
76 		} else {
77 			gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
78 		}
79 		txq->nb_free += nb_clean;
80 		start += nb_clean;
81 		if (start == txq->nb_tx_desc)
82 			start = 0;
83 		txq->next_to_clean += nb_clean;
84 	}
85 
86 	if (ntc > start) {
87 		nb_clean = ntc - start;
88 		if (nb_clean > GVE_TX_MAX_FREE_SZ)
89 			nb_clean = GVE_TX_MAX_FREE_SZ;
90 		if (txq->is_gqi_qpl) {
91 			for (i = start; i < start + nb_clean; i++) {
92 				iov = &txq->iov_ring[i];
93 				txq->fifo_avail += iov->iov_len;
94 				iov->iov_base = 0;
95 				iov->iov_len = 0;
96 			}
97 		} else {
98 			gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
99 		}
100 		txq->nb_free += nb_clean;
101 		txq->next_to_clean += nb_clean;
102 	}
103 }
104 
105 static inline void
106 gve_tx_clean_swr_qpl(struct gve_tx_queue *txq)
107 {
108 	uint32_t start = txq->sw_ntc;
109 	uint32_t ntc, nb_clean;
110 
111 	ntc = txq->sw_tail;
112 
113 	if (ntc == start)
114 		return;
115 
116 	/* if wrap around, free twice. */
117 	if (ntc < start) {
118 		nb_clean = txq->nb_tx_desc - start;
119 		if (nb_clean > GVE_TX_MAX_FREE_SZ)
120 			nb_clean = GVE_TX_MAX_FREE_SZ;
121 		gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
122 
123 		txq->sw_nb_free += nb_clean;
124 		start += nb_clean;
125 		if (start == txq->nb_tx_desc)
126 			start = 0;
127 		txq->sw_ntc = start;
128 	}
129 
130 	if (ntc > start) {
131 		nb_clean = ntc - start;
132 		if (nb_clean > GVE_TX_MAX_FREE_SZ)
133 			nb_clean = GVE_TX_MAX_FREE_SZ;
134 		gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
135 		txq->sw_nb_free += nb_clean;
136 		start += nb_clean;
137 		txq->sw_ntc = start;
138 	}
139 }
140 
141 static inline void
142 gve_tx_fill_pkt_desc(volatile union gve_tx_desc *desc, struct rte_mbuf *mbuf,
143 		     uint8_t desc_cnt, uint16_t len, uint64_t addr)
144 {
145 	uint64_t csum_l4 = mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK;
146 	uint8_t l4_csum_offset = 0;
147 	uint8_t l4_hdr_offset = 0;
148 
149 	if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
150 		csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
151 
152 	switch (csum_l4) {
153 	case RTE_MBUF_F_TX_TCP_CKSUM:
154 		l4_csum_offset = offsetof(struct rte_tcp_hdr, cksum);
155 		l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
156 		break;
157 	case RTE_MBUF_F_TX_UDP_CKSUM:
158 		l4_csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
159 		l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
160 		break;
161 	case RTE_MBUF_F_TX_SCTP_CKSUM:
162 		l4_csum_offset = offsetof(struct rte_sctp_hdr, cksum);
163 		l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
164 		break;
165 	}
166 
167 	if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
168 		desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
169 		desc->pkt.l4_csum_offset = l4_csum_offset >> 1;
170 		desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
171 	} else if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
172 		desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
173 		desc->pkt.l4_csum_offset = l4_csum_offset >> 1;
174 		desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
175 	} else {
176 		desc->pkt.type_flags = GVE_TXD_STD;
177 		desc->pkt.l4_csum_offset = 0;
178 		desc->pkt.l4_hdr_offset = 0;
179 	}
180 	desc->pkt.desc_cnt = desc_cnt;
181 	desc->pkt.len = rte_cpu_to_be_16(mbuf->pkt_len);
182 	desc->pkt.seg_len = rte_cpu_to_be_16(len);
183 	desc->pkt.seg_addr = rte_cpu_to_be_64(addr);
184 }
185 
186 static inline void
187 gve_tx_fill_seg_desc(volatile union gve_tx_desc *desc, uint64_t ol_flags,
188 		      union gve_tx_offload tx_offload,
189 		      uint16_t len, uint64_t addr)
190 {
191 	desc->seg.type_flags = GVE_TXD_SEG;
192 	if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
193 		if (ol_flags & RTE_MBUF_F_TX_IPV6)
194 			desc->seg.type_flags |= GVE_TXSF_IPV6;
195 		desc->seg.l3_offset = tx_offload.l2_len >> 1;
196 		desc->seg.mss = rte_cpu_to_be_16(tx_offload.tso_segsz);
197 	}
198 	desc->seg.seg_len = rte_cpu_to_be_16(len);
199 	desc->seg.seg_addr = rte_cpu_to_be_64(addr);
200 }
201 
202 static inline bool
203 is_fifo_avail(struct gve_tx_queue *txq, uint16_t len)
204 {
205 	if (txq->fifo_avail < len)
206 		return false;
207 	/* Don't split segment. */
208 	if (txq->fifo_head + len > txq->fifo_size &&
209 	    txq->fifo_size - txq->fifo_head + len > txq->fifo_avail)
210 		return false;
211 	return true;
212 }
213 static inline uint64_t
214 gve_tx_alloc_from_fifo(struct gve_tx_queue *txq, uint16_t tx_id, uint16_t len)
215 {
216 	uint32_t head = txq->fifo_head;
217 	uint32_t size = txq->fifo_size;
218 	struct gve_tx_iovec *iov;
219 	uint32_t aligned_head;
220 	uint32_t iov_len = 0;
221 	uint64_t fifo_addr;
222 
223 	iov = &txq->iov_ring[tx_id];
224 
225 	/* Don't split segment */
226 	if (head + len > size) {
227 		iov_len += (size - head);
228 		head = 0;
229 	}
230 
231 	fifo_addr = head;
232 	iov_len += len;
233 	iov->iov_base = head;
234 
235 	/* Re-align to a cacheline for next head */
236 	head += len;
237 	aligned_head = RTE_ALIGN(head, RTE_CACHE_LINE_SIZE);
238 	iov_len += (aligned_head - head);
239 	iov->iov_len = iov_len;
240 
241 	if (aligned_head == txq->fifo_size)
242 		aligned_head = 0;
243 	txq->fifo_head = aligned_head;
244 	txq->fifo_avail -= iov_len;
245 
246 	return fifo_addr;
247 }
248 
249 static inline uint16_t
250 gve_tx_burst_qpl(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
251 {
252 	union gve_tx_offload tx_offload = {0};
253 	volatile union gve_tx_desc *txr, *txd;
254 	struct gve_tx_queue *txq = tx_queue;
255 	struct rte_mbuf **sw_ring = txq->sw_ring;
256 	uint16_t mask = txq->nb_tx_desc - 1;
257 	uint16_t tx_id = txq->tx_tail & mask;
258 	uint64_t ol_flags, addr, fifo_addr;
259 	uint32_t tx_tail = txq->tx_tail;
260 	struct rte_mbuf *tx_pkt, *first;
261 	uint16_t sw_id = txq->sw_tail;
262 	uint16_t nb_used, i;
263 	uint64_t bytes = 0;
264 	uint16_t nb_tx = 0;
265 	uint32_t hlen;
266 
267 	txr = txq->tx_desc_ring;
268 
269 	if (txq->nb_free < txq->free_thresh || txq->fifo_avail == 0)
270 		gve_tx_clean(txq);
271 
272 	if (txq->sw_nb_free < txq->free_thresh)
273 		gve_tx_clean_swr_qpl(txq);
274 
275 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
276 		tx_pkt = *tx_pkts++;
277 		ol_flags = tx_pkt->ol_flags;
278 
279 		if (txq->sw_nb_free < tx_pkt->nb_segs) {
280 			gve_tx_clean_swr_qpl(txq);
281 			if (txq->sw_nb_free < tx_pkt->nb_segs)
282 				goto end_of_tx;
283 		}
284 
285 		/* Even for multi-segs, use 1 qpl buf for data */
286 		nb_used = 1;
287 		if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
288 			nb_used++;
289 
290 		if (txq->nb_free < nb_used)
291 			goto end_of_tx;
292 
293 		tx_offload.l2_len = tx_pkt->l2_len;
294 		tx_offload.l3_len = tx_pkt->l3_len;
295 		tx_offload.l4_len = tx_pkt->l4_len;
296 		tx_offload.tso_segsz = tx_pkt->tso_segsz;
297 
298 		first = tx_pkt;
299 		txd = &txr[tx_id];
300 		hlen = ol_flags & RTE_MBUF_F_TX_TCP_SEG ?
301 			(uint32_t)(tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len) :
302 			tx_pkt->pkt_len;
303 
304 		sw_ring[sw_id] = tx_pkt;
305 		if (!is_fifo_avail(txq, hlen)) {
306 			gve_tx_clean(txq);
307 			if (!is_fifo_avail(txq, hlen))
308 				goto end_of_tx;
309 		}
310 		addr = (uint64_t)(tx_pkt->buf_addr) + tx_pkt->data_off;
311 		fifo_addr = gve_tx_alloc_from_fifo(txq, tx_id, hlen);
312 
313 		/* For TSO, check if there's enough fifo space for data first */
314 		if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
315 			if (!is_fifo_avail(txq, tx_pkt->pkt_len - hlen)) {
316 				gve_tx_clean(txq);
317 				if (!is_fifo_avail(txq, tx_pkt->pkt_len - hlen))
318 					goto end_of_tx;
319 			}
320 		}
321 		if (tx_pkt->nb_segs == 1 || ol_flags & RTE_MBUF_F_TX_TCP_SEG)
322 			rte_memcpy((void *)(size_t)(fifo_addr + txq->fifo_base),
323 				   (void *)(size_t)addr, hlen);
324 		else
325 			rte_pktmbuf_read(tx_pkt, 0, hlen,
326 					 (void *)(size_t)(fifo_addr + txq->fifo_base));
327 		gve_tx_fill_pkt_desc(txd, tx_pkt, nb_used, hlen, fifo_addr);
328 
329 		if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
330 			tx_id = (tx_id + 1) & mask;
331 			txd = &txr[tx_id];
332 			addr = (uint64_t)(tx_pkt->buf_addr) + tx_pkt->data_off + hlen;
333 			fifo_addr = gve_tx_alloc_from_fifo(txq, tx_id, tx_pkt->pkt_len - hlen);
334 			if (tx_pkt->nb_segs == 1)
335 				rte_memcpy((void *)(size_t)(fifo_addr + txq->fifo_base),
336 					   (void *)(size_t)addr,
337 					   tx_pkt->pkt_len - hlen);
338 			else
339 				rte_pktmbuf_read(tx_pkt, hlen, tx_pkt->pkt_len - hlen,
340 						 (void *)(size_t)(fifo_addr + txq->fifo_base));
341 
342 			gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
343 					     tx_pkt->pkt_len - hlen, fifo_addr);
344 		}
345 
346 		/* record mbuf in sw_ring for free */
347 		for (i = 1; i < first->nb_segs; i++) {
348 			sw_id = (sw_id + 1) & mask;
349 			tx_pkt = tx_pkt->next;
350 			sw_ring[sw_id] = tx_pkt;
351 		}
352 
353 		sw_id = (sw_id + 1) & mask;
354 		tx_id = (tx_id + 1) & mask;
355 
356 		txq->nb_free -= nb_used;
357 		txq->sw_nb_free -= first->nb_segs;
358 		tx_tail += nb_used;
359 
360 		bytes += first->pkt_len;
361 	}
362 
363 end_of_tx:
364 	if (nb_tx) {
365 		rte_write32(rte_cpu_to_be_32(tx_tail), txq->qtx_tail);
366 		txq->tx_tail = tx_tail;
367 		txq->sw_tail = sw_id;
368 
369 		txq->stats.packets += nb_tx;
370 		txq->stats.bytes += bytes;
371 		txq->stats.errors += nb_pkts - nb_tx;
372 	}
373 
374 	return nb_tx;
375 }
376 
377 static inline uint16_t
378 gve_tx_burst_ra(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
379 {
380 	union gve_tx_offload tx_offload = {0};
381 	volatile union gve_tx_desc *txr, *txd;
382 	struct gve_tx_queue *txq = tx_queue;
383 	struct rte_mbuf **sw_ring = txq->sw_ring;
384 	uint16_t mask = txq->nb_tx_desc - 1;
385 	uint16_t tx_id = txq->tx_tail & mask;
386 	uint32_t tx_tail = txq->tx_tail;
387 	struct rte_mbuf *tx_pkt, *first;
388 	uint16_t nb_used, hlen, i;
389 	uint64_t ol_flags, addr;
390 	uint64_t bytes = 0;
391 	uint16_t nb_tx = 0;
392 
393 	txr = txq->tx_desc_ring;
394 
395 	if (txq->nb_free < txq->free_thresh)
396 		gve_tx_clean(txq);
397 
398 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
399 		tx_pkt = *tx_pkts++;
400 		ol_flags = tx_pkt->ol_flags;
401 
402 		nb_used = tx_pkt->nb_segs;
403 		if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
404 			nb_used++;
405 
406 		if (txq->nb_free < nb_used)
407 			goto end_of_tx;
408 
409 		tx_offload.l2_len = tx_pkt->l2_len;
410 		tx_offload.l3_len = tx_pkt->l3_len;
411 		tx_offload.l4_len = tx_pkt->l4_len;
412 		tx_offload.tso_segsz = tx_pkt->tso_segsz;
413 
414 		first = tx_pkt;
415 		txd = &txr[tx_id];
416 
417 		hlen = ol_flags & RTE_MBUF_F_TX_TCP_SEG ?
418 			(uint32_t)(tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len) :
419 			tx_pkt->pkt_len;
420 		/*
421 		 * if tso, the driver needs to fill 2 descs for 1 mbuf
422 		 * so only put this mbuf into the 1st tx entry in sw ring
423 		 */
424 		sw_ring[tx_id] = tx_pkt;
425 		addr = rte_mbuf_data_iova(tx_pkt);
426 		gve_tx_fill_pkt_desc(txd, tx_pkt, nb_used, hlen, addr);
427 
428 		if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
429 			tx_id = (tx_id + 1) & mask;
430 			txd = &txr[tx_id];
431 			addr = rte_mbuf_data_iova(tx_pkt) + hlen;
432 			gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
433 					     tx_pkt->data_len - hlen, addr);
434 		}
435 
436 		for (i = 1; i < first->nb_segs; i++) {
437 			tx_id = (tx_id + 1) & mask;
438 			txd = &txr[tx_id];
439 			tx_pkt = tx_pkt->next;
440 			sw_ring[tx_id] = tx_pkt;
441 			addr = rte_mbuf_data_iova(tx_pkt);
442 			gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
443 					     tx_pkt->data_len, addr);
444 		}
445 		tx_id = (tx_id + 1) & mask;
446 
447 		txq->nb_free -= nb_used;
448 		tx_tail += nb_used;
449 
450 		bytes += first->pkt_len;
451 	}
452 
453 end_of_tx:
454 	if (nb_tx) {
455 		rte_write32(rte_cpu_to_be_32(tx_tail), txq->qtx_tail);
456 		txq->tx_tail = tx_tail;
457 
458 		txq->stats.packets += nb_tx;
459 		txq->stats.bytes += bytes;
460 		txq->stats.errors += nb_pkts - nb_tx;
461 	}
462 
463 	return nb_tx;
464 }
465 
466 uint16_t
467 gve_tx_burst(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
468 {
469 	struct gve_tx_queue *txq = tx_queue;
470 
471 	if (txq->is_gqi_qpl)
472 		return gve_tx_burst_qpl(tx_queue, tx_pkts, nb_pkts);
473 
474 	return gve_tx_burst_ra(tx_queue, tx_pkts, nb_pkts);
475 }
476 
477 static inline void
478 gve_reset_txq(struct gve_tx_queue *txq)
479 {
480 	struct rte_mbuf **sw_ring;
481 	uint32_t size, i;
482 
483 	if (txq == NULL) {
484 		PMD_DRV_LOG(ERR, "Pointer to txq is NULL");
485 		return;
486 	}
487 
488 	size = txq->nb_tx_desc * sizeof(union gve_tx_desc);
489 	for (i = 0; i < size; i++)
490 		((volatile char *)txq->tx_desc_ring)[i] = 0;
491 
492 	sw_ring = txq->sw_ring;
493 	for (i = 0; i < txq->nb_tx_desc; i++) {
494 		sw_ring[i] = NULL;
495 		if (txq->is_gqi_qpl) {
496 			txq->iov_ring[i].iov_base = 0;
497 			txq->iov_ring[i].iov_len = 0;
498 		}
499 	}
500 
501 	txq->tx_tail = 0;
502 	txq->nb_free = txq->nb_tx_desc - 1;
503 	txq->next_to_clean = 0;
504 
505 	if (txq->is_gqi_qpl) {
506 		txq->fifo_size = PAGE_SIZE * txq->hw->tx_pages_per_qpl;
507 		txq->fifo_avail = txq->fifo_size;
508 		txq->fifo_head = 0;
509 		txq->fifo_base = (uint64_t)(txq->qpl->mz->addr);
510 
511 		txq->sw_tail = 0;
512 		txq->sw_nb_free = txq->nb_tx_desc - 1;
513 		txq->sw_ntc = 0;
514 	}
515 }
516 
517 static inline void
518 gve_release_txq_mbufs(struct gve_tx_queue *txq)
519 {
520 	uint16_t i;
521 
522 	for (i = 0; i < txq->nb_tx_desc; i++) {
523 		if (txq->sw_ring[i]) {
524 			rte_pktmbuf_free_seg(txq->sw_ring[i]);
525 			txq->sw_ring[i] = NULL;
526 		}
527 	}
528 }
529 
530 void
531 gve_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
532 {
533 	struct gve_tx_queue *q = dev->data->tx_queues[qid];
534 
535 	if (!q)
536 		return;
537 
538 	if (q->is_gqi_qpl) {
539 		gve_teardown_queue_page_list(q->hw, q->qpl);
540 		rte_free(q->iov_ring);
541 		q->qpl = NULL;
542 	}
543 
544 	gve_release_txq_mbufs(q);
545 	rte_free(q->sw_ring);
546 	rte_memzone_free(q->mz);
547 	rte_memzone_free(q->qres_mz);
548 	q->qres = NULL;
549 	rte_free(q);
550 }
551 
552 int
553 gve_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id, uint16_t nb_desc,
554 		   unsigned int socket_id, const struct rte_eth_txconf *conf)
555 {
556 	struct gve_priv *hw = dev->data->dev_private;
557 	const struct rte_memzone *mz;
558 	struct gve_tx_queue *txq;
559 	uint16_t free_thresh;
560 	int err = 0;
561 
562 	/* Ring size is required to be a power of two. */
563 	if (!rte_is_power_of_2(nb_desc)) {
564 		PMD_DRV_LOG(ERR, "Invalid ring size %u. GVE ring size must be a power of 2.",
565 			    nb_desc);
566 		return -EINVAL;
567 	}
568 
569 	/* Free memory if needed. */
570 	if (dev->data->tx_queues[queue_id]) {
571 		gve_tx_queue_release(dev, queue_id);
572 		dev->data->tx_queues[queue_id] = NULL;
573 	}
574 
575 	/* Allocate the TX queue data structure. */
576 	txq = rte_zmalloc_socket("gve txq", sizeof(struct gve_tx_queue),
577 				 RTE_CACHE_LINE_SIZE, socket_id);
578 	if (!txq) {
579 		PMD_DRV_LOG(ERR, "Failed to allocate memory for tx queue structure");
580 		err = -ENOMEM;
581 		goto err_txq;
582 	}
583 
584 	free_thresh = conf->tx_free_thresh ? conf->tx_free_thresh : GVE_DEFAULT_TX_FREE_THRESH;
585 	if (free_thresh >= nb_desc - 3) {
586 		PMD_DRV_LOG(ERR, "tx_free_thresh (%u) must be less than nb_desc (%u) minus 3.",
587 			    free_thresh, txq->nb_tx_desc);
588 		err = -EINVAL;
589 		goto err_txq;
590 	}
591 
592 	txq->nb_tx_desc = nb_desc;
593 	txq->free_thresh = free_thresh;
594 	txq->queue_id = queue_id;
595 	txq->port_id = dev->data->port_id;
596 	txq->ntfy_id = queue_id;
597 	txq->is_gqi_qpl = hw->queue_format == GVE_GQI_QPL_FORMAT;
598 	txq->hw = hw;
599 	txq->ntfy_addr = &hw->db_bar2[rte_be_to_cpu_32(hw->irq_dbs[txq->ntfy_id].id)];
600 
601 	/* Allocate software ring */
602 	txq->sw_ring = rte_zmalloc_socket("gve tx sw ring",
603 					  sizeof(struct rte_mbuf *) * nb_desc,
604 					  RTE_CACHE_LINE_SIZE, socket_id);
605 	if (!txq->sw_ring) {
606 		PMD_DRV_LOG(ERR, "Failed to allocate memory for SW TX ring");
607 		err = -ENOMEM;
608 		goto err_txq;
609 	}
610 
611 	mz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_id,
612 				      nb_desc * sizeof(union gve_tx_desc),
613 				      PAGE_SIZE, socket_id);
614 	if (mz == NULL) {
615 		PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for TX");
616 		err = -ENOMEM;
617 		goto err_sw_ring;
618 	}
619 	txq->tx_desc_ring = (union gve_tx_desc *)mz->addr;
620 	txq->tx_ring_phys_addr = mz->iova;
621 	txq->mz = mz;
622 
623 	/* QPL-specific allocations. */
624 	if (txq->is_gqi_qpl) {
625 		txq->iov_ring = rte_zmalloc_socket("gve tx iov ring",
626 						   sizeof(struct gve_tx_iovec) * nb_desc,
627 						   RTE_CACHE_LINE_SIZE, socket_id);
628 		if (!txq->iov_ring) {
629 			PMD_DRV_LOG(ERR, "Failed to allocate memory for SW TX ring");
630 			err = -ENOMEM;
631 			goto err_tx_ring;
632 		}
633 
634 		txq->qpl = gve_setup_queue_page_list(hw, queue_id, false,
635 						     hw->tx_pages_per_qpl);
636 		if (!txq->qpl) {
637 			err = -ENOMEM;
638 			PMD_DRV_LOG(ERR, "Failed to alloc tx qpl for queue %hu.",
639 				    queue_id);
640 			goto err_iov_ring;
641 		}
642 	}
643 
644 	mz = rte_eth_dma_zone_reserve(dev, "txq_res", queue_id, sizeof(struct gve_queue_resources),
645 				      PAGE_SIZE, socket_id);
646 	if (mz == NULL) {
647 		PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for TX resource");
648 		err = -ENOMEM;
649 		goto err_qpl;
650 	}
651 	txq->qres = (struct gve_queue_resources *)mz->addr;
652 	txq->qres_mz = mz;
653 
654 	gve_reset_txq(txq);
655 
656 	dev->data->tx_queues[queue_id] = txq;
657 
658 	return 0;
659 err_qpl:
660 	if (txq->is_gqi_qpl) {
661 		gve_teardown_queue_page_list(hw, txq->qpl);
662 		txq->qpl = NULL;
663 	}
664 err_iov_ring:
665 	if (txq->is_gqi_qpl)
666 		rte_free(txq->iov_ring);
667 err_tx_ring:
668 	rte_memzone_free(txq->mz);
669 err_sw_ring:
670 	rte_free(txq->sw_ring);
671 err_txq:
672 	rte_free(txq);
673 	return err;
674 }
675 
676 int
677 gve_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
678 {
679 	struct gve_priv *hw = dev->data->dev_private;
680 	struct gve_tx_queue *txq;
681 
682 	if (tx_queue_id >= dev->data->nb_tx_queues)
683 		return -EINVAL;
684 
685 	txq = dev->data->tx_queues[tx_queue_id];
686 
687 	txq->qtx_tail = &hw->db_bar2[rte_be_to_cpu_32(txq->qres->db_index)];
688 	txq->qtx_head =
689 		&hw->cnt_array[rte_be_to_cpu_32(txq->qres->counter_index)];
690 
691 	rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), txq->ntfy_addr);
692 
693 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
694 
695 	return 0;
696 }
697 
698 int
699 gve_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
700 {
701 	struct gve_tx_queue *txq;
702 
703 	if (tx_queue_id >= dev->data->nb_tx_queues)
704 		return -EINVAL;
705 
706 	txq = dev->data->tx_queues[tx_queue_id];
707 	gve_release_txq_mbufs(txq);
708 	gve_reset_txq(txq);
709 
710 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
711 
712 	return 0;
713 }
714 
715 void
716 gve_stop_tx_queues(struct rte_eth_dev *dev)
717 {
718 	struct gve_priv *hw = dev->data->dev_private;
719 	uint16_t i;
720 	int err;
721 
722 	if (!gve_is_gqi(hw))
723 		return gve_stop_tx_queues_dqo(dev);
724 
725 	err = gve_adminq_destroy_tx_queues(hw, dev->data->nb_tx_queues);
726 	if (err != 0)
727 		PMD_DRV_LOG(WARNING, "failed to destroy txqs");
728 
729 	for (i = 0; i < dev->data->nb_tx_queues; i++)
730 		if (gve_tx_queue_stop(dev, i) != 0)
731 			PMD_DRV_LOG(WARNING, "Fail to stop Tx queue %d", i);
732 }
733 
734 void
735 gve_set_tx_function(struct rte_eth_dev *dev)
736 {
737 	dev->tx_pkt_burst = gve_tx_burst;
738 }
739