xref: /dpdk/drivers/net/ntnic/ntnic_ethdev.c (revision 25a2a0dc3de31ca0a6fbc9371cf3dd85dfd74b07)
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  * Copyright(c) 2023 Napatech A/S
4  */
5 
6 #include <stdint.h>
7 
8 #include <rte_eal.h>
9 #include <rte_dev.h>
10 #include <rte_vfio.h>
11 #include <rte_ethdev.h>
12 #include <rte_bus_pci.h>
13 #include <ethdev_pci.h>
14 #include <rte_kvargs.h>
15 
16 #include <sys/queue.h>
17 
18 #include "ntlog.h"
19 #include "ntdrv_4ga.h"
20 #include "ntos_drv.h"
21 #include "ntos_system.h"
22 #include "nthw_fpga_instances.h"
23 #include "ntnic_vfio.h"
24 #include "ntnic_mod_reg.h"
25 #include "nt_util.h"
26 
27 #define HW_MAX_PKT_LEN (10000)
28 #define MAX_MTU (HW_MAX_PKT_LEN - RTE_ETHER_HDR_LEN - RTE_ETHER_CRC_LEN)
29 
30 #define EXCEPTION_PATH_HID 0
31 
32 #define MAX_TOTAL_QUEUES       128
33 
34 #define SG_NB_HW_RX_DESCRIPTORS 1024
35 #define SG_NB_HW_TX_DESCRIPTORS 1024
36 #define SG_HW_RX_PKT_BUFFER_SIZE (1024 << 1)
37 #define SG_HW_TX_PKT_BUFFER_SIZE (1024 << 1)
38 
39 /* Max RSS queues */
40 #define MAX_QUEUES 125
41 
42 #define NUM_VQ_SEGS(_data_size_)                                                                  \
43 	({                                                                                        \
44 		size_t _size = (_data_size_);                                                     \
45 		size_t _segment_count = ((_size + SG_HDR_SIZE) > SG_HW_TX_PKT_BUFFER_SIZE)        \
46 			? (((_size + SG_HDR_SIZE) + SG_HW_TX_PKT_BUFFER_SIZE - 1) /               \
47 			   SG_HW_TX_PKT_BUFFER_SIZE)                                              \
48 			: 1;                                                                      \
49 		_segment_count;                                                                   \
50 	})
51 
52 #define VIRTQ_DESCR_IDX(_tx_pkt_idx_)                                                             \
53 	(((_tx_pkt_idx_) + first_vq_descr_idx) % SG_NB_HW_TX_DESCRIPTORS)
54 
55 #define VIRTQ_DESCR_IDX_NEXT(_vq_descr_idx_) (((_vq_descr_idx_) + 1) % SG_NB_HW_TX_DESCRIPTORS)
56 
57 #define ONE_G_SIZE  0x40000000
58 #define ONE_G_MASK  (ONE_G_SIZE - 1)
59 
60 #define MAX_RX_PACKETS   128
61 #define MAX_TX_PACKETS   128
62 
63 int kill_pmd;
64 
65 #define ETH_DEV_NTNIC_HELP_ARG "help"
66 #define ETH_DEV_NTHW_RXQUEUES_ARG "rxqs"
67 #define ETH_DEV_NTHW_TXQUEUES_ARG "txqs"
68 
69 static const char *const valid_arguments[] = {
70 	ETH_DEV_NTNIC_HELP_ARG,
71 	ETH_DEV_NTHW_RXQUEUES_ARG,
72 	ETH_DEV_NTHW_TXQUEUES_ARG,
73 	NULL,
74 };
75 
76 
77 static const struct rte_pci_id nthw_pci_id_map[] = {
78 	{ RTE_PCI_DEVICE(NT_HW_PCI_VENDOR_ID, NT_HW_PCI_DEVICE_ID_NT200A02) },
79 	{
80 		.vendor_id = 0,
81 	},	/* sentinel */
82 };
83 
84 static const struct sg_ops_s *sg_ops;
85 
86 static rte_spinlock_t hwlock = RTE_SPINLOCK_INITIALIZER;
87 
88 /*
89  * Store and get adapter info
90  */
91 
92 static struct drv_s *_g_p_drv[NUM_ADAPTER_MAX] = { NULL };
93 
94 static void
95 store_pdrv(struct drv_s *p_drv)
96 {
97 	if (p_drv->adapter_no >= NUM_ADAPTER_MAX) {
98 		NT_LOG(ERR, NTNIC,
99 			"Internal error adapter number %u out of range. Max number of adapters: %u",
100 			p_drv->adapter_no, NUM_ADAPTER_MAX);
101 		return;
102 	}
103 
104 	if (_g_p_drv[p_drv->adapter_no] != 0) {
105 		NT_LOG(WRN, NTNIC,
106 			"Overwriting adapter structure for PCI  " PCIIDENT_PRINT_STR
107 			" with adapter structure for PCI  " PCIIDENT_PRINT_STR,
108 			PCIIDENT_TO_DOMAIN(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
109 			PCIIDENT_TO_BUSNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
110 			PCIIDENT_TO_DEVNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
111 			PCIIDENT_TO_FUNCNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
112 			PCIIDENT_TO_DOMAIN(p_drv->ntdrv.pciident),
113 			PCIIDENT_TO_BUSNR(p_drv->ntdrv.pciident),
114 			PCIIDENT_TO_DEVNR(p_drv->ntdrv.pciident),
115 			PCIIDENT_TO_FUNCNR(p_drv->ntdrv.pciident));
116 	}
117 
118 	rte_spinlock_lock(&hwlock);
119 	_g_p_drv[p_drv->adapter_no] = p_drv;
120 	rte_spinlock_unlock(&hwlock);
121 }
122 
123 static struct drv_s *
124 get_pdrv_from_pci(struct rte_pci_addr addr)
125 {
126 	int i;
127 	struct drv_s *p_drv = NULL;
128 	rte_spinlock_lock(&hwlock);
129 
130 	for (i = 0; i < NUM_ADAPTER_MAX; i++) {
131 		if (_g_p_drv[i]) {
132 			if (PCIIDENT_TO_DOMAIN(_g_p_drv[i]->ntdrv.pciident) == addr.domain &&
133 				PCIIDENT_TO_BUSNR(_g_p_drv[i]->ntdrv.pciident) == addr.bus) {
134 				p_drv = _g_p_drv[i];
135 				break;
136 			}
137 		}
138 	}
139 
140 	rte_spinlock_unlock(&hwlock);
141 	return p_drv;
142 }
143 
144 static int
145 eth_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete __rte_unused)
146 {
147 	const struct port_ops *port_ops = get_port_ops();
148 
149 	if (port_ops == NULL) {
150 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
151 		return -1;
152 	}
153 
154 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
155 
156 	const int n_intf_no = internals->n_intf_no;
157 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
158 
159 	if (eth_dev->data->dev_started) {
160 		const bool port_link_status = port_ops->get_link_status(p_adapter_info, n_intf_no);
161 		eth_dev->data->dev_link.link_status =
162 			port_link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN;
163 
164 		nt_link_speed_t port_link_speed =
165 			port_ops->get_link_speed(p_adapter_info, n_intf_no);
166 		eth_dev->data->dev_link.link_speed =
167 			nt_link_speed_to_eth_speed_num(port_link_speed);
168 
169 		nt_link_duplex_t nt_link_duplex =
170 			port_ops->get_link_duplex(p_adapter_info, n_intf_no);
171 		eth_dev->data->dev_link.link_duplex = nt_link_duplex_to_eth_duplex(nt_link_duplex);
172 
173 	} else {
174 		eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
175 		eth_dev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
176 		eth_dev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
177 	}
178 
179 	return 0;
180 }
181 
182 static int
183 eth_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *dev_info)
184 {
185 	const struct port_ops *port_ops = get_port_ops();
186 
187 	if (port_ops == NULL) {
188 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
189 		return -1;
190 	}
191 
192 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
193 
194 	const int n_intf_no = internals->n_intf_no;
195 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
196 
197 	dev_info->driver_name = internals->name;
198 	dev_info->max_mac_addrs = NUM_MAC_ADDRS_PER_PORT;
199 	dev_info->max_rx_pktlen = HW_MAX_PKT_LEN;
200 	dev_info->max_mtu = MAX_MTU;
201 
202 	if (internals->p_drv) {
203 		dev_info->max_rx_queues = internals->nb_rx_queues;
204 		dev_info->max_tx_queues = internals->nb_tx_queues;
205 
206 		dev_info->min_rx_bufsize = 64;
207 
208 		const uint32_t nt_port_speed_capa =
209 			port_ops->get_link_speed_capabilities(p_adapter_info, n_intf_no);
210 		dev_info->speed_capa = nt_link_speed_capa_to_eth_speed_capa(nt_port_speed_capa);
211 	}
212 
213 	return 0;
214 }
215 
216 static __rte_always_inline int copy_virtqueue_to_mbuf(struct rte_mbuf *mbuf,
217 	struct rte_mempool *mb_pool,
218 	struct nthw_received_packets *hw_recv,
219 	int max_segs,
220 	uint16_t data_len)
221 {
222 	int src_pkt = 0;
223 	/*
224 	 * 1. virtqueue packets may be segmented
225 	 * 2. the mbuf size may be too small and may need to be segmented
226 	 */
227 	char *data = (char *)hw_recv->addr + SG_HDR_SIZE;
228 	char *dst = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
229 
230 	/* set packet length */
231 	mbuf->pkt_len = data_len - SG_HDR_SIZE;
232 
233 	int remain = mbuf->pkt_len;
234 	/* First cpy_size is without header */
235 	int cpy_size = (data_len > SG_HW_RX_PKT_BUFFER_SIZE)
236 		? SG_HW_RX_PKT_BUFFER_SIZE - SG_HDR_SIZE
237 		: remain;
238 
239 	struct rte_mbuf *m = mbuf;	/* if mbuf segmentation is needed */
240 
241 	while (++src_pkt <= max_segs) {
242 		/* keep track of space in dst */
243 		int cpto_size = rte_pktmbuf_tailroom(m);
244 
245 		if (cpy_size > cpto_size) {
246 			int new_cpy_size = cpto_size;
247 
248 			rte_memcpy((void *)dst, (void *)data, new_cpy_size);
249 			m->data_len += new_cpy_size;
250 			remain -= new_cpy_size;
251 			cpy_size -= new_cpy_size;
252 
253 			data += new_cpy_size;
254 
255 			/*
256 			 * loop if remaining data from this virtqueue seg
257 			 * cannot fit in one extra mbuf
258 			 */
259 			do {
260 				m->next = rte_pktmbuf_alloc(mb_pool);
261 
262 				if (unlikely(!m->next))
263 					return -1;
264 
265 				m = m->next;
266 
267 				/* Headroom is not needed in chained mbufs */
268 				rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
269 				dst = (char *)m->buf_addr;
270 				m->data_len = 0;
271 				m->pkt_len = 0;
272 
273 				cpto_size = rte_pktmbuf_tailroom(m);
274 
275 				int actual_cpy_size =
276 					(cpy_size > cpto_size) ? cpto_size : cpy_size;
277 
278 				rte_memcpy((void *)dst, (void *)data, actual_cpy_size);
279 				m->pkt_len += actual_cpy_size;
280 				m->data_len += actual_cpy_size;
281 
282 				remain -= actual_cpy_size;
283 				cpy_size -= actual_cpy_size;
284 
285 				data += actual_cpy_size;
286 
287 				mbuf->nb_segs++;
288 
289 			} while (cpy_size && remain);
290 
291 		} else {
292 			/* all data from this virtqueue segment can fit in current mbuf */
293 			rte_memcpy((void *)dst, (void *)data, cpy_size);
294 			m->data_len += cpy_size;
295 
296 			if (mbuf->nb_segs > 1)
297 				m->pkt_len += cpy_size;
298 
299 			remain -= cpy_size;
300 		}
301 
302 		/* packet complete - all data from current virtqueue packet has been copied */
303 		if (remain == 0)
304 			break;
305 
306 		/* increment dst to data end */
307 		dst = rte_pktmbuf_mtod_offset(m, char *, m->data_len);
308 		/* prepare for next virtqueue segment */
309 		data = (char *)hw_recv[src_pkt].addr;	/* following packets are full data */
310 
311 		cpy_size = (remain > SG_HW_RX_PKT_BUFFER_SIZE) ? SG_HW_RX_PKT_BUFFER_SIZE : remain;
312 	};
313 
314 	if (src_pkt > max_segs) {
315 		NT_LOG(ERR, NTNIC,
316 			"Did not receive correct number of segment for a whole packet");
317 		return -1;
318 	}
319 
320 	return src_pkt;
321 }
322 
323 static uint16_t eth_dev_rx_scg(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
324 {
325 	unsigned int i;
326 	struct rte_mbuf *mbuf;
327 	struct ntnic_rx_queue *rx_q = queue;
328 	uint16_t num_rx = 0;
329 
330 	struct nthw_received_packets hw_recv[MAX_RX_PACKETS];
331 
332 	if (kill_pmd)
333 		return 0;
334 
335 	if (unlikely(nb_pkts == 0))
336 		return 0;
337 
338 	if (nb_pkts > MAX_RX_PACKETS)
339 		nb_pkts = MAX_RX_PACKETS;
340 
341 	uint16_t whole_pkts = 0;
342 	uint16_t hw_recv_pkt_segs = 0;
343 
344 	if (sg_ops != NULL) {
345 		hw_recv_pkt_segs =
346 			sg_ops->nthw_get_rx_packets(rx_q->vq, nb_pkts, hw_recv, &whole_pkts);
347 
348 		if (!hw_recv_pkt_segs)
349 			return 0;
350 	}
351 
352 	nb_pkts = whole_pkts;
353 
354 	int src_pkt = 0;/* from 0 to hw_recv_pkt_segs */
355 
356 	for (i = 0; i < nb_pkts; i++) {
357 		bufs[i] = rte_pktmbuf_alloc(rx_q->mb_pool);
358 
359 		if (!bufs[i]) {
360 			NT_LOG(ERR, NTNIC, "ERROR - no more buffers mbuf in mempool");
361 			goto err_exit;
362 		}
363 
364 		mbuf = bufs[i];
365 
366 		struct _pkt_hdr_rx *phdr = (struct _pkt_hdr_rx *)hw_recv[src_pkt].addr;
367 
368 		if (phdr->cap_len < SG_HDR_SIZE) {
369 			NT_LOG(ERR, NTNIC,
370 				"Pkt len of zero received. No header!! - dropping packets");
371 			rte_pktmbuf_free(mbuf);
372 			goto err_exit;
373 		}
374 
375 		{
376 			if (phdr->cap_len <= SG_HW_RX_PKT_BUFFER_SIZE &&
377 				(phdr->cap_len - SG_HDR_SIZE) <= rte_pktmbuf_tailroom(mbuf)) {
378 				mbuf->data_len = phdr->cap_len - SG_HDR_SIZE;
379 				rte_memcpy(rte_pktmbuf_mtod(mbuf, char *),
380 					(char *)hw_recv[src_pkt].addr + SG_HDR_SIZE,
381 					mbuf->data_len);
382 
383 				mbuf->pkt_len = mbuf->data_len;
384 				src_pkt++;
385 
386 			} else {
387 				int cpy_segs = copy_virtqueue_to_mbuf(mbuf, rx_q->mb_pool,
388 						&hw_recv[src_pkt],
389 						hw_recv_pkt_segs - src_pkt,
390 						phdr->cap_len);
391 
392 				if (cpy_segs < 0) {
393 					/* Error */
394 					rte_pktmbuf_free(mbuf);
395 					goto err_exit;
396 				}
397 
398 				src_pkt += cpy_segs;
399 			}
400 
401 			num_rx++;
402 
403 			mbuf->ol_flags &= ~(RTE_MBUF_F_RX_FDIR_ID | RTE_MBUF_F_RX_FDIR);
404 			mbuf->port = (uint16_t)-1;
405 		}
406 	}
407 
408 err_exit:
409 
410 	if (sg_ops != NULL)
411 		sg_ops->nthw_release_rx_packets(rx_q->vq, hw_recv_pkt_segs);
412 
413 	return num_rx;
414 }
415 
416 static int copy_mbuf_to_virtqueue(struct nthw_cvirtq_desc *cvq_desc,
417 	uint16_t vq_descr_idx,
418 	struct nthw_memory_descriptor *vq_bufs,
419 	int max_segs,
420 	struct rte_mbuf *mbuf)
421 {
422 	/*
423 	 * 1. mbuf packet may be segmented
424 	 * 2. the virtqueue buffer size may be too small and may need to be segmented
425 	 */
426 
427 	char *data = rte_pktmbuf_mtod(mbuf, char *);
428 	char *dst = (char *)vq_bufs[vq_descr_idx].virt_addr + SG_HDR_SIZE;
429 
430 	int remain = mbuf->pkt_len;
431 	int cpy_size = mbuf->data_len;
432 
433 	struct rte_mbuf *m = mbuf;
434 	int cpto_size = SG_HW_TX_PKT_BUFFER_SIZE - SG_HDR_SIZE;
435 
436 	cvq_desc->b[vq_descr_idx].len = SG_HDR_SIZE;
437 
438 	int cur_seg_num = 0;	/* start from 0 */
439 
440 	while (m) {
441 		/* Can all data in current src segment be in current dest segment */
442 		if (cpy_size > cpto_size) {
443 			int new_cpy_size = cpto_size;
444 
445 			rte_memcpy((void *)dst, (void *)data, new_cpy_size);
446 
447 			cvq_desc->b[vq_descr_idx].len += new_cpy_size;
448 
449 			remain -= new_cpy_size;
450 			cpy_size -= new_cpy_size;
451 
452 			data += new_cpy_size;
453 
454 			/*
455 			 * Loop if remaining data from this virtqueue seg cannot fit in one extra
456 			 * mbuf
457 			 */
458 			do {
459 				vq_add_flags(cvq_desc, vq_descr_idx, VIRTQ_DESC_F_NEXT);
460 
461 				int next_vq_descr_idx = VIRTQ_DESCR_IDX_NEXT(vq_descr_idx);
462 
463 				vq_set_next(cvq_desc, vq_descr_idx, next_vq_descr_idx);
464 
465 				vq_descr_idx = next_vq_descr_idx;
466 
467 				vq_set_flags(cvq_desc, vq_descr_idx, 0);
468 				vq_set_next(cvq_desc, vq_descr_idx, 0);
469 
470 				if (++cur_seg_num > max_segs)
471 					break;
472 
473 				dst = (char *)vq_bufs[vq_descr_idx].virt_addr;
474 				cpto_size = SG_HW_TX_PKT_BUFFER_SIZE;
475 
476 				int actual_cpy_size =
477 					(cpy_size > cpto_size) ? cpto_size : cpy_size;
478 				rte_memcpy((void *)dst, (void *)data, actual_cpy_size);
479 
480 				cvq_desc->b[vq_descr_idx].len = actual_cpy_size;
481 
482 				remain -= actual_cpy_size;
483 				cpy_size -= actual_cpy_size;
484 				cpto_size -= actual_cpy_size;
485 
486 				data += actual_cpy_size;
487 
488 			} while (cpy_size && remain);
489 
490 		} else {
491 			/* All data from this segment can fit in current virtqueue buffer */
492 			rte_memcpy((void *)dst, (void *)data, cpy_size);
493 
494 			cvq_desc->b[vq_descr_idx].len += cpy_size;
495 
496 			remain -= cpy_size;
497 			cpto_size -= cpy_size;
498 		}
499 
500 		/* Packet complete - all segments from current mbuf has been copied */
501 		if (remain == 0)
502 			break;
503 
504 		/* increment dst to data end */
505 		dst = (char *)vq_bufs[vq_descr_idx].virt_addr + cvq_desc->b[vq_descr_idx].len;
506 
507 		m = m->next;
508 
509 		if (!m) {
510 			NT_LOG(ERR, NTNIC, "ERROR: invalid packet size");
511 			break;
512 		}
513 
514 		/* Prepare for next mbuf segment */
515 		data = rte_pktmbuf_mtod(m, char *);
516 		cpy_size = m->data_len;
517 	};
518 
519 	cur_seg_num++;
520 
521 	if (cur_seg_num > max_segs) {
522 		NT_LOG(ERR, NTNIC,
523 			"Did not receive correct number of segment for a whole packet");
524 		return -1;
525 	}
526 
527 	return cur_seg_num;
528 }
529 
530 static uint16_t eth_dev_tx_scg(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
531 {
532 	uint16_t pkt;
533 	uint16_t first_vq_descr_idx = 0;
534 
535 	struct nthw_cvirtq_desc cvq_desc;
536 
537 	struct nthw_memory_descriptor *vq_bufs;
538 
539 	struct ntnic_tx_queue *tx_q = queue;
540 
541 	int nb_segs = 0, i;
542 	int pkts_sent = 0;
543 	uint16_t nb_segs_arr[MAX_TX_PACKETS];
544 
545 	if (kill_pmd)
546 		return 0;
547 
548 	if (nb_pkts > MAX_TX_PACKETS)
549 		nb_pkts = MAX_TX_PACKETS;
550 
551 	/*
552 	 * count all segments needed to contain all packets in vq buffers
553 	 */
554 	for (i = 0; i < nb_pkts; i++) {
555 		/* build the num segments array for segmentation control and release function */
556 		int vq_segs = NUM_VQ_SEGS(bufs[i]->pkt_len);
557 		nb_segs_arr[i] = vq_segs;
558 		nb_segs += vq_segs;
559 	}
560 
561 	if (!nb_segs)
562 		goto exit_out;
563 
564 	if (sg_ops == NULL)
565 		goto exit_out;
566 
567 	int got_nb_segs = sg_ops->nthw_get_tx_packets(tx_q->vq, nb_segs, &first_vq_descr_idx,
568 			&cvq_desc /*&vq_descr,*/, &vq_bufs);
569 
570 	if (!got_nb_segs)
571 		goto exit_out;
572 
573 	/*
574 	 * we may get less vq buffers than we have asked for
575 	 * calculate last whole packet that can fit into what
576 	 * we have got
577 	 */
578 	while (got_nb_segs < nb_segs) {
579 		if (!--nb_pkts)
580 			goto exit_out;
581 
582 		nb_segs -= NUM_VQ_SEGS(bufs[nb_pkts]->pkt_len);
583 
584 		if (nb_segs <= 0)
585 			goto exit_out;
586 	}
587 
588 	/*
589 	 * nb_pkts & nb_segs, got it all, ready to copy
590 	 */
591 	int seg_idx = 0;
592 	int last_seg_idx = seg_idx;
593 
594 	for (pkt = 0; pkt < nb_pkts; ++pkt) {
595 		uint16_t vq_descr_idx = VIRTQ_DESCR_IDX(seg_idx);
596 
597 		vq_set_flags(&cvq_desc, vq_descr_idx, 0);
598 		vq_set_next(&cvq_desc, vq_descr_idx, 0);
599 
600 		if (bufs[pkt]->nb_segs == 1 && nb_segs_arr[pkt] == 1) {
601 			rte_memcpy((void *)((char *)vq_bufs[vq_descr_idx].virt_addr + SG_HDR_SIZE),
602 				rte_pktmbuf_mtod(bufs[pkt], void *), bufs[pkt]->pkt_len);
603 
604 			cvq_desc.b[vq_descr_idx].len = bufs[pkt]->pkt_len + SG_HDR_SIZE;
605 
606 			seg_idx++;
607 
608 		} else {
609 			int cpy_segs = copy_mbuf_to_virtqueue(&cvq_desc, vq_descr_idx, vq_bufs,
610 					nb_segs - last_seg_idx, bufs[pkt]);
611 
612 			if (cpy_segs < 0)
613 				break;
614 
615 			seg_idx += cpy_segs;
616 		}
617 
618 		last_seg_idx = seg_idx;
619 		rte_pktmbuf_free(bufs[pkt]);
620 		pkts_sent++;
621 	}
622 
623 exit_out:
624 
625 	if (sg_ops != NULL) {
626 		if (pkts_sent)
627 			sg_ops->nthw_release_tx_packets(tx_q->vq, pkts_sent, nb_segs_arr);
628 	}
629 
630 	return pkts_sent;
631 }
632 
633 static int allocate_hw_virtio_queues(struct rte_eth_dev *eth_dev, int vf_num, struct hwq_s *hwq,
634 	int num_descr, int buf_size)
635 {
636 	int i, res;
637 	uint32_t size;
638 	uint64_t iova_addr;
639 
640 	NT_LOG(DBG, NTNIC, "***** Configure IOMMU for HW queues on VF %i *****", vf_num);
641 
642 	/* Just allocate 1MB to hold all combined descr rings */
643 	uint64_t tot_alloc_size = 0x100000 + buf_size * num_descr;
644 
645 	void *virt =
646 		rte_malloc_socket("VirtQDescr", tot_alloc_size, nt_util_align_size(tot_alloc_size),
647 			eth_dev->data->numa_node);
648 
649 	if (!virt)
650 		return -1;
651 
652 	uint64_t gp_offset = (uint64_t)virt & ONE_G_MASK;
653 	rte_iova_t hpa = rte_malloc_virt2iova(virt);
654 
655 	NT_LOG(DBG, NTNIC, "Allocated virtio descr rings : virt "
656 		"%p [0x%" PRIX64 "],hpa %" PRIX64 " [0x%" PRIX64 "]",
657 		virt, gp_offset, hpa, hpa & ONE_G_MASK);
658 
659 	/*
660 	 * Same offset on both HPA and IOVA
661 	 * Make sure 1G boundary is never crossed
662 	 */
663 	if (((hpa & ONE_G_MASK) != gp_offset) ||
664 		(((uint64_t)virt + tot_alloc_size) & ~ONE_G_MASK) !=
665 		((uint64_t)virt & ~ONE_G_MASK)) {
666 		NT_LOG(ERR, NTNIC, "*********************************************************");
667 		NT_LOG(ERR, NTNIC, "ERROR, no optimal IOMMU mapping available hpa: %016" PRIX64
668 			"(%016" PRIX64 "), gp_offset: %016" PRIX64 " size: %" PRIu64,
669 			hpa, hpa & ONE_G_MASK, gp_offset, tot_alloc_size);
670 		NT_LOG(ERR, NTNIC, "*********************************************************");
671 
672 		rte_free(virt);
673 
674 		/* Just allocate 1MB to hold all combined descr rings */
675 		size = 0x100000;
676 		void *virt = rte_malloc_socket("VirtQDescr", size, 4096, eth_dev->data->numa_node);
677 
678 		if (!virt)
679 			return -1;
680 
681 		res = nt_vfio_dma_map(vf_num, virt, &iova_addr, size);
682 
683 		NT_LOG(DBG, NTNIC, "VFIO MMAP res %i, vf_num %i", res, vf_num);
684 
685 		if (res != 0)
686 			return -1;
687 
688 		hwq->vf_num = vf_num;
689 		hwq->virt_queues_ctrl.virt_addr = virt;
690 		hwq->virt_queues_ctrl.phys_addr = (void *)iova_addr;
691 		hwq->virt_queues_ctrl.len = size;
692 
693 		NT_LOG(DBG, NTNIC,
694 			"Allocated for virtio descr rings combined 1MB : %p, IOVA %016" PRIX64 "",
695 			virt, iova_addr);
696 
697 		size = num_descr * sizeof(struct nthw_memory_descriptor);
698 		hwq->pkt_buffers =
699 			rte_zmalloc_socket("rx_pkt_buffers", size, 64, eth_dev->data->numa_node);
700 
701 		if (!hwq->pkt_buffers) {
702 			NT_LOG(ERR, NTNIC,
703 				"Failed to allocated buffer array for hw-queue %p, total size %i, elements %i",
704 				hwq->pkt_buffers, size, num_descr);
705 			rte_free(virt);
706 			return -1;
707 		}
708 
709 		size = buf_size * num_descr;
710 		void *virt_addr =
711 			rte_malloc_socket("pkt_buffer_pkts", size, 4096, eth_dev->data->numa_node);
712 
713 		if (!virt_addr) {
714 			NT_LOG(ERR, NTNIC,
715 				"Failed allocate packet buffers for hw-queue %p, buf size %i, elements %i",
716 				hwq->pkt_buffers, buf_size, num_descr);
717 			rte_free(hwq->pkt_buffers);
718 			rte_free(virt);
719 			return -1;
720 		}
721 
722 		res = nt_vfio_dma_map(vf_num, virt_addr, &iova_addr, size);
723 
724 		NT_LOG(DBG, NTNIC,
725 			"VFIO MMAP res %i, virt %p, iova %016" PRIX64 ", vf_num %i, num pkt bufs %i, tot size %i",
726 			res, virt_addr, iova_addr, vf_num, num_descr, size);
727 
728 		if (res != 0)
729 			return -1;
730 
731 		for (i = 0; i < num_descr; i++) {
732 			hwq->pkt_buffers[i].virt_addr =
733 				(void *)((char *)virt_addr + ((uint64_t)(i) * buf_size));
734 			hwq->pkt_buffers[i].phys_addr =
735 				(void *)(iova_addr + ((uint64_t)(i) * buf_size));
736 			hwq->pkt_buffers[i].len = buf_size;
737 		}
738 
739 		return 0;
740 	}	/* End of: no optimal IOMMU mapping available */
741 
742 	res = nt_vfio_dma_map(vf_num, virt, &iova_addr, ONE_G_SIZE);
743 
744 	if (res != 0) {
745 		NT_LOG(ERR, NTNIC, "VFIO MMAP FAILED! res %i, vf_num %i", res, vf_num);
746 		return -1;
747 	}
748 
749 	hwq->vf_num = vf_num;
750 	hwq->virt_queues_ctrl.virt_addr = virt;
751 	hwq->virt_queues_ctrl.phys_addr = (void *)(iova_addr);
752 	hwq->virt_queues_ctrl.len = 0x100000;
753 	iova_addr += 0x100000;
754 
755 	NT_LOG(DBG, NTNIC,
756 		"VFIO MMAP: virt_addr=%p phys_addr=%p size=%" PRIX32 " hpa=%" PRIX64 "",
757 		hwq->virt_queues_ctrl.virt_addr, hwq->virt_queues_ctrl.phys_addr,
758 		hwq->virt_queues_ctrl.len, rte_malloc_virt2iova(hwq->virt_queues_ctrl.virt_addr));
759 
760 	size = num_descr * sizeof(struct nthw_memory_descriptor);
761 	hwq->pkt_buffers =
762 		rte_zmalloc_socket("rx_pkt_buffers", size, 64, eth_dev->data->numa_node);
763 
764 	if (!hwq->pkt_buffers) {
765 		NT_LOG(ERR, NTNIC,
766 			"Failed to allocated buffer array for hw-queue %p, total size %i, elements %i",
767 			hwq->pkt_buffers, size, num_descr);
768 		rte_free(virt);
769 		return -1;
770 	}
771 
772 	void *virt_addr = (void *)((uint64_t)virt + 0x100000);
773 
774 	for (i = 0; i < num_descr; i++) {
775 		hwq->pkt_buffers[i].virt_addr =
776 			(void *)((char *)virt_addr + ((uint64_t)(i) * buf_size));
777 		hwq->pkt_buffers[i].phys_addr = (void *)(iova_addr + ((uint64_t)(i) * buf_size));
778 		hwq->pkt_buffers[i].len = buf_size;
779 	}
780 
781 	return 0;
782 }
783 
784 static void release_hw_virtio_queues(struct hwq_s *hwq)
785 {
786 	if (!hwq || hwq->vf_num == 0)
787 		return;
788 
789 	hwq->vf_num = 0;
790 }
791 
792 static int deallocate_hw_virtio_queues(struct hwq_s *hwq)
793 {
794 	int vf_num = hwq->vf_num;
795 
796 	void *virt = hwq->virt_queues_ctrl.virt_addr;
797 
798 	int res = nt_vfio_dma_unmap(vf_num, hwq->virt_queues_ctrl.virt_addr,
799 			(uint64_t)hwq->virt_queues_ctrl.phys_addr, ONE_G_SIZE);
800 
801 	if (res != 0) {
802 		NT_LOG(ERR, NTNIC, "VFIO UNMMAP FAILED! res %i, vf_num %i", res, vf_num);
803 		return -1;
804 	}
805 
806 	release_hw_virtio_queues(hwq);
807 	rte_free(hwq->pkt_buffers);
808 	rte_free(virt);
809 	return 0;
810 }
811 
812 static void eth_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t queue_id)
813 {
814 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
815 	struct ntnic_tx_queue *tx_q = &internals->txq_scg[queue_id];
816 	deallocate_hw_virtio_queues(&tx_q->hwq);
817 }
818 
819 static void eth_rx_queue_release(struct rte_eth_dev *eth_dev, uint16_t queue_id)
820 {
821 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
822 	struct ntnic_rx_queue *rx_q = &internals->rxq_scg[queue_id];
823 	deallocate_hw_virtio_queues(&rx_q->hwq);
824 }
825 
826 static int num_queues_alloced;
827 
828 /* Returns num queue starting at returned queue num or -1 on fail */
829 static int allocate_queue(int num)
830 {
831 	int next_free = num_queues_alloced;
832 	NT_LOG_DBGX(DBG, NTNIC, "num_queues_alloced=%u, New queues=%u, Max queues=%u",
833 		num_queues_alloced, num, MAX_TOTAL_QUEUES);
834 
835 	if (num_queues_alloced + num > MAX_TOTAL_QUEUES)
836 		return -1;
837 
838 	num_queues_alloced += num;
839 	return next_free;
840 }
841 
842 static int eth_rx_scg_queue_setup(struct rte_eth_dev *eth_dev,
843 	uint16_t rx_queue_id,
844 	uint16_t nb_rx_desc __rte_unused,
845 	unsigned int socket_id __rte_unused,
846 	const struct rte_eth_rxconf *rx_conf __rte_unused,
847 	struct rte_mempool *mb_pool)
848 {
849 	NT_LOG_DBGX(DBG, NTNIC, "Rx queue setup");
850 	struct rte_pktmbuf_pool_private *mbp_priv;
851 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
852 	struct ntnic_rx_queue *rx_q = &internals->rxq_scg[rx_queue_id];
853 	struct drv_s *p_drv = internals->p_drv;
854 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
855 
856 	if (sg_ops == NULL) {
857 		NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
858 		return 0;
859 	}
860 
861 	if (internals->type == PORT_TYPE_OVERRIDE) {
862 		rx_q->mb_pool = mb_pool;
863 		eth_dev->data->rx_queues[rx_queue_id] = rx_q;
864 		mbp_priv = rte_mempool_get_priv(rx_q->mb_pool);
865 		rx_q->buf_size = (uint16_t)(mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
866 		rx_q->enabled = 1;
867 		return 0;
868 	}
869 
870 	NT_LOG(DBG, NTNIC, "(%i) NTNIC RX OVS-SW queue setup: queue id %i, hw queue index %i",
871 		internals->port, rx_queue_id, rx_q->queue.hw_id);
872 
873 	rx_q->mb_pool = mb_pool;
874 
875 	eth_dev->data->rx_queues[rx_queue_id] = rx_q;
876 
877 	mbp_priv = rte_mempool_get_priv(rx_q->mb_pool);
878 	rx_q->buf_size = (uint16_t)(mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
879 	rx_q->enabled = 1;
880 
881 	if (allocate_hw_virtio_queues(eth_dev, EXCEPTION_PATH_HID, &rx_q->hwq,
882 			SG_NB_HW_RX_DESCRIPTORS, SG_HW_RX_PKT_BUFFER_SIZE) < 0)
883 		return -1;
884 
885 	rx_q->nb_hw_rx_descr = SG_NB_HW_RX_DESCRIPTORS;
886 
887 	rx_q->profile = p_drv->ntdrv.adapter_info.fpga_info.profile;
888 
889 	rx_q->vq =
890 		sg_ops->nthw_setup_mngd_rx_virt_queue(p_nt_drv->adapter_info.fpga_info.mp_nthw_dbs,
891 			rx_q->queue.hw_id,	/* index */
892 			rx_q->nb_hw_rx_descr,
893 			EXCEPTION_PATH_HID,	/* host_id */
894 			1,	/* header NT DVIO header for exception path */
895 			&rx_q->hwq.virt_queues_ctrl,
896 			rx_q->hwq.pkt_buffers,
897 			SPLIT_RING,
898 			-1);
899 
900 	NT_LOG(DBG, NTNIC, "(%i) NTNIC RX OVS-SW queues successfully setup", internals->port);
901 
902 	return 0;
903 }
904 
905 static int eth_tx_scg_queue_setup(struct rte_eth_dev *eth_dev,
906 	uint16_t tx_queue_id,
907 	uint16_t nb_tx_desc __rte_unused,
908 	unsigned int socket_id __rte_unused,
909 	const struct rte_eth_txconf *tx_conf __rte_unused)
910 {
911 	const struct port_ops *port_ops = get_port_ops();
912 
913 	if (port_ops == NULL) {
914 		NT_LOG_DBGX(ERR, NTNIC, "Link management module uninitialized");
915 		return -1;
916 	}
917 
918 	NT_LOG_DBGX(DBG, NTNIC, "Tx queue setup");
919 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
920 	struct drv_s *p_drv = internals->p_drv;
921 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
922 	struct ntnic_tx_queue *tx_q = &internals->txq_scg[tx_queue_id];
923 
924 	if (internals->type == PORT_TYPE_OVERRIDE) {
925 		eth_dev->data->tx_queues[tx_queue_id] = tx_q;
926 		return 0;
927 	}
928 
929 	if (sg_ops == NULL) {
930 		NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
931 		return 0;
932 	}
933 
934 	NT_LOG(DBG, NTNIC, "(%i) NTNIC TX OVS-SW queue setup: queue id %i, hw queue index %i",
935 		tx_q->port, tx_queue_id, tx_q->queue.hw_id);
936 
937 	if (tx_queue_id > internals->nb_tx_queues) {
938 		NT_LOG(ERR, NTNIC, "Error invalid tx queue id");
939 		return -1;
940 	}
941 
942 	eth_dev->data->tx_queues[tx_queue_id] = tx_q;
943 
944 	/* Calculate target ID for HW  - to be used in NTDVIO0 header bypass_port */
945 	if (tx_q->rss_target_id >= 0) {
946 		/* bypass to a multiqueue port - qsl-hsh index */
947 		tx_q->target_id = tx_q->rss_target_id + 0x90;
948 
949 	} else if (internals->vpq[tx_queue_id].hw_id > -1) {
950 		/* virtual port - queue index */
951 		tx_q->target_id = internals->vpq[tx_queue_id].hw_id;
952 
953 	} else {
954 		/* Phy port - phy port identifier */
955 		/* output/bypass to MAC */
956 		tx_q->target_id = (int)(tx_q->port + 0x80);
957 	}
958 
959 	if (allocate_hw_virtio_queues(eth_dev, EXCEPTION_PATH_HID, &tx_q->hwq,
960 			SG_NB_HW_TX_DESCRIPTORS, SG_HW_TX_PKT_BUFFER_SIZE) < 0) {
961 		return -1;
962 	}
963 
964 	tx_q->nb_hw_tx_descr = SG_NB_HW_TX_DESCRIPTORS;
965 
966 	tx_q->profile = p_drv->ntdrv.adapter_info.fpga_info.profile;
967 
968 	uint32_t port, header;
969 	port = tx_q->port;	/* transmit port */
970 	header = 0;	/* header type VirtIO-Net */
971 
972 	tx_q->vq =
973 		sg_ops->nthw_setup_mngd_tx_virt_queue(p_nt_drv->adapter_info.fpga_info.mp_nthw_dbs,
974 			tx_q->queue.hw_id,	/* index */
975 			tx_q->nb_hw_tx_descr,	/* queue size */
976 			EXCEPTION_PATH_HID,	/* host_id always VF4 */
977 			port,
978 			/*
979 			 * in_port - in vswitch mode has
980 			 * to move tx port from OVS excep.
981 			 * away from VM tx port,
982 			 * because of QoS is matched by port id!
983 			 */
984 			tx_q->port + 128,
985 			header,
986 			&tx_q->hwq.virt_queues_ctrl,
987 			tx_q->hwq.pkt_buffers,
988 			SPLIT_RING,
989 			-1,
990 			IN_ORDER);
991 
992 	tx_q->enabled = 1;
993 
994 	NT_LOG(DBG, NTNIC, "(%i) NTNIC TX OVS-SW queues successfully setup", internals->port);
995 
996 	if (internals->type == PORT_TYPE_PHYSICAL) {
997 		struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
998 		NT_LOG(DBG, NTNIC, "Port %i is ready for data. Enable port",
999 			internals->n_intf_no);
1000 		port_ops->set_adm_state(p_adapter_info, internals->n_intf_no, true);
1001 	}
1002 
1003 	return 0;
1004 }
1005 
1006 static int eth_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1007 {
1008 	eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1009 	return 0;
1010 }
1011 
1012 static int eth_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1013 {
1014 	eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1015 	return 0;
1016 }
1017 
1018 static int eth_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1019 {
1020 	eth_dev->data->tx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1021 	return 0;
1022 }
1023 
1024 static int eth_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1025 {
1026 	eth_dev->data->tx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1027 	return 0;
1028 }
1029 
1030 static int
1031 eth_mac_addr_add(struct rte_eth_dev *eth_dev,
1032 	struct rte_ether_addr *mac_addr,
1033 	uint32_t index,
1034 	uint32_t vmdq __rte_unused)
1035 {
1036 	struct rte_ether_addr *const eth_addrs = eth_dev->data->mac_addrs;
1037 
1038 	assert(index < NUM_MAC_ADDRS_PER_PORT);
1039 
1040 	if (index >= NUM_MAC_ADDRS_PER_PORT) {
1041 		const struct pmd_internals *const internals =
1042 			(struct pmd_internals *)eth_dev->data->dev_private;
1043 		NT_LOG_DBGX(DBG, NTNIC, "Port %i: illegal index %u (>= %u)",
1044 			internals->n_intf_no, index, NUM_MAC_ADDRS_PER_PORT);
1045 		return -1;
1046 	}
1047 
1048 	eth_addrs[index] = *mac_addr;
1049 
1050 	return 0;
1051 }
1052 
1053 static int
1054 eth_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1055 {
1056 	struct rte_ether_addr *const eth_addrs = dev->data->mac_addrs;
1057 
1058 	eth_addrs[0U] = *mac_addr;
1059 
1060 	return 0;
1061 }
1062 
1063 static int
1064 eth_set_mc_addr_list(struct rte_eth_dev *eth_dev,
1065 	struct rte_ether_addr *mc_addr_set,
1066 	uint32_t nb_mc_addr)
1067 {
1068 	struct pmd_internals *const internals = (struct pmd_internals *)eth_dev->data->dev_private;
1069 	struct rte_ether_addr *const mc_addrs = internals->mc_addrs;
1070 	size_t i;
1071 
1072 	if (nb_mc_addr >= NUM_MULTICAST_ADDRS_PER_PORT) {
1073 		NT_LOG_DBGX(DBG, NTNIC,
1074 			"Port %i: too many multicast addresses %u (>= %u)",
1075 			internals->n_intf_no, nb_mc_addr, NUM_MULTICAST_ADDRS_PER_PORT);
1076 		return -1;
1077 	}
1078 
1079 	for (i = 0U; i < NUM_MULTICAST_ADDRS_PER_PORT; i++)
1080 		if (i < nb_mc_addr)
1081 			mc_addrs[i] = mc_addr_set[i];
1082 
1083 		else
1084 			(void)memset(&mc_addrs[i], 0, sizeof(mc_addrs[i]));
1085 
1086 	return 0;
1087 }
1088 
1089 static int
1090 eth_dev_configure(struct rte_eth_dev *eth_dev)
1091 {
1092 	NT_LOG_DBGX(DBG, NTNIC, "Called for eth_dev %p", eth_dev);
1093 
1094 	/* The device is ALWAYS running promiscuous mode. */
1095 	eth_dev->data->promiscuous ^= ~eth_dev->data->promiscuous;
1096 	return 0;
1097 }
1098 
1099 static int
1100 eth_dev_start(struct rte_eth_dev *eth_dev)
1101 {
1102 	const struct port_ops *port_ops = get_port_ops();
1103 
1104 	if (port_ops == NULL) {
1105 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1106 		return -1;
1107 	}
1108 
1109 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1110 
1111 	const int n_intf_no = internals->n_intf_no;
1112 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1113 
1114 	NT_LOG_DBGX(DBG, NTNIC, "Port %u", internals->n_intf_no);
1115 
1116 	/* Start queues */
1117 	uint q;
1118 
1119 	for (q = 0; q < internals->nb_rx_queues; q++)
1120 		eth_rx_queue_start(eth_dev, q);
1121 
1122 	for (q = 0; q < internals->nb_tx_queues; q++)
1123 		eth_tx_queue_start(eth_dev, q);
1124 
1125 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE) {
1126 		eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
1127 
1128 	} else {
1129 		/* Enable the port */
1130 		port_ops->set_adm_state(p_adapter_info, internals->n_intf_no, true);
1131 
1132 		/*
1133 		 * wait for link on port
1134 		 * If application starts sending too soon before FPGA port is ready, garbage is
1135 		 * produced
1136 		 */
1137 		int loop = 0;
1138 
1139 		while (port_ops->get_link_status(p_adapter_info, n_intf_no) == RTE_ETH_LINK_DOWN) {
1140 			/* break out after 5 sec */
1141 			if (++loop >= 50) {
1142 				NT_LOG_DBGX(DBG, NTNIC,
1143 					"TIMEOUT No link on port %i (5sec timeout)",
1144 					internals->n_intf_no);
1145 				break;
1146 			}
1147 
1148 			nt_os_wait_usec(100 * 1000);
1149 		}
1150 
1151 		if (internals->lpbk_mode) {
1152 			if (internals->lpbk_mode & 1 << 0) {
1153 				port_ops->set_loopback_mode(p_adapter_info, n_intf_no,
1154 					NT_LINK_LOOPBACK_HOST);
1155 			}
1156 
1157 			if (internals->lpbk_mode & 1 << 1) {
1158 				port_ops->set_loopback_mode(p_adapter_info, n_intf_no,
1159 					NT_LINK_LOOPBACK_LINE);
1160 			}
1161 		}
1162 	}
1163 
1164 	return 0;
1165 }
1166 
1167 static int
1168 eth_dev_stop(struct rte_eth_dev *eth_dev)
1169 {
1170 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1171 
1172 	NT_LOG_DBGX(DBG, NTNIC, "Port %u", internals->n_intf_no);
1173 
1174 	if (internals->type != PORT_TYPE_VIRTUAL) {
1175 		uint q;
1176 
1177 		for (q = 0; q < internals->nb_rx_queues; q++)
1178 			eth_rx_queue_stop(eth_dev, q);
1179 
1180 		for (q = 0; q < internals->nb_tx_queues; q++)
1181 			eth_tx_queue_stop(eth_dev, q);
1182 	}
1183 
1184 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
1185 	return 0;
1186 }
1187 
1188 static int
1189 eth_dev_set_link_up(struct rte_eth_dev *eth_dev)
1190 {
1191 	const struct port_ops *port_ops = get_port_ops();
1192 
1193 	if (port_ops == NULL) {
1194 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1195 		return -1;
1196 	}
1197 
1198 	struct pmd_internals *const internals = (struct pmd_internals *)eth_dev->data->dev_private;
1199 
1200 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1201 	const int port = internals->n_intf_no;
1202 
1203 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1204 		return 0;
1205 
1206 	assert(port >= 0 && port < NUM_ADAPTER_PORTS_MAX);
1207 	assert(port == internals->n_intf_no);
1208 
1209 	port_ops->set_adm_state(p_adapter_info, port, true);
1210 
1211 	return 0;
1212 }
1213 
1214 static int
1215 eth_dev_set_link_down(struct rte_eth_dev *eth_dev)
1216 {
1217 	const struct port_ops *port_ops = get_port_ops();
1218 
1219 	if (port_ops == NULL) {
1220 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1221 		return -1;
1222 	}
1223 
1224 	struct pmd_internals *const internals = (struct pmd_internals *)eth_dev->data->dev_private;
1225 
1226 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1227 	const int port = internals->n_intf_no;
1228 
1229 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1230 		return 0;
1231 
1232 	assert(port >= 0 && port < NUM_ADAPTER_PORTS_MAX);
1233 	assert(port == internals->n_intf_no);
1234 
1235 	port_ops->set_link_status(p_adapter_info, port, false);
1236 
1237 	return 0;
1238 }
1239 
1240 static void
1241 drv_deinit(struct drv_s *p_drv)
1242 {
1243 	const struct adapter_ops *adapter_ops = get_adapter_ops();
1244 
1245 	if (adapter_ops == NULL) {
1246 		NT_LOG(ERR, NTNIC, "Adapter module uninitialized");
1247 		return;
1248 	}
1249 
1250 	if (p_drv == NULL)
1251 		return;
1252 
1253 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1254 
1255 	/* stop adapter */
1256 	adapter_ops->deinit(&p_nt_drv->adapter_info);
1257 
1258 	/* clean memory */
1259 	rte_free(p_drv);
1260 	p_drv = NULL;
1261 }
1262 
1263 static int
1264 eth_dev_close(struct rte_eth_dev *eth_dev)
1265 {
1266 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1267 	struct drv_s *p_drv = internals->p_drv;
1268 
1269 	if (internals->type != PORT_TYPE_VIRTUAL) {
1270 		struct ntnic_rx_queue *rx_q = internals->rxq_scg;
1271 		struct ntnic_tx_queue *tx_q = internals->txq_scg;
1272 
1273 		uint q;
1274 
1275 		if (sg_ops != NULL) {
1276 			for (q = 0; q < internals->nb_rx_queues; q++)
1277 				sg_ops->nthw_release_mngd_rx_virt_queue(rx_q[q].vq);
1278 
1279 			for (q = 0; q < internals->nb_tx_queues; q++)
1280 				sg_ops->nthw_release_mngd_tx_virt_queue(tx_q[q].vq);
1281 		}
1282 	}
1283 
1284 	internals->p_drv = NULL;
1285 
1286 	if (p_drv) {
1287 		/* decrease initialized ethernet devices */
1288 		p_drv->n_eth_dev_init_count--;
1289 
1290 		/*
1291 		 * rte_pci_dev has no private member for p_drv
1292 		 * wait until all rte_eth_dev's are closed - then close adapters via p_drv
1293 		 */
1294 		if (!p_drv->n_eth_dev_init_count)
1295 			drv_deinit(p_drv);
1296 	}
1297 
1298 	return 0;
1299 }
1300 
1301 static int
1302 eth_fw_version_get(struct rte_eth_dev *eth_dev, char *fw_version, size_t fw_size)
1303 {
1304 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1305 
1306 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1307 		return 0;
1308 
1309 	fpga_info_t *fpga_info = &internals->p_drv->ntdrv.adapter_info.fpga_info;
1310 	const int length = snprintf(fw_version, fw_size, "%03d-%04d-%02d-%02d",
1311 			fpga_info->n_fpga_type_id, fpga_info->n_fpga_prod_id,
1312 			fpga_info->n_fpga_ver_id, fpga_info->n_fpga_rev_id);
1313 
1314 	if ((size_t)length < fw_size) {
1315 		/* We have space for the version string */
1316 		return 0;
1317 
1318 	} else {
1319 		/* We do not have space for the version string -return the needed space */
1320 		return length + 1;
1321 	}
1322 }
1323 
1324 static int
1325 promiscuous_enable(struct rte_eth_dev __rte_unused(*dev))
1326 {
1327 	NT_LOG(DBG, NTHW, "The device always run promiscuous mode");
1328 	return 0;
1329 }
1330 
1331 static const struct eth_dev_ops nthw_eth_dev_ops = {
1332 	.dev_configure = eth_dev_configure,
1333 	.dev_start = eth_dev_start,
1334 	.dev_stop = eth_dev_stop,
1335 	.dev_set_link_up = eth_dev_set_link_up,
1336 	.dev_set_link_down = eth_dev_set_link_down,
1337 	.dev_close = eth_dev_close,
1338 	.link_update = eth_link_update,
1339 	.dev_infos_get = eth_dev_infos_get,
1340 	.fw_version_get = eth_fw_version_get,
1341 	.rx_queue_setup = eth_rx_scg_queue_setup,
1342 	.rx_queue_start = eth_rx_queue_start,
1343 	.rx_queue_stop = eth_rx_queue_stop,
1344 	.rx_queue_release = eth_rx_queue_release,
1345 	.tx_queue_setup = eth_tx_scg_queue_setup,
1346 	.tx_queue_start = eth_tx_queue_start,
1347 	.tx_queue_stop = eth_tx_queue_stop,
1348 	.tx_queue_release = eth_tx_queue_release,
1349 	.mac_addr_add = eth_mac_addr_add,
1350 	.mac_addr_set = eth_mac_addr_set,
1351 	.set_mc_addr_list = eth_set_mc_addr_list,
1352 	.promiscuous_enable = promiscuous_enable,
1353 };
1354 
1355 static int
1356 nthw_pci_dev_init(struct rte_pci_device *pci_dev)
1357 {
1358 	nt_vfio_init();
1359 	const struct port_ops *port_ops = get_port_ops();
1360 
1361 	if (port_ops == NULL) {
1362 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1363 		return -1;
1364 	}
1365 
1366 	const struct adapter_ops *adapter_ops = get_adapter_ops();
1367 
1368 	if (adapter_ops == NULL) {
1369 		NT_LOG(ERR, NTNIC, "Adapter module uninitialized");
1370 		return -1;
1371 	}
1372 
1373 	int res;
1374 	struct drv_s *p_drv;
1375 	ntdrv_4ga_t *p_nt_drv;
1376 	hw_info_t *p_hw_info;
1377 	fpga_info_t *fpga_info;
1378 	uint32_t n_port_mask = -1;	/* All ports enabled by default */
1379 	uint32_t nb_rx_queues = 1;
1380 	uint32_t nb_tx_queues = 1;
1381 	struct flow_queue_id_s queue_ids[MAX_QUEUES];
1382 	int n_phy_ports;
1383 	struct port_link_speed pls_mbps[NUM_ADAPTER_PORTS_MAX] = { 0 };
1384 	int num_port_speeds = 0;
1385 	NT_LOG_DBGX(DBG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i", pci_dev->name,
1386 		pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid,
1387 		pci_dev->addr.function);
1388 
1389 	/*
1390 	 * Process options/arguments
1391 	 */
1392 	if (pci_dev->device.devargs && pci_dev->device.devargs->args) {
1393 		int kvargs_count;
1394 		struct rte_kvargs *kvlist =
1395 			rte_kvargs_parse(pci_dev->device.devargs->args, valid_arguments);
1396 
1397 		if (kvlist == NULL)
1398 			return -1;
1399 
1400 		/*
1401 		 * Argument: help
1402 		 * NOTE: this argument/option check should be the first as it will stop
1403 		 * execution after producing its output
1404 		 */
1405 		{
1406 			if (rte_kvargs_get(kvlist, ETH_DEV_NTNIC_HELP_ARG)) {
1407 				size_t i;
1408 
1409 				for (i = 0; i < RTE_DIM(valid_arguments); i++)
1410 					if (valid_arguments[i] == NULL)
1411 						break;
1412 
1413 				exit(0);
1414 			}
1415 		}
1416 
1417 		/*
1418 		 * rxq option/argument
1419 		 * The number of rxq (hostbuffers) allocated in memory.
1420 		 * Default is 32 RX Hostbuffers
1421 		 */
1422 		kvargs_count = rte_kvargs_count(kvlist, ETH_DEV_NTHW_RXQUEUES_ARG);
1423 
1424 		if (kvargs_count != 0) {
1425 			assert(kvargs_count == 1);
1426 			res = rte_kvargs_process(kvlist, ETH_DEV_NTHW_RXQUEUES_ARG, &string_to_u32,
1427 					&nb_rx_queues);
1428 
1429 			if (res < 0) {
1430 				NT_LOG_DBGX(ERR, NTNIC,
1431 					"problem with command line arguments: res=%d",
1432 					res);
1433 				return -1;
1434 			}
1435 
1436 			NT_LOG_DBGX(DBG, NTNIC, "devargs: %s=%u",
1437 				ETH_DEV_NTHW_RXQUEUES_ARG, nb_rx_queues);
1438 		}
1439 
1440 		/*
1441 		 * txq option/argument
1442 		 * The number of txq (hostbuffers) allocated in memory.
1443 		 * Default is 32 TX Hostbuffers
1444 		 */
1445 		kvargs_count = rte_kvargs_count(kvlist, ETH_DEV_NTHW_TXQUEUES_ARG);
1446 
1447 		if (kvargs_count != 0) {
1448 			assert(kvargs_count == 1);
1449 			res = rte_kvargs_process(kvlist, ETH_DEV_NTHW_TXQUEUES_ARG, &string_to_u32,
1450 					&nb_tx_queues);
1451 
1452 			if (res < 0) {
1453 				NT_LOG_DBGX(ERR, NTNIC,
1454 					"problem with command line arguments: res=%d",
1455 					res);
1456 				return -1;
1457 			}
1458 
1459 			NT_LOG_DBGX(DBG, NTNIC, "devargs: %s=%u",
1460 				ETH_DEV_NTHW_TXQUEUES_ARG, nb_tx_queues);
1461 		}
1462 	}
1463 
1464 
1465 	/* alloc */
1466 	p_drv = rte_zmalloc_socket(pci_dev->name, sizeof(struct drv_s), RTE_CACHE_LINE_SIZE,
1467 			pci_dev->device.numa_node);
1468 
1469 	if (!p_drv) {
1470 		NT_LOG_DBGX(ERR, NTNIC, "%s: error %d",
1471 			(pci_dev->name[0] ? pci_dev->name : "NA"), -1);
1472 		return -1;
1473 	}
1474 
1475 	/* Setup VFIO context */
1476 	int vfio = nt_vfio_setup(pci_dev);
1477 
1478 	if (vfio < 0) {
1479 		NT_LOG_DBGX(ERR, NTNIC, "%s: vfio_setup error %d",
1480 			(pci_dev->name[0] ? pci_dev->name : "NA"), -1);
1481 		rte_free(p_drv);
1482 		return -1;
1483 	}
1484 
1485 	/* context */
1486 	p_nt_drv = &p_drv->ntdrv;
1487 	p_hw_info = &p_nt_drv->adapter_info.hw_info;
1488 	fpga_info = &p_nt_drv->adapter_info.fpga_info;
1489 
1490 	p_drv->p_dev = pci_dev;
1491 
1492 	/* Set context for NtDrv */
1493 	p_nt_drv->pciident = BDF_TO_PCIIDENT(pci_dev->addr.domain, pci_dev->addr.bus,
1494 			pci_dev->addr.devid, pci_dev->addr.function);
1495 	p_nt_drv->adapter_info.n_rx_host_buffers = nb_rx_queues;
1496 	p_nt_drv->adapter_info.n_tx_host_buffers = nb_tx_queues;
1497 
1498 	fpga_info->bar0_addr = (void *)pci_dev->mem_resource[0].addr;
1499 	fpga_info->bar0_size = pci_dev->mem_resource[0].len;
1500 	fpga_info->numa_node = pci_dev->device.numa_node;
1501 	fpga_info->pciident = p_nt_drv->pciident;
1502 	fpga_info->adapter_no = p_drv->adapter_no;
1503 
1504 	p_nt_drv->adapter_info.hw_info.pci_class_id = pci_dev->id.class_id;
1505 	p_nt_drv->adapter_info.hw_info.pci_vendor_id = pci_dev->id.vendor_id;
1506 	p_nt_drv->adapter_info.hw_info.pci_device_id = pci_dev->id.device_id;
1507 	p_nt_drv->adapter_info.hw_info.pci_sub_vendor_id = pci_dev->id.subsystem_vendor_id;
1508 	p_nt_drv->adapter_info.hw_info.pci_sub_device_id = pci_dev->id.subsystem_device_id;
1509 
1510 	NT_LOG(DBG, NTNIC, "%s: " PCIIDENT_PRINT_STR " %04X:%04X: %04X:%04X:",
1511 		p_nt_drv->adapter_info.mp_adapter_id_str, PCIIDENT_TO_DOMAIN(p_nt_drv->pciident),
1512 		PCIIDENT_TO_BUSNR(p_nt_drv->pciident), PCIIDENT_TO_DEVNR(p_nt_drv->pciident),
1513 		PCIIDENT_TO_FUNCNR(p_nt_drv->pciident),
1514 		p_nt_drv->adapter_info.hw_info.pci_vendor_id,
1515 		p_nt_drv->adapter_info.hw_info.pci_device_id,
1516 		p_nt_drv->adapter_info.hw_info.pci_sub_vendor_id,
1517 		p_nt_drv->adapter_info.hw_info.pci_sub_device_id);
1518 
1519 	p_nt_drv->b_shutdown = false;
1520 	p_nt_drv->adapter_info.pb_shutdown = &p_nt_drv->b_shutdown;
1521 
1522 	for (int i = 0; i < num_port_speeds; ++i) {
1523 		struct adapter_info_s *p_adapter_info = &p_nt_drv->adapter_info;
1524 		nt_link_speed_t link_speed = convert_link_speed(pls_mbps[i].link_speed);
1525 		port_ops->set_link_speed(p_adapter_info, i, link_speed);
1526 	}
1527 
1528 	/* store context */
1529 	store_pdrv(p_drv);
1530 
1531 	/* initialize nt4ga nthw fpga module instance in drv */
1532 	int err = adapter_ops->init(&p_nt_drv->adapter_info);
1533 
1534 	if (err != 0) {
1535 		NT_LOG(ERR, NTNIC, "%s: Cannot initialize the adapter instance",
1536 			p_nt_drv->adapter_info.mp_adapter_id_str);
1537 		return -1;
1538 	}
1539 
1540 	/* Initialize the queue system */
1541 	if (err == 0) {
1542 		sg_ops = get_sg_ops();
1543 
1544 		if (sg_ops != NULL) {
1545 			err = sg_ops->nthw_virt_queue_init(fpga_info);
1546 
1547 			if (err != 0) {
1548 				NT_LOG(ERR, NTNIC,
1549 					"%s: Cannot initialize scatter-gather queues",
1550 					p_nt_drv->adapter_info.mp_adapter_id_str);
1551 
1552 			} else {
1553 				NT_LOG(DBG, NTNIC, "%s: Initialized scatter-gather queues",
1554 					p_nt_drv->adapter_info.mp_adapter_id_str);
1555 			}
1556 
1557 		} else {
1558 			NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
1559 		}
1560 	}
1561 
1562 	/* Start ctrl, monitor, stat thread only for primary process. */
1563 	if (err == 0) {
1564 		/* mp_adapter_id_str is initialized after nt4ga_adapter_init(p_nt_drv) */
1565 		const char *const p_adapter_id_str = p_nt_drv->adapter_info.mp_adapter_id_str;
1566 		(void)p_adapter_id_str;
1567 		NT_LOG(DBG, NTNIC,
1568 			"%s: %s: AdapterPCI=" PCIIDENT_PRINT_STR " Hw=0x%02X_rev%d PhyPorts=%d",
1569 			(pci_dev->name[0] ? pci_dev->name : "NA"), p_adapter_id_str,
1570 			PCIIDENT_TO_DOMAIN(p_nt_drv->adapter_info.fpga_info.pciident),
1571 			PCIIDENT_TO_BUSNR(p_nt_drv->adapter_info.fpga_info.pciident),
1572 			PCIIDENT_TO_DEVNR(p_nt_drv->adapter_info.fpga_info.pciident),
1573 			PCIIDENT_TO_FUNCNR(p_nt_drv->adapter_info.fpga_info.pciident),
1574 			p_hw_info->hw_platform_id, fpga_info->nthw_hw_info.hw_id,
1575 			fpga_info->n_phy_ports);
1576 
1577 	} else {
1578 		NT_LOG_DBGX(ERR, NTNIC, "%s: error=%d",
1579 			(pci_dev->name[0] ? pci_dev->name : "NA"), err);
1580 		return -1;
1581 	}
1582 
1583 	n_phy_ports = fpga_info->n_phy_ports;
1584 
1585 	for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) {
1586 		const char *const p_port_id_str = p_nt_drv->adapter_info.mp_port_id_str[n_intf_no];
1587 		(void)p_port_id_str;
1588 		struct pmd_internals *internals = NULL;
1589 		struct rte_eth_dev *eth_dev = NULL;
1590 		char name[32];
1591 		int i;
1592 
1593 		if ((1 << n_intf_no) & ~n_port_mask) {
1594 			NT_LOG_DBGX(DBG, NTNIC,
1595 				"%s: interface #%d: skipping due to portmask 0x%02X",
1596 				p_port_id_str, n_intf_no, n_port_mask);
1597 			continue;
1598 		}
1599 
1600 		snprintf(name, sizeof(name), "ntnic%d", n_intf_no);
1601 		NT_LOG_DBGX(DBG, NTNIC, "%s: interface #%d: %s: '%s'", p_port_id_str,
1602 			n_intf_no, (pci_dev->name[0] ? pci_dev->name : "NA"), name);
1603 
1604 		internals = rte_zmalloc_socket(name, sizeof(struct pmd_internals),
1605 				RTE_CACHE_LINE_SIZE, pci_dev->device.numa_node);
1606 
1607 		if (!internals) {
1608 			NT_LOG_DBGX(ERR, NTNIC, "%s: %s: error=%d",
1609 				(pci_dev->name[0] ? pci_dev->name : "NA"), name, -1);
1610 			return -1;
1611 		}
1612 
1613 		internals->pci_dev = pci_dev;
1614 		internals->n_intf_no = n_intf_no;
1615 		internals->type = PORT_TYPE_PHYSICAL;
1616 		internals->nb_rx_queues = nb_rx_queues;
1617 		internals->nb_tx_queues = nb_tx_queues;
1618 
1619 		/* Not used queue index as dest port in bypass - use 0x80 + port nr */
1620 		for (i = 0; i < MAX_QUEUES; i++)
1621 			internals->vpq[i].hw_id = -1;
1622 
1623 
1624 		/* Setup queue_ids */
1625 		if (nb_rx_queues > 1) {
1626 			NT_LOG(DBG, NTNIC,
1627 				"(%i) NTNIC configured with Rx multi queues. %i queues",
1628 				internals->n_intf_no, nb_rx_queues);
1629 		}
1630 
1631 		if (nb_tx_queues > 1) {
1632 			NT_LOG(DBG, NTNIC,
1633 				"(%i) NTNIC configured with Tx multi queues. %i queues",
1634 				internals->n_intf_no, nb_tx_queues);
1635 		}
1636 
1637 		int max_num_queues = (nb_rx_queues > nb_tx_queues) ? nb_rx_queues : nb_tx_queues;
1638 		int start_queue = allocate_queue(max_num_queues);
1639 
1640 		if (start_queue < 0)
1641 			return -1;
1642 
1643 		for (i = 0; i < (int)max_num_queues; i++) {
1644 			queue_ids[i].id = i;
1645 			queue_ids[i].hw_id = start_queue + i;
1646 
1647 			internals->rxq_scg[i].queue = queue_ids[i];
1648 			/* use same index in Rx and Tx rings */
1649 			internals->txq_scg[i].queue = queue_ids[i];
1650 			internals->rxq_scg[i].enabled = 0;
1651 			internals->txq_scg[i].type = internals->type;
1652 			internals->rxq_scg[i].type = internals->type;
1653 			internals->rxq_scg[i].port = internals->port;
1654 		}
1655 
1656 		/* no tx queues - tx data goes out on phy */
1657 		internals->vpq_nb_vq = 0;
1658 
1659 		for (i = 0; i < (int)nb_tx_queues; i++) {
1660 			internals->txq_scg[i].port = internals->port;
1661 			internals->txq_scg[i].enabled = 0;
1662 		}
1663 
1664 		/* Set MAC address (but only if the MAC address is permitted) */
1665 		if (n_intf_no < fpga_info->nthw_hw_info.vpd_info.mn_mac_addr_count) {
1666 			const uint64_t mac =
1667 				fpga_info->nthw_hw_info.vpd_info.mn_mac_addr_value + n_intf_no;
1668 			internals->eth_addrs[0].addr_bytes[0] = (mac >> 40) & 0xFFu;
1669 			internals->eth_addrs[0].addr_bytes[1] = (mac >> 32) & 0xFFu;
1670 			internals->eth_addrs[0].addr_bytes[2] = (mac >> 24) & 0xFFu;
1671 			internals->eth_addrs[0].addr_bytes[3] = (mac >> 16) & 0xFFu;
1672 			internals->eth_addrs[0].addr_bytes[4] = (mac >> 8) & 0xFFu;
1673 			internals->eth_addrs[0].addr_bytes[5] = (mac >> 0) & 0xFFu;
1674 		}
1675 
1676 		eth_dev = rte_eth_dev_allocate(name);
1677 
1678 		if (!eth_dev) {
1679 			NT_LOG_DBGX(ERR, NTNIC, "%s: %s: error=%d",
1680 				(pci_dev->name[0] ? pci_dev->name : "NA"), name, -1);
1681 			return -1;
1682 		}
1683 
1684 		/* connect structs */
1685 		internals->p_drv = p_drv;
1686 		eth_dev->data->dev_private = internals;
1687 		eth_dev->data->mac_addrs = rte_malloc(NULL,
1688 					NUM_MAC_ADDRS_PER_PORT * sizeof(struct rte_ether_addr), 0);
1689 		rte_memcpy(&eth_dev->data->mac_addrs[0],
1690 					&internals->eth_addrs[0], RTE_ETHER_ADDR_LEN);
1691 
1692 		NT_LOG_DBGX(DBG, NTNIC, "Setting up RX functions for SCG");
1693 		eth_dev->rx_pkt_burst = eth_dev_rx_scg;
1694 		eth_dev->tx_pkt_burst = eth_dev_tx_scg;
1695 		eth_dev->tx_pkt_prepare = NULL;
1696 
1697 		struct rte_eth_link pmd_link;
1698 		pmd_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
1699 		pmd_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1700 		pmd_link.link_status = RTE_ETH_LINK_DOWN;
1701 		pmd_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
1702 
1703 		eth_dev->device = &pci_dev->device;
1704 		eth_dev->data->dev_link = pmd_link;
1705 		eth_dev->dev_ops = &nthw_eth_dev_ops;
1706 
1707 		eth_dev_pci_specific_init(eth_dev, pci_dev);
1708 		rte_eth_dev_probing_finish(eth_dev);
1709 
1710 		/* increase initialized ethernet devices - PF */
1711 		p_drv->n_eth_dev_init_count++;
1712 	}
1713 
1714 	return 0;
1715 }
1716 
1717 static int
1718 nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused)
1719 {
1720 	NT_LOG_DBGX(DBG, NTNIC, "PCI device deinitialization");
1721 
1722 	int i;
1723 	char name[32];
1724 
1725 	struct pmd_internals *internals = eth_dev->data->dev_private;
1726 	ntdrv_4ga_t *p_ntdrv = &internals->p_drv->ntdrv;
1727 	fpga_info_t *fpga_info = &p_ntdrv->adapter_info.fpga_info;
1728 	const int n_phy_ports = fpga_info->n_phy_ports;
1729 
1730 	/* let running threads end Rx and Tx activity */
1731 	if (sg_ops != NULL) {
1732 		nt_os_wait_usec(1 * 1000 * 1000);
1733 
1734 		while (internals) {
1735 			for (i = internals->nb_tx_queues - 1; i >= 0; i--) {
1736 				sg_ops->nthw_release_mngd_tx_virt_queue(internals->txq_scg[i].vq);
1737 				release_hw_virtio_queues(&internals->txq_scg[i].hwq);
1738 			}
1739 
1740 			for (i = internals->nb_rx_queues - 1; i >= 0; i--) {
1741 				sg_ops->nthw_release_mngd_rx_virt_queue(internals->rxq_scg[i].vq);
1742 				release_hw_virtio_queues(&internals->rxq_scg[i].hwq);
1743 			}
1744 
1745 			internals = internals->next;
1746 		}
1747 	}
1748 
1749 	for (i = 0; i < n_phy_ports; i++) {
1750 		sprintf(name, "ntnic%d", i);
1751 		eth_dev = rte_eth_dev_allocated(name);
1752 		if (eth_dev == NULL)
1753 			continue; /* port already released */
1754 		rte_eth_dev_release_port(eth_dev);
1755 	}
1756 
1757 	nt_vfio_remove(EXCEPTION_PATH_HID);
1758 	return 0;
1759 }
1760 
1761 static int
1762 nthw_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1763 	struct rte_pci_device *pci_dev)
1764 {
1765 	int ret;
1766 
1767 	NT_LOG_DBGX(DBG, NTNIC, "pcidev: name: '%s'", pci_dev->name);
1768 	NT_LOG_DBGX(DBG, NTNIC, "devargs: name: '%s'", pci_dev->device.name);
1769 
1770 	if (pci_dev->device.devargs) {
1771 		NT_LOG_DBGX(DBG, NTNIC, "devargs: args: '%s'",
1772 			(pci_dev->device.devargs->args ? pci_dev->device.devargs->args : "NULL"));
1773 		NT_LOG_DBGX(DBG, NTNIC, "devargs: data: '%s'",
1774 			(pci_dev->device.devargs->data ? pci_dev->device.devargs->data : "NULL"));
1775 	}
1776 
1777 	const int n_rte_vfio_no_io_mmu_enabled = rte_vfio_noiommu_is_enabled();
1778 	NT_LOG(DBG, NTNIC, "vfio_no_iommu_enabled=%d", n_rte_vfio_no_io_mmu_enabled);
1779 
1780 	if (n_rte_vfio_no_io_mmu_enabled) {
1781 		NT_LOG(ERR, NTNIC, "vfio_no_iommu_enabled=%d: this PMD needs VFIO IOMMU",
1782 			n_rte_vfio_no_io_mmu_enabled);
1783 		return -1;
1784 	}
1785 
1786 	const enum rte_iova_mode n_rte_io_va_mode = rte_eal_iova_mode();
1787 	NT_LOG(DBG, NTNIC, "iova mode=%d", n_rte_io_va_mode);
1788 
1789 	NT_LOG(DBG, NTNIC,
1790 		"busid=" PCI_PRI_FMT
1791 		" pciid=%04x:%04x_%04x:%04x locstr=%s @ numanode=%d: drv=%s drvalias=%s",
1792 		pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid,
1793 		pci_dev->addr.function, pci_dev->id.vendor_id, pci_dev->id.device_id,
1794 		pci_dev->id.subsystem_vendor_id, pci_dev->id.subsystem_device_id,
1795 		pci_dev->name[0] ? pci_dev->name : "NA",
1796 		pci_dev->device.numa_node,
1797 		pci_dev->driver->driver.name ? pci_dev->driver->driver.name : "NA",
1798 		pci_dev->driver->driver.alias ? pci_dev->driver->driver.alias : "NA");
1799 
1800 
1801 	ret = nthw_pci_dev_init(pci_dev);
1802 
1803 	NT_LOG_DBGX(DBG, NTNIC, "leave: ret=%d", ret);
1804 	return ret;
1805 }
1806 
1807 static int
1808 nthw_pci_remove(struct rte_pci_device *pci_dev)
1809 {
1810 	NT_LOG_DBGX(DBG, NTNIC);
1811 
1812 	struct drv_s *p_drv = get_pdrv_from_pci(pci_dev->addr);
1813 	drv_deinit(p_drv);
1814 
1815 	return rte_eth_dev_pci_generic_remove(pci_dev, nthw_pci_dev_deinit);
1816 }
1817 
1818 static struct rte_pci_driver rte_nthw_pmd = {
1819 	.id_table = nthw_pci_id_map,
1820 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1821 	.probe = nthw_pci_probe,
1822 	.remove = nthw_pci_remove,
1823 };
1824 
1825 RTE_PMD_REGISTER_PCI(net_ntnic, rte_nthw_pmd);
1826 RTE_PMD_REGISTER_PCI_TABLE(net_ntnic, nthw_pci_id_map);
1827 RTE_PMD_REGISTER_KMOD_DEP(net_ntnic, "* vfio-pci");
1828 
1829 RTE_LOG_REGISTER_SUFFIX(nt_log_general, general, INFO);
1830 RTE_LOG_REGISTER_SUFFIX(nt_log_nthw, nthw, INFO);
1831 RTE_LOG_REGISTER_SUFFIX(nt_log_filter, filter, INFO);
1832 RTE_LOG_REGISTER_SUFFIX(nt_log_ntnic, ntnic, INFO);
1833