xref: /dpdk/drivers/net/ntnic/ntnic_ethdev.c (revision 21a66096bb44a4468353782c36fc85913520dc6c)
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  * Copyright(c) 2023 Napatech A/S
4  */
5 
6 #include <stdint.h>
7 
8 #include <rte_eal.h>
9 #include <rte_dev.h>
10 #include <rte_vfio.h>
11 #include <rte_ethdev.h>
12 #include <rte_bus_pci.h>
13 #include <ethdev_pci.h>
14 #include <rte_kvargs.h>
15 
16 #include <sys/queue.h>
17 
18 #include "ntlog.h"
19 #include "ntdrv_4ga.h"
20 #include "ntos_drv.h"
21 #include "ntos_system.h"
22 #include "nthw_fpga_instances.h"
23 #include "ntnic_vfio.h"
24 #include "ntnic_mod_reg.h"
25 #include "nt_util.h"
26 
27 const rte_thread_attr_t thread_attr = { .priority = RTE_THREAD_PRIORITY_NORMAL };
28 #define THREAD_CTRL_CREATE(a, b, c, d) rte_thread_create_internal_control(a, b, c, d)
29 #define THREAD_JOIN(a) rte_thread_join(a, NULL)
30 #define THREAD_FUNC static uint32_t
31 #define THREAD_RETURN (0)
32 #define HW_MAX_PKT_LEN (10000)
33 #define MAX_MTU (HW_MAX_PKT_LEN - RTE_ETHER_HDR_LEN - RTE_ETHER_CRC_LEN)
34 
35 #define EXCEPTION_PATH_HID 0
36 
37 #define MAX_TOTAL_QUEUES       128
38 
39 #define SG_NB_HW_RX_DESCRIPTORS 1024
40 #define SG_NB_HW_TX_DESCRIPTORS 1024
41 #define SG_HW_RX_PKT_BUFFER_SIZE (1024 << 1)
42 #define SG_HW_TX_PKT_BUFFER_SIZE (1024 << 1)
43 
44 /* Max RSS queues */
45 #define MAX_QUEUES 125
46 
47 #define NUM_VQ_SEGS(_data_size_)                                                                  \
48 	({                                                                                        \
49 		size_t _size = (_data_size_);                                                     \
50 		size_t _segment_count = ((_size + SG_HDR_SIZE) > SG_HW_TX_PKT_BUFFER_SIZE)        \
51 			? (((_size + SG_HDR_SIZE) + SG_HW_TX_PKT_BUFFER_SIZE - 1) /               \
52 			   SG_HW_TX_PKT_BUFFER_SIZE)                                              \
53 			: 1;                                                                      \
54 		_segment_count;                                                                   \
55 	})
56 
57 #define VIRTQ_DESCR_IDX(_tx_pkt_idx_)                                                             \
58 	(((_tx_pkt_idx_) + first_vq_descr_idx) % SG_NB_HW_TX_DESCRIPTORS)
59 
60 #define VIRTQ_DESCR_IDX_NEXT(_vq_descr_idx_) (((_vq_descr_idx_) + 1) % SG_NB_HW_TX_DESCRIPTORS)
61 
62 #define ONE_G_SIZE  0x40000000
63 #define ONE_G_MASK  (ONE_G_SIZE - 1)
64 
65 #define MAX_RX_PACKETS   128
66 #define MAX_TX_PACKETS   128
67 
68 uint64_t rte_tsc_freq;
69 
70 int kill_pmd;
71 
72 #define ETH_DEV_NTNIC_HELP_ARG "help"
73 #define ETH_DEV_NTHW_RXQUEUES_ARG "rxqs"
74 #define ETH_DEV_NTHW_TXQUEUES_ARG "txqs"
75 
76 static const char *const valid_arguments[] = {
77 	ETH_DEV_NTNIC_HELP_ARG,
78 	ETH_DEV_NTHW_RXQUEUES_ARG,
79 	ETH_DEV_NTHW_TXQUEUES_ARG,
80 	NULL,
81 };
82 
83 
84 static const struct rte_pci_id nthw_pci_id_map[] = {
85 	{ RTE_PCI_DEVICE(NT_HW_PCI_VENDOR_ID, NT_HW_PCI_DEVICE_ID_NT200A02) },
86 	{
87 		.vendor_id = 0,
88 	},	/* sentinel */
89 };
90 
91 static const struct sg_ops_s *sg_ops;
92 
93 rte_spinlock_t hwlock = RTE_SPINLOCK_INITIALIZER;
94 
95 /*
96  * Store and get adapter info
97  */
98 
99 static struct drv_s *_g_p_drv[NUM_ADAPTER_MAX] = { NULL };
100 
101 static void
102 store_pdrv(struct drv_s *p_drv)
103 {
104 	if (p_drv->adapter_no >= NUM_ADAPTER_MAX) {
105 		NT_LOG(ERR, NTNIC,
106 			"Internal error adapter number %u out of range. Max number of adapters: %u",
107 			p_drv->adapter_no, NUM_ADAPTER_MAX);
108 		return;
109 	}
110 
111 	if (_g_p_drv[p_drv->adapter_no] != 0) {
112 		NT_LOG(WRN, NTNIC,
113 			"Overwriting adapter structure for PCI  " PCIIDENT_PRINT_STR
114 			" with adapter structure for PCI  " PCIIDENT_PRINT_STR,
115 			PCIIDENT_TO_DOMAIN(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
116 			PCIIDENT_TO_BUSNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
117 			PCIIDENT_TO_DEVNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
118 			PCIIDENT_TO_FUNCNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
119 			PCIIDENT_TO_DOMAIN(p_drv->ntdrv.pciident),
120 			PCIIDENT_TO_BUSNR(p_drv->ntdrv.pciident),
121 			PCIIDENT_TO_DEVNR(p_drv->ntdrv.pciident),
122 			PCIIDENT_TO_FUNCNR(p_drv->ntdrv.pciident));
123 	}
124 
125 	rte_spinlock_lock(&hwlock);
126 	_g_p_drv[p_drv->adapter_no] = p_drv;
127 	rte_spinlock_unlock(&hwlock);
128 }
129 
130 static void clear_pdrv(struct drv_s *p_drv)
131 {
132 	if (p_drv->adapter_no > NUM_ADAPTER_MAX)
133 		return;
134 
135 	rte_spinlock_lock(&hwlock);
136 	_g_p_drv[p_drv->adapter_no] = NULL;
137 	rte_spinlock_unlock(&hwlock);
138 }
139 
140 static struct drv_s *
141 get_pdrv_from_pci(struct rte_pci_addr addr)
142 {
143 	int i;
144 	struct drv_s *p_drv = NULL;
145 	rte_spinlock_lock(&hwlock);
146 
147 	for (i = 0; i < NUM_ADAPTER_MAX; i++) {
148 		if (_g_p_drv[i]) {
149 			if (PCIIDENT_TO_DOMAIN(_g_p_drv[i]->ntdrv.pciident) == addr.domain &&
150 				PCIIDENT_TO_BUSNR(_g_p_drv[i]->ntdrv.pciident) == addr.bus) {
151 				p_drv = _g_p_drv[i];
152 				break;
153 			}
154 		}
155 	}
156 
157 	rte_spinlock_unlock(&hwlock);
158 	return p_drv;
159 }
160 
161 static int dpdk_stats_collect(struct pmd_internals *internals, struct rte_eth_stats *stats)
162 {
163 	const struct ntnic_filter_ops *ntnic_filter_ops = get_ntnic_filter_ops();
164 
165 	if (ntnic_filter_ops == NULL) {
166 		NT_LOG_DBGX(ERR, NTNIC, "ntnic_filter_ops uninitialized");
167 		return -1;
168 	}
169 
170 	unsigned int i;
171 	struct drv_s *p_drv = internals->p_drv;
172 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
173 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
174 	nthw_stat_t *p_nthw_stat = p_nt4ga_stat->mp_nthw_stat;
175 	const int if_index = internals->n_intf_no;
176 	uint64_t rx_total = 0;
177 	uint64_t rx_total_b = 0;
178 	uint64_t tx_total = 0;
179 	uint64_t tx_total_b = 0;
180 	uint64_t tx_err_total = 0;
181 
182 	if (!p_nthw_stat || !p_nt4ga_stat || !stats || if_index < 0 ||
183 		if_index > NUM_ADAPTER_PORTS_MAX) {
184 		NT_LOG_DBGX(WRN, NTNIC, "error exit");
185 		return -1;
186 	}
187 
188 	/*
189 	 * Pull the latest port statistic numbers (Rx/Tx pkts and bytes)
190 	 * Return values are in the "internals->rxq_scg[]" and "internals->txq_scg[]" arrays
191 	 */
192 	ntnic_filter_ops->poll_statistics(internals);
193 
194 	memset(stats, 0, sizeof(*stats));
195 
196 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && i < internals->nb_rx_queues; i++) {
197 		stats->q_ipackets[i] = internals->rxq_scg[i].rx_pkts;
198 		stats->q_ibytes[i] = internals->rxq_scg[i].rx_bytes;
199 		rx_total += stats->q_ipackets[i];
200 		rx_total_b += stats->q_ibytes[i];
201 	}
202 
203 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && i < internals->nb_tx_queues; i++) {
204 		stats->q_opackets[i] = internals->txq_scg[i].tx_pkts;
205 		stats->q_obytes[i] = internals->txq_scg[i].tx_bytes;
206 		stats->q_errors[i] = internals->txq_scg[i].err_pkts;
207 		tx_total += stats->q_opackets[i];
208 		tx_total_b += stats->q_obytes[i];
209 		tx_err_total += stats->q_errors[i];
210 	}
211 
212 	stats->imissed = internals->rx_missed;
213 	stats->ipackets = rx_total;
214 	stats->ibytes = rx_total_b;
215 	stats->opackets = tx_total;
216 	stats->obytes = tx_total_b;
217 	stats->oerrors = tx_err_total;
218 
219 	return 0;
220 }
221 
222 static int dpdk_stats_reset(struct pmd_internals *internals, struct ntdrv_4ga_s *p_nt_drv,
223 	int n_intf_no)
224 {
225 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
226 	nthw_stat_t *p_nthw_stat = p_nt4ga_stat->mp_nthw_stat;
227 	unsigned int i;
228 
229 	if (!p_nthw_stat || !p_nt4ga_stat || n_intf_no < 0 || n_intf_no > NUM_ADAPTER_PORTS_MAX)
230 		return -1;
231 
232 	pthread_mutex_lock(&p_nt_drv->stat_lck);
233 
234 	/* Rx */
235 	for (i = 0; i < internals->nb_rx_queues; i++) {
236 		internals->rxq_scg[i].rx_pkts = 0;
237 		internals->rxq_scg[i].rx_bytes = 0;
238 		internals->rxq_scg[i].err_pkts = 0;
239 	}
240 
241 	internals->rx_missed = 0;
242 
243 	/* Tx */
244 	for (i = 0; i < internals->nb_tx_queues; i++) {
245 		internals->txq_scg[i].tx_pkts = 0;
246 		internals->txq_scg[i].tx_bytes = 0;
247 		internals->txq_scg[i].err_pkts = 0;
248 	}
249 
250 	p_nt4ga_stat->n_totals_reset_timestamp = time(NULL);
251 
252 	pthread_mutex_unlock(&p_nt_drv->stat_lck);
253 
254 	return 0;
255 }
256 
257 static int
258 eth_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete __rte_unused)
259 {
260 	const struct port_ops *port_ops = get_port_ops();
261 
262 	if (port_ops == NULL) {
263 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
264 		return -1;
265 	}
266 
267 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
268 
269 	const int n_intf_no = internals->n_intf_no;
270 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
271 
272 	if (eth_dev->data->dev_started) {
273 		const bool port_link_status = port_ops->get_link_status(p_adapter_info, n_intf_no);
274 		eth_dev->data->dev_link.link_status =
275 			port_link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN;
276 
277 		nt_link_speed_t port_link_speed =
278 			port_ops->get_link_speed(p_adapter_info, n_intf_no);
279 		eth_dev->data->dev_link.link_speed =
280 			nt_link_speed_to_eth_speed_num(port_link_speed);
281 
282 		nt_link_duplex_t nt_link_duplex =
283 			port_ops->get_link_duplex(p_adapter_info, n_intf_no);
284 		eth_dev->data->dev_link.link_duplex = nt_link_duplex_to_eth_duplex(nt_link_duplex);
285 
286 	} else {
287 		eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
288 		eth_dev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
289 		eth_dev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
290 	}
291 
292 	return 0;
293 }
294 
295 static int eth_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
296 {
297 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
298 	dpdk_stats_collect(internals, stats);
299 	return 0;
300 }
301 
302 static int eth_stats_reset(struct rte_eth_dev *eth_dev)
303 {
304 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
305 	struct drv_s *p_drv = internals->p_drv;
306 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
307 	const int if_index = internals->n_intf_no;
308 	dpdk_stats_reset(internals, p_nt_drv, if_index);
309 	return 0;
310 }
311 
312 static int
313 eth_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *dev_info)
314 {
315 	const struct port_ops *port_ops = get_port_ops();
316 
317 	if (port_ops == NULL) {
318 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
319 		return -1;
320 	}
321 
322 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
323 
324 	const int n_intf_no = internals->n_intf_no;
325 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
326 
327 	dev_info->driver_name = internals->name;
328 	dev_info->max_mac_addrs = NUM_MAC_ADDRS_PER_PORT;
329 	dev_info->max_rx_pktlen = HW_MAX_PKT_LEN;
330 	dev_info->max_mtu = MAX_MTU;
331 
332 	if (p_adapter_info->fpga_info.profile == FPGA_INFO_PROFILE_INLINE) {
333 		dev_info->flow_type_rss_offloads = NT_ETH_RSS_OFFLOAD_MASK;
334 		dev_info->hash_key_size = MAX_RSS_KEY_LEN;
335 
336 		dev_info->rss_algo_capa = RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT) |
337 			RTE_ETH_HASH_ALGO_CAPA_MASK(TOEPLITZ);
338 	}
339 
340 	if (internals->p_drv) {
341 		dev_info->max_rx_queues = internals->nb_rx_queues;
342 		dev_info->max_tx_queues = internals->nb_tx_queues;
343 
344 		dev_info->min_rx_bufsize = 64;
345 
346 		const uint32_t nt_port_speed_capa =
347 			port_ops->get_link_speed_capabilities(p_adapter_info, n_intf_no);
348 		dev_info->speed_capa = nt_link_speed_capa_to_eth_speed_capa(nt_port_speed_capa);
349 	}
350 
351 	return 0;
352 }
353 
354 static __rte_always_inline int copy_virtqueue_to_mbuf(struct rte_mbuf *mbuf,
355 	struct rte_mempool *mb_pool,
356 	struct nthw_received_packets *hw_recv,
357 	int max_segs,
358 	uint16_t data_len)
359 {
360 	int src_pkt = 0;
361 	/*
362 	 * 1. virtqueue packets may be segmented
363 	 * 2. the mbuf size may be too small and may need to be segmented
364 	 */
365 	char *data = (char *)hw_recv->addr + SG_HDR_SIZE;
366 	char *dst = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
367 
368 	/* set packet length */
369 	mbuf->pkt_len = data_len - SG_HDR_SIZE;
370 
371 	int remain = mbuf->pkt_len;
372 	/* First cpy_size is without header */
373 	int cpy_size = (data_len > SG_HW_RX_PKT_BUFFER_SIZE)
374 		? SG_HW_RX_PKT_BUFFER_SIZE - SG_HDR_SIZE
375 		: remain;
376 
377 	struct rte_mbuf *m = mbuf;	/* if mbuf segmentation is needed */
378 
379 	while (++src_pkt <= max_segs) {
380 		/* keep track of space in dst */
381 		int cpto_size = rte_pktmbuf_tailroom(m);
382 
383 		if (cpy_size > cpto_size) {
384 			int new_cpy_size = cpto_size;
385 
386 			rte_memcpy((void *)dst, (void *)data, new_cpy_size);
387 			m->data_len += new_cpy_size;
388 			remain -= new_cpy_size;
389 			cpy_size -= new_cpy_size;
390 
391 			data += new_cpy_size;
392 
393 			/*
394 			 * loop if remaining data from this virtqueue seg
395 			 * cannot fit in one extra mbuf
396 			 */
397 			do {
398 				m->next = rte_pktmbuf_alloc(mb_pool);
399 
400 				if (unlikely(!m->next))
401 					return -1;
402 
403 				m = m->next;
404 
405 				/* Headroom is not needed in chained mbufs */
406 				rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
407 				dst = (char *)m->buf_addr;
408 				m->data_len = 0;
409 				m->pkt_len = 0;
410 
411 				cpto_size = rte_pktmbuf_tailroom(m);
412 
413 				int actual_cpy_size =
414 					(cpy_size > cpto_size) ? cpto_size : cpy_size;
415 
416 				rte_memcpy((void *)dst, (void *)data, actual_cpy_size);
417 				m->pkt_len += actual_cpy_size;
418 				m->data_len += actual_cpy_size;
419 
420 				remain -= actual_cpy_size;
421 				cpy_size -= actual_cpy_size;
422 
423 				data += actual_cpy_size;
424 
425 				mbuf->nb_segs++;
426 
427 			} while (cpy_size && remain);
428 
429 		} else {
430 			/* all data from this virtqueue segment can fit in current mbuf */
431 			rte_memcpy((void *)dst, (void *)data, cpy_size);
432 			m->data_len += cpy_size;
433 
434 			if (mbuf->nb_segs > 1)
435 				m->pkt_len += cpy_size;
436 
437 			remain -= cpy_size;
438 		}
439 
440 		/* packet complete - all data from current virtqueue packet has been copied */
441 		if (remain == 0)
442 			break;
443 
444 		/* increment dst to data end */
445 		dst = rte_pktmbuf_mtod_offset(m, char *, m->data_len);
446 		/* prepare for next virtqueue segment */
447 		data = (char *)hw_recv[src_pkt].addr;	/* following packets are full data */
448 
449 		cpy_size = (remain > SG_HW_RX_PKT_BUFFER_SIZE) ? SG_HW_RX_PKT_BUFFER_SIZE : remain;
450 	};
451 
452 	if (src_pkt > max_segs) {
453 		NT_LOG(ERR, NTNIC,
454 			"Did not receive correct number of segment for a whole packet");
455 		return -1;
456 	}
457 
458 	return src_pkt;
459 }
460 
461 static uint16_t eth_dev_rx_scg(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
462 {
463 	unsigned int i;
464 	struct rte_mbuf *mbuf;
465 	struct ntnic_rx_queue *rx_q = queue;
466 	uint16_t num_rx = 0;
467 
468 	struct nthw_received_packets hw_recv[MAX_RX_PACKETS];
469 
470 	if (kill_pmd)
471 		return 0;
472 
473 	if (unlikely(nb_pkts == 0))
474 		return 0;
475 
476 	if (nb_pkts > MAX_RX_PACKETS)
477 		nb_pkts = MAX_RX_PACKETS;
478 
479 	uint16_t whole_pkts = 0;
480 	uint16_t hw_recv_pkt_segs = 0;
481 
482 	if (sg_ops != NULL) {
483 		hw_recv_pkt_segs =
484 			sg_ops->nthw_get_rx_packets(rx_q->vq, nb_pkts, hw_recv, &whole_pkts);
485 
486 		if (!hw_recv_pkt_segs)
487 			return 0;
488 	}
489 
490 	nb_pkts = whole_pkts;
491 
492 	int src_pkt = 0;/* from 0 to hw_recv_pkt_segs */
493 
494 	for (i = 0; i < nb_pkts; i++) {
495 		bufs[i] = rte_pktmbuf_alloc(rx_q->mb_pool);
496 
497 		if (!bufs[i]) {
498 			NT_LOG(ERR, NTNIC, "ERROR - no more buffers mbuf in mempool");
499 			goto err_exit;
500 		}
501 
502 		mbuf = bufs[i];
503 
504 		struct _pkt_hdr_rx *phdr = (struct _pkt_hdr_rx *)hw_recv[src_pkt].addr;
505 
506 		if (phdr->cap_len < SG_HDR_SIZE) {
507 			NT_LOG(ERR, NTNIC,
508 				"Pkt len of zero received. No header!! - dropping packets");
509 			rte_pktmbuf_free(mbuf);
510 			goto err_exit;
511 		}
512 
513 		{
514 			if (phdr->cap_len <= SG_HW_RX_PKT_BUFFER_SIZE &&
515 				(phdr->cap_len - SG_HDR_SIZE) <= rte_pktmbuf_tailroom(mbuf)) {
516 				mbuf->data_len = phdr->cap_len - SG_HDR_SIZE;
517 				rte_memcpy(rte_pktmbuf_mtod(mbuf, char *),
518 					(char *)hw_recv[src_pkt].addr + SG_HDR_SIZE,
519 					mbuf->data_len);
520 
521 				mbuf->pkt_len = mbuf->data_len;
522 				src_pkt++;
523 
524 			} else {
525 				int cpy_segs = copy_virtqueue_to_mbuf(mbuf, rx_q->mb_pool,
526 						&hw_recv[src_pkt],
527 						hw_recv_pkt_segs - src_pkt,
528 						phdr->cap_len);
529 
530 				if (cpy_segs < 0) {
531 					/* Error */
532 					rte_pktmbuf_free(mbuf);
533 					goto err_exit;
534 				}
535 
536 				src_pkt += cpy_segs;
537 			}
538 
539 			num_rx++;
540 
541 			mbuf->ol_flags &= ~(RTE_MBUF_F_RX_FDIR_ID | RTE_MBUF_F_RX_FDIR);
542 			mbuf->port = (uint16_t)-1;
543 		}
544 	}
545 
546 err_exit:
547 
548 	if (sg_ops != NULL)
549 		sg_ops->nthw_release_rx_packets(rx_q->vq, hw_recv_pkt_segs);
550 
551 	return num_rx;
552 }
553 
554 static int copy_mbuf_to_virtqueue(struct nthw_cvirtq_desc *cvq_desc,
555 	uint16_t vq_descr_idx,
556 	struct nthw_memory_descriptor *vq_bufs,
557 	int max_segs,
558 	struct rte_mbuf *mbuf)
559 {
560 	/*
561 	 * 1. mbuf packet may be segmented
562 	 * 2. the virtqueue buffer size may be too small and may need to be segmented
563 	 */
564 
565 	char *data = rte_pktmbuf_mtod(mbuf, char *);
566 	char *dst = (char *)vq_bufs[vq_descr_idx].virt_addr + SG_HDR_SIZE;
567 
568 	int remain = mbuf->pkt_len;
569 	int cpy_size = mbuf->data_len;
570 
571 	struct rte_mbuf *m = mbuf;
572 	int cpto_size = SG_HW_TX_PKT_BUFFER_SIZE - SG_HDR_SIZE;
573 
574 	cvq_desc->b[vq_descr_idx].len = SG_HDR_SIZE;
575 
576 	int cur_seg_num = 0;	/* start from 0 */
577 
578 	while (m) {
579 		/* Can all data in current src segment be in current dest segment */
580 		if (cpy_size > cpto_size) {
581 			int new_cpy_size = cpto_size;
582 
583 			rte_memcpy((void *)dst, (void *)data, new_cpy_size);
584 
585 			cvq_desc->b[vq_descr_idx].len += new_cpy_size;
586 
587 			remain -= new_cpy_size;
588 			cpy_size -= new_cpy_size;
589 
590 			data += new_cpy_size;
591 
592 			/*
593 			 * Loop if remaining data from this virtqueue seg cannot fit in one extra
594 			 * mbuf
595 			 */
596 			do {
597 				vq_add_flags(cvq_desc, vq_descr_idx, VIRTQ_DESC_F_NEXT);
598 
599 				int next_vq_descr_idx = VIRTQ_DESCR_IDX_NEXT(vq_descr_idx);
600 
601 				vq_set_next(cvq_desc, vq_descr_idx, next_vq_descr_idx);
602 
603 				vq_descr_idx = next_vq_descr_idx;
604 
605 				vq_set_flags(cvq_desc, vq_descr_idx, 0);
606 				vq_set_next(cvq_desc, vq_descr_idx, 0);
607 
608 				if (++cur_seg_num > max_segs)
609 					break;
610 
611 				dst = (char *)vq_bufs[vq_descr_idx].virt_addr;
612 				cpto_size = SG_HW_TX_PKT_BUFFER_SIZE;
613 
614 				int actual_cpy_size =
615 					(cpy_size > cpto_size) ? cpto_size : cpy_size;
616 				rte_memcpy((void *)dst, (void *)data, actual_cpy_size);
617 
618 				cvq_desc->b[vq_descr_idx].len = actual_cpy_size;
619 
620 				remain -= actual_cpy_size;
621 				cpy_size -= actual_cpy_size;
622 				cpto_size -= actual_cpy_size;
623 
624 				data += actual_cpy_size;
625 
626 			} while (cpy_size && remain);
627 
628 		} else {
629 			/* All data from this segment can fit in current virtqueue buffer */
630 			rte_memcpy((void *)dst, (void *)data, cpy_size);
631 
632 			cvq_desc->b[vq_descr_idx].len += cpy_size;
633 
634 			remain -= cpy_size;
635 			cpto_size -= cpy_size;
636 		}
637 
638 		/* Packet complete - all segments from current mbuf has been copied */
639 		if (remain == 0)
640 			break;
641 
642 		/* increment dst to data end */
643 		dst = (char *)vq_bufs[vq_descr_idx].virt_addr + cvq_desc->b[vq_descr_idx].len;
644 
645 		m = m->next;
646 
647 		if (!m) {
648 			NT_LOG(ERR, NTNIC, "ERROR: invalid packet size");
649 			break;
650 		}
651 
652 		/* Prepare for next mbuf segment */
653 		data = rte_pktmbuf_mtod(m, char *);
654 		cpy_size = m->data_len;
655 	};
656 
657 	cur_seg_num++;
658 
659 	if (cur_seg_num > max_segs) {
660 		NT_LOG(ERR, NTNIC,
661 			"Did not receive correct number of segment for a whole packet");
662 		return -1;
663 	}
664 
665 	return cur_seg_num;
666 }
667 
668 static uint16_t eth_dev_tx_scg(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
669 {
670 	uint16_t pkt;
671 	uint16_t first_vq_descr_idx = 0;
672 
673 	struct nthw_cvirtq_desc cvq_desc;
674 
675 	struct nthw_memory_descriptor *vq_bufs;
676 
677 	struct ntnic_tx_queue *tx_q = queue;
678 
679 	int nb_segs = 0, i;
680 	int pkts_sent = 0;
681 	uint16_t nb_segs_arr[MAX_TX_PACKETS];
682 
683 	if (kill_pmd)
684 		return 0;
685 
686 	if (nb_pkts > MAX_TX_PACKETS)
687 		nb_pkts = MAX_TX_PACKETS;
688 
689 	/*
690 	 * count all segments needed to contain all packets in vq buffers
691 	 */
692 	for (i = 0; i < nb_pkts; i++) {
693 		/* build the num segments array for segmentation control and release function */
694 		int vq_segs = NUM_VQ_SEGS(bufs[i]->pkt_len);
695 		nb_segs_arr[i] = vq_segs;
696 		nb_segs += vq_segs;
697 	}
698 
699 	if (!nb_segs)
700 		goto exit_out;
701 
702 	if (sg_ops == NULL)
703 		goto exit_out;
704 
705 	int got_nb_segs = sg_ops->nthw_get_tx_packets(tx_q->vq, nb_segs, &first_vq_descr_idx,
706 			&cvq_desc /*&vq_descr,*/, &vq_bufs);
707 
708 	if (!got_nb_segs)
709 		goto exit_out;
710 
711 	/*
712 	 * we may get less vq buffers than we have asked for
713 	 * calculate last whole packet that can fit into what
714 	 * we have got
715 	 */
716 	while (got_nb_segs < nb_segs) {
717 		if (!--nb_pkts)
718 			goto exit_out;
719 
720 		nb_segs -= NUM_VQ_SEGS(bufs[nb_pkts]->pkt_len);
721 
722 		if (nb_segs <= 0)
723 			goto exit_out;
724 	}
725 
726 	/*
727 	 * nb_pkts & nb_segs, got it all, ready to copy
728 	 */
729 	int seg_idx = 0;
730 	int last_seg_idx = seg_idx;
731 
732 	for (pkt = 0; pkt < nb_pkts; ++pkt) {
733 		uint16_t vq_descr_idx = VIRTQ_DESCR_IDX(seg_idx);
734 
735 		vq_set_flags(&cvq_desc, vq_descr_idx, 0);
736 		vq_set_next(&cvq_desc, vq_descr_idx, 0);
737 
738 		if (bufs[pkt]->nb_segs == 1 && nb_segs_arr[pkt] == 1) {
739 			rte_memcpy((void *)((char *)vq_bufs[vq_descr_idx].virt_addr + SG_HDR_SIZE),
740 				rte_pktmbuf_mtod(bufs[pkt], void *), bufs[pkt]->pkt_len);
741 
742 			cvq_desc.b[vq_descr_idx].len = bufs[pkt]->pkt_len + SG_HDR_SIZE;
743 
744 			seg_idx++;
745 
746 		} else {
747 			int cpy_segs = copy_mbuf_to_virtqueue(&cvq_desc, vq_descr_idx, vq_bufs,
748 					nb_segs - last_seg_idx, bufs[pkt]);
749 
750 			if (cpy_segs < 0)
751 				break;
752 
753 			seg_idx += cpy_segs;
754 		}
755 
756 		last_seg_idx = seg_idx;
757 		rte_pktmbuf_free(bufs[pkt]);
758 		pkts_sent++;
759 	}
760 
761 exit_out:
762 
763 	if (sg_ops != NULL) {
764 		if (pkts_sent)
765 			sg_ops->nthw_release_tx_packets(tx_q->vq, pkts_sent, nb_segs_arr);
766 	}
767 
768 	return pkts_sent;
769 }
770 
771 static int allocate_hw_virtio_queues(struct rte_eth_dev *eth_dev, int vf_num, struct hwq_s *hwq,
772 	int num_descr, int buf_size)
773 {
774 	int i, res;
775 	uint32_t size;
776 	uint64_t iova_addr;
777 
778 	NT_LOG(DBG, NTNIC, "***** Configure IOMMU for HW queues on VF %i *****", vf_num);
779 
780 	/* Just allocate 1MB to hold all combined descr rings */
781 	uint64_t tot_alloc_size = 0x100000 + buf_size * num_descr;
782 
783 	void *virt =
784 		rte_malloc_socket("VirtQDescr", tot_alloc_size, nt_util_align_size(tot_alloc_size),
785 			eth_dev->data->numa_node);
786 
787 	if (!virt)
788 		return -1;
789 
790 	uint64_t gp_offset = (uint64_t)virt & ONE_G_MASK;
791 	rte_iova_t hpa = rte_malloc_virt2iova(virt);
792 
793 	NT_LOG(DBG, NTNIC, "Allocated virtio descr rings : virt "
794 		"%p [0x%" PRIX64 "],hpa %" PRIX64 " [0x%" PRIX64 "]",
795 		virt, gp_offset, hpa, hpa & ONE_G_MASK);
796 
797 	/*
798 	 * Same offset on both HPA and IOVA
799 	 * Make sure 1G boundary is never crossed
800 	 */
801 	if (((hpa & ONE_G_MASK) != gp_offset) ||
802 		(((uint64_t)virt + tot_alloc_size) & ~ONE_G_MASK) !=
803 		((uint64_t)virt & ~ONE_G_MASK)) {
804 		NT_LOG(ERR, NTNIC, "*********************************************************");
805 		NT_LOG(ERR, NTNIC, "ERROR, no optimal IOMMU mapping available hpa: %016" PRIX64
806 			"(%016" PRIX64 "), gp_offset: %016" PRIX64 " size: %" PRIu64,
807 			hpa, hpa & ONE_G_MASK, gp_offset, tot_alloc_size);
808 		NT_LOG(ERR, NTNIC, "*********************************************************");
809 
810 		rte_free(virt);
811 
812 		/* Just allocate 1MB to hold all combined descr rings */
813 		size = 0x100000;
814 		void *virt = rte_malloc_socket("VirtQDescr", size, 4096, eth_dev->data->numa_node);
815 
816 		if (!virt)
817 			return -1;
818 
819 		res = nt_vfio_dma_map(vf_num, virt, &iova_addr, size);
820 
821 		NT_LOG(DBG, NTNIC, "VFIO MMAP res %i, vf_num %i", res, vf_num);
822 
823 		if (res != 0)
824 			return -1;
825 
826 		hwq->vf_num = vf_num;
827 		hwq->virt_queues_ctrl.virt_addr = virt;
828 		hwq->virt_queues_ctrl.phys_addr = (void *)iova_addr;
829 		hwq->virt_queues_ctrl.len = size;
830 
831 		NT_LOG(DBG, NTNIC,
832 			"Allocated for virtio descr rings combined 1MB : %p, IOVA %016" PRIX64 "",
833 			virt, iova_addr);
834 
835 		size = num_descr * sizeof(struct nthw_memory_descriptor);
836 		hwq->pkt_buffers =
837 			rte_zmalloc_socket("rx_pkt_buffers", size, 64, eth_dev->data->numa_node);
838 
839 		if (!hwq->pkt_buffers) {
840 			NT_LOG(ERR, NTNIC,
841 				"Failed to allocated buffer array for hw-queue %p, total size %i, elements %i",
842 				hwq->pkt_buffers, size, num_descr);
843 			rte_free(virt);
844 			return -1;
845 		}
846 
847 		size = buf_size * num_descr;
848 		void *virt_addr =
849 			rte_malloc_socket("pkt_buffer_pkts", size, 4096, eth_dev->data->numa_node);
850 
851 		if (!virt_addr) {
852 			NT_LOG(ERR, NTNIC,
853 				"Failed allocate packet buffers for hw-queue %p, buf size %i, elements %i",
854 				hwq->pkt_buffers, buf_size, num_descr);
855 			rte_free(hwq->pkt_buffers);
856 			rte_free(virt);
857 			return -1;
858 		}
859 
860 		res = nt_vfio_dma_map(vf_num, virt_addr, &iova_addr, size);
861 
862 		NT_LOG(DBG, NTNIC,
863 			"VFIO MMAP res %i, virt %p, iova %016" PRIX64 ", vf_num %i, num pkt bufs %i, tot size %i",
864 			res, virt_addr, iova_addr, vf_num, num_descr, size);
865 
866 		if (res != 0)
867 			return -1;
868 
869 		for (i = 0; i < num_descr; i++) {
870 			hwq->pkt_buffers[i].virt_addr =
871 				(void *)((char *)virt_addr + ((uint64_t)(i) * buf_size));
872 			hwq->pkt_buffers[i].phys_addr =
873 				(void *)(iova_addr + ((uint64_t)(i) * buf_size));
874 			hwq->pkt_buffers[i].len = buf_size;
875 		}
876 
877 		return 0;
878 	}	/* End of: no optimal IOMMU mapping available */
879 
880 	res = nt_vfio_dma_map(vf_num, virt, &iova_addr, ONE_G_SIZE);
881 
882 	if (res != 0) {
883 		NT_LOG(ERR, NTNIC, "VFIO MMAP FAILED! res %i, vf_num %i", res, vf_num);
884 		return -1;
885 	}
886 
887 	hwq->vf_num = vf_num;
888 	hwq->virt_queues_ctrl.virt_addr = virt;
889 	hwq->virt_queues_ctrl.phys_addr = (void *)(iova_addr);
890 	hwq->virt_queues_ctrl.len = 0x100000;
891 	iova_addr += 0x100000;
892 
893 	NT_LOG(DBG, NTNIC,
894 		"VFIO MMAP: virt_addr=%p phys_addr=%p size=%" PRIX32 " hpa=%" PRIX64 "",
895 		hwq->virt_queues_ctrl.virt_addr, hwq->virt_queues_ctrl.phys_addr,
896 		hwq->virt_queues_ctrl.len, rte_malloc_virt2iova(hwq->virt_queues_ctrl.virt_addr));
897 
898 	size = num_descr * sizeof(struct nthw_memory_descriptor);
899 	hwq->pkt_buffers =
900 		rte_zmalloc_socket("rx_pkt_buffers", size, 64, eth_dev->data->numa_node);
901 
902 	if (!hwq->pkt_buffers) {
903 		NT_LOG(ERR, NTNIC,
904 			"Failed to allocated buffer array for hw-queue %p, total size %i, elements %i",
905 			hwq->pkt_buffers, size, num_descr);
906 		rte_free(virt);
907 		return -1;
908 	}
909 
910 	void *virt_addr = (void *)((uint64_t)virt + 0x100000);
911 
912 	for (i = 0; i < num_descr; i++) {
913 		hwq->pkt_buffers[i].virt_addr =
914 			(void *)((char *)virt_addr + ((uint64_t)(i) * buf_size));
915 		hwq->pkt_buffers[i].phys_addr = (void *)(iova_addr + ((uint64_t)(i) * buf_size));
916 		hwq->pkt_buffers[i].len = buf_size;
917 	}
918 
919 	return 0;
920 }
921 
922 static void release_hw_virtio_queues(struct hwq_s *hwq)
923 {
924 	if (!hwq || hwq->vf_num == 0)
925 		return;
926 
927 	hwq->vf_num = 0;
928 }
929 
930 static int deallocate_hw_virtio_queues(struct hwq_s *hwq)
931 {
932 	int vf_num = hwq->vf_num;
933 
934 	void *virt = hwq->virt_queues_ctrl.virt_addr;
935 
936 	int res = nt_vfio_dma_unmap(vf_num, hwq->virt_queues_ctrl.virt_addr,
937 			(uint64_t)hwq->virt_queues_ctrl.phys_addr, ONE_G_SIZE);
938 
939 	if (res != 0) {
940 		NT_LOG(ERR, NTNIC, "VFIO UNMMAP FAILED! res %i, vf_num %i", res, vf_num);
941 		return -1;
942 	}
943 
944 	release_hw_virtio_queues(hwq);
945 	rte_free(hwq->pkt_buffers);
946 	rte_free(virt);
947 	return 0;
948 }
949 
950 static void eth_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t queue_id)
951 {
952 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
953 	struct ntnic_tx_queue *tx_q = &internals->txq_scg[queue_id];
954 	deallocate_hw_virtio_queues(&tx_q->hwq);
955 }
956 
957 static void eth_rx_queue_release(struct rte_eth_dev *eth_dev, uint16_t queue_id)
958 {
959 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
960 	struct ntnic_rx_queue *rx_q = &internals->rxq_scg[queue_id];
961 	deallocate_hw_virtio_queues(&rx_q->hwq);
962 }
963 
964 static int num_queues_alloced;
965 
966 /* Returns num queue starting at returned queue num or -1 on fail */
967 static int allocate_queue(int num)
968 {
969 	int next_free = num_queues_alloced;
970 	NT_LOG_DBGX(DBG, NTNIC, "num_queues_alloced=%u, New queues=%u, Max queues=%u",
971 		num_queues_alloced, num, MAX_TOTAL_QUEUES);
972 
973 	if (num_queues_alloced + num > MAX_TOTAL_QUEUES)
974 		return -1;
975 
976 	num_queues_alloced += num;
977 	return next_free;
978 }
979 
980 static int eth_rx_scg_queue_setup(struct rte_eth_dev *eth_dev,
981 	uint16_t rx_queue_id,
982 	uint16_t nb_rx_desc __rte_unused,
983 	unsigned int socket_id __rte_unused,
984 	const struct rte_eth_rxconf *rx_conf __rte_unused,
985 	struct rte_mempool *mb_pool)
986 {
987 	NT_LOG_DBGX(DBG, NTNIC, "Rx queue setup");
988 	struct rte_pktmbuf_pool_private *mbp_priv;
989 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
990 	struct ntnic_rx_queue *rx_q = &internals->rxq_scg[rx_queue_id];
991 	struct drv_s *p_drv = internals->p_drv;
992 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
993 
994 	if (sg_ops == NULL) {
995 		NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
996 		return 0;
997 	}
998 
999 	if (internals->type == PORT_TYPE_OVERRIDE) {
1000 		rx_q->mb_pool = mb_pool;
1001 		eth_dev->data->rx_queues[rx_queue_id] = rx_q;
1002 		mbp_priv = rte_mempool_get_priv(rx_q->mb_pool);
1003 		rx_q->buf_size = (uint16_t)(mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
1004 		rx_q->enabled = 1;
1005 		return 0;
1006 	}
1007 
1008 	NT_LOG(DBG, NTNIC, "(%i) NTNIC RX OVS-SW queue setup: queue id %i, hw queue index %i",
1009 		internals->port, rx_queue_id, rx_q->queue.hw_id);
1010 
1011 	rx_q->mb_pool = mb_pool;
1012 
1013 	eth_dev->data->rx_queues[rx_queue_id] = rx_q;
1014 
1015 	mbp_priv = rte_mempool_get_priv(rx_q->mb_pool);
1016 	rx_q->buf_size = (uint16_t)(mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
1017 	rx_q->enabled = 1;
1018 
1019 	if (allocate_hw_virtio_queues(eth_dev, EXCEPTION_PATH_HID, &rx_q->hwq,
1020 			SG_NB_HW_RX_DESCRIPTORS, SG_HW_RX_PKT_BUFFER_SIZE) < 0)
1021 		return -1;
1022 
1023 	rx_q->nb_hw_rx_descr = SG_NB_HW_RX_DESCRIPTORS;
1024 
1025 	rx_q->profile = p_drv->ntdrv.adapter_info.fpga_info.profile;
1026 
1027 	rx_q->vq =
1028 		sg_ops->nthw_setup_mngd_rx_virt_queue(p_nt_drv->adapter_info.fpga_info.mp_nthw_dbs,
1029 			rx_q->queue.hw_id,	/* index */
1030 			rx_q->nb_hw_rx_descr,
1031 			EXCEPTION_PATH_HID,	/* host_id */
1032 			1,	/* header NT DVIO header for exception path */
1033 			&rx_q->hwq.virt_queues_ctrl,
1034 			rx_q->hwq.pkt_buffers,
1035 			SPLIT_RING,
1036 			-1);
1037 
1038 	NT_LOG(DBG, NTNIC, "(%i) NTNIC RX OVS-SW queues successfully setup", internals->port);
1039 
1040 	return 0;
1041 }
1042 
1043 static int eth_tx_scg_queue_setup(struct rte_eth_dev *eth_dev,
1044 	uint16_t tx_queue_id,
1045 	uint16_t nb_tx_desc __rte_unused,
1046 	unsigned int socket_id __rte_unused,
1047 	const struct rte_eth_txconf *tx_conf __rte_unused)
1048 {
1049 	const struct port_ops *port_ops = get_port_ops();
1050 
1051 	if (port_ops == NULL) {
1052 		NT_LOG_DBGX(ERR, NTNIC, "Link management module uninitialized");
1053 		return -1;
1054 	}
1055 
1056 	NT_LOG_DBGX(DBG, NTNIC, "Tx queue setup");
1057 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1058 	struct drv_s *p_drv = internals->p_drv;
1059 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
1060 	struct ntnic_tx_queue *tx_q = &internals->txq_scg[tx_queue_id];
1061 
1062 	if (internals->type == PORT_TYPE_OVERRIDE) {
1063 		eth_dev->data->tx_queues[tx_queue_id] = tx_q;
1064 		return 0;
1065 	}
1066 
1067 	if (sg_ops == NULL) {
1068 		NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
1069 		return 0;
1070 	}
1071 
1072 	NT_LOG(DBG, NTNIC, "(%i) NTNIC TX OVS-SW queue setup: queue id %i, hw queue index %i",
1073 		tx_q->port, tx_queue_id, tx_q->queue.hw_id);
1074 
1075 	if (tx_queue_id > internals->nb_tx_queues) {
1076 		NT_LOG(ERR, NTNIC, "Error invalid tx queue id");
1077 		return -1;
1078 	}
1079 
1080 	eth_dev->data->tx_queues[tx_queue_id] = tx_q;
1081 
1082 	/* Calculate target ID for HW  - to be used in NTDVIO0 header bypass_port */
1083 	if (tx_q->rss_target_id >= 0) {
1084 		/* bypass to a multiqueue port - qsl-hsh index */
1085 		tx_q->target_id = tx_q->rss_target_id + 0x90;
1086 
1087 	} else if (internals->vpq[tx_queue_id].hw_id > -1) {
1088 		/* virtual port - queue index */
1089 		tx_q->target_id = internals->vpq[tx_queue_id].hw_id;
1090 
1091 	} else {
1092 		/* Phy port - phy port identifier */
1093 		/* output/bypass to MAC */
1094 		tx_q->target_id = (int)(tx_q->port + 0x80);
1095 	}
1096 
1097 	if (allocate_hw_virtio_queues(eth_dev, EXCEPTION_PATH_HID, &tx_q->hwq,
1098 			SG_NB_HW_TX_DESCRIPTORS, SG_HW_TX_PKT_BUFFER_SIZE) < 0) {
1099 		return -1;
1100 	}
1101 
1102 	tx_q->nb_hw_tx_descr = SG_NB_HW_TX_DESCRIPTORS;
1103 
1104 	tx_q->profile = p_drv->ntdrv.adapter_info.fpga_info.profile;
1105 
1106 	uint32_t port, header;
1107 	port = tx_q->port;	/* transmit port */
1108 	header = 0;	/* header type VirtIO-Net */
1109 
1110 	tx_q->vq =
1111 		sg_ops->nthw_setup_mngd_tx_virt_queue(p_nt_drv->adapter_info.fpga_info.mp_nthw_dbs,
1112 			tx_q->queue.hw_id,	/* index */
1113 			tx_q->nb_hw_tx_descr,	/* queue size */
1114 			EXCEPTION_PATH_HID,	/* host_id always VF4 */
1115 			port,
1116 			/*
1117 			 * in_port - in vswitch mode has
1118 			 * to move tx port from OVS excep.
1119 			 * away from VM tx port,
1120 			 * because of QoS is matched by port id!
1121 			 */
1122 			tx_q->port + 128,
1123 			header,
1124 			&tx_q->hwq.virt_queues_ctrl,
1125 			tx_q->hwq.pkt_buffers,
1126 			SPLIT_RING,
1127 			-1,
1128 			IN_ORDER);
1129 
1130 	tx_q->enabled = 1;
1131 
1132 	NT_LOG(DBG, NTNIC, "(%i) NTNIC TX OVS-SW queues successfully setup", internals->port);
1133 
1134 	if (internals->type == PORT_TYPE_PHYSICAL) {
1135 		struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1136 		NT_LOG(DBG, NTNIC, "Port %i is ready for data. Enable port",
1137 			internals->n_intf_no);
1138 		port_ops->set_adm_state(p_adapter_info, internals->n_intf_no, true);
1139 	}
1140 
1141 	return 0;
1142 }
1143 
1144 static int eth_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1145 {
1146 	eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1147 	return 0;
1148 }
1149 
1150 static int eth_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1151 {
1152 	eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1153 	return 0;
1154 }
1155 
1156 static int eth_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1157 {
1158 	eth_dev->data->tx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1159 	return 0;
1160 }
1161 
1162 static int eth_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1163 {
1164 	eth_dev->data->tx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1165 	return 0;
1166 }
1167 
1168 static int
1169 eth_mac_addr_add(struct rte_eth_dev *eth_dev,
1170 	struct rte_ether_addr *mac_addr,
1171 	uint32_t index,
1172 	uint32_t vmdq __rte_unused)
1173 {
1174 	struct rte_ether_addr *const eth_addrs = eth_dev->data->mac_addrs;
1175 
1176 	assert(index < NUM_MAC_ADDRS_PER_PORT);
1177 
1178 	if (index >= NUM_MAC_ADDRS_PER_PORT) {
1179 		const struct pmd_internals *const internals =
1180 			(struct pmd_internals *)eth_dev->data->dev_private;
1181 		NT_LOG_DBGX(DBG, NTNIC, "Port %i: illegal index %u (>= %u)",
1182 			internals->n_intf_no, index, NUM_MAC_ADDRS_PER_PORT);
1183 		return -1;
1184 	}
1185 
1186 	eth_addrs[index] = *mac_addr;
1187 
1188 	return 0;
1189 }
1190 
1191 static int
1192 eth_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1193 {
1194 	struct rte_ether_addr *const eth_addrs = dev->data->mac_addrs;
1195 
1196 	eth_addrs[0U] = *mac_addr;
1197 
1198 	return 0;
1199 }
1200 
1201 static int
1202 eth_set_mc_addr_list(struct rte_eth_dev *eth_dev,
1203 	struct rte_ether_addr *mc_addr_set,
1204 	uint32_t nb_mc_addr)
1205 {
1206 	struct pmd_internals *const internals = (struct pmd_internals *)eth_dev->data->dev_private;
1207 	struct rte_ether_addr *const mc_addrs = internals->mc_addrs;
1208 	size_t i;
1209 
1210 	if (nb_mc_addr >= NUM_MULTICAST_ADDRS_PER_PORT) {
1211 		NT_LOG_DBGX(DBG, NTNIC,
1212 			"Port %i: too many multicast addresses %u (>= %u)",
1213 			internals->n_intf_no, nb_mc_addr, NUM_MULTICAST_ADDRS_PER_PORT);
1214 		return -1;
1215 	}
1216 
1217 	for (i = 0U; i < NUM_MULTICAST_ADDRS_PER_PORT; i++)
1218 		if (i < nb_mc_addr)
1219 			mc_addrs[i] = mc_addr_set[i];
1220 
1221 		else
1222 			(void)memset(&mc_addrs[i], 0, sizeof(mc_addrs[i]));
1223 
1224 	return 0;
1225 }
1226 
1227 static int
1228 eth_dev_configure(struct rte_eth_dev *eth_dev)
1229 {
1230 	NT_LOG_DBGX(DBG, NTNIC, "Called for eth_dev %p", eth_dev);
1231 
1232 	/* The device is ALWAYS running promiscuous mode. */
1233 	eth_dev->data->promiscuous ^= ~eth_dev->data->promiscuous;
1234 	return 0;
1235 }
1236 
1237 static int
1238 eth_dev_start(struct rte_eth_dev *eth_dev)
1239 {
1240 	const struct port_ops *port_ops = get_port_ops();
1241 
1242 	if (port_ops == NULL) {
1243 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1244 		return -1;
1245 	}
1246 
1247 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1248 
1249 	const int n_intf_no = internals->n_intf_no;
1250 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1251 
1252 	NT_LOG_DBGX(DBG, NTNIC, "Port %u", internals->n_intf_no);
1253 
1254 	/* Start queues */
1255 	uint q;
1256 
1257 	for (q = 0; q < internals->nb_rx_queues; q++)
1258 		eth_rx_queue_start(eth_dev, q);
1259 
1260 	for (q = 0; q < internals->nb_tx_queues; q++)
1261 		eth_tx_queue_start(eth_dev, q);
1262 
1263 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE) {
1264 		eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
1265 
1266 	} else {
1267 		/* Enable the port */
1268 		port_ops->set_adm_state(p_adapter_info, internals->n_intf_no, true);
1269 
1270 		/*
1271 		 * wait for link on port
1272 		 * If application starts sending too soon before FPGA port is ready, garbage is
1273 		 * produced
1274 		 */
1275 		int loop = 0;
1276 
1277 		while (port_ops->get_link_status(p_adapter_info, n_intf_no) == RTE_ETH_LINK_DOWN) {
1278 			/* break out after 5 sec */
1279 			if (++loop >= 50) {
1280 				NT_LOG_DBGX(DBG, NTNIC,
1281 					"TIMEOUT No link on port %i (5sec timeout)",
1282 					internals->n_intf_no);
1283 				break;
1284 			}
1285 
1286 			nt_os_wait_usec(100 * 1000);
1287 		}
1288 
1289 		if (internals->lpbk_mode) {
1290 			if (internals->lpbk_mode & 1 << 0) {
1291 				port_ops->set_loopback_mode(p_adapter_info, n_intf_no,
1292 					NT_LINK_LOOPBACK_HOST);
1293 			}
1294 
1295 			if (internals->lpbk_mode & 1 << 1) {
1296 				port_ops->set_loopback_mode(p_adapter_info, n_intf_no,
1297 					NT_LINK_LOOPBACK_LINE);
1298 			}
1299 		}
1300 	}
1301 
1302 	return 0;
1303 }
1304 
1305 static int
1306 eth_dev_stop(struct rte_eth_dev *eth_dev)
1307 {
1308 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1309 
1310 	NT_LOG_DBGX(DBG, NTNIC, "Port %u", internals->n_intf_no);
1311 
1312 	if (internals->type != PORT_TYPE_VIRTUAL) {
1313 		uint q;
1314 
1315 		for (q = 0; q < internals->nb_rx_queues; q++)
1316 			eth_rx_queue_stop(eth_dev, q);
1317 
1318 		for (q = 0; q < internals->nb_tx_queues; q++)
1319 			eth_tx_queue_stop(eth_dev, q);
1320 	}
1321 
1322 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
1323 	return 0;
1324 }
1325 
1326 static int
1327 eth_dev_set_link_up(struct rte_eth_dev *eth_dev)
1328 {
1329 	const struct port_ops *port_ops = get_port_ops();
1330 
1331 	if (port_ops == NULL) {
1332 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1333 		return -1;
1334 	}
1335 
1336 	struct pmd_internals *const internals = (struct pmd_internals *)eth_dev->data->dev_private;
1337 
1338 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1339 	const int port = internals->n_intf_no;
1340 
1341 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1342 		return 0;
1343 
1344 	assert(port >= 0 && port < NUM_ADAPTER_PORTS_MAX);
1345 	assert(port == internals->n_intf_no);
1346 
1347 	port_ops->set_adm_state(p_adapter_info, port, true);
1348 
1349 	return 0;
1350 }
1351 
1352 static int
1353 eth_dev_set_link_down(struct rte_eth_dev *eth_dev)
1354 {
1355 	const struct port_ops *port_ops = get_port_ops();
1356 
1357 	if (port_ops == NULL) {
1358 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1359 		return -1;
1360 	}
1361 
1362 	struct pmd_internals *const internals = (struct pmd_internals *)eth_dev->data->dev_private;
1363 
1364 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1365 	const int port = internals->n_intf_no;
1366 
1367 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1368 		return 0;
1369 
1370 	assert(port >= 0 && port < NUM_ADAPTER_PORTS_MAX);
1371 	assert(port == internals->n_intf_no);
1372 
1373 	port_ops->set_link_status(p_adapter_info, port, false);
1374 
1375 	return 0;
1376 }
1377 
1378 static void
1379 drv_deinit(struct drv_s *p_drv)
1380 {
1381 	const struct profile_inline_ops *profile_inline_ops = get_profile_inline_ops();
1382 
1383 	if (profile_inline_ops == NULL) {
1384 		NT_LOG_DBGX(ERR, NTNIC, "profile_inline module uninitialized");
1385 		return;
1386 	}
1387 
1388 	const struct adapter_ops *adapter_ops = get_adapter_ops();
1389 
1390 	if (adapter_ops == NULL) {
1391 		NT_LOG(ERR, NTNIC, "Adapter module uninitialized");
1392 		return;
1393 	}
1394 
1395 	if (p_drv == NULL)
1396 		return;
1397 
1398 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1399 	fpga_info_t *fpga_info = &p_nt_drv->adapter_info.fpga_info;
1400 
1401 	/*
1402 	 * Mark the global pdrv for cleared. Used by some threads to terminate.
1403 	 * 1 second to give the threads a chance to see the termonation.
1404 	 */
1405 	clear_pdrv(p_drv);
1406 	nt_os_wait_usec(1000000);
1407 
1408 	/* stop statistics threads */
1409 	p_drv->ntdrv.b_shutdown = true;
1410 
1411 	if (fpga_info->profile == FPGA_INFO_PROFILE_INLINE) {
1412 		THREAD_JOIN(p_nt_drv->flm_thread);
1413 		profile_inline_ops->flm_free_queues();
1414 	}
1415 
1416 	/* stop adapter */
1417 	adapter_ops->deinit(&p_nt_drv->adapter_info);
1418 
1419 	/* clean memory */
1420 	rte_free(p_drv);
1421 	p_drv = NULL;
1422 }
1423 
1424 static int
1425 eth_dev_close(struct rte_eth_dev *eth_dev)
1426 {
1427 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1428 	struct drv_s *p_drv = internals->p_drv;
1429 
1430 	if (internals->type != PORT_TYPE_VIRTUAL) {
1431 		struct ntnic_rx_queue *rx_q = internals->rxq_scg;
1432 		struct ntnic_tx_queue *tx_q = internals->txq_scg;
1433 
1434 		uint q;
1435 
1436 		if (sg_ops != NULL) {
1437 			for (q = 0; q < internals->nb_rx_queues; q++)
1438 				sg_ops->nthw_release_mngd_rx_virt_queue(rx_q[q].vq);
1439 
1440 			for (q = 0; q < internals->nb_tx_queues; q++)
1441 				sg_ops->nthw_release_mngd_tx_virt_queue(tx_q[q].vq);
1442 		}
1443 	}
1444 
1445 	internals->p_drv = NULL;
1446 
1447 	if (p_drv) {
1448 		/* decrease initialized ethernet devices */
1449 		p_drv->n_eth_dev_init_count--;
1450 
1451 		/*
1452 		 * rte_pci_dev has no private member for p_drv
1453 		 * wait until all rte_eth_dev's are closed - then close adapters via p_drv
1454 		 */
1455 		if (!p_drv->n_eth_dev_init_count)
1456 			drv_deinit(p_drv);
1457 	}
1458 
1459 	return 0;
1460 }
1461 
1462 static int
1463 eth_fw_version_get(struct rte_eth_dev *eth_dev, char *fw_version, size_t fw_size)
1464 {
1465 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1466 
1467 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1468 		return 0;
1469 
1470 	fpga_info_t *fpga_info = &internals->p_drv->ntdrv.adapter_info.fpga_info;
1471 	const int length = snprintf(fw_version, fw_size, "%03d-%04d-%02d-%02d",
1472 			fpga_info->n_fpga_type_id, fpga_info->n_fpga_prod_id,
1473 			fpga_info->n_fpga_ver_id, fpga_info->n_fpga_rev_id);
1474 
1475 	if ((size_t)length < fw_size) {
1476 		/* We have space for the version string */
1477 		return 0;
1478 
1479 	} else {
1480 		/* We do not have space for the version string -return the needed space */
1481 		return length + 1;
1482 	}
1483 }
1484 
1485 static int dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused, const struct rte_flow_ops **ops)
1486 {
1487 	*ops = get_dev_flow_ops();
1488 	return 0;
1489 }
1490 
1491 static int
1492 promiscuous_enable(struct rte_eth_dev __rte_unused(*dev))
1493 {
1494 	NT_LOG(DBG, NTHW, "The device always run promiscuous mode");
1495 	return 0;
1496 }
1497 
1498 static int eth_dev_rss_hash_update(struct rte_eth_dev *eth_dev, struct rte_eth_rss_conf *rss_conf)
1499 {
1500 	const struct flow_filter_ops *flow_filter_ops = get_flow_filter_ops();
1501 
1502 	if (flow_filter_ops == NULL) {
1503 		NT_LOG_DBGX(ERR, NTNIC, "flow_filter module uninitialized");
1504 		return -1;
1505 	}
1506 
1507 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1508 
1509 	struct flow_nic_dev *ndev = internals->flw_dev->ndev;
1510 	struct nt_eth_rss_conf tmp_rss_conf = { 0 };
1511 	const int hsh_idx = 0;	/* hsh index 0 means the default receipt in HSH module */
1512 
1513 	if (rss_conf->rss_key != NULL) {
1514 		if (rss_conf->rss_key_len > MAX_RSS_KEY_LEN) {
1515 			NT_LOG(ERR, NTNIC,
1516 				"ERROR: - RSS hash key length %u exceeds maximum value %u",
1517 				rss_conf->rss_key_len, MAX_RSS_KEY_LEN);
1518 			return -1;
1519 		}
1520 
1521 		rte_memcpy(&tmp_rss_conf.rss_key, rss_conf->rss_key, rss_conf->rss_key_len);
1522 	}
1523 
1524 	tmp_rss_conf.algorithm = rss_conf->algorithm;
1525 
1526 	tmp_rss_conf.rss_hf = rss_conf->rss_hf;
1527 	int res = flow_filter_ops->flow_nic_set_hasher_fields(ndev, hsh_idx, tmp_rss_conf);
1528 
1529 	if (res == 0) {
1530 		flow_filter_ops->hw_mod_hsh_rcp_flush(&ndev->be, hsh_idx, 1);
1531 		rte_memcpy(&ndev->rss_conf, &tmp_rss_conf, sizeof(struct nt_eth_rss_conf));
1532 
1533 	} else {
1534 		NT_LOG(ERR, NTNIC, "ERROR: - RSS hash update failed with error %i", res);
1535 	}
1536 
1537 	return res;
1538 }
1539 
1540 static int rss_hash_conf_get(struct rte_eth_dev *eth_dev, struct rte_eth_rss_conf *rss_conf)
1541 {
1542 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1543 	struct flow_nic_dev *ndev = internals->flw_dev->ndev;
1544 
1545 	rss_conf->algorithm = (enum rte_eth_hash_function)ndev->rss_conf.algorithm;
1546 
1547 	rss_conf->rss_hf = ndev->rss_conf.rss_hf;
1548 
1549 	/*
1550 	 * copy full stored key into rss_key and pad it with
1551 	 * zeros up to rss_key_len / MAX_RSS_KEY_LEN
1552 	 */
1553 	if (rss_conf->rss_key != NULL) {
1554 		int key_len = RTE_MIN(rss_conf->rss_key_len, MAX_RSS_KEY_LEN);
1555 		memset(rss_conf->rss_key, 0, rss_conf->rss_key_len);
1556 		rte_memcpy(rss_conf->rss_key, &ndev->rss_conf.rss_key, key_len);
1557 		rss_conf->rss_key_len = key_len;
1558 	}
1559 
1560 	return 0;
1561 }
1562 
1563 static const struct eth_dev_ops nthw_eth_dev_ops = {
1564 	.dev_configure = eth_dev_configure,
1565 	.dev_start = eth_dev_start,
1566 	.dev_stop = eth_dev_stop,
1567 	.dev_set_link_up = eth_dev_set_link_up,
1568 	.dev_set_link_down = eth_dev_set_link_down,
1569 	.dev_close = eth_dev_close,
1570 	.link_update = eth_link_update,
1571 	.stats_get = eth_stats_get,
1572 	.stats_reset = eth_stats_reset,
1573 	.dev_infos_get = eth_dev_infos_get,
1574 	.fw_version_get = eth_fw_version_get,
1575 	.rx_queue_setup = eth_rx_scg_queue_setup,
1576 	.rx_queue_start = eth_rx_queue_start,
1577 	.rx_queue_stop = eth_rx_queue_stop,
1578 	.rx_queue_release = eth_rx_queue_release,
1579 	.tx_queue_setup = eth_tx_scg_queue_setup,
1580 	.tx_queue_start = eth_tx_queue_start,
1581 	.tx_queue_stop = eth_tx_queue_stop,
1582 	.tx_queue_release = eth_tx_queue_release,
1583 	.mac_addr_add = eth_mac_addr_add,
1584 	.mac_addr_set = eth_mac_addr_set,
1585 	.set_mc_addr_list = eth_set_mc_addr_list,
1586 	.flow_ops_get = dev_flow_ops_get,
1587 	.promiscuous_enable = promiscuous_enable,
1588 	.rss_hash_update = eth_dev_rss_hash_update,
1589 	.rss_hash_conf_get = rss_hash_conf_get,
1590 };
1591 
1592 /*
1593  * Adapter flm stat thread
1594  */
1595 THREAD_FUNC adapter_flm_update_thread_fn(void *context)
1596 {
1597 	const struct profile_inline_ops *profile_inline_ops = get_profile_inline_ops();
1598 
1599 	if (profile_inline_ops == NULL) {
1600 		NT_LOG(ERR, NTNIC, "%s: profile_inline module uninitialized", __func__);
1601 		return THREAD_RETURN;
1602 	}
1603 
1604 	struct drv_s *p_drv = context;
1605 
1606 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
1607 	struct adapter_info_s *p_adapter_info = &p_nt_drv->adapter_info;
1608 	struct nt4ga_filter_s *p_nt4ga_filter = &p_adapter_info->nt4ga_filter;
1609 	struct flow_nic_dev *p_flow_nic_dev = p_nt4ga_filter->mp_flow_device;
1610 
1611 	NT_LOG(DBG, NTNIC, "%s: %s: waiting for port configuration",
1612 		p_adapter_info->mp_adapter_id_str, __func__);
1613 
1614 	while (p_flow_nic_dev->eth_base == NULL)
1615 		nt_os_wait_usec(1 * 1000 * 1000);
1616 
1617 	struct flow_eth_dev *dev = p_flow_nic_dev->eth_base;
1618 
1619 	NT_LOG(DBG, NTNIC, "%s: %s: begin", p_adapter_info->mp_adapter_id_str, __func__);
1620 
1621 	while (!p_drv->ntdrv.b_shutdown)
1622 		if (profile_inline_ops->flm_update(dev) == 0)
1623 			nt_os_wait_usec(10);
1624 
1625 	NT_LOG(DBG, NTNIC, "%s: %s: end", p_adapter_info->mp_adapter_id_str, __func__);
1626 	return THREAD_RETURN;
1627 }
1628 
1629 static int
1630 nthw_pci_dev_init(struct rte_pci_device *pci_dev)
1631 {
1632 	const struct flow_filter_ops *flow_filter_ops = get_flow_filter_ops();
1633 
1634 	if (flow_filter_ops == NULL) {
1635 		NT_LOG_DBGX(ERR, NTNIC, "flow_filter module uninitialized");
1636 		/* Return statement is not necessary here to allow traffic processing by SW  */
1637 	}
1638 
1639 	const struct profile_inline_ops *profile_inline_ops = get_profile_inline_ops();
1640 
1641 	if (profile_inline_ops == NULL) {
1642 		NT_LOG_DBGX(ERR, NTNIC, "profile_inline module uninitialized");
1643 		/* Return statement is not necessary here to allow traffic processing by SW  */
1644 	}
1645 
1646 	nt_vfio_init();
1647 	const struct port_ops *port_ops = get_port_ops();
1648 
1649 	if (port_ops == NULL) {
1650 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1651 		return -1;
1652 	}
1653 
1654 	const struct adapter_ops *adapter_ops = get_adapter_ops();
1655 
1656 	if (adapter_ops == NULL) {
1657 		NT_LOG(ERR, NTNIC, "Adapter module uninitialized");
1658 		return -1;
1659 	}
1660 
1661 	int res;
1662 	struct drv_s *p_drv;
1663 	ntdrv_4ga_t *p_nt_drv;
1664 	hw_info_t *p_hw_info;
1665 	fpga_info_t *fpga_info;
1666 	uint32_t n_port_mask = -1;	/* All ports enabled by default */
1667 	uint32_t nb_rx_queues = 1;
1668 	uint32_t nb_tx_queues = 1;
1669 	uint32_t exception_path = 0;
1670 	struct flow_queue_id_s queue_ids[MAX_QUEUES];
1671 	int n_phy_ports;
1672 	struct port_link_speed pls_mbps[NUM_ADAPTER_PORTS_MAX] = { 0 };
1673 	int num_port_speeds = 0;
1674 	enum flow_eth_dev_profile profile = FLOW_ETH_DEV_PROFILE_INLINE;
1675 
1676 	NT_LOG_DBGX(DBG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i", pci_dev->name,
1677 		pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid,
1678 		pci_dev->addr.function);
1679 
1680 	/*
1681 	 * Process options/arguments
1682 	 */
1683 	if (pci_dev->device.devargs && pci_dev->device.devargs->args) {
1684 		int kvargs_count;
1685 		struct rte_kvargs *kvlist =
1686 			rte_kvargs_parse(pci_dev->device.devargs->args, valid_arguments);
1687 
1688 		if (kvlist == NULL)
1689 			return -1;
1690 
1691 		/*
1692 		 * Argument: help
1693 		 * NOTE: this argument/option check should be the first as it will stop
1694 		 * execution after producing its output
1695 		 */
1696 		{
1697 			if (rte_kvargs_get(kvlist, ETH_DEV_NTNIC_HELP_ARG)) {
1698 				size_t i;
1699 
1700 				for (i = 0; i < RTE_DIM(valid_arguments); i++)
1701 					if (valid_arguments[i] == NULL)
1702 						break;
1703 
1704 				exit(0);
1705 			}
1706 		}
1707 
1708 		/*
1709 		 * rxq option/argument
1710 		 * The number of rxq (hostbuffers) allocated in memory.
1711 		 * Default is 32 RX Hostbuffers
1712 		 */
1713 		kvargs_count = rte_kvargs_count(kvlist, ETH_DEV_NTHW_RXQUEUES_ARG);
1714 
1715 		if (kvargs_count != 0) {
1716 			assert(kvargs_count == 1);
1717 			res = rte_kvargs_process(kvlist, ETH_DEV_NTHW_RXQUEUES_ARG, &string_to_u32,
1718 					&nb_rx_queues);
1719 
1720 			if (res < 0) {
1721 				NT_LOG_DBGX(ERR, NTNIC,
1722 					"problem with command line arguments: res=%d",
1723 					res);
1724 				return -1;
1725 			}
1726 
1727 			NT_LOG_DBGX(DBG, NTNIC, "devargs: %s=%u",
1728 				ETH_DEV_NTHW_RXQUEUES_ARG, nb_rx_queues);
1729 		}
1730 
1731 		/*
1732 		 * txq option/argument
1733 		 * The number of txq (hostbuffers) allocated in memory.
1734 		 * Default is 32 TX Hostbuffers
1735 		 */
1736 		kvargs_count = rte_kvargs_count(kvlist, ETH_DEV_NTHW_TXQUEUES_ARG);
1737 
1738 		if (kvargs_count != 0) {
1739 			assert(kvargs_count == 1);
1740 			res = rte_kvargs_process(kvlist, ETH_DEV_NTHW_TXQUEUES_ARG, &string_to_u32,
1741 					&nb_tx_queues);
1742 
1743 			if (res < 0) {
1744 				NT_LOG_DBGX(ERR, NTNIC,
1745 					"problem with command line arguments: res=%d",
1746 					res);
1747 				return -1;
1748 			}
1749 
1750 			NT_LOG_DBGX(DBG, NTNIC, "devargs: %s=%u",
1751 				ETH_DEV_NTHW_TXQUEUES_ARG, nb_tx_queues);
1752 		}
1753 	}
1754 
1755 
1756 	/* alloc */
1757 	p_drv = rte_zmalloc_socket(pci_dev->name, sizeof(struct drv_s), RTE_CACHE_LINE_SIZE,
1758 			pci_dev->device.numa_node);
1759 
1760 	if (!p_drv) {
1761 		NT_LOG_DBGX(ERR, NTNIC, "%s: error %d",
1762 			(pci_dev->name[0] ? pci_dev->name : "NA"), -1);
1763 		return -1;
1764 	}
1765 
1766 	/* Setup VFIO context */
1767 	int vfio = nt_vfio_setup(pci_dev);
1768 
1769 	if (vfio < 0) {
1770 		NT_LOG_DBGX(ERR, NTNIC, "%s: vfio_setup error %d",
1771 			(pci_dev->name[0] ? pci_dev->name : "NA"), -1);
1772 		rte_free(p_drv);
1773 		return -1;
1774 	}
1775 
1776 	/* context */
1777 	p_nt_drv = &p_drv->ntdrv;
1778 	p_hw_info = &p_nt_drv->adapter_info.hw_info;
1779 	fpga_info = &p_nt_drv->adapter_info.fpga_info;
1780 
1781 	p_drv->p_dev = pci_dev;
1782 
1783 	/* Set context for NtDrv */
1784 	p_nt_drv->pciident = BDF_TO_PCIIDENT(pci_dev->addr.domain, pci_dev->addr.bus,
1785 			pci_dev->addr.devid, pci_dev->addr.function);
1786 	p_nt_drv->adapter_info.n_rx_host_buffers = nb_rx_queues;
1787 	p_nt_drv->adapter_info.n_tx_host_buffers = nb_tx_queues;
1788 
1789 	fpga_info->bar0_addr = (void *)pci_dev->mem_resource[0].addr;
1790 	fpga_info->bar0_size = pci_dev->mem_resource[0].len;
1791 	fpga_info->numa_node = pci_dev->device.numa_node;
1792 	fpga_info->pciident = p_nt_drv->pciident;
1793 	fpga_info->adapter_no = p_drv->adapter_no;
1794 
1795 	p_nt_drv->adapter_info.hw_info.pci_class_id = pci_dev->id.class_id;
1796 	p_nt_drv->adapter_info.hw_info.pci_vendor_id = pci_dev->id.vendor_id;
1797 	p_nt_drv->adapter_info.hw_info.pci_device_id = pci_dev->id.device_id;
1798 	p_nt_drv->adapter_info.hw_info.pci_sub_vendor_id = pci_dev->id.subsystem_vendor_id;
1799 	p_nt_drv->adapter_info.hw_info.pci_sub_device_id = pci_dev->id.subsystem_device_id;
1800 
1801 	NT_LOG(DBG, NTNIC, "%s: " PCIIDENT_PRINT_STR " %04X:%04X: %04X:%04X:",
1802 		p_nt_drv->adapter_info.mp_adapter_id_str, PCIIDENT_TO_DOMAIN(p_nt_drv->pciident),
1803 		PCIIDENT_TO_BUSNR(p_nt_drv->pciident), PCIIDENT_TO_DEVNR(p_nt_drv->pciident),
1804 		PCIIDENT_TO_FUNCNR(p_nt_drv->pciident),
1805 		p_nt_drv->adapter_info.hw_info.pci_vendor_id,
1806 		p_nt_drv->adapter_info.hw_info.pci_device_id,
1807 		p_nt_drv->adapter_info.hw_info.pci_sub_vendor_id,
1808 		p_nt_drv->adapter_info.hw_info.pci_sub_device_id);
1809 
1810 	p_nt_drv->b_shutdown = false;
1811 	p_nt_drv->adapter_info.pb_shutdown = &p_nt_drv->b_shutdown;
1812 
1813 	for (int i = 0; i < num_port_speeds; ++i) {
1814 		struct adapter_info_s *p_adapter_info = &p_nt_drv->adapter_info;
1815 		nt_link_speed_t link_speed = convert_link_speed(pls_mbps[i].link_speed);
1816 		port_ops->set_link_speed(p_adapter_info, i, link_speed);
1817 	}
1818 
1819 	/* store context */
1820 	store_pdrv(p_drv);
1821 
1822 	/* initialize nt4ga nthw fpga module instance in drv */
1823 	int err = adapter_ops->init(&p_nt_drv->adapter_info);
1824 
1825 	if (err != 0) {
1826 		NT_LOG(ERR, NTNIC, "%s: Cannot initialize the adapter instance",
1827 			p_nt_drv->adapter_info.mp_adapter_id_str);
1828 		return -1;
1829 	}
1830 
1831 	/* Initialize the queue system */
1832 	if (err == 0) {
1833 		sg_ops = get_sg_ops();
1834 
1835 		if (sg_ops != NULL) {
1836 			err = sg_ops->nthw_virt_queue_init(fpga_info);
1837 
1838 			if (err != 0) {
1839 				NT_LOG(ERR, NTNIC,
1840 					"%s: Cannot initialize scatter-gather queues",
1841 					p_nt_drv->adapter_info.mp_adapter_id_str);
1842 
1843 			} else {
1844 				NT_LOG(DBG, NTNIC, "%s: Initialized scatter-gather queues",
1845 					p_nt_drv->adapter_info.mp_adapter_id_str);
1846 			}
1847 
1848 		} else {
1849 			NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
1850 		}
1851 	}
1852 
1853 	/* Start ctrl, monitor, stat thread only for primary process. */
1854 	if (err == 0) {
1855 		/* mp_adapter_id_str is initialized after nt4ga_adapter_init(p_nt_drv) */
1856 		const char *const p_adapter_id_str = p_nt_drv->adapter_info.mp_adapter_id_str;
1857 		(void)p_adapter_id_str;
1858 		NT_LOG(DBG, NTNIC,
1859 			"%s: %s: AdapterPCI=" PCIIDENT_PRINT_STR " Hw=0x%02X_rev%d PhyPorts=%d",
1860 			(pci_dev->name[0] ? pci_dev->name : "NA"), p_adapter_id_str,
1861 			PCIIDENT_TO_DOMAIN(p_nt_drv->adapter_info.fpga_info.pciident),
1862 			PCIIDENT_TO_BUSNR(p_nt_drv->adapter_info.fpga_info.pciident),
1863 			PCIIDENT_TO_DEVNR(p_nt_drv->adapter_info.fpga_info.pciident),
1864 			PCIIDENT_TO_FUNCNR(p_nt_drv->adapter_info.fpga_info.pciident),
1865 			p_hw_info->hw_platform_id, fpga_info->nthw_hw_info.hw_id,
1866 			fpga_info->n_phy_ports);
1867 
1868 	} else {
1869 		NT_LOG_DBGX(ERR, NTNIC, "%s: error=%d",
1870 			(pci_dev->name[0] ? pci_dev->name : "NA"), err);
1871 		return -1;
1872 	}
1873 
1874 	if (profile_inline_ops != NULL && fpga_info->profile == FPGA_INFO_PROFILE_INLINE) {
1875 		profile_inline_ops->flm_setup_queues();
1876 		res = THREAD_CTRL_CREATE(&p_nt_drv->flm_thread, "ntnic-nt_flm_update_thr",
1877 			adapter_flm_update_thread_fn, (void *)p_drv);
1878 
1879 		if (res) {
1880 			NT_LOG_DBGX(ERR, NTNIC, "%s: error=%d",
1881 				(pci_dev->name[0] ? pci_dev->name : "NA"), res);
1882 			return -1;
1883 		}
1884 	}
1885 
1886 	n_phy_ports = fpga_info->n_phy_ports;
1887 
1888 	for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) {
1889 		const char *const p_port_id_str = p_nt_drv->adapter_info.mp_port_id_str[n_intf_no];
1890 		(void)p_port_id_str;
1891 		struct pmd_internals *internals = NULL;
1892 		struct rte_eth_dev *eth_dev = NULL;
1893 		char name[32];
1894 		int i;
1895 
1896 		if ((1 << n_intf_no) & ~n_port_mask) {
1897 			NT_LOG_DBGX(DBG, NTNIC,
1898 				"%s: interface #%d: skipping due to portmask 0x%02X",
1899 				p_port_id_str, n_intf_no, n_port_mask);
1900 			continue;
1901 		}
1902 
1903 		snprintf(name, sizeof(name), "ntnic%d", n_intf_no);
1904 		NT_LOG_DBGX(DBG, NTNIC, "%s: interface #%d: %s: '%s'", p_port_id_str,
1905 			n_intf_no, (pci_dev->name[0] ? pci_dev->name : "NA"), name);
1906 
1907 		internals = rte_zmalloc_socket(name, sizeof(struct pmd_internals),
1908 				RTE_CACHE_LINE_SIZE, pci_dev->device.numa_node);
1909 
1910 		if (!internals) {
1911 			NT_LOG_DBGX(ERR, NTNIC, "%s: %s: error=%d",
1912 				(pci_dev->name[0] ? pci_dev->name : "NA"), name, -1);
1913 			return -1;
1914 		}
1915 
1916 		internals->pci_dev = pci_dev;
1917 		internals->n_intf_no = n_intf_no;
1918 		internals->type = PORT_TYPE_PHYSICAL;
1919 		internals->nb_rx_queues = nb_rx_queues;
1920 		internals->nb_tx_queues = nb_tx_queues;
1921 
1922 		/* Not used queue index as dest port in bypass - use 0x80 + port nr */
1923 		for (i = 0; i < MAX_QUEUES; i++)
1924 			internals->vpq[i].hw_id = -1;
1925 
1926 
1927 		/* Setup queue_ids */
1928 		if (nb_rx_queues > 1) {
1929 			NT_LOG(DBG, NTNIC,
1930 				"(%i) NTNIC configured with Rx multi queues. %i queues",
1931 				internals->n_intf_no, nb_rx_queues);
1932 		}
1933 
1934 		if (nb_tx_queues > 1) {
1935 			NT_LOG(DBG, NTNIC,
1936 				"(%i) NTNIC configured with Tx multi queues. %i queues",
1937 				internals->n_intf_no, nb_tx_queues);
1938 		}
1939 
1940 		int max_num_queues = (nb_rx_queues > nb_tx_queues) ? nb_rx_queues : nb_tx_queues;
1941 		int start_queue = allocate_queue(max_num_queues);
1942 
1943 		if (start_queue < 0)
1944 			return -1;
1945 
1946 		for (i = 0; i < (int)max_num_queues; i++) {
1947 			queue_ids[i].id = i;
1948 			queue_ids[i].hw_id = start_queue + i;
1949 
1950 			internals->rxq_scg[i].queue = queue_ids[i];
1951 			/* use same index in Rx and Tx rings */
1952 			internals->txq_scg[i].queue = queue_ids[i];
1953 			internals->rxq_scg[i].enabled = 0;
1954 			internals->txq_scg[i].type = internals->type;
1955 			internals->rxq_scg[i].type = internals->type;
1956 			internals->rxq_scg[i].port = internals->port;
1957 		}
1958 
1959 		/* no tx queues - tx data goes out on phy */
1960 		internals->vpq_nb_vq = 0;
1961 
1962 		for (i = 0; i < (int)nb_tx_queues; i++) {
1963 			internals->txq_scg[i].port = internals->port;
1964 			internals->txq_scg[i].enabled = 0;
1965 		}
1966 
1967 		/* Set MAC address (but only if the MAC address is permitted) */
1968 		if (n_intf_no < fpga_info->nthw_hw_info.vpd_info.mn_mac_addr_count) {
1969 			const uint64_t mac =
1970 				fpga_info->nthw_hw_info.vpd_info.mn_mac_addr_value + n_intf_no;
1971 			internals->eth_addrs[0].addr_bytes[0] = (mac >> 40) & 0xFFu;
1972 			internals->eth_addrs[0].addr_bytes[1] = (mac >> 32) & 0xFFu;
1973 			internals->eth_addrs[0].addr_bytes[2] = (mac >> 24) & 0xFFu;
1974 			internals->eth_addrs[0].addr_bytes[3] = (mac >> 16) & 0xFFu;
1975 			internals->eth_addrs[0].addr_bytes[4] = (mac >> 8) & 0xFFu;
1976 			internals->eth_addrs[0].addr_bytes[5] = (mac >> 0) & 0xFFu;
1977 		}
1978 
1979 		eth_dev = rte_eth_dev_allocate(name);
1980 
1981 		if (!eth_dev) {
1982 			NT_LOG_DBGX(ERR, NTNIC, "%s: %s: error=%d",
1983 				(pci_dev->name[0] ? pci_dev->name : "NA"), name, -1);
1984 			return -1;
1985 		}
1986 
1987 		if (flow_filter_ops != NULL) {
1988 			internals->flw_dev = flow_filter_ops->flow_get_eth_dev(0, n_intf_no,
1989 				eth_dev->data->port_id, nb_rx_queues, queue_ids,
1990 				&internals->txq_scg[0].rss_target_id, profile, exception_path);
1991 
1992 			if (!internals->flw_dev) {
1993 				NT_LOG(ERR, NTNIC,
1994 					"Error creating port. Resource exhaustion in HW");
1995 				return -1;
1996 			}
1997 		}
1998 
1999 		/* connect structs */
2000 		internals->p_drv = p_drv;
2001 		eth_dev->data->dev_private = internals;
2002 		eth_dev->data->mac_addrs = rte_malloc(NULL,
2003 					NUM_MAC_ADDRS_PER_PORT * sizeof(struct rte_ether_addr), 0);
2004 		rte_memcpy(&eth_dev->data->mac_addrs[0],
2005 					&internals->eth_addrs[0], RTE_ETHER_ADDR_LEN);
2006 
2007 		NT_LOG_DBGX(DBG, NTNIC, "Setting up RX functions for SCG");
2008 		eth_dev->rx_pkt_burst = eth_dev_rx_scg;
2009 		eth_dev->tx_pkt_burst = eth_dev_tx_scg;
2010 		eth_dev->tx_pkt_prepare = NULL;
2011 
2012 		struct rte_eth_link pmd_link;
2013 		pmd_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2014 		pmd_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
2015 		pmd_link.link_status = RTE_ETH_LINK_DOWN;
2016 		pmd_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2017 
2018 		eth_dev->device = &pci_dev->device;
2019 		eth_dev->data->dev_link = pmd_link;
2020 		eth_dev->dev_ops = &nthw_eth_dev_ops;
2021 
2022 		eth_dev_pci_specific_init(eth_dev, pci_dev);
2023 		rte_eth_dev_probing_finish(eth_dev);
2024 
2025 		/* increase initialized ethernet devices - PF */
2026 		p_drv->n_eth_dev_init_count++;
2027 	}
2028 
2029 	return 0;
2030 }
2031 
2032 static int
2033 nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused)
2034 {
2035 	NT_LOG_DBGX(DBG, NTNIC, "PCI device deinitialization");
2036 
2037 	int i;
2038 	char name[32];
2039 
2040 	struct pmd_internals *internals = eth_dev->data->dev_private;
2041 	ntdrv_4ga_t *p_ntdrv = &internals->p_drv->ntdrv;
2042 	fpga_info_t *fpga_info = &p_ntdrv->adapter_info.fpga_info;
2043 	const int n_phy_ports = fpga_info->n_phy_ports;
2044 
2045 	/* let running threads end Rx and Tx activity */
2046 	if (sg_ops != NULL) {
2047 		nt_os_wait_usec(1 * 1000 * 1000);
2048 
2049 		while (internals) {
2050 			for (i = internals->nb_tx_queues - 1; i >= 0; i--) {
2051 				sg_ops->nthw_release_mngd_tx_virt_queue(internals->txq_scg[i].vq);
2052 				release_hw_virtio_queues(&internals->txq_scg[i].hwq);
2053 			}
2054 
2055 			for (i = internals->nb_rx_queues - 1; i >= 0; i--) {
2056 				sg_ops->nthw_release_mngd_rx_virt_queue(internals->rxq_scg[i].vq);
2057 				release_hw_virtio_queues(&internals->rxq_scg[i].hwq);
2058 			}
2059 
2060 			internals = internals->next;
2061 		}
2062 	}
2063 
2064 	for (i = 0; i < n_phy_ports; i++) {
2065 		sprintf(name, "ntnic%d", i);
2066 		eth_dev = rte_eth_dev_allocated(name);
2067 		if (eth_dev == NULL)
2068 			continue; /* port already released */
2069 		rte_eth_dev_release_port(eth_dev);
2070 	}
2071 
2072 	nt_vfio_remove(EXCEPTION_PATH_HID);
2073 	return 0;
2074 }
2075 
2076 static int
2077 nthw_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2078 	struct rte_pci_device *pci_dev)
2079 {
2080 	int ret;
2081 
2082 	NT_LOG_DBGX(DBG, NTNIC, "pcidev: name: '%s'", pci_dev->name);
2083 	NT_LOG_DBGX(DBG, NTNIC, "devargs: name: '%s'", pci_dev->device.name);
2084 
2085 	if (pci_dev->device.devargs) {
2086 		NT_LOG_DBGX(DBG, NTNIC, "devargs: args: '%s'",
2087 			(pci_dev->device.devargs->args ? pci_dev->device.devargs->args : "NULL"));
2088 		NT_LOG_DBGX(DBG, NTNIC, "devargs: data: '%s'",
2089 			(pci_dev->device.devargs->data ? pci_dev->device.devargs->data : "NULL"));
2090 	}
2091 
2092 	const int n_rte_vfio_no_io_mmu_enabled = rte_vfio_noiommu_is_enabled();
2093 	NT_LOG(DBG, NTNIC, "vfio_no_iommu_enabled=%d", n_rte_vfio_no_io_mmu_enabled);
2094 
2095 	if (n_rte_vfio_no_io_mmu_enabled) {
2096 		NT_LOG(ERR, NTNIC, "vfio_no_iommu_enabled=%d: this PMD needs VFIO IOMMU",
2097 			n_rte_vfio_no_io_mmu_enabled);
2098 		return -1;
2099 	}
2100 
2101 	const enum rte_iova_mode n_rte_io_va_mode = rte_eal_iova_mode();
2102 	NT_LOG(DBG, NTNIC, "iova mode=%d", n_rte_io_va_mode);
2103 
2104 	NT_LOG(DBG, NTNIC,
2105 		"busid=" PCI_PRI_FMT
2106 		" pciid=%04x:%04x_%04x:%04x locstr=%s @ numanode=%d: drv=%s drvalias=%s",
2107 		pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid,
2108 		pci_dev->addr.function, pci_dev->id.vendor_id, pci_dev->id.device_id,
2109 		pci_dev->id.subsystem_vendor_id, pci_dev->id.subsystem_device_id,
2110 		pci_dev->name[0] ? pci_dev->name : "NA",
2111 		pci_dev->device.numa_node,
2112 		pci_dev->driver->driver.name ? pci_dev->driver->driver.name : "NA",
2113 		pci_dev->driver->driver.alias ? pci_dev->driver->driver.alias : "NA");
2114 
2115 
2116 	ret = nthw_pci_dev_init(pci_dev);
2117 
2118 	NT_LOG_DBGX(DBG, NTNIC, "leave: ret=%d", ret);
2119 	return ret;
2120 }
2121 
2122 static int
2123 nthw_pci_remove(struct rte_pci_device *pci_dev)
2124 {
2125 	NT_LOG_DBGX(DBG, NTNIC);
2126 
2127 	struct drv_s *p_drv = get_pdrv_from_pci(pci_dev->addr);
2128 	drv_deinit(p_drv);
2129 
2130 	return rte_eth_dev_pci_generic_remove(pci_dev, nthw_pci_dev_deinit);
2131 }
2132 
2133 static struct rte_pci_driver rte_nthw_pmd = {
2134 	.id_table = nthw_pci_id_map,
2135 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
2136 	.probe = nthw_pci_probe,
2137 	.remove = nthw_pci_remove,
2138 };
2139 
2140 RTE_PMD_REGISTER_PCI(net_ntnic, rte_nthw_pmd);
2141 RTE_PMD_REGISTER_PCI_TABLE(net_ntnic, nthw_pci_id_map);
2142 RTE_PMD_REGISTER_KMOD_DEP(net_ntnic, "* vfio-pci");
2143 
2144 RTE_LOG_REGISTER_SUFFIX(nt_log_general, general, INFO);
2145 RTE_LOG_REGISTER_SUFFIX(nt_log_nthw, nthw, INFO);
2146 RTE_LOG_REGISTER_SUFFIX(nt_log_filter, filter, INFO);
2147 RTE_LOG_REGISTER_SUFFIX(nt_log_ntnic, ntnic, INFO);
2148