xref: /dpdk/drivers/net/ntnic/ntnic_ethdev.c (revision 6019656d6f6848c83591f24867538311545776eb)
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  * Copyright(c) 2023 Napatech A/S
4  */
5 
6 #include <stdint.h>
7 #include <stdarg.h>
8 
9 #include <signal.h>
10 
11 #include <rte_eal.h>
12 #include <rte_dev.h>
13 #include <rte_vfio.h>
14 #include <rte_ethdev.h>
15 #include <rte_bus_pci.h>
16 #include <ethdev_pci.h>
17 #include <rte_kvargs.h>
18 
19 #include <sys/queue.h>
20 
21 #include "rte_spinlock.h"
22 #include "ntlog.h"
23 #include "ntdrv_4ga.h"
24 #include "ntos_drv.h"
25 #include "ntos_system.h"
26 #include "nthw_fpga_instances.h"
27 #include "ntnic_vfio.h"
28 #include "ntnic_mod_reg.h"
29 #include "nt_util.h"
30 #include "profile_inline/flm_age_queue.h"
31 #include "profile_inline/flm_evt_queue.h"
32 #include "rte_pmd_ntnic.h"
33 
34 const rte_thread_attr_t thread_attr = { .priority = RTE_THREAD_PRIORITY_NORMAL };
35 #define THREAD_CREATE(a, b, c) rte_thread_create(a, &thread_attr, b, c)
36 #define THREAD_CTRL_CREATE(a, b, c, d) rte_thread_create_internal_control(a, b, c, d)
37 #define THREAD_JOIN(a) rte_thread_join(a, NULL)
38 #define THREAD_FUNC static uint32_t
39 #define THREAD_RETURN (0)
40 #define HW_MAX_PKT_LEN (10000)
41 #define MAX_MTU (HW_MAX_PKT_LEN - RTE_ETHER_HDR_LEN - RTE_ETHER_CRC_LEN)
42 #define MIN_MTU_INLINE 512
43 
44 #define EXCEPTION_PATH_HID 0
45 
46 #define MAX_TOTAL_QUEUES       128
47 
48 #define SG_NB_HW_RX_DESCRIPTORS 1024
49 #define SG_NB_HW_TX_DESCRIPTORS 1024
50 #define SG_HW_RX_PKT_BUFFER_SIZE (1024 << 1)
51 #define SG_HW_TX_PKT_BUFFER_SIZE (1024 << 1)
52 
53 #define NUM_VQ_SEGS(_data_size_)                                                                  \
54 	({                                                                                        \
55 		size_t _size = (_data_size_);                                                     \
56 		size_t _segment_count = ((_size + SG_HDR_SIZE) > SG_HW_TX_PKT_BUFFER_SIZE)        \
57 			? (((_size + SG_HDR_SIZE) + SG_HW_TX_PKT_BUFFER_SIZE - 1) /               \
58 			   SG_HW_TX_PKT_BUFFER_SIZE)                                              \
59 			: 1;                                                                      \
60 		_segment_count;                                                                   \
61 	})
62 
63 #define VIRTQ_DESCR_IDX(_tx_pkt_idx_)                                                             \
64 	(((_tx_pkt_idx_) + first_vq_descr_idx) % SG_NB_HW_TX_DESCRIPTORS)
65 
66 #define VIRTQ_DESCR_IDX_NEXT(_vq_descr_idx_) (((_vq_descr_idx_) + 1) % SG_NB_HW_TX_DESCRIPTORS)
67 
68 #define ONE_G_SIZE  0x40000000
69 #define ONE_G_MASK  (ONE_G_SIZE - 1)
70 
71 #define MAX_RX_PACKETS   128
72 #define MAX_TX_PACKETS   128
73 
74 #define MTUINITVAL 1500
75 
76 uint64_t rte_tsc_freq;
77 
78 static void (*previous_handler)(int sig);
79 static rte_thread_t shutdown_tid;
80 
81 int kill_pmd;
82 
83 #define ETH_DEV_NTNIC_HELP_ARG "help"
84 #define ETH_DEV_NTHW_RXQUEUES_ARG "rxqs"
85 #define ETH_DEV_NTHW_TXQUEUES_ARG "txqs"
86 
87 static const char *const valid_arguments[] = {
88 	ETH_DEV_NTNIC_HELP_ARG,
89 	ETH_DEV_NTHW_RXQUEUES_ARG,
90 	ETH_DEV_NTHW_TXQUEUES_ARG,
91 	NULL,
92 };
93 
94 
95 static const struct rte_pci_id nthw_pci_id_map[] = {
96 	{ RTE_PCI_DEVICE(NT_HW_PCI_VENDOR_ID, NT_HW_PCI_DEVICE_ID_NT200A02) },
97 	{
98 		.vendor_id = 0,
99 	},	/* sentinel */
100 };
101 
102 static const struct sg_ops_s *sg_ops;
103 
104 rte_spinlock_t hwlock = RTE_SPINLOCK_INITIALIZER;
105 
106 /*
107  * Store and get adapter info
108  */
109 
110 static struct drv_s *_g_p_drv[NUM_ADAPTER_MAX] = { NULL };
111 
112 static void
113 store_pdrv(struct drv_s *p_drv)
114 {
115 	if (p_drv->adapter_no >= NUM_ADAPTER_MAX) {
116 		NT_LOG(ERR, NTNIC,
117 			"Internal error adapter number %u out of range. Max number of adapters: %u",
118 			p_drv->adapter_no, NUM_ADAPTER_MAX);
119 		return;
120 	}
121 
122 	if (_g_p_drv[p_drv->adapter_no] != 0) {
123 		NT_LOG(WRN, NTNIC,
124 			"Overwriting adapter structure for PCI  " PCIIDENT_PRINT_STR
125 			" with adapter structure for PCI  " PCIIDENT_PRINT_STR,
126 			PCIIDENT_TO_DOMAIN(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
127 			PCIIDENT_TO_BUSNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
128 			PCIIDENT_TO_DEVNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
129 			PCIIDENT_TO_FUNCNR(_g_p_drv[p_drv->adapter_no]->ntdrv.pciident),
130 			PCIIDENT_TO_DOMAIN(p_drv->ntdrv.pciident),
131 			PCIIDENT_TO_BUSNR(p_drv->ntdrv.pciident),
132 			PCIIDENT_TO_DEVNR(p_drv->ntdrv.pciident),
133 			PCIIDENT_TO_FUNCNR(p_drv->ntdrv.pciident));
134 	}
135 
136 	rte_spinlock_lock(&hwlock);
137 	_g_p_drv[p_drv->adapter_no] = p_drv;
138 	rte_spinlock_unlock(&hwlock);
139 }
140 
141 static void clear_pdrv(struct drv_s *p_drv)
142 {
143 	if (p_drv->adapter_no > NUM_ADAPTER_MAX)
144 		return;
145 
146 	rte_spinlock_lock(&hwlock);
147 	_g_p_drv[p_drv->adapter_no] = NULL;
148 	rte_spinlock_unlock(&hwlock);
149 }
150 
151 static struct drv_s *
152 get_pdrv_from_pci(struct rte_pci_addr addr)
153 {
154 	int i;
155 	struct drv_s *p_drv = NULL;
156 	rte_spinlock_lock(&hwlock);
157 
158 	for (i = 0; i < NUM_ADAPTER_MAX; i++) {
159 		if (_g_p_drv[i]) {
160 			if (PCIIDENT_TO_DOMAIN(_g_p_drv[i]->ntdrv.pciident) == addr.domain &&
161 				PCIIDENT_TO_BUSNR(_g_p_drv[i]->ntdrv.pciident) == addr.bus) {
162 				p_drv = _g_p_drv[i];
163 				break;
164 			}
165 		}
166 	}
167 
168 	rte_spinlock_unlock(&hwlock);
169 	return p_drv;
170 }
171 
172 static int dpdk_stats_collect(struct pmd_internals *internals, struct rte_eth_stats *stats)
173 {
174 	const struct ntnic_filter_ops *ntnic_filter_ops = get_ntnic_filter_ops();
175 
176 	if (ntnic_filter_ops == NULL) {
177 		NT_LOG_DBGX(ERR, NTNIC, "ntnic_filter_ops uninitialized");
178 		return -1;
179 	}
180 
181 	unsigned int i;
182 	struct drv_s *p_drv = internals->p_drv;
183 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
184 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
185 	nthw_stat_t *p_nthw_stat = p_nt4ga_stat->mp_nthw_stat;
186 	const int if_index = internals->n_intf_no;
187 	uint64_t rx_total = 0;
188 	uint64_t rx_total_b = 0;
189 	uint64_t tx_total = 0;
190 	uint64_t tx_total_b = 0;
191 	uint64_t tx_err_total = 0;
192 
193 	if (!p_nthw_stat || !p_nt4ga_stat || !stats || if_index < 0 ||
194 		if_index > NUM_ADAPTER_PORTS_MAX) {
195 		NT_LOG_DBGX(WRN, NTNIC, "error exit");
196 		return -1;
197 	}
198 
199 	/*
200 	 * Pull the latest port statistic numbers (Rx/Tx pkts and bytes)
201 	 * Return values are in the "internals->rxq_scg[]" and "internals->txq_scg[]" arrays
202 	 */
203 	ntnic_filter_ops->poll_statistics(internals);
204 
205 	memset(stats, 0, sizeof(*stats));
206 
207 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && i < internals->nb_rx_queues; i++) {
208 		stats->q_ipackets[i] = internals->rxq_scg[i].rx_pkts;
209 		stats->q_ibytes[i] = internals->rxq_scg[i].rx_bytes;
210 		rx_total += stats->q_ipackets[i];
211 		rx_total_b += stats->q_ibytes[i];
212 	}
213 
214 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && i < internals->nb_tx_queues; i++) {
215 		stats->q_opackets[i] = internals->txq_scg[i].tx_pkts;
216 		stats->q_obytes[i] = internals->txq_scg[i].tx_bytes;
217 		stats->q_errors[i] = internals->txq_scg[i].err_pkts;
218 		tx_total += stats->q_opackets[i];
219 		tx_total_b += stats->q_obytes[i];
220 		tx_err_total += stats->q_errors[i];
221 	}
222 
223 	stats->imissed = internals->rx_missed;
224 	stats->ipackets = rx_total;
225 	stats->ibytes = rx_total_b;
226 	stats->opackets = tx_total;
227 	stats->obytes = tx_total_b;
228 	stats->oerrors = tx_err_total;
229 
230 	return 0;
231 }
232 
233 static int dpdk_stats_reset(struct pmd_internals *internals, struct ntdrv_4ga_s *p_nt_drv,
234 	int n_intf_no)
235 {
236 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
237 	nthw_stat_t *p_nthw_stat = p_nt4ga_stat->mp_nthw_stat;
238 	unsigned int i;
239 
240 	if (!p_nthw_stat || !p_nt4ga_stat || n_intf_no < 0 || n_intf_no > NUM_ADAPTER_PORTS_MAX)
241 		return -1;
242 
243 	rte_spinlock_lock(&p_nt_drv->stat_lck);
244 
245 	/* Rx */
246 	for (i = 0; i < internals->nb_rx_queues; i++) {
247 		internals->rxq_scg[i].rx_pkts = 0;
248 		internals->rxq_scg[i].rx_bytes = 0;
249 		internals->rxq_scg[i].err_pkts = 0;
250 	}
251 
252 	internals->rx_missed = 0;
253 
254 	/* Tx */
255 	for (i = 0; i < internals->nb_tx_queues; i++) {
256 		internals->txq_scg[i].tx_pkts = 0;
257 		internals->txq_scg[i].tx_bytes = 0;
258 		internals->txq_scg[i].err_pkts = 0;
259 	}
260 
261 	p_nt4ga_stat->n_totals_reset_timestamp = time(NULL);
262 
263 	rte_spinlock_unlock(&p_nt_drv->stat_lck);
264 
265 	return 0;
266 }
267 
268 static int
269 eth_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete __rte_unused)
270 {
271 	const struct port_ops *port_ops = get_port_ops();
272 
273 	if (port_ops == NULL) {
274 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
275 		return -1;
276 	}
277 
278 	struct pmd_internals *internals = eth_dev->data->dev_private;
279 
280 	const int n_intf_no = internals->n_intf_no;
281 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
282 
283 	if (eth_dev->data->dev_started) {
284 		const bool port_link_status = port_ops->get_link_status(p_adapter_info, n_intf_no);
285 		eth_dev->data->dev_link.link_status =
286 			port_link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN;
287 
288 		nt_link_speed_t port_link_speed =
289 			port_ops->get_link_speed(p_adapter_info, n_intf_no);
290 		eth_dev->data->dev_link.link_speed =
291 			nt_link_speed_to_eth_speed_num(port_link_speed);
292 
293 		nt_link_duplex_t nt_link_duplex =
294 			port_ops->get_link_duplex(p_adapter_info, n_intf_no);
295 		eth_dev->data->dev_link.link_duplex = nt_link_duplex_to_eth_duplex(nt_link_duplex);
296 
297 	} else {
298 		eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
299 		eth_dev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
300 		eth_dev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
301 	}
302 
303 	return 0;
304 }
305 
306 static int eth_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
307 {
308 	struct pmd_internals *internals = eth_dev->data->dev_private;
309 	dpdk_stats_collect(internals, stats);
310 	return 0;
311 }
312 
313 static int eth_stats_reset(struct rte_eth_dev *eth_dev)
314 {
315 	struct pmd_internals *internals = eth_dev->data->dev_private;
316 	struct drv_s *p_drv = internals->p_drv;
317 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
318 	const int if_index = internals->n_intf_no;
319 	dpdk_stats_reset(internals, p_nt_drv, if_index);
320 	return 0;
321 }
322 
323 static int
324 eth_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *dev_info)
325 {
326 	const struct port_ops *port_ops = get_port_ops();
327 
328 	if (port_ops == NULL) {
329 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
330 		return -1;
331 	}
332 
333 	struct pmd_internals *internals = eth_dev->data->dev_private;
334 
335 	const int n_intf_no = internals->n_intf_no;
336 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
337 
338 	dev_info->driver_name = internals->name;
339 	dev_info->max_mac_addrs = NUM_MAC_ADDRS_PER_PORT;
340 	dev_info->max_rx_pktlen = HW_MAX_PKT_LEN;
341 	dev_info->max_mtu = MAX_MTU;
342 
343 	if (p_adapter_info->fpga_info.profile == FPGA_INFO_PROFILE_INLINE) {
344 		dev_info->min_mtu = MIN_MTU_INLINE;
345 		dev_info->flow_type_rss_offloads = NT_ETH_RSS_OFFLOAD_MASK;
346 		dev_info->hash_key_size = MAX_RSS_KEY_LEN;
347 
348 		dev_info->rss_algo_capa = RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT) |
349 			RTE_ETH_HASH_ALGO_CAPA_MASK(TOEPLITZ);
350 	}
351 
352 	if (internals->p_drv) {
353 		dev_info->max_rx_queues = internals->nb_rx_queues;
354 		dev_info->max_tx_queues = internals->nb_tx_queues;
355 
356 		dev_info->min_rx_bufsize = 64;
357 
358 		const uint32_t nt_port_speed_capa =
359 			port_ops->get_link_speed_capabilities(p_adapter_info, n_intf_no);
360 		dev_info->speed_capa = nt_link_speed_capa_to_eth_speed_capa(nt_port_speed_capa);
361 	}
362 
363 	return 0;
364 }
365 
366 static __rte_always_inline int copy_virtqueue_to_mbuf(struct rte_mbuf *mbuf,
367 	struct rte_mempool *mb_pool,
368 	struct nthw_received_packets *hw_recv,
369 	int max_segs,
370 	uint16_t data_len)
371 {
372 	int src_pkt = 0;
373 	/*
374 	 * 1. virtqueue packets may be segmented
375 	 * 2. the mbuf size may be too small and may need to be segmented
376 	 */
377 	char *data = (char *)hw_recv->addr + SG_HDR_SIZE;
378 	char *dst = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
379 
380 	/* set packet length */
381 	mbuf->pkt_len = data_len - SG_HDR_SIZE;
382 
383 	int remain = mbuf->pkt_len;
384 	/* First cpy_size is without header */
385 	int cpy_size = (data_len > SG_HW_RX_PKT_BUFFER_SIZE)
386 		? SG_HW_RX_PKT_BUFFER_SIZE - SG_HDR_SIZE
387 		: remain;
388 
389 	struct rte_mbuf *m = mbuf;	/* if mbuf segmentation is needed */
390 
391 	while (++src_pkt <= max_segs) {
392 		/* keep track of space in dst */
393 		int cpto_size = rte_pktmbuf_tailroom(m);
394 
395 		if (cpy_size > cpto_size) {
396 			int new_cpy_size = cpto_size;
397 
398 			rte_memcpy((void *)dst, (void *)data, new_cpy_size);
399 			m->data_len += new_cpy_size;
400 			remain -= new_cpy_size;
401 			cpy_size -= new_cpy_size;
402 
403 			data += new_cpy_size;
404 
405 			/*
406 			 * loop if remaining data from this virtqueue seg
407 			 * cannot fit in one extra mbuf
408 			 */
409 			do {
410 				m->next = rte_pktmbuf_alloc(mb_pool);
411 
412 				if (unlikely(!m->next))
413 					return -1;
414 
415 				m = m->next;
416 
417 				/* Headroom is not needed in chained mbufs */
418 				rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
419 				dst = (char *)m->buf_addr;
420 				m->data_len = 0;
421 				m->pkt_len = 0;
422 
423 				cpto_size = rte_pktmbuf_tailroom(m);
424 
425 				int actual_cpy_size =
426 					(cpy_size > cpto_size) ? cpto_size : cpy_size;
427 
428 				rte_memcpy((void *)dst, (void *)data, actual_cpy_size);
429 				m->pkt_len += actual_cpy_size;
430 				m->data_len += actual_cpy_size;
431 
432 				remain -= actual_cpy_size;
433 				cpy_size -= actual_cpy_size;
434 
435 				data += actual_cpy_size;
436 
437 				mbuf->nb_segs++;
438 
439 			} while (cpy_size && remain);
440 
441 		} else {
442 			/* all data from this virtqueue segment can fit in current mbuf */
443 			rte_memcpy((void *)dst, (void *)data, cpy_size);
444 			m->data_len += cpy_size;
445 
446 			if (mbuf->nb_segs > 1)
447 				m->pkt_len += cpy_size;
448 
449 			remain -= cpy_size;
450 		}
451 
452 		/* packet complete - all data from current virtqueue packet has been copied */
453 		if (remain == 0)
454 			break;
455 
456 		/* increment dst to data end */
457 		dst = rte_pktmbuf_mtod_offset(m, char *, m->data_len);
458 		/* prepare for next virtqueue segment */
459 		data = (char *)hw_recv[src_pkt].addr;	/* following packets are full data */
460 
461 		cpy_size = (remain > SG_HW_RX_PKT_BUFFER_SIZE) ? SG_HW_RX_PKT_BUFFER_SIZE : remain;
462 	};
463 
464 	if (src_pkt > max_segs) {
465 		NT_LOG(ERR, NTNIC,
466 			"Did not receive correct number of segment for a whole packet");
467 		return -1;
468 	}
469 
470 	return src_pkt;
471 }
472 
473 static uint16_t eth_dev_rx_scg(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
474 {
475 	unsigned int i;
476 	struct rte_mbuf *mbuf;
477 	struct ntnic_rx_queue *rx_q = queue;
478 	uint16_t num_rx = 0;
479 
480 	struct nthw_received_packets hw_recv[MAX_RX_PACKETS];
481 
482 	if (kill_pmd)
483 		return 0;
484 
485 	if (unlikely(nb_pkts == 0))
486 		return 0;
487 
488 	if (nb_pkts > MAX_RX_PACKETS)
489 		nb_pkts = MAX_RX_PACKETS;
490 
491 	uint16_t whole_pkts = 0;
492 	uint16_t hw_recv_pkt_segs = 0;
493 
494 	if (sg_ops != NULL) {
495 		hw_recv_pkt_segs =
496 			sg_ops->nthw_get_rx_packets(rx_q->vq, nb_pkts, hw_recv, &whole_pkts);
497 
498 		if (!hw_recv_pkt_segs)
499 			return 0;
500 	}
501 
502 	nb_pkts = whole_pkts;
503 
504 	int src_pkt = 0;/* from 0 to hw_recv_pkt_segs */
505 
506 	for (i = 0; i < nb_pkts; i++) {
507 		bufs[i] = rte_pktmbuf_alloc(rx_q->mb_pool);
508 
509 		if (!bufs[i]) {
510 			NT_LOG(ERR, NTNIC, "ERROR - no more buffers mbuf in mempool");
511 			goto err_exit;
512 		}
513 
514 		mbuf = bufs[i];
515 
516 		struct _pkt_hdr_rx *phdr = (struct _pkt_hdr_rx *)hw_recv[src_pkt].addr;
517 
518 		if (phdr->cap_len < SG_HDR_SIZE) {
519 			NT_LOG(ERR, NTNIC,
520 				"Pkt len of zero received. No header!! - dropping packets");
521 			rte_pktmbuf_free(mbuf);
522 			goto err_exit;
523 		}
524 
525 		{
526 			if (phdr->cap_len <= SG_HW_RX_PKT_BUFFER_SIZE &&
527 				(phdr->cap_len - SG_HDR_SIZE) <= rte_pktmbuf_tailroom(mbuf)) {
528 				mbuf->data_len = phdr->cap_len - SG_HDR_SIZE;
529 				rte_memcpy(rte_pktmbuf_mtod(mbuf, char *),
530 					(char *)hw_recv[src_pkt].addr + SG_HDR_SIZE,
531 					mbuf->data_len);
532 
533 				mbuf->pkt_len = mbuf->data_len;
534 				src_pkt++;
535 
536 			} else {
537 				int cpy_segs = copy_virtqueue_to_mbuf(mbuf, rx_q->mb_pool,
538 						&hw_recv[src_pkt],
539 						hw_recv_pkt_segs - src_pkt,
540 						phdr->cap_len);
541 
542 				if (cpy_segs < 0) {
543 					/* Error */
544 					rte_pktmbuf_free(mbuf);
545 					goto err_exit;
546 				}
547 
548 				src_pkt += cpy_segs;
549 			}
550 
551 			num_rx++;
552 
553 			mbuf->ol_flags &= ~(RTE_MBUF_F_RX_FDIR_ID | RTE_MBUF_F_RX_FDIR);
554 			mbuf->port = (uint16_t)-1;
555 		}
556 	}
557 
558 err_exit:
559 
560 	if (sg_ops != NULL)
561 		sg_ops->nthw_release_rx_packets(rx_q->vq, hw_recv_pkt_segs);
562 
563 	return num_rx;
564 }
565 
566 static int copy_mbuf_to_virtqueue(struct nthw_cvirtq_desc *cvq_desc,
567 	uint16_t vq_descr_idx,
568 	struct nthw_memory_descriptor *vq_bufs,
569 	int max_segs,
570 	struct rte_mbuf *mbuf)
571 {
572 	/*
573 	 * 1. mbuf packet may be segmented
574 	 * 2. the virtqueue buffer size may be too small and may need to be segmented
575 	 */
576 
577 	char *data = rte_pktmbuf_mtod(mbuf, char *);
578 	char *dst = (char *)vq_bufs[vq_descr_idx].virt_addr + SG_HDR_SIZE;
579 
580 	int remain = mbuf->pkt_len;
581 	int cpy_size = mbuf->data_len;
582 
583 	struct rte_mbuf *m = mbuf;
584 	int cpto_size = SG_HW_TX_PKT_BUFFER_SIZE - SG_HDR_SIZE;
585 
586 	cvq_desc->b[vq_descr_idx].len = SG_HDR_SIZE;
587 
588 	int cur_seg_num = 0;	/* start from 0 */
589 
590 	while (m) {
591 		/* Can all data in current src segment be in current dest segment */
592 		if (cpy_size > cpto_size) {
593 			int new_cpy_size = cpto_size;
594 
595 			rte_memcpy((void *)dst, (void *)data, new_cpy_size);
596 
597 			cvq_desc->b[vq_descr_idx].len += new_cpy_size;
598 
599 			remain -= new_cpy_size;
600 			cpy_size -= new_cpy_size;
601 
602 			data += new_cpy_size;
603 
604 			/*
605 			 * Loop if remaining data from this virtqueue seg cannot fit in one extra
606 			 * mbuf
607 			 */
608 			do {
609 				vq_add_flags(cvq_desc, vq_descr_idx, VIRTQ_DESC_F_NEXT);
610 
611 				int next_vq_descr_idx = VIRTQ_DESCR_IDX_NEXT(vq_descr_idx);
612 
613 				vq_set_next(cvq_desc, vq_descr_idx, next_vq_descr_idx);
614 
615 				vq_descr_idx = next_vq_descr_idx;
616 
617 				vq_set_flags(cvq_desc, vq_descr_idx, 0);
618 				vq_set_next(cvq_desc, vq_descr_idx, 0);
619 
620 				if (++cur_seg_num > max_segs)
621 					break;
622 
623 				dst = (char *)vq_bufs[vq_descr_idx].virt_addr;
624 				cpto_size = SG_HW_TX_PKT_BUFFER_SIZE;
625 
626 				int actual_cpy_size =
627 					(cpy_size > cpto_size) ? cpto_size : cpy_size;
628 				rte_memcpy((void *)dst, (void *)data, actual_cpy_size);
629 
630 				cvq_desc->b[vq_descr_idx].len = actual_cpy_size;
631 
632 				remain -= actual_cpy_size;
633 				cpy_size -= actual_cpy_size;
634 				cpto_size -= actual_cpy_size;
635 
636 				data += actual_cpy_size;
637 
638 			} while (cpy_size && remain);
639 
640 		} else {
641 			/* All data from this segment can fit in current virtqueue buffer */
642 			rte_memcpy((void *)dst, (void *)data, cpy_size);
643 
644 			cvq_desc->b[vq_descr_idx].len += cpy_size;
645 
646 			remain -= cpy_size;
647 			cpto_size -= cpy_size;
648 		}
649 
650 		/* Packet complete - all segments from current mbuf has been copied */
651 		if (remain == 0)
652 			break;
653 
654 		/* increment dst to data end */
655 		dst = (char *)vq_bufs[vq_descr_idx].virt_addr + cvq_desc->b[vq_descr_idx].len;
656 
657 		m = m->next;
658 
659 		if (!m) {
660 			NT_LOG(ERR, NTNIC, "ERROR: invalid packet size");
661 			break;
662 		}
663 
664 		/* Prepare for next mbuf segment */
665 		data = rte_pktmbuf_mtod(m, char *);
666 		cpy_size = m->data_len;
667 	};
668 
669 	cur_seg_num++;
670 
671 	if (cur_seg_num > max_segs) {
672 		NT_LOG(ERR, NTNIC,
673 			"Did not receive correct number of segment for a whole packet");
674 		return -1;
675 	}
676 
677 	return cur_seg_num;
678 }
679 
680 static uint16_t eth_dev_tx_scg(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
681 {
682 	uint16_t pkt;
683 	uint16_t first_vq_descr_idx = 0;
684 
685 	struct nthw_cvirtq_desc cvq_desc;
686 
687 	struct nthw_memory_descriptor *vq_bufs;
688 
689 	struct ntnic_tx_queue *tx_q = queue;
690 
691 	int nb_segs = 0, i;
692 	int pkts_sent = 0;
693 	uint16_t nb_segs_arr[MAX_TX_PACKETS];
694 
695 	if (kill_pmd)
696 		return 0;
697 
698 	if (nb_pkts > MAX_TX_PACKETS)
699 		nb_pkts = MAX_TX_PACKETS;
700 
701 	/*
702 	 * count all segments needed to contain all packets in vq buffers
703 	 */
704 	for (i = 0; i < nb_pkts; i++) {
705 		/* build the num segments array for segmentation control and release function */
706 		int vq_segs = NUM_VQ_SEGS(bufs[i]->pkt_len);
707 		nb_segs_arr[i] = vq_segs;
708 		nb_segs += vq_segs;
709 	}
710 
711 	if (!nb_segs)
712 		goto exit_out;
713 
714 	if (sg_ops == NULL)
715 		goto exit_out;
716 
717 	int got_nb_segs = sg_ops->nthw_get_tx_packets(tx_q->vq, nb_segs, &first_vq_descr_idx,
718 			&cvq_desc /*&vq_descr,*/, &vq_bufs);
719 
720 	if (!got_nb_segs)
721 		goto exit_out;
722 
723 	/*
724 	 * we may get less vq buffers than we have asked for
725 	 * calculate last whole packet that can fit into what
726 	 * we have got
727 	 */
728 	while (got_nb_segs < nb_segs) {
729 		if (!--nb_pkts)
730 			goto exit_out;
731 
732 		nb_segs -= NUM_VQ_SEGS(bufs[nb_pkts]->pkt_len);
733 
734 		if (nb_segs <= 0)
735 			goto exit_out;
736 	}
737 
738 	/*
739 	 * nb_pkts & nb_segs, got it all, ready to copy
740 	 */
741 	int seg_idx = 0;
742 	int last_seg_idx = seg_idx;
743 
744 	for (pkt = 0; pkt < nb_pkts; ++pkt) {
745 		uint16_t vq_descr_idx = VIRTQ_DESCR_IDX(seg_idx);
746 
747 		vq_set_flags(&cvq_desc, vq_descr_idx, 0);
748 		vq_set_next(&cvq_desc, vq_descr_idx, 0);
749 
750 		if (bufs[pkt]->nb_segs == 1 && nb_segs_arr[pkt] == 1) {
751 			rte_memcpy((void *)((char *)vq_bufs[vq_descr_idx].virt_addr + SG_HDR_SIZE),
752 				rte_pktmbuf_mtod(bufs[pkt], void *), bufs[pkt]->pkt_len);
753 
754 			cvq_desc.b[vq_descr_idx].len = bufs[pkt]->pkt_len + SG_HDR_SIZE;
755 
756 			seg_idx++;
757 
758 		} else {
759 			int cpy_segs = copy_mbuf_to_virtqueue(&cvq_desc, vq_descr_idx, vq_bufs,
760 					nb_segs - last_seg_idx, bufs[pkt]);
761 
762 			if (cpy_segs < 0)
763 				break;
764 
765 			seg_idx += cpy_segs;
766 		}
767 
768 		last_seg_idx = seg_idx;
769 		rte_pktmbuf_free(bufs[pkt]);
770 		pkts_sent++;
771 	}
772 
773 exit_out:
774 
775 	if (sg_ops != NULL) {
776 		if (pkts_sent)
777 			sg_ops->nthw_release_tx_packets(tx_q->vq, pkts_sent, nb_segs_arr);
778 	}
779 
780 	return pkts_sent;
781 }
782 
783 static int allocate_hw_virtio_queues(struct rte_eth_dev *eth_dev, int vf_num, struct hwq_s *hwq,
784 	int num_descr, int buf_size)
785 {
786 	int i, res;
787 	uint32_t size;
788 	uint64_t iova_addr;
789 
790 	NT_LOG(DBG, NTNIC, "***** Configure IOMMU for HW queues on VF %i *****", vf_num);
791 
792 	/* Just allocate 1MB to hold all combined descr rings */
793 	uint64_t tot_alloc_size = 0x100000 + buf_size * num_descr;
794 
795 	void *virt =
796 		rte_malloc_socket("VirtQDescr", tot_alloc_size, nt_util_align_size(tot_alloc_size),
797 			eth_dev->data->numa_node);
798 
799 	if (!virt)
800 		return -1;
801 
802 	uint64_t gp_offset = (uint64_t)virt & ONE_G_MASK;
803 	rte_iova_t hpa = rte_malloc_virt2iova(virt);
804 
805 	NT_LOG(DBG, NTNIC, "Allocated virtio descr rings : virt "
806 		"%p [0x%" PRIX64 "],hpa %" PRIX64 " [0x%" PRIX64 "]",
807 		virt, gp_offset, hpa, hpa & ONE_G_MASK);
808 
809 	/*
810 	 * Same offset on both HPA and IOVA
811 	 * Make sure 1G boundary is never crossed
812 	 */
813 	if (((hpa & ONE_G_MASK) != gp_offset) ||
814 		(((uint64_t)virt + tot_alloc_size) & ~ONE_G_MASK) !=
815 		((uint64_t)virt & ~ONE_G_MASK)) {
816 		NT_LOG(ERR, NTNIC, "*********************************************************");
817 		NT_LOG(ERR, NTNIC, "ERROR, no optimal IOMMU mapping available hpa: %016" PRIX64
818 			"(%016" PRIX64 "), gp_offset: %016" PRIX64 " size: %" PRIu64,
819 			hpa, hpa & ONE_G_MASK, gp_offset, tot_alloc_size);
820 		NT_LOG(ERR, NTNIC, "*********************************************************");
821 
822 		rte_free(virt);
823 
824 		/* Just allocate 1MB to hold all combined descr rings */
825 		size = 0x100000;
826 		void *virt = rte_malloc_socket("VirtQDescr", size, 4096, eth_dev->data->numa_node);
827 
828 		if (!virt)
829 			return -1;
830 
831 		res = nt_vfio_dma_map(vf_num, virt, &iova_addr, size);
832 
833 		NT_LOG(DBG, NTNIC, "VFIO MMAP res %i, vf_num %i", res, vf_num);
834 
835 		if (res != 0)
836 			return -1;
837 
838 		hwq->vf_num = vf_num;
839 		hwq->virt_queues_ctrl.virt_addr = virt;
840 		hwq->virt_queues_ctrl.phys_addr = (void *)iova_addr;
841 		hwq->virt_queues_ctrl.len = size;
842 
843 		NT_LOG(DBG, NTNIC,
844 			"Allocated for virtio descr rings combined 1MB : %p, IOVA %016" PRIX64 "",
845 			virt, iova_addr);
846 
847 		size = num_descr * sizeof(struct nthw_memory_descriptor);
848 		hwq->pkt_buffers =
849 			rte_zmalloc_socket("rx_pkt_buffers", size, 64, eth_dev->data->numa_node);
850 
851 		if (!hwq->pkt_buffers) {
852 			NT_LOG(ERR, NTNIC,
853 				"Failed to allocated buffer array for hw-queue %p, total size %i, elements %i",
854 				hwq->pkt_buffers, size, num_descr);
855 			rte_free(virt);
856 			return -1;
857 		}
858 
859 		size = buf_size * num_descr;
860 		void *virt_addr =
861 			rte_malloc_socket("pkt_buffer_pkts", size, 4096, eth_dev->data->numa_node);
862 
863 		if (!virt_addr) {
864 			NT_LOG(ERR, NTNIC,
865 				"Failed allocate packet buffers for hw-queue %p, buf size %i, elements %i",
866 				hwq->pkt_buffers, buf_size, num_descr);
867 			rte_free(hwq->pkt_buffers);
868 			rte_free(virt);
869 			return -1;
870 		}
871 
872 		res = nt_vfio_dma_map(vf_num, virt_addr, &iova_addr, size);
873 
874 		NT_LOG(DBG, NTNIC,
875 			"VFIO MMAP res %i, virt %p, iova %016" PRIX64 ", vf_num %i, num pkt bufs %i, tot size %i",
876 			res, virt_addr, iova_addr, vf_num, num_descr, size);
877 
878 		if (res != 0)
879 			return -1;
880 
881 		for (i = 0; i < num_descr; i++) {
882 			hwq->pkt_buffers[i].virt_addr =
883 				(void *)((char *)virt_addr + ((uint64_t)(i) * buf_size));
884 			hwq->pkt_buffers[i].phys_addr =
885 				(void *)(iova_addr + ((uint64_t)(i) * buf_size));
886 			hwq->pkt_buffers[i].len = buf_size;
887 		}
888 
889 		return 0;
890 	}	/* End of: no optimal IOMMU mapping available */
891 
892 	res = nt_vfio_dma_map(vf_num, virt, &iova_addr, ONE_G_SIZE);
893 
894 	if (res != 0) {
895 		NT_LOG(ERR, NTNIC, "VFIO MMAP FAILED! res %i, vf_num %i", res, vf_num);
896 		return -1;
897 	}
898 
899 	hwq->vf_num = vf_num;
900 	hwq->virt_queues_ctrl.virt_addr = virt;
901 	hwq->virt_queues_ctrl.phys_addr = (void *)(iova_addr);
902 	hwq->virt_queues_ctrl.len = 0x100000;
903 	iova_addr += 0x100000;
904 
905 	NT_LOG(DBG, NTNIC,
906 		"VFIO MMAP: virt_addr=%p phys_addr=%p size=%" PRIX32 " hpa=%" PRIX64 "",
907 		hwq->virt_queues_ctrl.virt_addr, hwq->virt_queues_ctrl.phys_addr,
908 		hwq->virt_queues_ctrl.len, rte_malloc_virt2iova(hwq->virt_queues_ctrl.virt_addr));
909 
910 	size = num_descr * sizeof(struct nthw_memory_descriptor);
911 	hwq->pkt_buffers =
912 		rte_zmalloc_socket("rx_pkt_buffers", size, 64, eth_dev->data->numa_node);
913 
914 	if (!hwq->pkt_buffers) {
915 		NT_LOG(ERR, NTNIC,
916 			"Failed to allocated buffer array for hw-queue %p, total size %i, elements %i",
917 			hwq->pkt_buffers, size, num_descr);
918 		rte_free(virt);
919 		return -1;
920 	}
921 
922 	void *virt_addr = (void *)((uint64_t)virt + 0x100000);
923 
924 	for (i = 0; i < num_descr; i++) {
925 		hwq->pkt_buffers[i].virt_addr =
926 			(void *)((char *)virt_addr + ((uint64_t)(i) * buf_size));
927 		hwq->pkt_buffers[i].phys_addr = (void *)(iova_addr + ((uint64_t)(i) * buf_size));
928 		hwq->pkt_buffers[i].len = buf_size;
929 	}
930 
931 	return 0;
932 }
933 
934 static void release_hw_virtio_queues(struct hwq_s *hwq)
935 {
936 	if (!hwq || hwq->vf_num == 0)
937 		return;
938 
939 	hwq->vf_num = 0;
940 }
941 
942 static int deallocate_hw_virtio_queues(struct hwq_s *hwq)
943 {
944 	int vf_num = hwq->vf_num;
945 
946 	void *virt = hwq->virt_queues_ctrl.virt_addr;
947 
948 	int res = nt_vfio_dma_unmap(vf_num, hwq->virt_queues_ctrl.virt_addr,
949 			(uint64_t)hwq->virt_queues_ctrl.phys_addr, ONE_G_SIZE);
950 
951 	if (res != 0) {
952 		NT_LOG(ERR, NTNIC, "VFIO UNMMAP FAILED! res %i, vf_num %i", res, vf_num);
953 		return -1;
954 	}
955 
956 	release_hw_virtio_queues(hwq);
957 	rte_free(hwq->pkt_buffers);
958 	rte_free(virt);
959 	return 0;
960 }
961 
962 static void eth_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t queue_id)
963 {
964 	struct pmd_internals *internals = eth_dev->data->dev_private;
965 	struct ntnic_tx_queue *tx_q = &internals->txq_scg[queue_id];
966 	deallocate_hw_virtio_queues(&tx_q->hwq);
967 }
968 
969 static void eth_rx_queue_release(struct rte_eth_dev *eth_dev, uint16_t queue_id)
970 {
971 	struct pmd_internals *internals = eth_dev->data->dev_private;
972 	struct ntnic_rx_queue *rx_q = &internals->rxq_scg[queue_id];
973 	deallocate_hw_virtio_queues(&rx_q->hwq);
974 }
975 
976 static int num_queues_alloced;
977 
978 /* Returns num queue starting at returned queue num or -1 on fail */
979 static int allocate_queue(int num)
980 {
981 	int next_free = num_queues_alloced;
982 	NT_LOG_DBGX(DBG, NTNIC, "num_queues_alloced=%u, New queues=%u, Max queues=%u",
983 		num_queues_alloced, num, MAX_TOTAL_QUEUES);
984 
985 	if (num_queues_alloced + num > MAX_TOTAL_QUEUES)
986 		return -1;
987 
988 	num_queues_alloced += num;
989 	return next_free;
990 }
991 
992 static int eth_rx_scg_queue_setup(struct rte_eth_dev *eth_dev,
993 	uint16_t rx_queue_id,
994 	uint16_t nb_rx_desc __rte_unused,
995 	unsigned int socket_id __rte_unused,
996 	const struct rte_eth_rxconf *rx_conf __rte_unused,
997 	struct rte_mempool *mb_pool)
998 {
999 	NT_LOG_DBGX(DBG, NTNIC, "Rx queue setup");
1000 	struct rte_pktmbuf_pool_private *mbp_priv;
1001 	struct pmd_internals *internals = eth_dev->data->dev_private;
1002 	struct ntnic_rx_queue *rx_q = &internals->rxq_scg[rx_queue_id];
1003 	struct drv_s *p_drv = internals->p_drv;
1004 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
1005 
1006 	if (sg_ops == NULL) {
1007 		NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
1008 		return 0;
1009 	}
1010 
1011 	if (internals->type == PORT_TYPE_OVERRIDE) {
1012 		rx_q->mb_pool = mb_pool;
1013 		eth_dev->data->rx_queues[rx_queue_id] = rx_q;
1014 		mbp_priv = rte_mempool_get_priv(rx_q->mb_pool);
1015 		rx_q->buf_size = (uint16_t)(mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
1016 		rx_q->enabled = 1;
1017 		return 0;
1018 	}
1019 
1020 	NT_LOG(DBG, NTNIC, "(%i) NTNIC RX OVS-SW queue setup: queue id %i, hw queue index %i",
1021 		internals->port, rx_queue_id, rx_q->queue.hw_id);
1022 
1023 	rx_q->mb_pool = mb_pool;
1024 
1025 	eth_dev->data->rx_queues[rx_queue_id] = rx_q;
1026 
1027 	mbp_priv = rte_mempool_get_priv(rx_q->mb_pool);
1028 	rx_q->buf_size = (uint16_t)(mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
1029 	rx_q->enabled = 1;
1030 
1031 	if (allocate_hw_virtio_queues(eth_dev, EXCEPTION_PATH_HID, &rx_q->hwq,
1032 			SG_NB_HW_RX_DESCRIPTORS, SG_HW_RX_PKT_BUFFER_SIZE) < 0)
1033 		return -1;
1034 
1035 	rx_q->nb_hw_rx_descr = SG_NB_HW_RX_DESCRIPTORS;
1036 
1037 	rx_q->profile = p_drv->ntdrv.adapter_info.fpga_info.profile;
1038 
1039 	rx_q->vq =
1040 		sg_ops->nthw_setup_mngd_rx_virt_queue(p_nt_drv->adapter_info.fpga_info.mp_nthw_dbs,
1041 			rx_q->queue.hw_id,	/* index */
1042 			rx_q->nb_hw_rx_descr,
1043 			EXCEPTION_PATH_HID,	/* host_id */
1044 			1,	/* header NT DVIO header for exception path */
1045 			&rx_q->hwq.virt_queues_ctrl,
1046 			rx_q->hwq.pkt_buffers,
1047 			SPLIT_RING,
1048 			-1);
1049 
1050 	NT_LOG(DBG, NTNIC, "(%i) NTNIC RX OVS-SW queues successfully setup", internals->port);
1051 
1052 	return 0;
1053 }
1054 
1055 static int eth_tx_scg_queue_setup(struct rte_eth_dev *eth_dev,
1056 	uint16_t tx_queue_id,
1057 	uint16_t nb_tx_desc __rte_unused,
1058 	unsigned int socket_id __rte_unused,
1059 	const struct rte_eth_txconf *tx_conf __rte_unused)
1060 {
1061 	const struct port_ops *port_ops = get_port_ops();
1062 
1063 	if (port_ops == NULL) {
1064 		NT_LOG_DBGX(ERR, NTNIC, "Link management module uninitialized");
1065 		return -1;
1066 	}
1067 
1068 	NT_LOG_DBGX(DBG, NTNIC, "Tx queue setup");
1069 	struct pmd_internals *internals = eth_dev->data->dev_private;
1070 	struct drv_s *p_drv = internals->p_drv;
1071 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
1072 	struct ntnic_tx_queue *tx_q = &internals->txq_scg[tx_queue_id];
1073 
1074 	if (internals->type == PORT_TYPE_OVERRIDE) {
1075 		eth_dev->data->tx_queues[tx_queue_id] = tx_q;
1076 		return 0;
1077 	}
1078 
1079 	if (sg_ops == NULL) {
1080 		NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
1081 		return 0;
1082 	}
1083 
1084 	NT_LOG(DBG, NTNIC, "(%i) NTNIC TX OVS-SW queue setup: queue id %i, hw queue index %i",
1085 		tx_q->port, tx_queue_id, tx_q->queue.hw_id);
1086 
1087 	if (tx_queue_id > internals->nb_tx_queues) {
1088 		NT_LOG(ERR, NTNIC, "Error invalid tx queue id");
1089 		return -1;
1090 	}
1091 
1092 	eth_dev->data->tx_queues[tx_queue_id] = tx_q;
1093 
1094 	/* Calculate target ID for HW  - to be used in NTDVIO0 header bypass_port */
1095 	if (tx_q->rss_target_id >= 0) {
1096 		/* bypass to a multiqueue port - qsl-hsh index */
1097 		tx_q->target_id = tx_q->rss_target_id + 0x90;
1098 
1099 	} else if (internals->vpq[tx_queue_id].hw_id > -1) {
1100 		/* virtual port - queue index */
1101 		tx_q->target_id = internals->vpq[tx_queue_id].hw_id;
1102 
1103 	} else {
1104 		/* Phy port - phy port identifier */
1105 		/* output/bypass to MAC */
1106 		tx_q->target_id = (int)(tx_q->port + 0x80);
1107 	}
1108 
1109 	if (allocate_hw_virtio_queues(eth_dev, EXCEPTION_PATH_HID, &tx_q->hwq,
1110 			SG_NB_HW_TX_DESCRIPTORS, SG_HW_TX_PKT_BUFFER_SIZE) < 0) {
1111 		return -1;
1112 	}
1113 
1114 	tx_q->nb_hw_tx_descr = SG_NB_HW_TX_DESCRIPTORS;
1115 
1116 	tx_q->profile = p_drv->ntdrv.adapter_info.fpga_info.profile;
1117 
1118 	uint32_t port, header;
1119 	port = tx_q->port;	/* transmit port */
1120 	header = 0;	/* header type VirtIO-Net */
1121 
1122 	tx_q->vq =
1123 		sg_ops->nthw_setup_mngd_tx_virt_queue(p_nt_drv->adapter_info.fpga_info.mp_nthw_dbs,
1124 			tx_q->queue.hw_id,	/* index */
1125 			tx_q->nb_hw_tx_descr,	/* queue size */
1126 			EXCEPTION_PATH_HID,	/* host_id always VF4 */
1127 			port,
1128 			/*
1129 			 * in_port - in vswitch mode has
1130 			 * to move tx port from OVS excep.
1131 			 * away from VM tx port,
1132 			 * because of QoS is matched by port id!
1133 			 */
1134 			tx_q->port + 128,
1135 			header,
1136 			&tx_q->hwq.virt_queues_ctrl,
1137 			tx_q->hwq.pkt_buffers,
1138 			SPLIT_RING,
1139 			-1,
1140 			IN_ORDER);
1141 
1142 	tx_q->enabled = 1;
1143 
1144 	NT_LOG(DBG, NTNIC, "(%i) NTNIC TX OVS-SW queues successfully setup", internals->port);
1145 
1146 	if (internals->type == PORT_TYPE_PHYSICAL) {
1147 		struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1148 		NT_LOG(DBG, NTNIC, "Port %i is ready for data. Enable port",
1149 			internals->n_intf_no);
1150 		port_ops->set_adm_state(p_adapter_info, internals->n_intf_no, true);
1151 	}
1152 
1153 	return 0;
1154 }
1155 
1156 static int dev_set_mtu_inline(struct rte_eth_dev *eth_dev, uint16_t mtu)
1157 {
1158 	const struct profile_inline_ops *profile_inline_ops = get_profile_inline_ops();
1159 
1160 	if (profile_inline_ops == NULL) {
1161 		NT_LOG_DBGX(ERR, NTNIC, "profile_inline module uninitialized");
1162 		return -1;
1163 	}
1164 
1165 	struct pmd_internals *internals = (struct pmd_internals *)eth_dev->data->dev_private;
1166 
1167 	struct flow_eth_dev *flw_dev = internals->flw_dev;
1168 	int ret = -1;
1169 
1170 	if (internals->type == PORT_TYPE_PHYSICAL && mtu >= MIN_MTU_INLINE && mtu <= MAX_MTU)
1171 		ret = profile_inline_ops->flow_set_mtu_inline(flw_dev, internals->port, mtu);
1172 
1173 	return ret ? -EINVAL : 0;
1174 }
1175 
1176 static int eth_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1177 {
1178 	eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1179 	return 0;
1180 }
1181 
1182 static int eth_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1183 {
1184 	eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1185 	return 0;
1186 }
1187 
1188 static int eth_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1189 {
1190 	eth_dev->data->tx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1191 	return 0;
1192 }
1193 
1194 static int eth_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1195 {
1196 	eth_dev->data->tx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1197 	return 0;
1198 }
1199 
1200 static int
1201 eth_mac_addr_add(struct rte_eth_dev *eth_dev,
1202 	struct rte_ether_addr *mac_addr,
1203 	uint32_t index,
1204 	uint32_t vmdq __rte_unused)
1205 {
1206 	struct rte_ether_addr *const eth_addrs = eth_dev->data->mac_addrs;
1207 
1208 	assert(index < NUM_MAC_ADDRS_PER_PORT);
1209 
1210 	if (index >= NUM_MAC_ADDRS_PER_PORT) {
1211 		const struct pmd_internals *const internals =
1212 			eth_dev->data->dev_private;
1213 		NT_LOG_DBGX(DBG, NTNIC, "Port %i: illegal index %u (>= %u)",
1214 			internals->n_intf_no, index, NUM_MAC_ADDRS_PER_PORT);
1215 		return -1;
1216 	}
1217 
1218 	eth_addrs[index] = *mac_addr;
1219 
1220 	return 0;
1221 }
1222 
1223 static int
1224 eth_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1225 {
1226 	struct rte_ether_addr *const eth_addrs = dev->data->mac_addrs;
1227 
1228 	eth_addrs[0U] = *mac_addr;
1229 
1230 	return 0;
1231 }
1232 
1233 static int
1234 eth_set_mc_addr_list(struct rte_eth_dev *eth_dev,
1235 	struct rte_ether_addr *mc_addr_set,
1236 	uint32_t nb_mc_addr)
1237 {
1238 	struct pmd_internals *const internals = eth_dev->data->dev_private;
1239 	struct rte_ether_addr *const mc_addrs = internals->mc_addrs;
1240 	size_t i;
1241 
1242 	if (nb_mc_addr >= NUM_MULTICAST_ADDRS_PER_PORT) {
1243 		NT_LOG_DBGX(DBG, NTNIC,
1244 			"Port %i: too many multicast addresses %u (>= %u)",
1245 			internals->n_intf_no, nb_mc_addr, NUM_MULTICAST_ADDRS_PER_PORT);
1246 		return -1;
1247 	}
1248 
1249 	for (i = 0U; i < NUM_MULTICAST_ADDRS_PER_PORT; i++)
1250 		if (i < nb_mc_addr)
1251 			mc_addrs[i] = mc_addr_set[i];
1252 
1253 		else
1254 			(void)memset(&mc_addrs[i], 0, sizeof(mc_addrs[i]));
1255 
1256 	return 0;
1257 }
1258 
1259 static int
1260 eth_dev_configure(struct rte_eth_dev *eth_dev)
1261 {
1262 	NT_LOG_DBGX(DBG, NTNIC, "Called for eth_dev %p", eth_dev);
1263 
1264 	/* The device is ALWAYS running promiscuous mode. */
1265 	eth_dev->data->promiscuous ^= ~eth_dev->data->promiscuous;
1266 	return 0;
1267 }
1268 
1269 static int
1270 eth_dev_start(struct rte_eth_dev *eth_dev)
1271 {
1272 	const struct port_ops *port_ops = get_port_ops();
1273 
1274 	if (port_ops == NULL) {
1275 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1276 		return -1;
1277 	}
1278 
1279 	eth_dev->flow_fp_ops = get_dev_fp_flow_ops();
1280 	struct pmd_internals *internals = eth_dev->data->dev_private;
1281 
1282 	const int n_intf_no = internals->n_intf_no;
1283 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1284 
1285 	NT_LOG_DBGX(DBG, NTNIC, "Port %u", internals->n_intf_no);
1286 
1287 	/* Start queues */
1288 	uint q;
1289 
1290 	for (q = 0; q < internals->nb_rx_queues; q++)
1291 		eth_rx_queue_start(eth_dev, q);
1292 
1293 	for (q = 0; q < internals->nb_tx_queues; q++)
1294 		eth_tx_queue_start(eth_dev, q);
1295 
1296 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE) {
1297 		eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
1298 
1299 	} else {
1300 		/* Enable the port */
1301 		port_ops->set_adm_state(p_adapter_info, internals->n_intf_no, true);
1302 
1303 		/*
1304 		 * wait for link on port
1305 		 * If application starts sending too soon before FPGA port is ready, garbage is
1306 		 * produced
1307 		 */
1308 		int loop = 0;
1309 
1310 		while (port_ops->get_link_status(p_adapter_info, n_intf_no) == RTE_ETH_LINK_DOWN) {
1311 			/* break out after 5 sec */
1312 			if (++loop >= 50) {
1313 				NT_LOG_DBGX(DBG, NTNIC,
1314 					"TIMEOUT No link on port %i (5sec timeout)",
1315 					internals->n_intf_no);
1316 				break;
1317 			}
1318 
1319 			nt_os_wait_usec(100 * 1000);
1320 		}
1321 
1322 		if (internals->lpbk_mode) {
1323 			if (internals->lpbk_mode & 1 << 0) {
1324 				port_ops->set_loopback_mode(p_adapter_info, n_intf_no,
1325 					NT_LINK_LOOPBACK_HOST);
1326 			}
1327 
1328 			if (internals->lpbk_mode & 1 << 1) {
1329 				port_ops->set_loopback_mode(p_adapter_info, n_intf_no,
1330 					NT_LINK_LOOPBACK_LINE);
1331 			}
1332 		}
1333 	}
1334 
1335 	return 0;
1336 }
1337 
1338 static int
1339 eth_dev_stop(struct rte_eth_dev *eth_dev)
1340 {
1341 	struct pmd_internals *internals = eth_dev->data->dev_private;
1342 
1343 	NT_LOG_DBGX(DBG, NTNIC, "Port %u", internals->n_intf_no);
1344 
1345 	if (internals->type != PORT_TYPE_VIRTUAL) {
1346 		uint q;
1347 
1348 		for (q = 0; q < internals->nb_rx_queues; q++)
1349 			eth_rx_queue_stop(eth_dev, q);
1350 
1351 		for (q = 0; q < internals->nb_tx_queues; q++)
1352 			eth_tx_queue_stop(eth_dev, q);
1353 	}
1354 
1355 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
1356 	return 0;
1357 }
1358 
1359 static int
1360 eth_dev_set_link_up(struct rte_eth_dev *eth_dev)
1361 {
1362 	const struct port_ops *port_ops = get_port_ops();
1363 
1364 	if (port_ops == NULL) {
1365 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1366 		return -1;
1367 	}
1368 
1369 	struct pmd_internals *const internals = eth_dev->data->dev_private;
1370 
1371 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1372 	const int port = internals->n_intf_no;
1373 
1374 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1375 		return 0;
1376 
1377 	assert(port >= 0 && port < NUM_ADAPTER_PORTS_MAX);
1378 	assert(port == internals->n_intf_no);
1379 
1380 	port_ops->set_adm_state(p_adapter_info, port, true);
1381 
1382 	return 0;
1383 }
1384 
1385 static int
1386 eth_dev_set_link_down(struct rte_eth_dev *eth_dev)
1387 {
1388 	const struct port_ops *port_ops = get_port_ops();
1389 
1390 	if (port_ops == NULL) {
1391 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
1392 		return -1;
1393 	}
1394 
1395 	struct pmd_internals *const internals = eth_dev->data->dev_private;
1396 
1397 	struct adapter_info_s *p_adapter_info = &internals->p_drv->ntdrv.adapter_info;
1398 	const int port = internals->n_intf_no;
1399 
1400 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1401 		return 0;
1402 
1403 	assert(port >= 0 && port < NUM_ADAPTER_PORTS_MAX);
1404 	assert(port == internals->n_intf_no);
1405 
1406 	port_ops->set_link_status(p_adapter_info, port, false);
1407 
1408 	return 0;
1409 }
1410 
1411 static void
1412 drv_deinit(struct drv_s *p_drv)
1413 {
1414 	const struct profile_inline_ops *profile_inline_ops = get_profile_inline_ops();
1415 
1416 	if (profile_inline_ops == NULL) {
1417 		NT_LOG_DBGX(ERR, NTNIC, "profile_inline module uninitialized");
1418 		return;
1419 	}
1420 
1421 	const struct adapter_ops *adapter_ops = get_adapter_ops();
1422 
1423 	if (adapter_ops == NULL) {
1424 		NT_LOG(ERR, NTNIC, "Adapter module uninitialized");
1425 		return;
1426 	}
1427 
1428 	if (p_drv == NULL)
1429 		return;
1430 
1431 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1432 	fpga_info_t *fpga_info = &p_nt_drv->adapter_info.fpga_info;
1433 
1434 	/*
1435 	 * Mark the global pdrv for cleared. Used by some threads to terminate.
1436 	 * 1 second to give the threads a chance to see the termonation.
1437 	 */
1438 	clear_pdrv(p_drv);
1439 	nt_os_wait_usec(1000000);
1440 
1441 	/* stop statistics threads */
1442 	p_drv->ntdrv.b_shutdown = true;
1443 	THREAD_JOIN(p_nt_drv->stat_thread);
1444 
1445 	if (fpga_info->profile == FPGA_INFO_PROFILE_INLINE) {
1446 		THREAD_JOIN(p_nt_drv->flm_thread);
1447 		profile_inline_ops->flm_free_queues();
1448 		THREAD_JOIN(p_nt_drv->port_event_thread);
1449 		/* Free all local flm event queues */
1450 		flm_inf_sta_queue_free_all(FLM_INFO_LOCAL);
1451 		/* Free all remote flm event queues */
1452 		flm_inf_sta_queue_free_all(FLM_INFO_REMOTE);
1453 		/* Free all aged flow event queues */
1454 		flm_age_queue_free_all();
1455 	}
1456 
1457 	/* stop adapter */
1458 	adapter_ops->deinit(&p_nt_drv->adapter_info);
1459 
1460 	/* clean memory */
1461 	rte_free(p_drv);
1462 	p_drv = NULL;
1463 }
1464 
1465 static int
1466 eth_dev_close(struct rte_eth_dev *eth_dev)
1467 {
1468 	struct pmd_internals *internals = eth_dev->data->dev_private;
1469 	struct drv_s *p_drv = internals->p_drv;
1470 
1471 	if (internals->type != PORT_TYPE_VIRTUAL) {
1472 		struct ntnic_rx_queue *rx_q = internals->rxq_scg;
1473 		struct ntnic_tx_queue *tx_q = internals->txq_scg;
1474 
1475 		uint q;
1476 
1477 		if (sg_ops != NULL) {
1478 			for (q = 0; q < internals->nb_rx_queues; q++)
1479 				sg_ops->nthw_release_mngd_rx_virt_queue(rx_q[q].vq);
1480 
1481 			for (q = 0; q < internals->nb_tx_queues; q++)
1482 				sg_ops->nthw_release_mngd_tx_virt_queue(tx_q[q].vq);
1483 		}
1484 	}
1485 
1486 	internals->p_drv = NULL;
1487 
1488 	if (p_drv) {
1489 		/* decrease initialized ethernet devices */
1490 		p_drv->n_eth_dev_init_count--;
1491 
1492 		/*
1493 		 * rte_pci_dev has no private member for p_drv
1494 		 * wait until all rte_eth_dev's are closed - then close adapters via p_drv
1495 		 */
1496 		if (!p_drv->n_eth_dev_init_count)
1497 			drv_deinit(p_drv);
1498 	}
1499 
1500 	return 0;
1501 }
1502 
1503 static int
1504 eth_fw_version_get(struct rte_eth_dev *eth_dev, char *fw_version, size_t fw_size)
1505 {
1506 	struct pmd_internals *internals = eth_dev->data->dev_private;
1507 
1508 	if (internals->type == PORT_TYPE_VIRTUAL || internals->type == PORT_TYPE_OVERRIDE)
1509 		return 0;
1510 
1511 	fpga_info_t *fpga_info = &internals->p_drv->ntdrv.adapter_info.fpga_info;
1512 	const int length = snprintf(fw_version, fw_size, "%03d-%04d-%02d-%02d",
1513 			fpga_info->n_fpga_type_id, fpga_info->n_fpga_prod_id,
1514 			fpga_info->n_fpga_ver_id, fpga_info->n_fpga_rev_id);
1515 
1516 	if ((size_t)length < fw_size) {
1517 		/* We have space for the version string */
1518 		return 0;
1519 
1520 	} else {
1521 		/* We do not have space for the version string -return the needed space */
1522 		return length + 1;
1523 	}
1524 }
1525 
1526 static int dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused, const struct rte_flow_ops **ops)
1527 {
1528 	*ops = get_dev_flow_ops();
1529 	return 0;
1530 }
1531 
1532 static int eth_xstats_get(struct rte_eth_dev *eth_dev, struct rte_eth_xstat *stats, unsigned int n)
1533 {
1534 	struct pmd_internals *internals = eth_dev->data->dev_private;
1535 	struct drv_s *p_drv = internals->p_drv;
1536 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1537 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
1538 	int if_index = internals->n_intf_no;
1539 	int nb_xstats;
1540 
1541 	const struct ntnic_xstats_ops *ntnic_xstats_ops = get_ntnic_xstats_ops();
1542 
1543 	if (ntnic_xstats_ops == NULL) {
1544 		NT_LOG(INF, NTNIC, "ntnic_xstats module not included");
1545 		return -1;
1546 	}
1547 
1548 	rte_spinlock_lock(&p_nt_drv->stat_lck);
1549 	nb_xstats = ntnic_xstats_ops->nthw_xstats_get(p_nt4ga_stat, stats, n, if_index);
1550 	rte_spinlock_unlock(&p_nt_drv->stat_lck);
1551 	return nb_xstats;
1552 }
1553 
1554 static int eth_xstats_get_by_id(struct rte_eth_dev *eth_dev,
1555 	const uint64_t *ids,
1556 	uint64_t *values,
1557 	unsigned int n)
1558 {
1559 	struct pmd_internals *internals = eth_dev->data->dev_private;
1560 	struct drv_s *p_drv = internals->p_drv;
1561 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1562 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
1563 	int if_index = internals->n_intf_no;
1564 	int nb_xstats;
1565 
1566 	const struct ntnic_xstats_ops *ntnic_xstats_ops = get_ntnic_xstats_ops();
1567 
1568 	if (ntnic_xstats_ops == NULL) {
1569 		NT_LOG(INF, NTNIC, "ntnic_xstats module not included");
1570 		return -1;
1571 	}
1572 
1573 	rte_spinlock_lock(&p_nt_drv->stat_lck);
1574 	nb_xstats =
1575 		ntnic_xstats_ops->nthw_xstats_get_by_id(p_nt4ga_stat, ids, values, n, if_index);
1576 	rte_spinlock_unlock(&p_nt_drv->stat_lck);
1577 	return nb_xstats;
1578 }
1579 
1580 static int eth_xstats_reset(struct rte_eth_dev *eth_dev)
1581 {
1582 	struct pmd_internals *internals = eth_dev->data->dev_private;
1583 	struct drv_s *p_drv = internals->p_drv;
1584 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1585 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
1586 	int if_index = internals->n_intf_no;
1587 
1588 	struct ntnic_xstats_ops *ntnic_xstats_ops = get_ntnic_xstats_ops();
1589 
1590 	if (ntnic_xstats_ops == NULL) {
1591 		NT_LOG(INF, NTNIC, "ntnic_xstats module not included");
1592 		return -1;
1593 	}
1594 
1595 	rte_spinlock_lock(&p_nt_drv->stat_lck);
1596 	ntnic_xstats_ops->nthw_xstats_reset(p_nt4ga_stat, if_index);
1597 	rte_spinlock_unlock(&p_nt_drv->stat_lck);
1598 	return dpdk_stats_reset(internals, p_nt_drv, if_index);
1599 }
1600 
1601 static int eth_xstats_get_names(struct rte_eth_dev *eth_dev,
1602 	struct rte_eth_xstat_name *xstats_names, unsigned int size)
1603 {
1604 	struct pmd_internals *internals = eth_dev->data->dev_private;
1605 	struct drv_s *p_drv = internals->p_drv;
1606 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1607 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
1608 
1609 	const struct ntnic_xstats_ops *ntnic_xstats_ops = get_ntnic_xstats_ops();
1610 
1611 	if (ntnic_xstats_ops == NULL) {
1612 		NT_LOG(INF, NTNIC, "ntnic_xstats module not included");
1613 		return -1;
1614 	}
1615 
1616 	return ntnic_xstats_ops->nthw_xstats_get_names(p_nt4ga_stat, xstats_names, size);
1617 }
1618 
1619 static int eth_xstats_get_names_by_id(struct rte_eth_dev *eth_dev,
1620 	const uint64_t *ids,
1621 	struct rte_eth_xstat_name *xstats_names,
1622 	unsigned int size)
1623 {
1624 	struct pmd_internals *internals = eth_dev->data->dev_private;
1625 	struct drv_s *p_drv = internals->p_drv;
1626 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1627 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
1628 	const struct ntnic_xstats_ops *ntnic_xstats_ops = get_ntnic_xstats_ops();
1629 
1630 	if (ntnic_xstats_ops == NULL) {
1631 		NT_LOG(INF, NTNIC, "ntnic_xstats module not included");
1632 		return -1;
1633 	}
1634 
1635 	return ntnic_xstats_ops->nthw_xstats_get_names_by_id(p_nt4ga_stat, xstats_names, ids,
1636 			size);
1637 }
1638 
1639 static int
1640 promiscuous_enable(struct rte_eth_dev __rte_unused(*dev))
1641 {
1642 	NT_LOG(DBG, NTHW, "The device always run promiscuous mode");
1643 	return 0;
1644 }
1645 
1646 static int eth_dev_rss_hash_update(struct rte_eth_dev *eth_dev, struct rte_eth_rss_conf *rss_conf)
1647 {
1648 	const struct flow_filter_ops *flow_filter_ops = get_flow_filter_ops();
1649 
1650 	if (flow_filter_ops == NULL) {
1651 		NT_LOG_DBGX(ERR, NTNIC, "flow_filter module uninitialized");
1652 		return -1;
1653 	}
1654 
1655 	struct pmd_internals *internals = eth_dev->data->dev_private;
1656 
1657 	struct flow_nic_dev *ndev = internals->flw_dev->ndev;
1658 	struct nt_eth_rss_conf tmp_rss_conf = { 0 };
1659 	const int hsh_idx = 0;	/* hsh index 0 means the default receipt in HSH module */
1660 
1661 	if (rss_conf->rss_key != NULL) {
1662 		if (rss_conf->rss_key_len > MAX_RSS_KEY_LEN) {
1663 			NT_LOG(ERR, NTNIC,
1664 				"ERROR: - RSS hash key length %u exceeds maximum value %u",
1665 				rss_conf->rss_key_len, MAX_RSS_KEY_LEN);
1666 			return -1;
1667 		}
1668 
1669 		rte_memcpy(&tmp_rss_conf.rss_key, rss_conf->rss_key, rss_conf->rss_key_len);
1670 	}
1671 
1672 	tmp_rss_conf.algorithm = rss_conf->algorithm;
1673 
1674 	tmp_rss_conf.rss_hf = rss_conf->rss_hf;
1675 	int res = flow_filter_ops->flow_nic_set_hasher_fields(ndev, hsh_idx, tmp_rss_conf);
1676 
1677 	if (res == 0) {
1678 		flow_filter_ops->hw_mod_hsh_rcp_flush(&ndev->be, hsh_idx, 1);
1679 		rte_memcpy(&ndev->rss_conf, &tmp_rss_conf, sizeof(struct nt_eth_rss_conf));
1680 
1681 	} else {
1682 		NT_LOG(ERR, NTNIC, "ERROR: - RSS hash update failed with error %i", res);
1683 	}
1684 
1685 	return res;
1686 }
1687 
1688 static int rss_hash_conf_get(struct rte_eth_dev *eth_dev, struct rte_eth_rss_conf *rss_conf)
1689 {
1690 	struct pmd_internals *internals = eth_dev->data->dev_private;
1691 	struct flow_nic_dev *ndev = internals->flw_dev->ndev;
1692 
1693 	rss_conf->algorithm = (enum rte_eth_hash_function)ndev->rss_conf.algorithm;
1694 
1695 	rss_conf->rss_hf = ndev->rss_conf.rss_hf;
1696 
1697 	/*
1698 	 * copy full stored key into rss_key and pad it with
1699 	 * zeros up to rss_key_len / MAX_RSS_KEY_LEN
1700 	 */
1701 	if (rss_conf->rss_key != NULL) {
1702 		int key_len = RTE_MIN(rss_conf->rss_key_len, MAX_RSS_KEY_LEN);
1703 		memset(rss_conf->rss_key, 0, rss_conf->rss_key_len);
1704 		rte_memcpy(rss_conf->rss_key, &ndev->rss_conf.rss_key, key_len);
1705 		rss_conf->rss_key_len = key_len;
1706 	}
1707 
1708 	return 0;
1709 }
1710 
1711 static struct eth_dev_ops nthw_eth_dev_ops = {
1712 	.dev_configure = eth_dev_configure,
1713 	.dev_start = eth_dev_start,
1714 	.dev_stop = eth_dev_stop,
1715 	.dev_set_link_up = eth_dev_set_link_up,
1716 	.dev_set_link_down = eth_dev_set_link_down,
1717 	.dev_close = eth_dev_close,
1718 	.link_update = eth_link_update,
1719 	.stats_get = eth_stats_get,
1720 	.stats_reset = eth_stats_reset,
1721 	.dev_infos_get = eth_dev_infos_get,
1722 	.fw_version_get = eth_fw_version_get,
1723 	.rx_queue_setup = eth_rx_scg_queue_setup,
1724 	.rx_queue_start = eth_rx_queue_start,
1725 	.rx_queue_stop = eth_rx_queue_stop,
1726 	.rx_queue_release = eth_rx_queue_release,
1727 	.tx_queue_setup = eth_tx_scg_queue_setup,
1728 	.tx_queue_start = eth_tx_queue_start,
1729 	.tx_queue_stop = eth_tx_queue_stop,
1730 	.tx_queue_release = eth_tx_queue_release,
1731 	.mac_addr_add = eth_mac_addr_add,
1732 	.mac_addr_set = eth_mac_addr_set,
1733 	.set_mc_addr_list = eth_set_mc_addr_list,
1734 	.mtr_ops_get = NULL,
1735 	.flow_ops_get = dev_flow_ops_get,
1736 	.xstats_get = eth_xstats_get,
1737 	.xstats_get_names = eth_xstats_get_names,
1738 	.xstats_reset = eth_xstats_reset,
1739 	.xstats_get_by_id = eth_xstats_get_by_id,
1740 	.xstats_get_names_by_id = eth_xstats_get_names_by_id,
1741 	.mtu_set = NULL,
1742 	.promiscuous_enable = promiscuous_enable,
1743 	.rss_hash_update = eth_dev_rss_hash_update,
1744 	.rss_hash_conf_get = rss_hash_conf_get,
1745 };
1746 
1747 /*
1748  * Port event thread
1749  */
1750 THREAD_FUNC port_event_thread_fn(void *context)
1751 {
1752 	struct pmd_internals *internals = context;
1753 	struct drv_s *p_drv = internals->p_drv;
1754 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1755 	struct adapter_info_s *p_adapter_info = &p_nt_drv->adapter_info;
1756 	struct flow_nic_dev *ndev = p_adapter_info->nt4ga_filter.mp_flow_device;
1757 
1758 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
1759 	struct rte_eth_dev *eth_dev = &rte_eth_devices[internals->port_id];
1760 	uint8_t port_no = internals->port;
1761 
1762 	ntnic_flm_load_t flmdata;
1763 	ntnic_port_load_t portdata;
1764 
1765 	memset(&flmdata, 0, sizeof(flmdata));
1766 	memset(&portdata, 0, sizeof(portdata));
1767 
1768 	while (ndev != NULL && ndev->eth_base == NULL)
1769 		nt_os_wait_usec(1 * 1000 * 1000);
1770 
1771 	while (!p_drv->ntdrv.b_shutdown) {
1772 		/*
1773 		 * FLM load measurement
1774 		 * Do only send event, if there has been a change
1775 		 */
1776 		if (p_nt4ga_stat->flm_stat_ver > 22 && p_nt4ga_stat->mp_stat_structs_flm) {
1777 			if (flmdata.lookup != p_nt4ga_stat->mp_stat_structs_flm->load_lps ||
1778 				flmdata.access != p_nt4ga_stat->mp_stat_structs_flm->load_aps) {
1779 				rte_spinlock_lock(&p_nt_drv->stat_lck);
1780 				flmdata.lookup = p_nt4ga_stat->mp_stat_structs_flm->load_lps;
1781 				flmdata.access = p_nt4ga_stat->mp_stat_structs_flm->load_aps;
1782 				flmdata.lookup_maximum =
1783 					p_nt4ga_stat->mp_stat_structs_flm->max_lps;
1784 				flmdata.access_maximum =
1785 					p_nt4ga_stat->mp_stat_structs_flm->max_aps;
1786 				rte_spinlock_unlock(&p_nt_drv->stat_lck);
1787 
1788 				if (eth_dev && eth_dev->data && eth_dev->data->dev_private) {
1789 					rte_eth_dev_callback_process(eth_dev,
1790 						(enum rte_eth_event_type)RTE_NTNIC_FLM_LOAD_EVENT,
1791 						&flmdata);
1792 				}
1793 			}
1794 		}
1795 
1796 		/*
1797 		 * Port load measurement
1798 		 * Do only send event, if there has been a change.
1799 		 */
1800 		if (p_nt4ga_stat->mp_port_load) {
1801 			if (portdata.rx_bps != p_nt4ga_stat->mp_port_load[port_no].rx_bps ||
1802 				portdata.tx_bps != p_nt4ga_stat->mp_port_load[port_no].tx_bps) {
1803 				rte_spinlock_lock(&p_nt_drv->stat_lck);
1804 				portdata.rx_bps = p_nt4ga_stat->mp_port_load[port_no].rx_bps;
1805 				portdata.tx_bps = p_nt4ga_stat->mp_port_load[port_no].tx_bps;
1806 				portdata.rx_pps = p_nt4ga_stat->mp_port_load[port_no].rx_pps;
1807 				portdata.tx_pps = p_nt4ga_stat->mp_port_load[port_no].tx_pps;
1808 				portdata.rx_pps_maximum =
1809 					p_nt4ga_stat->mp_port_load[port_no].rx_pps_max;
1810 				portdata.tx_pps_maximum =
1811 					p_nt4ga_stat->mp_port_load[port_no].tx_pps_max;
1812 				portdata.rx_bps_maximum =
1813 					p_nt4ga_stat->mp_port_load[port_no].rx_bps_max;
1814 				portdata.tx_bps_maximum =
1815 					p_nt4ga_stat->mp_port_load[port_no].tx_bps_max;
1816 				rte_spinlock_unlock(&p_nt_drv->stat_lck);
1817 
1818 				if (eth_dev && eth_dev->data && eth_dev->data->dev_private) {
1819 					rte_eth_dev_callback_process(eth_dev,
1820 						(enum rte_eth_event_type)RTE_NTNIC_PORT_LOAD_EVENT,
1821 						&portdata);
1822 				}
1823 			}
1824 		}
1825 
1826 		/* Process events */
1827 		{
1828 			int count = 0;
1829 			bool do_wait = true;
1830 
1831 			while (count < 5000) {
1832 				/* Local FLM statistic events */
1833 				struct flm_info_event_s data;
1834 
1835 				if (flm_inf_queue_get(port_no, FLM_INFO_LOCAL, &data) == 0) {
1836 					if (eth_dev && eth_dev->data &&
1837 						eth_dev->data->dev_private) {
1838 						struct ntnic_flm_statistic_s event_data;
1839 						event_data.bytes = data.bytes;
1840 						event_data.packets = data.packets;
1841 						event_data.cause = data.cause;
1842 						event_data.id = data.id;
1843 						event_data.timestamp = data.timestamp;
1844 						rte_eth_dev_callback_process(eth_dev,
1845 							(enum rte_eth_event_type)
1846 							RTE_NTNIC_FLM_STATS_EVENT,
1847 							&event_data);
1848 						do_wait = false;
1849 					}
1850 				}
1851 
1852 				/* AGED event */
1853 				/* Note: RTE_FLOW_PORT_FLAG_STRICT_QUEUE flag is not supported so
1854 				 * event is always generated
1855 				 */
1856 				int aged_event_count = flm_age_event_get(port_no);
1857 
1858 				if (aged_event_count > 0 && eth_dev && eth_dev->data &&
1859 					eth_dev->data->dev_private) {
1860 					rte_eth_dev_callback_process(eth_dev,
1861 						RTE_ETH_EVENT_FLOW_AGED,
1862 						NULL);
1863 					flm_age_event_clear(port_no);
1864 					do_wait = false;
1865 				}
1866 
1867 				if (do_wait)
1868 					nt_os_wait_usec(10);
1869 
1870 				count++;
1871 				do_wait = true;
1872 			}
1873 		}
1874 	}
1875 
1876 	return THREAD_RETURN;
1877 }
1878 
1879 /*
1880  * Adapter flm stat thread
1881  */
1882 THREAD_FUNC adapter_flm_update_thread_fn(void *context)
1883 {
1884 	const struct profile_inline_ops *profile_inline_ops = get_profile_inline_ops();
1885 
1886 	if (profile_inline_ops == NULL) {
1887 		NT_LOG(ERR, NTNIC, "%s: profile_inline module uninitialized", __func__);
1888 		return THREAD_RETURN;
1889 	}
1890 
1891 	struct drv_s *p_drv = context;
1892 
1893 	struct ntdrv_4ga_s *p_nt_drv = &p_drv->ntdrv;
1894 	struct adapter_info_s *p_adapter_info = &p_nt_drv->adapter_info;
1895 	struct nt4ga_filter_s *p_nt4ga_filter = &p_adapter_info->nt4ga_filter;
1896 	struct flow_nic_dev *p_flow_nic_dev = p_nt4ga_filter->mp_flow_device;
1897 
1898 	NT_LOG(DBG, NTNIC, "%s: %s: waiting for port configuration",
1899 		p_adapter_info->mp_adapter_id_str, __func__);
1900 
1901 	while (p_flow_nic_dev->eth_base == NULL)
1902 		nt_os_wait_usec(1 * 1000 * 1000);
1903 
1904 	struct flow_eth_dev *dev = p_flow_nic_dev->eth_base;
1905 
1906 	NT_LOG(DBG, NTNIC, "%s: %s: begin", p_adapter_info->mp_adapter_id_str, __func__);
1907 
1908 	while (!p_drv->ntdrv.b_shutdown)
1909 		if (profile_inline_ops->flm_update(dev) == 0)
1910 			nt_os_wait_usec(10);
1911 
1912 	NT_LOG(DBG, NTNIC, "%s: %s: end", p_adapter_info->mp_adapter_id_str, __func__);
1913 	return THREAD_RETURN;
1914 }
1915 
1916 /*
1917  * Adapter stat thread
1918  */
1919 THREAD_FUNC adapter_stat_thread_fn(void *context)
1920 {
1921 	const struct nt4ga_stat_ops *nt4ga_stat_ops = get_nt4ga_stat_ops();
1922 
1923 	if (nt4ga_stat_ops == NULL) {
1924 		NT_LOG_DBGX(ERR, NTNIC, "Statistics module uninitialized");
1925 		return THREAD_RETURN;
1926 	}
1927 
1928 	struct drv_s *p_drv = context;
1929 
1930 	ntdrv_4ga_t *p_nt_drv = &p_drv->ntdrv;
1931 	nt4ga_stat_t *p_nt4ga_stat = &p_nt_drv->adapter_info.nt4ga_stat;
1932 	nthw_stat_t *p_nthw_stat = p_nt4ga_stat->mp_nthw_stat;
1933 	const char *const p_adapter_id_str = p_nt_drv->adapter_info.mp_adapter_id_str;
1934 	(void)p_adapter_id_str;
1935 
1936 	if (!p_nthw_stat)
1937 		return THREAD_RETURN;
1938 
1939 	NT_LOG_DBGX(DBG, NTNIC, "%s: begin", p_adapter_id_str);
1940 
1941 	assert(p_nthw_stat);
1942 
1943 	while (!p_drv->ntdrv.b_shutdown) {
1944 		nt_os_wait_usec(10 * 1000);
1945 
1946 		nthw_stat_trigger(p_nthw_stat);
1947 
1948 		uint32_t loop = 0;
1949 
1950 		while ((!p_drv->ntdrv.b_shutdown) &&
1951 			(*p_nthw_stat->mp_timestamp == (uint64_t)-1)) {
1952 			nt_os_wait_usec(1 * 100);
1953 
1954 			if (rte_log_get_level(nt_log_ntnic) == RTE_LOG_DEBUG &&
1955 				(++loop & 0x3fff) == 0) {
1956 				if (p_nt4ga_stat->mp_nthw_rpf) {
1957 					NT_LOG(ERR, NTNIC, "Statistics DMA frozen");
1958 
1959 				} else if (p_nt4ga_stat->mp_nthw_rmc) {
1960 					uint32_t sf_ram_of =
1961 						nthw_rmc_get_status_sf_ram_of(p_nt4ga_stat
1962 							->mp_nthw_rmc);
1963 					uint32_t descr_fifo_of =
1964 						nthw_rmc_get_status_descr_fifo_of(p_nt4ga_stat
1965 							->mp_nthw_rmc);
1966 
1967 					uint32_t dbg_merge =
1968 						nthw_rmc_get_dbg_merge(p_nt4ga_stat->mp_nthw_rmc);
1969 					uint32_t mac_if_err =
1970 						nthw_rmc_get_mac_if_err(p_nt4ga_stat->mp_nthw_rmc);
1971 
1972 					NT_LOG(ERR, NTNIC, "Statistics DMA frozen");
1973 					NT_LOG(ERR, NTNIC, "SF RAM Overflow     : %08x",
1974 						sf_ram_of);
1975 					NT_LOG(ERR, NTNIC, "Descr Fifo Overflow : %08x",
1976 						descr_fifo_of);
1977 					NT_LOG(ERR, NTNIC, "DBG Merge           : %08x",
1978 						dbg_merge);
1979 					NT_LOG(ERR, NTNIC, "MAC If Errors       : %08x",
1980 						mac_if_err);
1981 				}
1982 			}
1983 		}
1984 
1985 		/* Check then collect */
1986 		{
1987 			rte_spinlock_lock(&p_nt_drv->stat_lck);
1988 			nt4ga_stat_ops->nt4ga_stat_collect(&p_nt_drv->adapter_info, p_nt4ga_stat);
1989 			rte_spinlock_unlock(&p_nt_drv->stat_lck);
1990 		}
1991 	}
1992 
1993 	NT_LOG_DBGX(DBG, NTNIC, "%s: end", p_adapter_id_str);
1994 	return THREAD_RETURN;
1995 }
1996 
1997 static int
1998 nthw_pci_dev_init(struct rte_pci_device *pci_dev)
1999 {
2000 	const struct flow_filter_ops *flow_filter_ops = get_flow_filter_ops();
2001 
2002 	if (flow_filter_ops == NULL) {
2003 		NT_LOG_DBGX(ERR, NTNIC, "flow_filter module uninitialized");
2004 		/* Return statement is not necessary here to allow traffic processing by SW  */
2005 	}
2006 
2007 	const struct profile_inline_ops *profile_inline_ops = get_profile_inline_ops();
2008 
2009 	if (profile_inline_ops == NULL) {
2010 		NT_LOG_DBGX(ERR, NTNIC, "profile_inline module uninitialized");
2011 		/* Return statement is not necessary here to allow traffic processing by SW  */
2012 	}
2013 
2014 	nt_vfio_init();
2015 	const struct port_ops *port_ops = get_port_ops();
2016 
2017 	if (port_ops == NULL) {
2018 		NT_LOG(ERR, NTNIC, "Link management module uninitialized");
2019 		return -1;
2020 	}
2021 
2022 	const struct adapter_ops *adapter_ops = get_adapter_ops();
2023 
2024 	if (adapter_ops == NULL) {
2025 		NT_LOG(ERR, NTNIC, "Adapter module uninitialized");
2026 		return -1;
2027 	}
2028 
2029 	int res;
2030 	struct drv_s *p_drv;
2031 	ntdrv_4ga_t *p_nt_drv;
2032 	hw_info_t *p_hw_info;
2033 	fpga_info_t *fpga_info;
2034 	uint32_t n_port_mask = -1;	/* All ports enabled by default */
2035 	uint32_t nb_rx_queues = 1;
2036 	uint32_t nb_tx_queues = 1;
2037 	uint32_t exception_path = 0;
2038 	struct flow_queue_id_s queue_ids[MAX_QUEUES];
2039 	int n_phy_ports;
2040 	struct port_link_speed pls_mbps[NUM_ADAPTER_PORTS_MAX] = { 0 };
2041 	int num_port_speeds = 0;
2042 	enum flow_eth_dev_profile profile = FLOW_ETH_DEV_PROFILE_INLINE;
2043 
2044 	NT_LOG_DBGX(DBG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i", pci_dev->name,
2045 		pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid,
2046 		pci_dev->addr.function);
2047 
2048 	/*
2049 	 * Process options/arguments
2050 	 */
2051 	if (pci_dev->device.devargs && pci_dev->device.devargs->args) {
2052 		int kvargs_count;
2053 		struct rte_kvargs *kvlist =
2054 			rte_kvargs_parse(pci_dev->device.devargs->args, valid_arguments);
2055 
2056 		if (kvlist == NULL)
2057 			return -1;
2058 
2059 		/*
2060 		 * Argument: help
2061 		 * NOTE: this argument/option check should be the first as it will stop
2062 		 * execution after producing its output
2063 		 */
2064 		{
2065 			if (rte_kvargs_get(kvlist, ETH_DEV_NTNIC_HELP_ARG)) {
2066 				size_t i;
2067 
2068 				for (i = 0; i < RTE_DIM(valid_arguments); i++)
2069 					if (valid_arguments[i] == NULL)
2070 						break;
2071 
2072 				exit(0);
2073 			}
2074 		}
2075 
2076 		/*
2077 		 * rxq option/argument
2078 		 * The number of rxq (hostbuffers) allocated in memory.
2079 		 * Default is 32 RX Hostbuffers
2080 		 */
2081 		kvargs_count = rte_kvargs_count(kvlist, ETH_DEV_NTHW_RXQUEUES_ARG);
2082 
2083 		if (kvargs_count != 0) {
2084 			assert(kvargs_count == 1);
2085 			res = rte_kvargs_process(kvlist, ETH_DEV_NTHW_RXQUEUES_ARG, &string_to_u32,
2086 					&nb_rx_queues);
2087 
2088 			if (res < 0) {
2089 				NT_LOG_DBGX(ERR, NTNIC,
2090 					"problem with command line arguments: res=%d",
2091 					res);
2092 				return -1;
2093 			}
2094 
2095 			NT_LOG_DBGX(DBG, NTNIC, "devargs: %s=%u",
2096 				ETH_DEV_NTHW_RXQUEUES_ARG, nb_rx_queues);
2097 		}
2098 
2099 		/*
2100 		 * txq option/argument
2101 		 * The number of txq (hostbuffers) allocated in memory.
2102 		 * Default is 32 TX Hostbuffers
2103 		 */
2104 		kvargs_count = rte_kvargs_count(kvlist, ETH_DEV_NTHW_TXQUEUES_ARG);
2105 
2106 		if (kvargs_count != 0) {
2107 			assert(kvargs_count == 1);
2108 			res = rte_kvargs_process(kvlist, ETH_DEV_NTHW_TXQUEUES_ARG, &string_to_u32,
2109 					&nb_tx_queues);
2110 
2111 			if (res < 0) {
2112 				NT_LOG_DBGX(ERR, NTNIC,
2113 					"problem with command line arguments: res=%d",
2114 					res);
2115 				return -1;
2116 			}
2117 
2118 			NT_LOG_DBGX(DBG, NTNIC, "devargs: %s=%u",
2119 				ETH_DEV_NTHW_TXQUEUES_ARG, nb_tx_queues);
2120 		}
2121 	}
2122 
2123 
2124 	/* alloc */
2125 	p_drv = rte_zmalloc_socket(pci_dev->name, sizeof(struct drv_s), RTE_CACHE_LINE_SIZE,
2126 			pci_dev->device.numa_node);
2127 
2128 	if (!p_drv) {
2129 		NT_LOG_DBGX(ERR, NTNIC, "%s: error %d",
2130 			(pci_dev->name[0] ? pci_dev->name : "NA"), -1);
2131 		return -1;
2132 	}
2133 
2134 	/* Setup VFIO context */
2135 	int vfio = nt_vfio_setup(pci_dev);
2136 
2137 	if (vfio < 0) {
2138 		NT_LOG_DBGX(ERR, NTNIC, "%s: vfio_setup error %d",
2139 			(pci_dev->name[0] ? pci_dev->name : "NA"), -1);
2140 		rte_free(p_drv);
2141 		return -1;
2142 	}
2143 
2144 	/* context */
2145 	p_nt_drv = &p_drv->ntdrv;
2146 	p_hw_info = &p_nt_drv->adapter_info.hw_info;
2147 	fpga_info = &p_nt_drv->adapter_info.fpga_info;
2148 
2149 	p_drv->p_dev = pci_dev;
2150 
2151 	/* Set context for NtDrv */
2152 	p_nt_drv->pciident = BDF_TO_PCIIDENT(pci_dev->addr.domain, pci_dev->addr.bus,
2153 			pci_dev->addr.devid, pci_dev->addr.function);
2154 	p_nt_drv->adapter_info.n_rx_host_buffers = nb_rx_queues;
2155 	p_nt_drv->adapter_info.n_tx_host_buffers = nb_tx_queues;
2156 
2157 	fpga_info->bar0_addr = (void *)pci_dev->mem_resource[0].addr;
2158 	fpga_info->bar0_size = pci_dev->mem_resource[0].len;
2159 	fpga_info->numa_node = pci_dev->device.numa_node;
2160 	fpga_info->pciident = p_nt_drv->pciident;
2161 	fpga_info->adapter_no = p_drv->adapter_no;
2162 
2163 	p_nt_drv->adapter_info.hw_info.pci_class_id = pci_dev->id.class_id;
2164 	p_nt_drv->adapter_info.hw_info.pci_vendor_id = pci_dev->id.vendor_id;
2165 	p_nt_drv->adapter_info.hw_info.pci_device_id = pci_dev->id.device_id;
2166 	p_nt_drv->adapter_info.hw_info.pci_sub_vendor_id = pci_dev->id.subsystem_vendor_id;
2167 	p_nt_drv->adapter_info.hw_info.pci_sub_device_id = pci_dev->id.subsystem_device_id;
2168 
2169 	NT_LOG(DBG, NTNIC, "%s: " PCIIDENT_PRINT_STR " %04X:%04X: %04X:%04X:",
2170 		p_nt_drv->adapter_info.mp_adapter_id_str, PCIIDENT_TO_DOMAIN(p_nt_drv->pciident),
2171 		PCIIDENT_TO_BUSNR(p_nt_drv->pciident), PCIIDENT_TO_DEVNR(p_nt_drv->pciident),
2172 		PCIIDENT_TO_FUNCNR(p_nt_drv->pciident),
2173 		p_nt_drv->adapter_info.hw_info.pci_vendor_id,
2174 		p_nt_drv->adapter_info.hw_info.pci_device_id,
2175 		p_nt_drv->adapter_info.hw_info.pci_sub_vendor_id,
2176 		p_nt_drv->adapter_info.hw_info.pci_sub_device_id);
2177 
2178 	p_nt_drv->b_shutdown = false;
2179 	p_nt_drv->adapter_info.pb_shutdown = &p_nt_drv->b_shutdown;
2180 
2181 	for (int i = 0; i < num_port_speeds; ++i) {
2182 		struct adapter_info_s *p_adapter_info = &p_nt_drv->adapter_info;
2183 		nt_link_speed_t link_speed = convert_link_speed(pls_mbps[i].link_speed);
2184 		port_ops->set_link_speed(p_adapter_info, i, link_speed);
2185 	}
2186 
2187 	/* store context */
2188 	store_pdrv(p_drv);
2189 
2190 	/* initialize nt4ga nthw fpga module instance in drv */
2191 	int err = adapter_ops->init(&p_nt_drv->adapter_info);
2192 
2193 	if (err != 0) {
2194 		NT_LOG(ERR, NTNIC, "%s: Cannot initialize the adapter instance",
2195 			p_nt_drv->adapter_info.mp_adapter_id_str);
2196 		return -1;
2197 	}
2198 
2199 	const struct meter_ops_s *meter_ops = get_meter_ops();
2200 
2201 	if (meter_ops != NULL)
2202 		nthw_eth_dev_ops.mtr_ops_get = meter_ops->eth_mtr_ops_get;
2203 
2204 	else
2205 		NT_LOG(DBG, NTNIC, "Meter module is not initialized");
2206 
2207 	/* Initialize the queue system */
2208 	if (err == 0) {
2209 		sg_ops = get_sg_ops();
2210 
2211 		if (sg_ops != NULL) {
2212 			err = sg_ops->nthw_virt_queue_init(fpga_info);
2213 
2214 			if (err != 0) {
2215 				NT_LOG(ERR, NTNIC,
2216 					"%s: Cannot initialize scatter-gather queues",
2217 					p_nt_drv->adapter_info.mp_adapter_id_str);
2218 
2219 			} else {
2220 				NT_LOG(DBG, NTNIC, "%s: Initialized scatter-gather queues",
2221 					p_nt_drv->adapter_info.mp_adapter_id_str);
2222 			}
2223 
2224 		} else {
2225 			NT_LOG_DBGX(DBG, NTNIC, "SG module is not initialized");
2226 		}
2227 	}
2228 
2229 	/* Start ctrl, monitor, stat thread only for primary process. */
2230 	if (err == 0) {
2231 		/* mp_adapter_id_str is initialized after nt4ga_adapter_init(p_nt_drv) */
2232 		const char *const p_adapter_id_str = p_nt_drv->adapter_info.mp_adapter_id_str;
2233 		(void)p_adapter_id_str;
2234 		NT_LOG(DBG, NTNIC,
2235 			"%s: %s: AdapterPCI=" PCIIDENT_PRINT_STR " Hw=0x%02X_rev%d PhyPorts=%d",
2236 			(pci_dev->name[0] ? pci_dev->name : "NA"), p_adapter_id_str,
2237 			PCIIDENT_TO_DOMAIN(p_nt_drv->adapter_info.fpga_info.pciident),
2238 			PCIIDENT_TO_BUSNR(p_nt_drv->adapter_info.fpga_info.pciident),
2239 			PCIIDENT_TO_DEVNR(p_nt_drv->adapter_info.fpga_info.pciident),
2240 			PCIIDENT_TO_FUNCNR(p_nt_drv->adapter_info.fpga_info.pciident),
2241 			p_hw_info->hw_platform_id, fpga_info->nthw_hw_info.hw_id,
2242 			fpga_info->n_phy_ports);
2243 
2244 	} else {
2245 		NT_LOG_DBGX(ERR, NTNIC, "%s: error=%d",
2246 			(pci_dev->name[0] ? pci_dev->name : "NA"), err);
2247 		return -1;
2248 	}
2249 
2250 	if (profile_inline_ops != NULL && fpga_info->profile == FPGA_INFO_PROFILE_INLINE) {
2251 		profile_inline_ops->flm_setup_queues();
2252 		res = THREAD_CTRL_CREATE(&p_nt_drv->flm_thread, "ntnic-nt_flm_update_thr",
2253 			adapter_flm_update_thread_fn, (void *)p_drv);
2254 
2255 		if (res) {
2256 			NT_LOG_DBGX(ERR, NTNIC, "%s: error=%d",
2257 				(pci_dev->name[0] ? pci_dev->name : "NA"), res);
2258 			return -1;
2259 		}
2260 	}
2261 
2262 	rte_spinlock_init(&p_nt_drv->stat_lck);
2263 	res = THREAD_CTRL_CREATE(&p_nt_drv->stat_thread, "nt4ga_stat_thr", adapter_stat_thread_fn,
2264 			(void *)p_drv);
2265 
2266 	if (res) {
2267 		NT_LOG(ERR, NTNIC, "%s: error=%d",
2268 			(pci_dev->name[0] ? pci_dev->name : "NA"), res);
2269 		return -1;
2270 	}
2271 
2272 	n_phy_ports = fpga_info->n_phy_ports;
2273 
2274 	for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) {
2275 		const char *const p_port_id_str = p_nt_drv->adapter_info.mp_port_id_str[n_intf_no];
2276 		(void)p_port_id_str;
2277 		struct pmd_internals *internals = NULL;
2278 		struct rte_eth_dev *eth_dev = NULL;
2279 		char name[32];
2280 		int i;
2281 
2282 		if ((1 << n_intf_no) & ~n_port_mask) {
2283 			NT_LOG_DBGX(DBG, NTNIC,
2284 				"%s: interface #%d: skipping due to portmask 0x%02X",
2285 				p_port_id_str, n_intf_no, n_port_mask);
2286 			continue;
2287 		}
2288 
2289 		snprintf(name, sizeof(name), "ntnic%d", n_intf_no);
2290 		NT_LOG_DBGX(DBG, NTNIC, "%s: interface #%d: %s: '%s'", p_port_id_str,
2291 			n_intf_no, (pci_dev->name[0] ? pci_dev->name : "NA"), name);
2292 
2293 		internals = rte_zmalloc_socket(name, sizeof(struct pmd_internals),
2294 				RTE_CACHE_LINE_SIZE, pci_dev->device.numa_node);
2295 
2296 		if (!internals) {
2297 			NT_LOG_DBGX(ERR, NTNIC, "%s: %s: error=%d",
2298 				(pci_dev->name[0] ? pci_dev->name : "NA"), name, -1);
2299 			return -1;
2300 		}
2301 
2302 		internals->pci_dev = pci_dev;
2303 		internals->n_intf_no = n_intf_no;
2304 		internals->type = PORT_TYPE_PHYSICAL;
2305 		internals->port = n_intf_no;
2306 		internals->nb_rx_queues = nb_rx_queues;
2307 		internals->nb_tx_queues = nb_tx_queues;
2308 
2309 		/* Not used queue index as dest port in bypass - use 0x80 + port nr */
2310 		for (i = 0; i < MAX_QUEUES; i++)
2311 			internals->vpq[i].hw_id = -1;
2312 
2313 
2314 		/* Setup queue_ids */
2315 		if (nb_rx_queues > 1) {
2316 			NT_LOG(DBG, NTNIC,
2317 				"(%i) NTNIC configured with Rx multi queues. %i queues",
2318 				internals->n_intf_no, nb_rx_queues);
2319 		}
2320 
2321 		if (nb_tx_queues > 1) {
2322 			NT_LOG(DBG, NTNIC,
2323 				"(%i) NTNIC configured with Tx multi queues. %i queues",
2324 				internals->n_intf_no, nb_tx_queues);
2325 		}
2326 
2327 		int max_num_queues = (nb_rx_queues > nb_tx_queues) ? nb_rx_queues : nb_tx_queues;
2328 		int start_queue = allocate_queue(max_num_queues);
2329 
2330 		if (start_queue < 0)
2331 			return -1;
2332 
2333 		for (i = 0; i < (int)max_num_queues; i++) {
2334 			queue_ids[i].id = i;
2335 			queue_ids[i].hw_id = start_queue + i;
2336 
2337 			internals->rxq_scg[i].queue = queue_ids[i];
2338 			/* use same index in Rx and Tx rings */
2339 			internals->txq_scg[i].queue = queue_ids[i];
2340 			internals->rxq_scg[i].enabled = 0;
2341 			internals->txq_scg[i].type = internals->type;
2342 			internals->rxq_scg[i].type = internals->type;
2343 			internals->rxq_scg[i].port = internals->port;
2344 		}
2345 
2346 		/* no tx queues - tx data goes out on phy */
2347 		internals->vpq_nb_vq = 0;
2348 
2349 		for (i = 0; i < (int)nb_tx_queues; i++) {
2350 			internals->txq_scg[i].port = internals->port;
2351 			internals->txq_scg[i].enabled = 0;
2352 		}
2353 
2354 		/* Set MAC address (but only if the MAC address is permitted) */
2355 		if (n_intf_no < fpga_info->nthw_hw_info.vpd_info.mn_mac_addr_count) {
2356 			const uint64_t mac =
2357 				fpga_info->nthw_hw_info.vpd_info.mn_mac_addr_value + n_intf_no;
2358 			internals->eth_addrs[0].addr_bytes[0] = (mac >> 40) & 0xFFu;
2359 			internals->eth_addrs[0].addr_bytes[1] = (mac >> 32) & 0xFFu;
2360 			internals->eth_addrs[0].addr_bytes[2] = (mac >> 24) & 0xFFu;
2361 			internals->eth_addrs[0].addr_bytes[3] = (mac >> 16) & 0xFFu;
2362 			internals->eth_addrs[0].addr_bytes[4] = (mac >> 8) & 0xFFu;
2363 			internals->eth_addrs[0].addr_bytes[5] = (mac >> 0) & 0xFFu;
2364 		}
2365 
2366 		eth_dev = rte_eth_dev_allocate(name);
2367 
2368 		if (!eth_dev) {
2369 			NT_LOG_DBGX(ERR, NTNIC, "%s: %s: error=%d",
2370 				(pci_dev->name[0] ? pci_dev->name : "NA"), name, -1);
2371 			return -1;
2372 		}
2373 
2374 		if (flow_filter_ops != NULL) {
2375 			internals->flw_dev = flow_filter_ops->flow_get_eth_dev(0, n_intf_no,
2376 				eth_dev->data->port_id, nb_rx_queues, queue_ids,
2377 				&internals->txq_scg[0].rss_target_id, profile, exception_path);
2378 
2379 			if (!internals->flw_dev) {
2380 				NT_LOG(ERR, NTNIC,
2381 					"Error creating port. Resource exhaustion in HW");
2382 				return -1;
2383 			}
2384 		}
2385 
2386 		/* connect structs */
2387 		internals->p_drv = p_drv;
2388 		eth_dev->data->dev_private = internals;
2389 		eth_dev->data->mac_addrs = rte_malloc(NULL,
2390 					NUM_MAC_ADDRS_PER_PORT * sizeof(struct rte_ether_addr), 0);
2391 		rte_memcpy(&eth_dev->data->mac_addrs[0],
2392 					&internals->eth_addrs[0], RTE_ETHER_ADDR_LEN);
2393 
2394 		NT_LOG_DBGX(DBG, NTNIC, "Setting up RX functions for SCG");
2395 		eth_dev->rx_pkt_burst = eth_dev_rx_scg;
2396 		eth_dev->tx_pkt_burst = eth_dev_tx_scg;
2397 		eth_dev->tx_pkt_prepare = NULL;
2398 
2399 		struct rte_eth_link pmd_link;
2400 		pmd_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2401 		pmd_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
2402 		pmd_link.link_status = RTE_ETH_LINK_DOWN;
2403 		pmd_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
2404 
2405 		eth_dev->device = &pci_dev->device;
2406 		eth_dev->data->dev_link = pmd_link;
2407 		eth_dev->dev_ops = &nthw_eth_dev_ops;
2408 
2409 		eth_dev_pci_specific_init(eth_dev, pci_dev);
2410 		rte_eth_dev_probing_finish(eth_dev);
2411 
2412 		/* increase initialized ethernet devices - PF */
2413 		p_drv->n_eth_dev_init_count++;
2414 
2415 		if (get_flow_filter_ops() != NULL) {
2416 			if (fpga_info->profile == FPGA_INFO_PROFILE_INLINE &&
2417 				internals->flw_dev->ndev->be.tpe.ver >= 2) {
2418 				assert(nthw_eth_dev_ops.mtu_set == dev_set_mtu_inline ||
2419 					nthw_eth_dev_ops.mtu_set == NULL);
2420 				nthw_eth_dev_ops.mtu_set = dev_set_mtu_inline;
2421 				dev_set_mtu_inline(eth_dev, MTUINITVAL);
2422 				NT_LOG_DBGX(DBG, NTNIC, "INLINE MTU supported, tpe version %d",
2423 					internals->flw_dev->ndev->be.tpe.ver);
2424 
2425 			} else {
2426 				NT_LOG(DBG, NTNIC, "INLINE MTU not supported");
2427 			}
2428 		}
2429 
2430 		/* Port event thread */
2431 		if (fpga_info->profile == FPGA_INFO_PROFILE_INLINE) {
2432 			res = THREAD_CTRL_CREATE(&p_nt_drv->port_event_thread, "nt_port_event_thr",
2433 					port_event_thread_fn, (void *)internals);
2434 
2435 			if (res) {
2436 				NT_LOG(ERR, NTNIC, "%s: error=%d",
2437 					(pci_dev->name[0] ? pci_dev->name : "NA"), res);
2438 				return -1;
2439 			}
2440 		}
2441 	}
2442 
2443 	return 0;
2444 }
2445 
2446 static int
2447 nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused)
2448 {
2449 	NT_LOG_DBGX(DBG, NTNIC, "PCI device deinitialization");
2450 
2451 	int i;
2452 	char name[32];
2453 
2454 	struct pmd_internals *internals = eth_dev->data->dev_private;
2455 	ntdrv_4ga_t *p_ntdrv = &internals->p_drv->ntdrv;
2456 	fpga_info_t *fpga_info = &p_ntdrv->adapter_info.fpga_info;
2457 	const int n_phy_ports = fpga_info->n_phy_ports;
2458 
2459 	/* let running threads end Rx and Tx activity */
2460 	if (sg_ops != NULL) {
2461 		nt_os_wait_usec(1 * 1000 * 1000);
2462 
2463 		while (internals) {
2464 			for (i = internals->nb_tx_queues - 1; i >= 0; i--) {
2465 				sg_ops->nthw_release_mngd_tx_virt_queue(internals->txq_scg[i].vq);
2466 				release_hw_virtio_queues(&internals->txq_scg[i].hwq);
2467 			}
2468 
2469 			for (i = internals->nb_rx_queues - 1; i >= 0; i--) {
2470 				sg_ops->nthw_release_mngd_rx_virt_queue(internals->rxq_scg[i].vq);
2471 				release_hw_virtio_queues(&internals->rxq_scg[i].hwq);
2472 			}
2473 
2474 			internals = internals->next;
2475 		}
2476 	}
2477 
2478 	for (i = 0; i < n_phy_ports; i++) {
2479 		sprintf(name, "ntnic%d", i);
2480 		eth_dev = rte_eth_dev_allocated(name);
2481 		if (eth_dev == NULL)
2482 			continue; /* port already released */
2483 		rte_eth_dev_release_port(eth_dev);
2484 	}
2485 
2486 	nt_vfio_remove(EXCEPTION_PATH_HID);
2487 	return 0;
2488 }
2489 
2490 static void signal_handler_func_int(int sig)
2491 {
2492 	if (sig != SIGINT) {
2493 		signal(sig, previous_handler);
2494 		raise(sig);
2495 		return;
2496 	}
2497 
2498 	kill_pmd = 1;
2499 }
2500 
2501 THREAD_FUNC shutdown_thread(void *arg __rte_unused)
2502 {
2503 	while (!kill_pmd)
2504 		nt_os_wait_usec(100 * 1000);
2505 
2506 	NT_LOG_DBGX(DBG, NTNIC, "Shutting down because of ctrl+C");
2507 
2508 	signal(SIGINT, previous_handler);
2509 	raise(SIGINT);
2510 
2511 	return THREAD_RETURN;
2512 }
2513 
2514 static int init_shutdown(void)
2515 {
2516 	NT_LOG(DBG, NTNIC, "Starting shutdown handler");
2517 	kill_pmd = 0;
2518 	previous_handler = signal(SIGINT, signal_handler_func_int);
2519 	THREAD_CREATE(&shutdown_tid, shutdown_thread, NULL);
2520 
2521 	/*
2522 	 * 1 time calculation of 1 sec stat update rtc cycles to prevent stat poll
2523 	 * flooding by OVS from multiple virtual port threads - no need to be precise
2524 	 */
2525 	uint64_t now_rtc = rte_get_tsc_cycles();
2526 	nt_os_wait_usec(10 * 1000);
2527 	rte_tsc_freq = 100 * (rte_get_tsc_cycles() - now_rtc);
2528 
2529 	return 0;
2530 }
2531 
2532 static int
2533 nthw_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2534 	struct rte_pci_device *pci_dev)
2535 {
2536 	int ret;
2537 
2538 	NT_LOG_DBGX(DBG, NTNIC, "pcidev: name: '%s'", pci_dev->name);
2539 	NT_LOG_DBGX(DBG, NTNIC, "devargs: name: '%s'", pci_dev->device.name);
2540 
2541 	if (pci_dev->device.devargs) {
2542 		NT_LOG_DBGX(DBG, NTNIC, "devargs: args: '%s'",
2543 			(pci_dev->device.devargs->args ? pci_dev->device.devargs->args : "NULL"));
2544 		NT_LOG_DBGX(DBG, NTNIC, "devargs: data: '%s'",
2545 			(pci_dev->device.devargs->data ? pci_dev->device.devargs->data : "NULL"));
2546 	}
2547 
2548 	const int n_rte_vfio_no_io_mmu_enabled = rte_vfio_noiommu_is_enabled();
2549 	NT_LOG(DBG, NTNIC, "vfio_no_iommu_enabled=%d", n_rte_vfio_no_io_mmu_enabled);
2550 
2551 	if (n_rte_vfio_no_io_mmu_enabled) {
2552 		NT_LOG(ERR, NTNIC, "vfio_no_iommu_enabled=%d: this PMD needs VFIO IOMMU",
2553 			n_rte_vfio_no_io_mmu_enabled);
2554 		return -1;
2555 	}
2556 
2557 	const enum rte_iova_mode n_rte_io_va_mode = rte_eal_iova_mode();
2558 	NT_LOG(DBG, NTNIC, "iova mode=%d", n_rte_io_va_mode);
2559 
2560 	NT_LOG(DBG, NTNIC,
2561 		"busid=" PCI_PRI_FMT
2562 		" pciid=%04x:%04x_%04x:%04x locstr=%s @ numanode=%d: drv=%s drvalias=%s",
2563 		pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid,
2564 		pci_dev->addr.function, pci_dev->id.vendor_id, pci_dev->id.device_id,
2565 		pci_dev->id.subsystem_vendor_id, pci_dev->id.subsystem_device_id,
2566 		pci_dev->name[0] ? pci_dev->name : "NA",
2567 		pci_dev->device.numa_node,
2568 		pci_dev->driver->driver.name ? pci_dev->driver->driver.name : "NA",
2569 		pci_dev->driver->driver.alias ? pci_dev->driver->driver.alias : "NA");
2570 
2571 
2572 	ret = nthw_pci_dev_init(pci_dev);
2573 
2574 	init_shutdown();
2575 
2576 	NT_LOG_DBGX(DBG, NTNIC, "leave: ret=%d", ret);
2577 	return ret;
2578 }
2579 
2580 static int
2581 nthw_pci_remove(struct rte_pci_device *pci_dev)
2582 {
2583 	NT_LOG_DBGX(DBG, NTNIC);
2584 
2585 	struct drv_s *p_drv = get_pdrv_from_pci(pci_dev->addr);
2586 	drv_deinit(p_drv);
2587 
2588 	return rte_eth_dev_pci_generic_remove(pci_dev, nthw_pci_dev_deinit);
2589 }
2590 
2591 static struct rte_pci_driver rte_nthw_pmd = {
2592 	.id_table = nthw_pci_id_map,
2593 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
2594 	.probe = nthw_pci_probe,
2595 	.remove = nthw_pci_remove,
2596 };
2597 
2598 RTE_PMD_REGISTER_PCI(net_ntnic, rte_nthw_pmd);
2599 RTE_PMD_REGISTER_PCI_TABLE(net_ntnic, nthw_pci_id_map);
2600 RTE_PMD_REGISTER_KMOD_DEP(net_ntnic, "* vfio-pci");
2601 
2602 RTE_LOG_REGISTER_SUFFIX(nt_log_general, general, INFO);
2603 RTE_LOG_REGISTER_SUFFIX(nt_log_nthw, nthw, INFO);
2604 RTE_LOG_REGISTER_SUFFIX(nt_log_filter, filter, INFO);
2605 RTE_LOG_REGISTER_SUFFIX(nt_log_ntnic, ntnic, INFO);
2606