xref: /dpdk/drivers/net/mana/mana.c (revision af0785a2447b307965377b62f46a5f39457a85a3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 
5 #include <unistd.h>
6 #include <dirent.h>
7 #include <fcntl.h>
8 #include <sys/mman.h>
9 
10 #include <ethdev_driver.h>
11 #include <ethdev_pci.h>
12 #include <rte_kvargs.h>
13 #include <rte_eal_paging.h>
14 
15 #include <infiniband/verbs.h>
16 #include <infiniband/manadv.h>
17 
18 #include <assert.h>
19 
20 #include "mana.h"
21 
22 /* Shared memory between primary/secondary processes, per driver */
23 /* Data to track primary/secondary usage */
24 struct mana_shared_data *mana_shared_data;
25 static struct mana_shared_data mana_local_data;
26 
27 /* The memory region for the above data */
28 static const struct rte_memzone *mana_shared_mz;
29 static const char *MZ_MANA_SHARED_DATA = "mana_shared_data";
30 
31 /* Spinlock for mana_shared_data */
32 static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
33 
34 /* Allocate a buffer on the stack and fill it with a printf format string. */
35 #define MANA_MKSTR(name, ...) \
36 	int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
37 	char name[mkstr_size_##name + 1]; \
38 	\
39 	memset(name, 0, mkstr_size_##name + 1); \
40 	snprintf(name, sizeof(name), "" __VA_ARGS__)
41 
42 int mana_logtype_driver;
43 int mana_logtype_init;
44 
45 /*
46  * Callback from rdma-core to allocate a buffer for a queue.
47  */
48 void *
49 mana_alloc_verbs_buf(size_t size, void *data)
50 {
51 	void *ret;
52 	size_t alignment = rte_mem_page_size();
53 	int socket = (int)(uintptr_t)data;
54 
55 	DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket);
56 
57 	if (alignment == (size_t)-1) {
58 		DRV_LOG(ERR, "Failed to get mem page size");
59 		rte_errno = ENOMEM;
60 		return NULL;
61 	}
62 
63 	ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket);
64 	if (!ret && size)
65 		rte_errno = ENOMEM;
66 	return ret;
67 }
68 
69 void
70 mana_free_verbs_buf(void *ptr, void *data __rte_unused)
71 {
72 	rte_free(ptr);
73 }
74 
75 static int
76 mana_dev_configure(struct rte_eth_dev *dev)
77 {
78 	struct mana_priv *priv = dev->data->dev_private;
79 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
80 
81 	if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
82 		dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
83 
84 	if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) {
85 		DRV_LOG(ERR, "Only support equal number of rx/tx queues");
86 		return -EINVAL;
87 	}
88 
89 	if (!rte_is_power_of_2(dev->data->nb_rx_queues)) {
90 		DRV_LOG(ERR, "number of TX/RX queues must be power of 2");
91 		return -EINVAL;
92 	}
93 
94 	priv->num_queues = dev->data->nb_rx_queues;
95 
96 	manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS,
97 				(void *)((uintptr_t)&(struct manadv_ctx_allocators){
98 					.alloc = &mana_alloc_verbs_buf,
99 					.free = &mana_free_verbs_buf,
100 					.data = 0,
101 				}));
102 
103 	return 0;
104 }
105 
106 static void
107 rx_intr_vec_disable(struct mana_priv *priv)
108 {
109 	struct rte_intr_handle *intr_handle = priv->intr_handle;
110 
111 	rte_intr_free_epoll_fd(intr_handle);
112 	rte_intr_vec_list_free(intr_handle);
113 	rte_intr_nb_efd_set(intr_handle, 0);
114 }
115 
116 static int
117 rx_intr_vec_enable(struct mana_priv *priv)
118 {
119 	unsigned int i;
120 	unsigned int rxqs_n = priv->dev_data->nb_rx_queues;
121 	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
122 	struct rte_intr_handle *intr_handle = priv->intr_handle;
123 	int ret;
124 
125 	rx_intr_vec_disable(priv);
126 
127 	if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) {
128 		DRV_LOG(ERR, "Failed to allocate memory for interrupt vector");
129 		return -ENOMEM;
130 	}
131 
132 	for (i = 0; i < n; i++) {
133 		struct mana_rxq *rxq = priv->dev_data->rx_queues[i];
134 
135 		ret = rte_intr_vec_list_index_set(intr_handle, i,
136 						  RTE_INTR_VEC_RXTX_OFFSET + i);
137 		if (ret) {
138 			DRV_LOG(ERR, "Failed to set intr vec %u", i);
139 			return ret;
140 		}
141 
142 		ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd);
143 		if (ret) {
144 			DRV_LOG(ERR, "Failed to set FD at intr %u", i);
145 			return ret;
146 		}
147 	}
148 
149 	return rte_intr_nb_efd_set(intr_handle, n);
150 }
151 
152 static void
153 rxq_intr_disable(struct mana_priv *priv)
154 {
155 	int err = rte_errno;
156 
157 	rx_intr_vec_disable(priv);
158 	rte_errno = err;
159 }
160 
161 static int
162 rxq_intr_enable(struct mana_priv *priv)
163 {
164 	const struct rte_eth_intr_conf *const intr_conf =
165 		&priv->dev_data->dev_conf.intr_conf;
166 
167 	if (!intr_conf->rxq)
168 		return 0;
169 
170 	return rx_intr_vec_enable(priv);
171 }
172 
173 static int
174 mana_dev_start(struct rte_eth_dev *dev)
175 {
176 	int ret;
177 	struct mana_priv *priv = dev->data->dev_private;
178 
179 	rte_spinlock_init(&priv->mr_btree_lock);
180 	ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N,
181 				 dev->device->numa_node);
182 	if (ret) {
183 		DRV_LOG(ERR, "Failed to init device MR btree %d", ret);
184 		return ret;
185 	}
186 
187 	ret = mana_start_tx_queues(dev);
188 	if (ret) {
189 		DRV_LOG(ERR, "failed to start tx queues %d", ret);
190 		goto failed_tx;
191 	}
192 
193 	ret = mana_start_rx_queues(dev);
194 	if (ret) {
195 		DRV_LOG(ERR, "failed to start rx queues %d", ret);
196 		goto failed_rx;
197 	}
198 
199 	rte_wmb();
200 
201 	dev->tx_pkt_burst = mana_tx_burst;
202 	dev->rx_pkt_burst = mana_rx_burst;
203 
204 	DRV_LOG(INFO, "TX/RX queues have started");
205 
206 	/* Enable datapath for secondary processes */
207 	mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);
208 
209 	ret = rxq_intr_enable(priv);
210 	if (ret) {
211 		DRV_LOG(ERR, "Failed to enable RX interrupts");
212 		goto failed_intr;
213 	}
214 
215 	return 0;
216 
217 failed_intr:
218 	mana_stop_rx_queues(dev);
219 
220 failed_rx:
221 	mana_stop_tx_queues(dev);
222 
223 failed_tx:
224 	mana_mr_btree_free(&priv->mr_btree);
225 
226 	return ret;
227 }
228 
229 static int
230 mana_dev_stop(struct rte_eth_dev *dev)
231 {
232 	int ret;
233 	struct mana_priv *priv = dev->data->dev_private;
234 
235 	rxq_intr_disable(priv);
236 
237 	dev->tx_pkt_burst = mana_tx_burst_removed;
238 	dev->rx_pkt_burst = mana_rx_burst_removed;
239 
240 	/* Stop datapath on secondary processes */
241 	mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX);
242 
243 	rte_wmb();
244 
245 	ret = mana_stop_tx_queues(dev);
246 	if (ret) {
247 		DRV_LOG(ERR, "failed to stop tx queues");
248 		return ret;
249 	}
250 
251 	ret = mana_stop_rx_queues(dev);
252 	if (ret) {
253 		DRV_LOG(ERR, "failed to stop tx queues");
254 		return ret;
255 	}
256 
257 	return 0;
258 }
259 
260 static int mana_intr_uninstall(struct mana_priv *priv);
261 
262 static int
263 mana_dev_close(struct rte_eth_dev *dev)
264 {
265 	struct mana_priv *priv = dev->data->dev_private;
266 	int ret;
267 
268 	mana_remove_all_mr(priv);
269 
270 	ret = mana_intr_uninstall(priv);
271 	if (ret)
272 		return ret;
273 
274 	ret = ibv_close_device(priv->ib_ctx);
275 	if (ret) {
276 		ret = errno;
277 		return ret;
278 	}
279 
280 	return 0;
281 }
282 
283 static int
284 mana_dev_info_get(struct rte_eth_dev *dev,
285 		  struct rte_eth_dev_info *dev_info)
286 {
287 	struct mana_priv *priv = dev->data->dev_private;
288 
289 	dev_info->max_mtu = RTE_ETHER_MTU;
290 
291 	/* RX params */
292 	dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE;
293 	dev_info->max_rx_pktlen = MAX_FRAME_SIZE;
294 
295 	dev_info->max_rx_queues = priv->max_rx_queues;
296 	dev_info->max_tx_queues = priv->max_tx_queues;
297 
298 	dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR;
299 	dev_info->max_hash_mac_addrs = 0;
300 
301 	dev_info->max_vfs = 1;
302 
303 	/* Offload params */
304 	dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT;
305 
306 	dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT;
307 
308 	/* RSS */
309 	dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS;
310 	dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES;
311 	dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT;
312 
313 	/* Thresholds */
314 	dev_info->default_rxconf = (struct rte_eth_rxconf){
315 		.rx_thresh = {
316 			.pthresh = 8,
317 			.hthresh = 8,
318 			.wthresh = 0,
319 		},
320 		.rx_free_thresh = 32,
321 		/* If no descriptors available, pkts are dropped by default */
322 		.rx_drop_en = 1,
323 	};
324 
325 	dev_info->default_txconf = (struct rte_eth_txconf){
326 		.tx_thresh = {
327 			.pthresh = 32,
328 			.hthresh = 0,
329 			.wthresh = 0,
330 		},
331 		.tx_rs_thresh = 32,
332 		.tx_free_thresh = 32,
333 	};
334 
335 	/* Buffer limits */
336 	dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
337 	dev_info->rx_desc_lim.nb_max = priv->max_rx_desc;
338 	dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
339 	dev_info->rx_desc_lim.nb_seg_max = priv->max_recv_sge;
340 	dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge;
341 
342 	dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
343 	dev_info->tx_desc_lim.nb_max = priv->max_tx_desc;
344 	dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
345 	dev_info->tx_desc_lim.nb_seg_max = priv->max_send_sge;
346 	dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge;
347 
348 	/* Speed */
349 	dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G;
350 
351 	/* RX params */
352 	dev_info->default_rxportconf.burst_size = 1;
353 	dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE;
354 	dev_info->default_rxportconf.nb_queues = 1;
355 
356 	/* TX params */
357 	dev_info->default_txportconf.burst_size = 1;
358 	dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE;
359 	dev_info->default_txportconf.nb_queues = 1;
360 
361 	return 0;
362 }
363 
364 static void
365 mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
366 		       struct rte_eth_txq_info *qinfo)
367 {
368 	struct mana_txq *txq = dev->data->tx_queues[queue_id];
369 
370 	qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
371 	qinfo->nb_desc = txq->num_desc;
372 }
373 
374 static void
375 mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
376 		       struct rte_eth_rxq_info *qinfo)
377 {
378 	struct mana_rxq *rxq = dev->data->rx_queues[queue_id];
379 
380 	qinfo->mp = rxq->mp;
381 	qinfo->nb_desc = rxq->num_desc;
382 	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
383 }
384 
385 static const uint32_t *
386 mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused)
387 {
388 	static const uint32_t ptypes[] = {
389 		RTE_PTYPE_L2_ETHER,
390 		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
391 		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
392 		RTE_PTYPE_L4_FRAG,
393 		RTE_PTYPE_L4_TCP,
394 		RTE_PTYPE_L4_UDP,
395 		RTE_PTYPE_UNKNOWN
396 	};
397 
398 	return ptypes;
399 }
400 
401 static int
402 mana_rss_hash_update(struct rte_eth_dev *dev,
403 		     struct rte_eth_rss_conf *rss_conf)
404 {
405 	struct mana_priv *priv = dev->data->dev_private;
406 
407 	/* Currently can only update RSS hash when device is stopped */
408 	if (dev->data->dev_started) {
409 		DRV_LOG(ERR, "Can't update RSS after device has started");
410 		return -ENODEV;
411 	}
412 
413 	if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) {
414 		DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64,
415 			dev->data->port_id, rss_conf->rss_hf);
416 		return -EINVAL;
417 	}
418 
419 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
420 		if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) {
421 			DRV_LOG(ERR, "Port %u key len must be %u long",
422 				dev->data->port_id,
423 				TOEPLITZ_HASH_KEY_SIZE_IN_BYTES);
424 			return -EINVAL;
425 		}
426 
427 		priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
428 		priv->rss_conf.rss_key =
429 			rte_zmalloc("mana_rss", rss_conf->rss_key_len,
430 				    RTE_CACHE_LINE_SIZE);
431 		if (!priv->rss_conf.rss_key)
432 			return -ENOMEM;
433 		memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
434 		       rss_conf->rss_key_len);
435 	}
436 	priv->rss_conf.rss_hf = rss_conf->rss_hf;
437 
438 	return 0;
439 }
440 
441 static int
442 mana_rss_hash_conf_get(struct rte_eth_dev *dev,
443 		       struct rte_eth_rss_conf *rss_conf)
444 {
445 	struct mana_priv *priv = dev->data->dev_private;
446 
447 	if (!rss_conf)
448 		return -EINVAL;
449 
450 	if (rss_conf->rss_key &&
451 	    rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) {
452 		memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
453 		       priv->rss_conf.rss_key_len);
454 	}
455 
456 	rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
457 	rss_conf->rss_hf = priv->rss_conf.rss_hf;
458 
459 	return 0;
460 }
461 
462 static int
463 mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
464 			uint16_t nb_desc, unsigned int socket_id,
465 			const struct rte_eth_txconf *tx_conf __rte_unused)
466 
467 {
468 	struct mana_priv *priv = dev->data->dev_private;
469 	struct mana_txq *txq;
470 	int ret;
471 
472 	txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id);
473 	if (!txq) {
474 		DRV_LOG(ERR, "failed to allocate txq");
475 		return -ENOMEM;
476 	}
477 
478 	txq->socket = socket_id;
479 
480 	txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring",
481 					   sizeof(struct mana_txq_desc) *
482 						nb_desc,
483 					   RTE_CACHE_LINE_SIZE, socket_id);
484 	if (!txq->desc_ring) {
485 		DRV_LOG(ERR, "failed to allocate txq desc_ring");
486 		ret = -ENOMEM;
487 		goto fail;
488 	}
489 
490 	ret = mana_mr_btree_init(&txq->mr_btree,
491 				 MANA_MR_BTREE_PER_QUEUE_N, socket_id);
492 	if (ret) {
493 		DRV_LOG(ERR, "Failed to init TXQ MR btree");
494 		goto fail;
495 	}
496 
497 	DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p",
498 		queue_idx, nb_desc, socket_id, txq->desc_ring);
499 
500 	txq->desc_ring_head = 0;
501 	txq->desc_ring_tail = 0;
502 	txq->priv = priv;
503 	txq->num_desc = nb_desc;
504 	dev->data->tx_queues[queue_idx] = txq;
505 
506 	return 0;
507 
508 fail:
509 	rte_free(txq->desc_ring);
510 	rte_free(txq);
511 	return ret;
512 }
513 
514 static void
515 mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
516 {
517 	struct mana_txq *txq = dev->data->tx_queues[qid];
518 
519 	mana_mr_btree_free(&txq->mr_btree);
520 
521 	rte_free(txq->desc_ring);
522 	rte_free(txq);
523 }
524 
525 static int
526 mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
527 			uint16_t nb_desc, unsigned int socket_id,
528 			const struct rte_eth_rxconf *rx_conf __rte_unused,
529 			struct rte_mempool *mp)
530 {
531 	struct mana_priv *priv = dev->data->dev_private;
532 	struct mana_rxq *rxq;
533 	int ret;
534 
535 	rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id);
536 	if (!rxq) {
537 		DRV_LOG(ERR, "failed to allocate rxq");
538 		return -ENOMEM;
539 	}
540 
541 	DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u",
542 		queue_idx, nb_desc, socket_id);
543 
544 	rxq->socket = socket_id;
545 
546 	rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring",
547 					    sizeof(struct mana_rxq_desc) *
548 						nb_desc,
549 					    RTE_CACHE_LINE_SIZE, socket_id);
550 
551 	if (!rxq->desc_ring) {
552 		DRV_LOG(ERR, "failed to allocate rxq desc_ring");
553 		ret = -ENOMEM;
554 		goto fail;
555 	}
556 
557 	rxq->desc_ring_head = 0;
558 	rxq->desc_ring_tail = 0;
559 
560 	ret = mana_mr_btree_init(&rxq->mr_btree,
561 				 MANA_MR_BTREE_PER_QUEUE_N, socket_id);
562 	if (ret) {
563 		DRV_LOG(ERR, "Failed to init RXQ MR btree");
564 		goto fail;
565 	}
566 
567 	rxq->priv = priv;
568 	rxq->num_desc = nb_desc;
569 	rxq->mp = mp;
570 	dev->data->rx_queues[queue_idx] = rxq;
571 
572 	return 0;
573 
574 fail:
575 	rte_free(rxq->desc_ring);
576 	rte_free(rxq);
577 	return ret;
578 }
579 
580 static void
581 mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
582 {
583 	struct mana_rxq *rxq = dev->data->rx_queues[qid];
584 
585 	mana_mr_btree_free(&rxq->mr_btree);
586 
587 	rte_free(rxq->desc_ring);
588 	rte_free(rxq);
589 }
590 
591 static int
592 mana_dev_link_update(struct rte_eth_dev *dev,
593 		     int wait_to_complete __rte_unused)
594 {
595 	struct rte_eth_link link;
596 
597 	/* MANA has no concept of carrier state, always reporting UP */
598 	link = (struct rte_eth_link) {
599 		.link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
600 		.link_autoneg = RTE_ETH_LINK_SPEED_FIXED,
601 		.link_speed = RTE_ETH_SPEED_NUM_100G,
602 		.link_status = RTE_ETH_LINK_UP,
603 	};
604 
605 	return rte_eth_linkstatus_set(dev, &link);
606 }
607 
608 static int
609 mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
610 {
611 	unsigned int i;
612 
613 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
614 		struct mana_txq *txq = dev->data->tx_queues[i];
615 
616 		if (!txq)
617 			continue;
618 
619 		stats->opackets = txq->stats.packets;
620 		stats->obytes = txq->stats.bytes;
621 		stats->oerrors = txq->stats.errors;
622 
623 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
624 			stats->q_opackets[i] = txq->stats.packets;
625 			stats->q_obytes[i] = txq->stats.bytes;
626 		}
627 	}
628 
629 	stats->rx_nombuf = 0;
630 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
631 		struct mana_rxq *rxq = dev->data->rx_queues[i];
632 
633 		if (!rxq)
634 			continue;
635 
636 		stats->ipackets = rxq->stats.packets;
637 		stats->ibytes = rxq->stats.bytes;
638 		stats->ierrors = rxq->stats.errors;
639 
640 		/* There is no good way to get stats->imissed, not setting it */
641 
642 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
643 			stats->q_ipackets[i] = rxq->stats.packets;
644 			stats->q_ibytes[i] = rxq->stats.bytes;
645 		}
646 
647 		stats->rx_nombuf += rxq->stats.nombuf;
648 	}
649 
650 	return 0;
651 }
652 
653 static int
654 mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused)
655 {
656 	unsigned int i;
657 
658 	PMD_INIT_FUNC_TRACE();
659 
660 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
661 		struct mana_txq *txq = dev->data->tx_queues[i];
662 
663 		if (!txq)
664 			continue;
665 
666 		memset(&txq->stats, 0, sizeof(txq->stats));
667 	}
668 
669 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
670 		struct mana_rxq *rxq = dev->data->rx_queues[i];
671 
672 		if (!rxq)
673 			continue;
674 
675 		memset(&rxq->stats, 0, sizeof(rxq->stats));
676 	}
677 
678 	return 0;
679 }
680 
681 static const struct eth_dev_ops mana_dev_ops = {
682 	.dev_configure		= mana_dev_configure,
683 	.dev_start		= mana_dev_start,
684 	.dev_stop		= mana_dev_stop,
685 	.dev_close		= mana_dev_close,
686 	.dev_infos_get		= mana_dev_info_get,
687 	.txq_info_get		= mana_dev_tx_queue_info,
688 	.rxq_info_get		= mana_dev_rx_queue_info,
689 	.dev_supported_ptypes_get = mana_supported_ptypes,
690 	.rss_hash_update	= mana_rss_hash_update,
691 	.rss_hash_conf_get	= mana_rss_hash_conf_get,
692 	.tx_queue_setup		= mana_dev_tx_queue_setup,
693 	.tx_queue_release	= mana_dev_tx_queue_release,
694 	.rx_queue_setup		= mana_dev_rx_queue_setup,
695 	.rx_queue_release	= mana_dev_rx_queue_release,
696 	.rx_queue_intr_enable	= mana_rx_intr_enable,
697 	.rx_queue_intr_disable	= mana_rx_intr_disable,
698 	.link_update		= mana_dev_link_update,
699 	.stats_get		= mana_dev_stats_get,
700 	.stats_reset		= mana_dev_stats_reset,
701 };
702 
703 static const struct eth_dev_ops mana_dev_secondary_ops = {
704 	.stats_get = mana_dev_stats_get,
705 	.stats_reset = mana_dev_stats_reset,
706 	.dev_infos_get = mana_dev_info_get,
707 };
708 
709 uint16_t
710 mana_rx_burst_removed(void *dpdk_rxq __rte_unused,
711 		      struct rte_mbuf **pkts __rte_unused,
712 		      uint16_t pkts_n __rte_unused)
713 {
714 	rte_mb();
715 	return 0;
716 }
717 
718 uint16_t
719 mana_tx_burst_removed(void *dpdk_rxq __rte_unused,
720 		      struct rte_mbuf **pkts __rte_unused,
721 		      uint16_t pkts_n __rte_unused)
722 {
723 	rte_mb();
724 	return 0;
725 }
726 
727 #define ETH_MANA_MAC_ARG "mac"
728 static const char * const mana_init_args[] = {
729 	ETH_MANA_MAC_ARG,
730 	NULL,
731 };
732 
733 /* Support of parsing up to 8 mac address from EAL command line */
734 #define MAX_NUM_ADDRESS 8
735 struct mana_conf {
736 	struct rte_ether_addr mac_array[MAX_NUM_ADDRESS];
737 	unsigned int index;
738 };
739 
740 static int
741 mana_arg_parse_callback(const char *key, const char *val, void *private)
742 {
743 	struct mana_conf *conf = (struct mana_conf *)private;
744 	int ret;
745 
746 	DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index);
747 
748 	if (conf->index >= MAX_NUM_ADDRESS) {
749 		DRV_LOG(ERR, "Exceeding max MAC address");
750 		return 1;
751 	}
752 
753 	ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]);
754 	if (ret) {
755 		DRV_LOG(ERR, "Invalid MAC address %s", val);
756 		return ret;
757 	}
758 
759 	conf->index++;
760 
761 	return 0;
762 }
763 
764 static int
765 mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf)
766 {
767 	struct rte_kvargs *kvlist;
768 	unsigned int arg_count;
769 	int ret = 0;
770 
771 	kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args);
772 	if (!kvlist) {
773 		DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str);
774 		return -EINVAL;
775 	}
776 
777 	arg_count = rte_kvargs_count(kvlist, mana_init_args[0]);
778 	if (arg_count > MAX_NUM_ADDRESS) {
779 		ret = -EINVAL;
780 		goto free_kvlist;
781 	}
782 	ret = rte_kvargs_process(kvlist, mana_init_args[0],
783 				 mana_arg_parse_callback, conf);
784 	if (ret) {
785 		DRV_LOG(ERR, "error parsing args");
786 		goto free_kvlist;
787 	}
788 
789 free_kvlist:
790 	rte_kvargs_free(kvlist);
791 	return ret;
792 }
793 
794 static int
795 get_port_mac(struct ibv_device *device, unsigned int port,
796 	     struct rte_ether_addr *addr)
797 {
798 	FILE *file;
799 	int ret = 0;
800 	DIR *dir;
801 	struct dirent *dent;
802 	unsigned int dev_port;
803 	char mac[20];
804 
805 	MANA_MKSTR(path, "%s/device/net", device->ibdev_path);
806 
807 	dir = opendir(path);
808 	if (!dir)
809 		return -ENOENT;
810 
811 	while ((dent = readdir(dir))) {
812 		char *name = dent->d_name;
813 
814 		MANA_MKSTR(port_path, "%s/%s/dev_port", path, name);
815 
816 		/* Ignore . and .. */
817 		if ((name[0] == '.') &&
818 		    ((name[1] == '\0') ||
819 		     ((name[1] == '.') && (name[2] == '\0'))))
820 			continue;
821 
822 		file = fopen(port_path, "r");
823 		if (!file)
824 			continue;
825 
826 		ret = fscanf(file, "%u", &dev_port);
827 		fclose(file);
828 
829 		if (ret != 1)
830 			continue;
831 
832 		/* Ethernet ports start at 0, IB port start at 1 */
833 		if (dev_port == port - 1) {
834 			MANA_MKSTR(address_path, "%s/%s/address", path, name);
835 
836 			file = fopen(address_path, "r");
837 			if (!file)
838 				continue;
839 
840 			ret = fscanf(file, "%s", mac);
841 			fclose(file);
842 
843 			if (ret < 0)
844 				break;
845 
846 			ret = rte_ether_unformat_addr(mac, addr);
847 			if (ret)
848 				DRV_LOG(ERR, "unrecognized mac addr %s", mac);
849 			break;
850 		}
851 	}
852 
853 	closedir(dir);
854 	return ret;
855 }
856 
857 static int
858 mana_ibv_device_to_pci_addr(const struct ibv_device *device,
859 			    struct rte_pci_addr *pci_addr)
860 {
861 	FILE *file;
862 	char *line = NULL;
863 	size_t len = 0;
864 
865 	MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path);
866 
867 	file = fopen(path, "r");
868 	if (!file)
869 		return -errno;
870 
871 	while (getline(&line, &len, file) != -1) {
872 		/* Extract information. */
873 		if (sscanf(line,
874 			   "PCI_SLOT_NAME="
875 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
876 			   &pci_addr->domain,
877 			   &pci_addr->bus,
878 			   &pci_addr->devid,
879 			   &pci_addr->function) == 4) {
880 			break;
881 		}
882 	}
883 
884 	free(line);
885 	fclose(file);
886 	return 0;
887 }
888 
889 /*
890  * Interrupt handler from IB layer to notify this device is being removed.
891  */
892 static void
893 mana_intr_handler(void *arg)
894 {
895 	struct mana_priv *priv = arg;
896 	struct ibv_context *ctx = priv->ib_ctx;
897 	struct ibv_async_event event;
898 
899 	/* Read and ack all messages from IB device */
900 	while (true) {
901 		if (ibv_get_async_event(ctx, &event))
902 			break;
903 
904 		if (event.event_type == IBV_EVENT_DEVICE_FATAL) {
905 			struct rte_eth_dev *dev;
906 
907 			dev = &rte_eth_devices[priv->port_id];
908 			if (dev->data->dev_conf.intr_conf.rmv)
909 				rte_eth_dev_callback_process(dev,
910 					RTE_ETH_EVENT_INTR_RMV, NULL);
911 		}
912 
913 		ibv_ack_async_event(&event);
914 	}
915 }
916 
917 static int
918 mana_intr_uninstall(struct mana_priv *priv)
919 {
920 	int ret;
921 
922 	ret = rte_intr_callback_unregister(priv->intr_handle,
923 					   mana_intr_handler, priv);
924 	if (ret <= 0) {
925 		DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret);
926 		return ret;
927 	}
928 
929 	rte_intr_instance_free(priv->intr_handle);
930 
931 	return 0;
932 }
933 
934 int
935 mana_fd_set_non_blocking(int fd)
936 {
937 	int ret = fcntl(fd, F_GETFL);
938 
939 	if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK))
940 		return 0;
941 
942 	rte_errno = errno;
943 	return -rte_errno;
944 }
945 
946 static int
947 mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv)
948 {
949 	int ret;
950 	struct ibv_context *ctx = priv->ib_ctx;
951 
952 	priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
953 	if (!priv->intr_handle) {
954 		DRV_LOG(ERR, "Failed to allocate intr_handle");
955 		rte_errno = ENOMEM;
956 		return -ENOMEM;
957 	}
958 
959 	ret = rte_intr_fd_set(priv->intr_handle, -1);
960 	if (ret)
961 		goto free_intr;
962 
963 	ret = mana_fd_set_non_blocking(ctx->async_fd);
964 	if (ret) {
965 		DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK");
966 		goto free_intr;
967 	}
968 
969 	ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd);
970 	if (ret)
971 		goto free_intr;
972 
973 	ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT);
974 	if (ret)
975 		goto free_intr;
976 
977 	ret = rte_intr_callback_register(priv->intr_handle,
978 					 mana_intr_handler, priv);
979 	if (ret) {
980 		DRV_LOG(ERR, "Failed to register intr callback");
981 		rte_intr_fd_set(priv->intr_handle, -1);
982 		goto free_intr;
983 	}
984 
985 	eth_dev->intr_handle = priv->intr_handle;
986 	return 0;
987 
988 free_intr:
989 	rte_intr_instance_free(priv->intr_handle);
990 	priv->intr_handle = NULL;
991 
992 	return ret;
993 }
994 
995 static int
996 mana_proc_priv_init(struct rte_eth_dev *dev)
997 {
998 	struct mana_process_priv *priv;
999 
1000 	priv = rte_zmalloc_socket("mana_proc_priv",
1001 				  sizeof(struct mana_process_priv),
1002 				  RTE_CACHE_LINE_SIZE,
1003 				  dev->device->numa_node);
1004 	if (!priv)
1005 		return -ENOMEM;
1006 
1007 	dev->process_private = priv;
1008 	return 0;
1009 }
1010 
1011 /*
1012  * Map the doorbell page for the secondary process through IB device handle.
1013  */
1014 static int
1015 mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd)
1016 {
1017 	struct mana_process_priv *priv = eth_dev->process_private;
1018 
1019 	void *addr;
1020 
1021 	addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0);
1022 	if (addr == MAP_FAILED) {
1023 		DRV_LOG(ERR, "Failed to map secondary doorbell port %u",
1024 			eth_dev->data->port_id);
1025 		return -ENOMEM;
1026 	}
1027 
1028 	DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr);
1029 
1030 	priv->db_page = addr;
1031 
1032 	return 0;
1033 }
1034 
1035 /* Initialize shared data for the driver (all devices) */
1036 static int
1037 mana_init_shared_data(void)
1038 {
1039 	int ret =  0;
1040 	const struct rte_memzone *secondary_mz;
1041 
1042 	rte_spinlock_lock(&mana_shared_data_lock);
1043 
1044 	/* Skip if shared data is already initialized */
1045 	if (mana_shared_data)
1046 		goto exit;
1047 
1048 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1049 		mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA,
1050 						     sizeof(*mana_shared_data),
1051 						     SOCKET_ID_ANY, 0);
1052 		if (!mana_shared_mz) {
1053 			DRV_LOG(ERR, "Cannot allocate mana shared data");
1054 			ret = -rte_errno;
1055 			goto exit;
1056 		}
1057 
1058 		mana_shared_data = mana_shared_mz->addr;
1059 		memset(mana_shared_data, 0, sizeof(*mana_shared_data));
1060 		rte_spinlock_init(&mana_shared_data->lock);
1061 	} else {
1062 		secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA);
1063 		if (!secondary_mz) {
1064 			DRV_LOG(ERR, "Cannot attach mana shared data");
1065 			ret = -rte_errno;
1066 			goto exit;
1067 		}
1068 
1069 		mana_shared_data = secondary_mz->addr;
1070 		memset(&mana_local_data, 0, sizeof(mana_local_data));
1071 	}
1072 
1073 exit:
1074 	rte_spinlock_unlock(&mana_shared_data_lock);
1075 
1076 	return ret;
1077 }
1078 
1079 /*
1080  * Init the data structures for use in primary and secondary processes.
1081  */
1082 static int
1083 mana_init_once(void)
1084 {
1085 	int ret;
1086 
1087 	ret = mana_init_shared_data();
1088 	if (ret)
1089 		return ret;
1090 
1091 	rte_spinlock_lock(&mana_shared_data->lock);
1092 
1093 	switch (rte_eal_process_type()) {
1094 	case RTE_PROC_PRIMARY:
1095 		if (mana_shared_data->init_done)
1096 			break;
1097 
1098 		ret = mana_mp_init_primary();
1099 		if (ret)
1100 			break;
1101 		DRV_LOG(ERR, "MP INIT PRIMARY");
1102 
1103 		mana_shared_data->init_done = 1;
1104 		break;
1105 
1106 	case RTE_PROC_SECONDARY:
1107 
1108 		if (mana_local_data.init_done)
1109 			break;
1110 
1111 		ret = mana_mp_init_secondary();
1112 		if (ret)
1113 			break;
1114 
1115 		DRV_LOG(ERR, "MP INIT SECONDARY");
1116 
1117 		mana_local_data.init_done = 1;
1118 		break;
1119 
1120 	default:
1121 		/* Impossible, internal error */
1122 		ret = -EPROTO;
1123 		break;
1124 	}
1125 
1126 	rte_spinlock_unlock(&mana_shared_data->lock);
1127 
1128 	return ret;
1129 }
1130 
1131 /*
1132  * Probe an IB port
1133  * Return value:
1134  * positive value: successfully probed port
1135  * 0: port not matching specified MAC address
1136  * negative value: error code
1137  */
1138 static int
1139 mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr,
1140 		uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr)
1141 {
1142 	struct mana_priv *priv = NULL;
1143 	struct rte_eth_dev *eth_dev = NULL;
1144 	struct ibv_parent_domain_init_attr attr = {0};
1145 	char address[64];
1146 	char name[RTE_ETH_NAME_MAX_LEN];
1147 	int ret;
1148 	struct ibv_context *ctx = NULL;
1149 
1150 	rte_ether_format_addr(address, sizeof(address), addr);
1151 	DRV_LOG(INFO, "device located port %u address %s", port, address);
1152 
1153 	priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE,
1154 				  SOCKET_ID_ANY);
1155 	if (!priv)
1156 		return -ENOMEM;
1157 
1158 	snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port);
1159 
1160 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1161 		int fd;
1162 
1163 		eth_dev = rte_eth_dev_attach_secondary(name);
1164 		if (!eth_dev) {
1165 			DRV_LOG(ERR, "Can't attach to dev %s", name);
1166 			ret =  -ENOMEM;
1167 			goto failed;
1168 		}
1169 
1170 		eth_dev->device = &pci_dev->device;
1171 		eth_dev->dev_ops = &mana_dev_secondary_ops;
1172 		ret = mana_proc_priv_init(eth_dev);
1173 		if (ret)
1174 			goto failed;
1175 		priv->process_priv = eth_dev->process_private;
1176 
1177 		/* Get the IB FD from the primary process */
1178 		fd = mana_mp_req_verbs_cmd_fd(eth_dev);
1179 		if (fd < 0) {
1180 			DRV_LOG(ERR, "Failed to get FD %d", fd);
1181 			ret = -ENODEV;
1182 			goto failed;
1183 		}
1184 
1185 		ret = mana_map_doorbell_secondary(eth_dev, fd);
1186 		if (ret) {
1187 			DRV_LOG(ERR, "Failed secondary map %d", fd);
1188 			goto failed;
1189 		}
1190 
1191 		/* fd is no not used after mapping doorbell */
1192 		close(fd);
1193 
1194 		eth_dev->tx_pkt_burst = mana_tx_burst_removed;
1195 		eth_dev->rx_pkt_burst = mana_rx_burst_removed;
1196 
1197 		rte_spinlock_lock(&mana_shared_data->lock);
1198 		mana_shared_data->secondary_cnt++;
1199 		mana_local_data.secondary_cnt++;
1200 		rte_spinlock_unlock(&mana_shared_data->lock);
1201 
1202 		rte_eth_copy_pci_info(eth_dev, pci_dev);
1203 		rte_eth_dev_probing_finish(eth_dev);
1204 
1205 		return 0;
1206 	}
1207 
1208 	ctx = ibv_open_device(ibdev);
1209 	if (!ctx) {
1210 		DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name);
1211 		ret = -ENODEV;
1212 		goto failed;
1213 	}
1214 
1215 	eth_dev = rte_eth_dev_allocate(name);
1216 	if (!eth_dev) {
1217 		ret = -ENOMEM;
1218 		goto failed;
1219 	}
1220 
1221 	eth_dev->data->mac_addrs =
1222 		rte_calloc("mana_mac", 1,
1223 			   sizeof(struct rte_ether_addr), 0);
1224 	if (!eth_dev->data->mac_addrs) {
1225 		ret = -ENOMEM;
1226 		goto failed;
1227 	}
1228 
1229 	rte_ether_addr_copy(addr, eth_dev->data->mac_addrs);
1230 
1231 	priv->ib_pd = ibv_alloc_pd(ctx);
1232 	if (!priv->ib_pd) {
1233 		DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port);
1234 		ret = -ENOMEM;
1235 		goto failed;
1236 	}
1237 
1238 	/* Create a parent domain with the port number */
1239 	attr.pd = priv->ib_pd;
1240 	attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
1241 	attr.pd_context = (void *)(uint64_t)port;
1242 	priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr);
1243 	if (!priv->ib_parent_pd) {
1244 		DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port);
1245 		ret = -ENOMEM;
1246 		goto failed;
1247 	}
1248 
1249 	priv->ib_ctx = ctx;
1250 	priv->port_id = eth_dev->data->port_id;
1251 	priv->dev_port = port;
1252 	eth_dev->data->dev_private = priv;
1253 	priv->dev_data = eth_dev->data;
1254 
1255 	priv->max_rx_queues = dev_attr->orig_attr.max_qp;
1256 	priv->max_tx_queues = dev_attr->orig_attr.max_qp;
1257 
1258 	priv->max_rx_desc =
1259 		RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1260 			dev_attr->orig_attr.max_cqe);
1261 	priv->max_tx_desc =
1262 		RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1263 			dev_attr->orig_attr.max_cqe);
1264 
1265 	priv->max_send_sge = dev_attr->orig_attr.max_sge;
1266 	priv->max_recv_sge = dev_attr->orig_attr.max_sge;
1267 
1268 	priv->max_mr = dev_attr->orig_attr.max_mr;
1269 	priv->max_mr_size = dev_attr->orig_attr.max_mr_size;
1270 
1271 	DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d",
1272 		name, priv->max_rx_queues, priv->max_rx_desc,
1273 		priv->max_send_sge);
1274 
1275 	rte_eth_copy_pci_info(eth_dev, pci_dev);
1276 
1277 	/* Create async interrupt handler */
1278 	ret = mana_intr_install(eth_dev, priv);
1279 	if (ret) {
1280 		DRV_LOG(ERR, "Failed to install intr handler");
1281 		goto failed;
1282 	}
1283 
1284 	rte_spinlock_lock(&mana_shared_data->lock);
1285 	mana_shared_data->primary_cnt++;
1286 	rte_spinlock_unlock(&mana_shared_data->lock);
1287 
1288 	eth_dev->device = &pci_dev->device;
1289 
1290 	DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id);
1291 
1292 	eth_dev->rx_pkt_burst = mana_rx_burst_removed;
1293 	eth_dev->tx_pkt_burst = mana_tx_burst_removed;
1294 	eth_dev->dev_ops = &mana_dev_ops;
1295 
1296 	rte_eth_dev_probing_finish(eth_dev);
1297 
1298 	return 0;
1299 
1300 failed:
1301 	/* Free the resource for the port failed */
1302 	if (priv) {
1303 		if (priv->ib_parent_pd)
1304 			ibv_dealloc_pd(priv->ib_parent_pd);
1305 
1306 		if (priv->ib_pd)
1307 			ibv_dealloc_pd(priv->ib_pd);
1308 	}
1309 
1310 	if (eth_dev)
1311 		rte_eth_dev_release_port(eth_dev);
1312 
1313 	rte_free(priv);
1314 
1315 	if (ctx)
1316 		ibv_close_device(ctx);
1317 
1318 	return ret;
1319 }
1320 
1321 /*
1322  * Goes through the IB device list to look for the IB port matching the
1323  * mac_addr. If found, create a rte_eth_dev for it.
1324  */
1325 static int
1326 mana_pci_probe_mac(struct rte_pci_device *pci_dev,
1327 		   struct rte_ether_addr *mac_addr)
1328 {
1329 	struct ibv_device **ibv_list;
1330 	int ibv_idx;
1331 	struct ibv_context *ctx;
1332 	int num_devices;
1333 	int ret = 0;
1334 	uint8_t port;
1335 
1336 	ibv_list = ibv_get_device_list(&num_devices);
1337 	for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
1338 		struct ibv_device *ibdev = ibv_list[ibv_idx];
1339 		struct rte_pci_addr pci_addr;
1340 		struct ibv_device_attr_ex dev_attr;
1341 
1342 		DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
1343 			ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
1344 
1345 		if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
1346 			continue;
1347 
1348 		/* Ignore if this IB device is not this PCI device */
1349 		if (pci_dev->addr.domain != pci_addr.domain ||
1350 		    pci_dev->addr.bus != pci_addr.bus ||
1351 		    pci_dev->addr.devid != pci_addr.devid ||
1352 		    pci_dev->addr.function != pci_addr.function)
1353 			continue;
1354 
1355 		ctx = ibv_open_device(ibdev);
1356 		if (!ctx) {
1357 			DRV_LOG(ERR, "Failed to open IB device %s",
1358 				ibdev->name);
1359 			continue;
1360 		}
1361 		ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
1362 		ibv_close_device(ctx);
1363 
1364 		for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
1365 		     port++) {
1366 			struct rte_ether_addr addr;
1367 			ret = get_port_mac(ibdev, port, &addr);
1368 			if (ret)
1369 				continue;
1370 
1371 			if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr))
1372 				continue;
1373 
1374 			ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr);
1375 			if (ret)
1376 				DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret);
1377 			else
1378 				DRV_LOG(INFO, "Successfully probed on IB port %u", port);
1379 		}
1380 	}
1381 
1382 	ibv_free_device_list(ibv_list);
1383 	return ret;
1384 }
1385 
1386 /*
1387  * Main callback function from PCI bus to probe a device.
1388  */
1389 static int
1390 mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1391 	       struct rte_pci_device *pci_dev)
1392 {
1393 	struct rte_devargs *args = pci_dev->device.devargs;
1394 	struct mana_conf conf = {0};
1395 	unsigned int i;
1396 	int ret;
1397 
1398 	if (args && args->drv_str) {
1399 		ret = mana_parse_args(args, &conf);
1400 		if (ret) {
1401 			DRV_LOG(ERR, "Failed to parse parameters args = %s",
1402 				args->drv_str);
1403 			return ret;
1404 		}
1405 	}
1406 
1407 	ret = mana_init_once();
1408 	if (ret) {
1409 		DRV_LOG(ERR, "Failed to init PMD global data %d", ret);
1410 		return ret;
1411 	}
1412 
1413 	/* If there are no driver parameters, probe on all ports */
1414 	if (!conf.index)
1415 		return mana_pci_probe_mac(pci_dev, NULL);
1416 
1417 	for (i = 0; i < conf.index; i++) {
1418 		ret = mana_pci_probe_mac(pci_dev, &conf.mac_array[i]);
1419 		if (ret)
1420 			return ret;
1421 	}
1422 
1423 	return 0;
1424 }
1425 
1426 static int
1427 mana_dev_uninit(struct rte_eth_dev *dev)
1428 {
1429 	return mana_dev_close(dev);
1430 }
1431 
1432 /*
1433  * Callback from PCI to remove this device.
1434  */
1435 static int
1436 mana_pci_remove(struct rte_pci_device *pci_dev)
1437 {
1438 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1439 		rte_spinlock_lock(&mana_shared_data_lock);
1440 
1441 		rte_spinlock_lock(&mana_shared_data->lock);
1442 
1443 		RTE_VERIFY(mana_shared_data->primary_cnt > 0);
1444 		mana_shared_data->primary_cnt--;
1445 		if (!mana_shared_data->primary_cnt) {
1446 			DRV_LOG(DEBUG, "mp uninit primary");
1447 			mana_mp_uninit_primary();
1448 		}
1449 
1450 		rte_spinlock_unlock(&mana_shared_data->lock);
1451 
1452 		/* Also free the shared memory if this is the last */
1453 		if (!mana_shared_data->primary_cnt) {
1454 			DRV_LOG(DEBUG, "free shared memezone data");
1455 			rte_memzone_free(mana_shared_mz);
1456 		}
1457 
1458 		rte_spinlock_unlock(&mana_shared_data_lock);
1459 	} else {
1460 		rte_spinlock_lock(&mana_shared_data_lock);
1461 
1462 		rte_spinlock_lock(&mana_shared_data->lock);
1463 		RTE_VERIFY(mana_shared_data->secondary_cnt > 0);
1464 		mana_shared_data->secondary_cnt--;
1465 		rte_spinlock_unlock(&mana_shared_data->lock);
1466 
1467 		RTE_VERIFY(mana_local_data.secondary_cnt > 0);
1468 		mana_local_data.secondary_cnt--;
1469 		if (!mana_local_data.secondary_cnt) {
1470 			DRV_LOG(DEBUG, "mp uninit secondary");
1471 			mana_mp_uninit_secondary();
1472 		}
1473 
1474 		rte_spinlock_unlock(&mana_shared_data_lock);
1475 	}
1476 
1477 	return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit);
1478 }
1479 
1480 static const struct rte_pci_id mana_pci_id_map[] = {
1481 	{
1482 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT,
1483 			       PCI_DEVICE_ID_MICROSOFT_MANA)
1484 	},
1485 	{
1486 		.vendor_id = 0
1487 	},
1488 };
1489 
1490 static struct rte_pci_driver mana_pci_driver = {
1491 	.id_table = mana_pci_id_map,
1492 	.probe = mana_pci_probe,
1493 	.remove = mana_pci_remove,
1494 	.drv_flags = RTE_PCI_DRV_INTR_RMV,
1495 };
1496 
1497 RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
1498 RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
1499 RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
1500 RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
1501 RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
1502 RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>");
1503