xref: /dpdk/drivers/net/mana/mana.c (revision c2c0c8af08ed4e37a10bbabf98a09a105a5650a8)
1517ed6e2SLong Li /* SPDX-License-Identifier: BSD-3-Clause
2517ed6e2SLong Li  * Copyright 2022 Microsoft Corporation
3517ed6e2SLong Li  */
4517ed6e2SLong Li 
5517ed6e2SLong Li #include <unistd.h>
6517ed6e2SLong Li #include <dirent.h>
7517ed6e2SLong Li #include <fcntl.h>
8517ed6e2SLong Li #include <sys/mman.h>
984497839SLong Li #include <sys/ioctl.h>
1084497839SLong Li #include <net/if.h>
11517ed6e2SLong Li 
12517ed6e2SLong Li #include <ethdev_driver.h>
13517ed6e2SLong Li #include <ethdev_pci.h>
14517ed6e2SLong Li #include <rte_kvargs.h>
15517ed6e2SLong Li #include <rte_eal_paging.h>
168fa22e1fSThomas Monjalon #include <rte_pci.h>
17517ed6e2SLong Li 
18517ed6e2SLong Li #include <infiniband/verbs.h>
19517ed6e2SLong Li #include <infiniband/manadv.h>
20517ed6e2SLong Li 
21517ed6e2SLong Li #include <assert.h>
22517ed6e2SLong Li 
23517ed6e2SLong Li #include "mana.h"
24517ed6e2SLong Li 
25517ed6e2SLong Li /* Shared memory between primary/secondary processes, per driver */
26517ed6e2SLong Li /* Data to track primary/secondary usage */
27517ed6e2SLong Li struct mana_shared_data *mana_shared_data;
28517ed6e2SLong Li static struct mana_shared_data mana_local_data;
29517ed6e2SLong Li 
30517ed6e2SLong Li /* The memory region for the above data */
31517ed6e2SLong Li static const struct rte_memzone *mana_shared_mz;
32517ed6e2SLong Li static const char *MZ_MANA_SHARED_DATA = "mana_shared_data";
33517ed6e2SLong Li 
34517ed6e2SLong Li /* Spinlock for mana_shared_data */
35517ed6e2SLong Li static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
36517ed6e2SLong Li 
37517ed6e2SLong Li /* Allocate a buffer on the stack and fill it with a printf format string. */
38517ed6e2SLong Li #define MANA_MKSTR(name, ...) \
39517ed6e2SLong Li 	int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
40517ed6e2SLong Li 	char name[mkstr_size_##name + 1]; \
41517ed6e2SLong Li 	\
42517ed6e2SLong Li 	memset(name, 0, mkstr_size_##name + 1); \
43517ed6e2SLong Li 	snprintf(name, sizeof(name), "" __VA_ARGS__)
44517ed6e2SLong Li 
45517ed6e2SLong Li int mana_logtype_driver;
46517ed6e2SLong Li int mana_logtype_init;
47517ed6e2SLong Li 
480dbfecfeSLong Li /*
490dbfecfeSLong Li  * Callback from rdma-core to allocate a buffer for a queue.
500dbfecfeSLong Li  */
510dbfecfeSLong Li void *
mana_alloc_verbs_buf(size_t size,void * data)520dbfecfeSLong Li mana_alloc_verbs_buf(size_t size, void *data)
530dbfecfeSLong Li {
540dbfecfeSLong Li 	void *ret;
550dbfecfeSLong Li 	size_t alignment = rte_mem_page_size();
560dbfecfeSLong Li 	int socket = (int)(uintptr_t)data;
570dbfecfeSLong Li 
580dbfecfeSLong Li 	DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket);
590dbfecfeSLong Li 
600dbfecfeSLong Li 	if (alignment == (size_t)-1) {
610dbfecfeSLong Li 		DRV_LOG(ERR, "Failed to get mem page size");
620dbfecfeSLong Li 		rte_errno = ENOMEM;
630dbfecfeSLong Li 		return NULL;
640dbfecfeSLong Li 	}
650dbfecfeSLong Li 
660dbfecfeSLong Li 	ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket);
670dbfecfeSLong Li 	if (!ret && size)
680dbfecfeSLong Li 		rte_errno = ENOMEM;
690dbfecfeSLong Li 	return ret;
700dbfecfeSLong Li }
710dbfecfeSLong Li 
720dbfecfeSLong Li void
mana_free_verbs_buf(void * ptr,void * data __rte_unused)730dbfecfeSLong Li mana_free_verbs_buf(void *ptr, void *data __rte_unused)
740dbfecfeSLong Li {
750dbfecfeSLong Li 	rte_free(ptr);
760dbfecfeSLong Li }
770dbfecfeSLong Li 
780dbfecfeSLong Li static int
mana_dev_configure(struct rte_eth_dev * dev)790dbfecfeSLong Li mana_dev_configure(struct rte_eth_dev *dev)
800dbfecfeSLong Li {
810dbfecfeSLong Li 	struct mana_priv *priv = dev->data->dev_private;
820dbfecfeSLong Li 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
830dbfecfeSLong Li 
840dbfecfeSLong Li 	if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
850dbfecfeSLong Li 		dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
860dbfecfeSLong Li 
870dbfecfeSLong Li 	if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) {
880dbfecfeSLong Li 		DRV_LOG(ERR, "Only support equal number of rx/tx queues");
890dbfecfeSLong Li 		return -EINVAL;
900dbfecfeSLong Li 	}
910dbfecfeSLong Li 
920dbfecfeSLong Li 	if (!rte_is_power_of_2(dev->data->nb_rx_queues)) {
930dbfecfeSLong Li 		DRV_LOG(ERR, "number of TX/RX queues must be power of 2");
940dbfecfeSLong Li 		return -EINVAL;
950dbfecfeSLong Li 	}
960dbfecfeSLong Li 
97f8a4217dSWei Hu 	priv->vlan_strip = !!(dev_conf->rxmode.offloads &
98f8a4217dSWei Hu 			      RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
99f8a4217dSWei Hu 
1000dbfecfeSLong Li 	priv->num_queues = dev->data->nb_rx_queues;
1010dbfecfeSLong Li 
1020dbfecfeSLong Li 	manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS,
1030dbfecfeSLong Li 				(void *)((uintptr_t)&(struct manadv_ctx_allocators){
1040dbfecfeSLong Li 					.alloc = &mana_alloc_verbs_buf,
1050dbfecfeSLong Li 					.free = &mana_free_verbs_buf,
1060dbfecfeSLong Li 					.data = 0,
1070dbfecfeSLong Li 				}));
1080dbfecfeSLong Li 
1090dbfecfeSLong Li 	return 0;
1100dbfecfeSLong Li }
1110dbfecfeSLong Li 
112afd5d170SLong Li static void
rx_intr_vec_disable(struct mana_priv * priv)113afd5d170SLong Li rx_intr_vec_disable(struct mana_priv *priv)
114afd5d170SLong Li {
115afd5d170SLong Li 	struct rte_intr_handle *intr_handle = priv->intr_handle;
116afd5d170SLong Li 
117afd5d170SLong Li 	rte_intr_free_epoll_fd(intr_handle);
118afd5d170SLong Li 	rte_intr_vec_list_free(intr_handle);
119afd5d170SLong Li 	rte_intr_nb_efd_set(intr_handle, 0);
120afd5d170SLong Li }
121afd5d170SLong Li 
122afd5d170SLong Li static int
rx_intr_vec_enable(struct mana_priv * priv)123afd5d170SLong Li rx_intr_vec_enable(struct mana_priv *priv)
124afd5d170SLong Li {
125afd5d170SLong Li 	unsigned int i;
126afd5d170SLong Li 	unsigned int rxqs_n = priv->dev_data->nb_rx_queues;
127afd5d170SLong Li 	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
128afd5d170SLong Li 	struct rte_intr_handle *intr_handle = priv->intr_handle;
129afd5d170SLong Li 	int ret;
130afd5d170SLong Li 
131afd5d170SLong Li 	rx_intr_vec_disable(priv);
132afd5d170SLong Li 
133afd5d170SLong Li 	if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) {
134afd5d170SLong Li 		DRV_LOG(ERR, "Failed to allocate memory for interrupt vector");
135afd5d170SLong Li 		return -ENOMEM;
136afd5d170SLong Li 	}
137afd5d170SLong Li 
138afd5d170SLong Li 	for (i = 0; i < n; i++) {
139afd5d170SLong Li 		struct mana_rxq *rxq = priv->dev_data->rx_queues[i];
140afd5d170SLong Li 
141afd5d170SLong Li 		ret = rte_intr_vec_list_index_set(intr_handle, i,
142afd5d170SLong Li 						  RTE_INTR_VEC_RXTX_OFFSET + i);
143afd5d170SLong Li 		if (ret) {
144afd5d170SLong Li 			DRV_LOG(ERR, "Failed to set intr vec %u", i);
145afd5d170SLong Li 			return ret;
146afd5d170SLong Li 		}
147afd5d170SLong Li 
148afd5d170SLong Li 		ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd);
149afd5d170SLong Li 		if (ret) {
150afd5d170SLong Li 			DRV_LOG(ERR, "Failed to set FD at intr %u", i);
151afd5d170SLong Li 			return ret;
152afd5d170SLong Li 		}
153afd5d170SLong Li 	}
154afd5d170SLong Li 
155afd5d170SLong Li 	return rte_intr_nb_efd_set(intr_handle, n);
156afd5d170SLong Li }
157afd5d170SLong Li 
158afd5d170SLong Li static void
rxq_intr_disable(struct mana_priv * priv)159afd5d170SLong Li rxq_intr_disable(struct mana_priv *priv)
160afd5d170SLong Li {
161afd5d170SLong Li 	int err = rte_errno;
162afd5d170SLong Li 
163afd5d170SLong Li 	rx_intr_vec_disable(priv);
164afd5d170SLong Li 	rte_errno = err;
165afd5d170SLong Li }
166afd5d170SLong Li 
167afd5d170SLong Li static int
rxq_intr_enable(struct mana_priv * priv)168afd5d170SLong Li rxq_intr_enable(struct mana_priv *priv)
169afd5d170SLong Li {
170afd5d170SLong Li 	const struct rte_eth_intr_conf *const intr_conf =
171afd5d170SLong Li 		&priv->dev_data->dev_conf.intr_conf;
172afd5d170SLong Li 
173afd5d170SLong Li 	if (!intr_conf->rxq)
174afd5d170SLong Li 		return 0;
175afd5d170SLong Li 
176afd5d170SLong Li 	return rx_intr_vec_enable(priv);
177afd5d170SLong Li }
178bd15f237SLong Li 
1790dbfecfeSLong Li static int
mana_dev_start(struct rte_eth_dev * dev)18037544700SLong Li mana_dev_start(struct rte_eth_dev *dev)
18137544700SLong Li {
18237544700SLong Li 	int ret;
18337544700SLong Li 	struct mana_priv *priv = dev->data->dev_private;
18437544700SLong Li 
18537544700SLong Li 	rte_spinlock_init(&priv->mr_btree_lock);
18637544700SLong Li 	ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N,
18737544700SLong Li 				 dev->device->numa_node);
18837544700SLong Li 	if (ret) {
18937544700SLong Li 		DRV_LOG(ERR, "Failed to init device MR btree %d", ret);
19037544700SLong Li 		return ret;
19137544700SLong Li 	}
19237544700SLong Li 
19337544700SLong Li 	ret = mana_start_tx_queues(dev);
19437544700SLong Li 	if (ret) {
19537544700SLong Li 		DRV_LOG(ERR, "failed to start tx queues %d", ret);
19637544700SLong Li 		goto failed_tx;
19737544700SLong Li 	}
19837544700SLong Li 
19937544700SLong Li 	ret = mana_start_rx_queues(dev);
20037544700SLong Li 	if (ret) {
20137544700SLong Li 		DRV_LOG(ERR, "failed to start rx queues %d", ret);
20237544700SLong Li 		goto failed_rx;
20337544700SLong Li 	}
20437544700SLong Li 
20537544700SLong Li 	rte_wmb();
20637544700SLong Li 
20737544700SLong Li 	dev->tx_pkt_burst = mana_tx_burst;
20837544700SLong Li 	dev->rx_pkt_burst = mana_rx_burst;
20937544700SLong Li 
21037544700SLong Li 	DRV_LOG(INFO, "TX/RX queues have started");
21137544700SLong Li 
21237544700SLong Li 	/* Enable datapath for secondary processes */
21337544700SLong Li 	mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);
21437544700SLong Li 
215afd5d170SLong Li 	ret = rxq_intr_enable(priv);
216afd5d170SLong Li 	if (ret) {
217afd5d170SLong Li 		DRV_LOG(ERR, "Failed to enable RX interrupts");
218afd5d170SLong Li 		goto failed_intr;
219afd5d170SLong Li 	}
220afd5d170SLong Li 
22137544700SLong Li 	return 0;
22237544700SLong Li 
223afd5d170SLong Li failed_intr:
224afd5d170SLong Li 	mana_stop_rx_queues(dev);
225afd5d170SLong Li 
22637544700SLong Li failed_rx:
22737544700SLong Li 	mana_stop_tx_queues(dev);
22837544700SLong Li 
22937544700SLong Li failed_tx:
23037544700SLong Li 	mana_mr_btree_free(&priv->mr_btree);
23137544700SLong Li 
23237544700SLong Li 	return ret;
23337544700SLong Li }
23437544700SLong Li 
23537544700SLong Li static int
mana_dev_stop(struct rte_eth_dev * dev)236afd5d170SLong Li mana_dev_stop(struct rte_eth_dev *dev)
23737544700SLong Li {
23837544700SLong Li 	int ret;
239afd5d170SLong Li 	struct mana_priv *priv = dev->data->dev_private;
240afd5d170SLong Li 
241afd5d170SLong Li 	rxq_intr_disable(priv);
24237544700SLong Li 
24337544700SLong Li 	dev->tx_pkt_burst = mana_tx_burst_removed;
24437544700SLong Li 	dev->rx_pkt_burst = mana_rx_burst_removed;
24537544700SLong Li 
24637544700SLong Li 	/* Stop datapath on secondary processes */
24737544700SLong Li 	mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX);
24837544700SLong Li 
24937544700SLong Li 	rte_wmb();
25037544700SLong Li 
25137544700SLong Li 	ret = mana_stop_tx_queues(dev);
25237544700SLong Li 	if (ret) {
25337544700SLong Li 		DRV_LOG(ERR, "failed to stop tx queues");
25437544700SLong Li 		return ret;
25537544700SLong Li 	}
25637544700SLong Li 
25737544700SLong Li 	ret = mana_stop_rx_queues(dev);
25837544700SLong Li 	if (ret) {
25937544700SLong Li 		DRV_LOG(ERR, "failed to stop tx queues");
26037544700SLong Li 		return ret;
26137544700SLong Li 	}
26237544700SLong Li 
26337544700SLong Li 	return 0;
26437544700SLong Li }
26537544700SLong Li 
266afd5d170SLong Li static int mana_intr_uninstall(struct mana_priv *priv);
267afd5d170SLong Li 
26837544700SLong Li static int
mana_dev_close(struct rte_eth_dev * dev)2690dbfecfeSLong Li mana_dev_close(struct rte_eth_dev *dev)
2700dbfecfeSLong Li {
2710dbfecfeSLong Li 	struct mana_priv *priv = dev->data->dev_private;
2720dbfecfeSLong Li 	int ret;
2730dbfecfeSLong Li 
2740f5db3c6SLong Li 	mana_remove_all_mr(priv);
2750f5db3c6SLong Li 
276bd15f237SLong Li 	ret = mana_intr_uninstall(priv);
277bd15f237SLong Li 	if (ret)
278bd15f237SLong Li 		return ret;
279bd15f237SLong Li 
2800dbfecfeSLong Li 	ret = ibv_close_device(priv->ib_ctx);
2810dbfecfeSLong Li 	if (ret) {
2820dbfecfeSLong Li 		ret = errno;
2830dbfecfeSLong Li 		return ret;
2840dbfecfeSLong Li 	}
2850dbfecfeSLong Li 
2860dbfecfeSLong Li 	return 0;
2870dbfecfeSLong Li }
2880dbfecfeSLong Li 
289d878cb09SLong Li static int
mana_dev_info_get(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)290d878cb09SLong Li mana_dev_info_get(struct rte_eth_dev *dev,
291d878cb09SLong Li 		  struct rte_eth_dev_info *dev_info)
292d878cb09SLong Li {
293d878cb09SLong Li 	struct mana_priv *priv = dev->data->dev_private;
294d878cb09SLong Li 
29584497839SLong Li 	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
29684497839SLong Li 	dev_info->max_mtu = MANA_MAX_MTU;
297d878cb09SLong Li 
298d878cb09SLong Li 	/* RX params */
299d878cb09SLong Li 	dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE;
30084497839SLong Li 	dev_info->max_rx_pktlen = MANA_MAX_MTU + RTE_ETHER_HDR_LEN;
301d878cb09SLong Li 
302b7e79896SLong Li 	dev_info->max_rx_queues = RTE_MIN(priv->max_rx_queues, UINT16_MAX);
303b7e79896SLong Li 	dev_info->max_tx_queues = RTE_MIN(priv->max_tx_queues, UINT16_MAX);
304d878cb09SLong Li 
305d878cb09SLong Li 	dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR;
306d878cb09SLong Li 	dev_info->max_hash_mac_addrs = 0;
307d878cb09SLong Li 
308d878cb09SLong Li 	dev_info->max_vfs = 1;
309d878cb09SLong Li 
310d878cb09SLong Li 	/* Offload params */
311d878cb09SLong Li 	dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT;
312d878cb09SLong Li 
313d878cb09SLong Li 	dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT;
314d878cb09SLong Li 
315d878cb09SLong Li 	/* RSS */
316d878cb09SLong Li 	dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS;
317d878cb09SLong Li 	dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES;
318d878cb09SLong Li 	dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT;
319d878cb09SLong Li 
320d878cb09SLong Li 	/* Thresholds */
321d878cb09SLong Li 	dev_info->default_rxconf = (struct rte_eth_rxconf){
322d878cb09SLong Li 		.rx_thresh = {
323d878cb09SLong Li 			.pthresh = 8,
324d878cb09SLong Li 			.hthresh = 8,
325d878cb09SLong Li 			.wthresh = 0,
326d878cb09SLong Li 		},
327d878cb09SLong Li 		.rx_free_thresh = 32,
328d878cb09SLong Li 		/* If no descriptors available, pkts are dropped by default */
329d878cb09SLong Li 		.rx_drop_en = 1,
330d878cb09SLong Li 	};
331d878cb09SLong Li 
332d878cb09SLong Li 	dev_info->default_txconf = (struct rte_eth_txconf){
333d878cb09SLong Li 		.tx_thresh = {
334d878cb09SLong Li 			.pthresh = 32,
335d878cb09SLong Li 			.hthresh = 0,
336d878cb09SLong Li 			.wthresh = 0,
337d878cb09SLong Li 		},
338d878cb09SLong Li 		.tx_rs_thresh = 32,
339d878cb09SLong Li 		.tx_free_thresh = 32,
340d878cb09SLong Li 	};
341d878cb09SLong Li 
342d878cb09SLong Li 	/* Buffer limits */
343d878cb09SLong Li 	dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
344b7e79896SLong Li 	dev_info->rx_desc_lim.nb_max = RTE_MIN(priv->max_rx_desc, UINT16_MAX);
345d878cb09SLong Li 	dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
346b7e79896SLong Li 	dev_info->rx_desc_lim.nb_seg_max =
347b7e79896SLong Li 		RTE_MIN(priv->max_recv_sge, UINT16_MAX);
348b7e79896SLong Li 	dev_info->rx_desc_lim.nb_mtu_seg_max =
349b7e79896SLong Li 		RTE_MIN(priv->max_recv_sge, UINT16_MAX);
350d878cb09SLong Li 
351d878cb09SLong Li 	dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
352b7e79896SLong Li 	dev_info->tx_desc_lim.nb_max = RTE_MIN(priv->max_tx_desc, UINT16_MAX);
353d878cb09SLong Li 	dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
354b7e79896SLong Li 	dev_info->tx_desc_lim.nb_seg_max =
355b7e79896SLong Li 		RTE_MIN(priv->max_send_sge, UINT16_MAX);
356b7e79896SLong Li 	dev_info->tx_desc_lim.nb_mtu_seg_max =
357b7e79896SLong Li 		RTE_MIN(priv->max_send_sge, UINT16_MAX);
358d878cb09SLong Li 
359d878cb09SLong Li 	/* Speed */
360d878cb09SLong Li 	dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G;
361d878cb09SLong Li 
362d878cb09SLong Li 	/* RX params */
363d878cb09SLong Li 	dev_info->default_rxportconf.burst_size = 1;
364d878cb09SLong Li 	dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE;
365d878cb09SLong Li 	dev_info->default_rxportconf.nb_queues = 1;
366d878cb09SLong Li 
367d878cb09SLong Li 	/* TX params */
368d878cb09SLong Li 	dev_info->default_txportconf.burst_size = 1;
369d878cb09SLong Li 	dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE;
370d878cb09SLong Li 	dev_info->default_txportconf.nb_queues = 1;
371d878cb09SLong Li 
372d878cb09SLong Li 	return 0;
373d878cb09SLong Li }
374d878cb09SLong Li 
3750c63c005SLong Li static void
mana_dev_tx_queue_info(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_txq_info * qinfo)376f7dc479aSLong Li mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
377f7dc479aSLong Li 		       struct rte_eth_txq_info *qinfo)
378f7dc479aSLong Li {
379f7dc479aSLong Li 	struct mana_txq *txq = dev->data->tx_queues[queue_id];
380f7dc479aSLong Li 
381f7dc479aSLong Li 	qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
382f7dc479aSLong Li 	qinfo->nb_desc = txq->num_desc;
383f7dc479aSLong Li }
384f7dc479aSLong Li 
385f7dc479aSLong Li static void
mana_dev_rx_queue_info(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_rxq_info * qinfo)3860c63c005SLong Li mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
3870c63c005SLong Li 		       struct rte_eth_rxq_info *qinfo)
3880c63c005SLong Li {
3890c63c005SLong Li 	struct mana_rxq *rxq = dev->data->rx_queues[queue_id];
3900c63c005SLong Li 
3910c63c005SLong Li 	qinfo->mp = rxq->mp;
3920c63c005SLong Li 	qinfo->nb_desc = rxq->num_desc;
3930c63c005SLong Li 	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
3940c63c005SLong Li }
3950c63c005SLong Li 
396d9679c3aSLong Li static const uint32_t *
mana_supported_ptypes(struct rte_eth_dev * dev __rte_unused,size_t * no_of_elements)397ba6a168aSSivaramakrishnan Venkat mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused,
398ba6a168aSSivaramakrishnan Venkat 		      size_t *no_of_elements)
399d9679c3aSLong Li {
400d9679c3aSLong Li 	static const uint32_t ptypes[] = {
401d9679c3aSLong Li 		RTE_PTYPE_L2_ETHER,
402d9679c3aSLong Li 		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
403d9679c3aSLong Li 		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
404d9679c3aSLong Li 		RTE_PTYPE_L4_FRAG,
405d9679c3aSLong Li 		RTE_PTYPE_L4_TCP,
406d9679c3aSLong Li 		RTE_PTYPE_L4_UDP,
407d9679c3aSLong Li 	};
408d9679c3aSLong Li 
409ba6a168aSSivaramakrishnan Venkat 	*no_of_elements = RTE_DIM(ptypes);
410d9679c3aSLong Li 	return ptypes;
411d9679c3aSLong Li }
412d9679c3aSLong Li 
41321958568SLong Li static int
mana_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)414a382177cSLong Li mana_rss_hash_update(struct rte_eth_dev *dev,
415a382177cSLong Li 		     struct rte_eth_rss_conf *rss_conf)
416a382177cSLong Li {
417a382177cSLong Li 	struct mana_priv *priv = dev->data->dev_private;
418a382177cSLong Li 
419a382177cSLong Li 	/* Currently can only update RSS hash when device is stopped */
420a382177cSLong Li 	if (dev->data->dev_started) {
421a382177cSLong Li 		DRV_LOG(ERR, "Can't update RSS after device has started");
422a382177cSLong Li 		return -ENODEV;
423a382177cSLong Li 	}
424a382177cSLong Li 
425a382177cSLong Li 	if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) {
426a382177cSLong Li 		DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64,
427a382177cSLong Li 			dev->data->port_id, rss_conf->rss_hf);
428a382177cSLong Li 		return -EINVAL;
429a382177cSLong Li 	}
430a382177cSLong Li 
431a382177cSLong Li 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
432a382177cSLong Li 		if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) {
433a382177cSLong Li 			DRV_LOG(ERR, "Port %u key len must be %u long",
434a382177cSLong Li 				dev->data->port_id,
435a382177cSLong Li 				TOEPLITZ_HASH_KEY_SIZE_IN_BYTES);
436a382177cSLong Li 			return -EINVAL;
437a382177cSLong Li 		}
438a382177cSLong Li 
439a382177cSLong Li 		priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
440a382177cSLong Li 		priv->rss_conf.rss_key =
441a382177cSLong Li 			rte_zmalloc("mana_rss", rss_conf->rss_key_len,
442a382177cSLong Li 				    RTE_CACHE_LINE_SIZE);
443a382177cSLong Li 		if (!priv->rss_conf.rss_key)
444a382177cSLong Li 			return -ENOMEM;
445a382177cSLong Li 		memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
446a382177cSLong Li 		       rss_conf->rss_key_len);
447a382177cSLong Li 	}
448a382177cSLong Li 	priv->rss_conf.rss_hf = rss_conf->rss_hf;
449a382177cSLong Li 
450a382177cSLong Li 	return 0;
451a382177cSLong Li }
452a382177cSLong Li 
453a382177cSLong Li static int
mana_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)454a382177cSLong Li mana_rss_hash_conf_get(struct rte_eth_dev *dev,
455a382177cSLong Li 		       struct rte_eth_rss_conf *rss_conf)
456a382177cSLong Li {
457a382177cSLong Li 	struct mana_priv *priv = dev->data->dev_private;
458a382177cSLong Li 
459a382177cSLong Li 	if (!rss_conf)
460a382177cSLong Li 		return -EINVAL;
461a382177cSLong Li 
462a382177cSLong Li 	if (rss_conf->rss_key &&
463a382177cSLong Li 	    rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) {
464a382177cSLong Li 		memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
465a382177cSLong Li 		       priv->rss_conf.rss_key_len);
466a382177cSLong Li 	}
467a382177cSLong Li 
468a382177cSLong Li 	rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
469a382177cSLong Li 	rss_conf->rss_hf = priv->rss_conf.rss_hf;
470a382177cSLong Li 
471a382177cSLong Li 	return 0;
472a382177cSLong Li }
473a382177cSLong Li 
474a382177cSLong Li static int
mana_dev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf __rte_unused)475f7dc479aSLong Li mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
476f7dc479aSLong Li 			uint16_t nb_desc, unsigned int socket_id,
477f7dc479aSLong Li 			const struct rte_eth_txconf *tx_conf __rte_unused)
478f7dc479aSLong Li 
479f7dc479aSLong Li {
480f7dc479aSLong Li 	struct mana_priv *priv = dev->data->dev_private;
481f7dc479aSLong Li 	struct mana_txq *txq;
482f7dc479aSLong Li 	int ret;
483f7dc479aSLong Li 
484f7dc479aSLong Li 	txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id);
485f7dc479aSLong Li 	if (!txq) {
486f7dc479aSLong Li 		DRV_LOG(ERR, "failed to allocate txq");
487f7dc479aSLong Li 		return -ENOMEM;
488f7dc479aSLong Li 	}
489f7dc479aSLong Li 
490f7dc479aSLong Li 	txq->socket = socket_id;
491f7dc479aSLong Li 
492f7dc479aSLong Li 	txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring",
493f7dc479aSLong Li 					   sizeof(struct mana_txq_desc) *
494f7dc479aSLong Li 						nb_desc,
495f7dc479aSLong Li 					   RTE_CACHE_LINE_SIZE, socket_id);
496f7dc479aSLong Li 	if (!txq->desc_ring) {
497f7dc479aSLong Li 		DRV_LOG(ERR, "failed to allocate txq desc_ring");
498f7dc479aSLong Li 		ret = -ENOMEM;
499f7dc479aSLong Li 		goto fail;
500f7dc479aSLong Li 	}
501f7dc479aSLong Li 
50231124619SLong Li 	txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp",
50331124619SLong Li 			sizeof(*txq->gdma_comp_buf) * nb_desc,
50431124619SLong Li 			RTE_CACHE_LINE_SIZE, socket_id);
50531124619SLong Li 	if (!txq->gdma_comp_buf) {
50631124619SLong Li 		DRV_LOG(ERR, "failed to allocate txq comp");
50731124619SLong Li 		ret = -ENOMEM;
50831124619SLong Li 		goto fail;
50931124619SLong Li 	}
51031124619SLong Li 
5110f5db3c6SLong Li 	ret = mana_mr_btree_init(&txq->mr_btree,
5120f5db3c6SLong Li 				 MANA_MR_BTREE_PER_QUEUE_N, socket_id);
5130f5db3c6SLong Li 	if (ret) {
5140f5db3c6SLong Li 		DRV_LOG(ERR, "Failed to init TXQ MR btree");
5150f5db3c6SLong Li 		goto fail;
5160f5db3c6SLong Li 	}
5170f5db3c6SLong Li 
518f7dc479aSLong Li 	DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p",
519f7dc479aSLong Li 		queue_idx, nb_desc, socket_id, txq->desc_ring);
520f7dc479aSLong Li 
521f7dc479aSLong Li 	txq->desc_ring_head = 0;
522f7dc479aSLong Li 	txq->desc_ring_tail = 0;
523f7dc479aSLong Li 	txq->priv = priv;
524f7dc479aSLong Li 	txq->num_desc = nb_desc;
525f7dc479aSLong Li 	dev->data->tx_queues[queue_idx] = txq;
526f7dc479aSLong Li 
527f7dc479aSLong Li 	return 0;
528f7dc479aSLong Li 
529f7dc479aSLong Li fail:
53031124619SLong Li 	rte_free(txq->gdma_comp_buf);
531f7dc479aSLong Li 	rte_free(txq->desc_ring);
532f7dc479aSLong Li 	rte_free(txq);
533f7dc479aSLong Li 	return ret;
534f7dc479aSLong Li }
535f7dc479aSLong Li 
536f7dc479aSLong Li static void
mana_dev_tx_queue_release(struct rte_eth_dev * dev,uint16_t qid)537f7dc479aSLong Li mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
538f7dc479aSLong Li {
539f7dc479aSLong Li 	struct mana_txq *txq = dev->data->tx_queues[qid];
540f7dc479aSLong Li 
5410f5db3c6SLong Li 	mana_mr_btree_free(&txq->mr_btree);
5420f5db3c6SLong Li 
54331124619SLong Li 	rte_free(txq->gdma_comp_buf);
544f7dc479aSLong Li 	rte_free(txq->desc_ring);
545f7dc479aSLong Li 	rte_free(txq);
546f7dc479aSLong Li }
547f7dc479aSLong Li 
548f7dc479aSLong Li static int
mana_dev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf __rte_unused,struct rte_mempool * mp)5490c63c005SLong Li mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
5500c63c005SLong Li 			uint16_t nb_desc, unsigned int socket_id,
5510c63c005SLong Li 			const struct rte_eth_rxconf *rx_conf __rte_unused,
5520c63c005SLong Li 			struct rte_mempool *mp)
5530c63c005SLong Li {
5540c63c005SLong Li 	struct mana_priv *priv = dev->data->dev_private;
5550c63c005SLong Li 	struct mana_rxq *rxq;
5560c63c005SLong Li 	int ret;
5570c63c005SLong Li 
5580c63c005SLong Li 	rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id);
5590c63c005SLong Li 	if (!rxq) {
5600c63c005SLong Li 		DRV_LOG(ERR, "failed to allocate rxq");
5610c63c005SLong Li 		return -ENOMEM;
5620c63c005SLong Li 	}
5630c63c005SLong Li 
5640c63c005SLong Li 	DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u",
5650c63c005SLong Li 		queue_idx, nb_desc, socket_id);
5660c63c005SLong Li 
5670c63c005SLong Li 	rxq->socket = socket_id;
5680c63c005SLong Li 
5690c63c005SLong Li 	rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring",
5700c63c005SLong Li 					    sizeof(struct mana_rxq_desc) *
5710c63c005SLong Li 						nb_desc,
5720c63c005SLong Li 					    RTE_CACHE_LINE_SIZE, socket_id);
5730c63c005SLong Li 
5740c63c005SLong Li 	if (!rxq->desc_ring) {
5750c63c005SLong Li 		DRV_LOG(ERR, "failed to allocate rxq desc_ring");
5760c63c005SLong Li 		ret = -ENOMEM;
5770c63c005SLong Li 		goto fail;
5780c63c005SLong Li 	}
5790c63c005SLong Li 
5800c63c005SLong Li 	rxq->desc_ring_head = 0;
5810c63c005SLong Li 	rxq->desc_ring_tail = 0;
5820c63c005SLong Li 
58331124619SLong Li 	rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp",
58431124619SLong Li 			sizeof(*rxq->gdma_comp_buf) * nb_desc,
58531124619SLong Li 			RTE_CACHE_LINE_SIZE, socket_id);
58631124619SLong Li 	if (!rxq->gdma_comp_buf) {
58731124619SLong Li 		DRV_LOG(ERR, "failed to allocate rxq comp");
58831124619SLong Li 		ret = -ENOMEM;
58931124619SLong Li 		goto fail;
59031124619SLong Li 	}
59131124619SLong Li 
5920f5db3c6SLong Li 	ret = mana_mr_btree_init(&rxq->mr_btree,
5930f5db3c6SLong Li 				 MANA_MR_BTREE_PER_QUEUE_N, socket_id);
5940f5db3c6SLong Li 	if (ret) {
5950f5db3c6SLong Li 		DRV_LOG(ERR, "Failed to init RXQ MR btree");
5960f5db3c6SLong Li 		goto fail;
5970f5db3c6SLong Li 	}
5980f5db3c6SLong Li 
5990c63c005SLong Li 	rxq->priv = priv;
6000c63c005SLong Li 	rxq->num_desc = nb_desc;
6010c63c005SLong Li 	rxq->mp = mp;
6020c63c005SLong Li 	dev->data->rx_queues[queue_idx] = rxq;
6030c63c005SLong Li 
6040c63c005SLong Li 	return 0;
6050c63c005SLong Li 
6060c63c005SLong Li fail:
60731124619SLong Li 	rte_free(rxq->gdma_comp_buf);
6080c63c005SLong Li 	rte_free(rxq->desc_ring);
6090c63c005SLong Li 	rte_free(rxq);
6100c63c005SLong Li 	return ret;
6110c63c005SLong Li }
6120c63c005SLong Li 
6130c63c005SLong Li static void
mana_dev_rx_queue_release(struct rte_eth_dev * dev,uint16_t qid)6140c63c005SLong Li mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
6150c63c005SLong Li {
6160c63c005SLong Li 	struct mana_rxq *rxq = dev->data->rx_queues[qid];
6170c63c005SLong Li 
6180f5db3c6SLong Li 	mana_mr_btree_free(&rxq->mr_btree);
6190f5db3c6SLong Li 
62031124619SLong Li 	rte_free(rxq->gdma_comp_buf);
6210c63c005SLong Li 	rte_free(rxq->desc_ring);
6220c63c005SLong Li 	rte_free(rxq);
6230c63c005SLong Li }
6240c63c005SLong Li 
6250c63c005SLong Li static int
mana_dev_link_update(struct rte_eth_dev * dev,int wait_to_complete __rte_unused)62621958568SLong Li mana_dev_link_update(struct rte_eth_dev *dev,
62721958568SLong Li 		     int wait_to_complete __rte_unused)
62821958568SLong Li {
62921958568SLong Li 	struct rte_eth_link link;
63021958568SLong Li 
63121958568SLong Li 	/* MANA has no concept of carrier state, always reporting UP */
63221958568SLong Li 	link = (struct rte_eth_link) {
63321958568SLong Li 		.link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
63421958568SLong Li 		.link_autoneg = RTE_ETH_LINK_SPEED_FIXED,
63521958568SLong Li 		.link_speed = RTE_ETH_SPEED_NUM_100G,
63621958568SLong Li 		.link_status = RTE_ETH_LINK_UP,
63721958568SLong Li 	};
63821958568SLong Li 
63921958568SLong Li 	return rte_eth_linkstatus_set(dev, &link);
64021958568SLong Li }
64121958568SLong Li 
642e350b568SLong Li static int
mana_dev_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)643e350b568SLong Li mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
644e350b568SLong Li {
645e350b568SLong Li 	unsigned int i;
646e350b568SLong Li 
647e350b568SLong Li 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
648e350b568SLong Li 		struct mana_txq *txq = dev->data->tx_queues[i];
649e350b568SLong Li 
650e350b568SLong Li 		if (!txq)
651e350b568SLong Li 			continue;
652e350b568SLong Li 
653eb6f507aSLong Li 		stats->opackets += txq->stats.packets;
654eb6f507aSLong Li 		stats->obytes += txq->stats.bytes;
655eb6f507aSLong Li 		stats->oerrors += txq->stats.errors;
656e350b568SLong Li 
657e350b568SLong Li 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
658e350b568SLong Li 			stats->q_opackets[i] = txq->stats.packets;
659e350b568SLong Li 			stats->q_obytes[i] = txq->stats.bytes;
660e350b568SLong Li 		}
661e350b568SLong Li 	}
662e350b568SLong Li 
663e350b568SLong Li 	stats->rx_nombuf = 0;
664e350b568SLong Li 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
665e350b568SLong Li 		struct mana_rxq *rxq = dev->data->rx_queues[i];
666e350b568SLong Li 
667e350b568SLong Li 		if (!rxq)
668e350b568SLong Li 			continue;
669e350b568SLong Li 
670eb6f507aSLong Li 		stats->ipackets += rxq->stats.packets;
671eb6f507aSLong Li 		stats->ibytes += rxq->stats.bytes;
672eb6f507aSLong Li 		stats->ierrors += rxq->stats.errors;
673e350b568SLong Li 
674e350b568SLong Li 		/* There is no good way to get stats->imissed, not setting it */
675e350b568SLong Li 
676e350b568SLong Li 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
677e350b568SLong Li 			stats->q_ipackets[i] = rxq->stats.packets;
678e350b568SLong Li 			stats->q_ibytes[i] = rxq->stats.bytes;
679e350b568SLong Li 		}
680e350b568SLong Li 
681e350b568SLong Li 		stats->rx_nombuf += rxq->stats.nombuf;
682e350b568SLong Li 	}
683e350b568SLong Li 
684e350b568SLong Li 	return 0;
685e350b568SLong Li }
686e350b568SLong Li 
687e350b568SLong Li static int
mana_dev_stats_reset(struct rte_eth_dev * dev __rte_unused)688e350b568SLong Li mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused)
689e350b568SLong Li {
690e350b568SLong Li 	unsigned int i;
691e350b568SLong Li 
692e350b568SLong Li 	PMD_INIT_FUNC_TRACE();
693e350b568SLong Li 
694e350b568SLong Li 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
695e350b568SLong Li 		struct mana_txq *txq = dev->data->tx_queues[i];
696e350b568SLong Li 
697e350b568SLong Li 		if (!txq)
698e350b568SLong Li 			continue;
699e350b568SLong Li 
700e350b568SLong Li 		memset(&txq->stats, 0, sizeof(txq->stats));
701e350b568SLong Li 	}
702e350b568SLong Li 
703e350b568SLong Li 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
704e350b568SLong Li 		struct mana_rxq *rxq = dev->data->rx_queues[i];
705e350b568SLong Li 
706e350b568SLong Li 		if (!rxq)
707e350b568SLong Li 			continue;
708e350b568SLong Li 
709e350b568SLong Li 		memset(&rxq->stats, 0, sizeof(rxq->stats));
710e350b568SLong Li 	}
711e350b568SLong Li 
712e350b568SLong Li 	return 0;
713e350b568SLong Li }
714e350b568SLong Li 
71584497839SLong Li static int
mana_get_ifname(const struct mana_priv * priv,char (* ifname)[IF_NAMESIZE])71684497839SLong Li mana_get_ifname(const struct mana_priv *priv, char (*ifname)[IF_NAMESIZE])
71784497839SLong Li {
718*c2c0c8afSMahmoud Maatuq 	int ret = -ENODEV;
71984497839SLong Li 	DIR *dir;
72084497839SLong Li 	struct dirent *dent;
72184497839SLong Li 
72284497839SLong Li 	MANA_MKSTR(dirpath, "%s/device/net", priv->ib_ctx->device->ibdev_path);
72384497839SLong Li 
72484497839SLong Li 	dir = opendir(dirpath);
72584497839SLong Li 	if (dir == NULL)
72684497839SLong Li 		return -ENODEV;
72784497839SLong Li 
72884497839SLong Li 	while ((dent = readdir(dir)) != NULL) {
72984497839SLong Li 		char *name = dent->d_name;
73084497839SLong Li 		FILE *file;
73184497839SLong Li 		struct rte_ether_addr addr;
73284497839SLong Li 		char *mac = NULL;
73384497839SLong Li 
73484497839SLong Li 		if ((name[0] == '.') &&
73584497839SLong Li 		    ((name[1] == '\0') ||
73684497839SLong Li 		     ((name[1] == '.') && (name[2] == '\0'))))
73784497839SLong Li 			continue;
73884497839SLong Li 
73984497839SLong Li 		MANA_MKSTR(path, "%s/%s/address", dirpath, name);
74084497839SLong Li 
74184497839SLong Li 		file = fopen(path, "r");
74284497839SLong Li 		if (!file) {
74384497839SLong Li 			ret = -ENODEV;
74484497839SLong Li 			break;
74584497839SLong Li 		}
74684497839SLong Li 
74784497839SLong Li 		ret = fscanf(file, "%ms", &mac);
74884497839SLong Li 		fclose(file);
74984497839SLong Li 
75084497839SLong Li 		if (ret <= 0) {
75184497839SLong Li 			ret = -EINVAL;
75284497839SLong Li 			break;
75384497839SLong Li 		}
75484497839SLong Li 
75584497839SLong Li 		ret = rte_ether_unformat_addr(mac, &addr);
75684497839SLong Li 		free(mac);
75784497839SLong Li 		if (ret)
75884497839SLong Li 			break;
75984497839SLong Li 
76084497839SLong Li 		if (rte_is_same_ether_addr(&addr, priv->dev_data->mac_addrs)) {
76184497839SLong Li 			strlcpy(*ifname, name, sizeof(*ifname));
76284497839SLong Li 			ret = 0;
76384497839SLong Li 			break;
76484497839SLong Li 		}
76584497839SLong Li 	}
76684497839SLong Li 
76784497839SLong Li 	closedir(dir);
76884497839SLong Li 	return ret;
76984497839SLong Li }
77084497839SLong Li 
77184497839SLong Li static int
mana_ifreq(const struct mana_priv * priv,int req,struct ifreq * ifr)77284497839SLong Li mana_ifreq(const struct mana_priv *priv, int req, struct ifreq *ifr)
77384497839SLong Li {
77484497839SLong Li 	int sock, ret;
77584497839SLong Li 
77684497839SLong Li 	sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
77784497839SLong Li 	if (sock == -1)
77884497839SLong Li 		return -errno;
77984497839SLong Li 
78084497839SLong Li 	ret = mana_get_ifname(priv, &ifr->ifr_name);
78184497839SLong Li 	if (ret) {
78284497839SLong Li 		close(sock);
78384497839SLong Li 		return ret;
78484497839SLong Li 	}
78584497839SLong Li 
78684497839SLong Li 	if (ioctl(sock, req, ifr) == -1)
78784497839SLong Li 		ret = -errno;
78884497839SLong Li 
78984497839SLong Li 	close(sock);
79084497839SLong Li 
79184497839SLong Li 	return ret;
79284497839SLong Li }
79384497839SLong Li 
79484497839SLong Li static int
mana_mtu_set(struct rte_eth_dev * dev,uint16_t mtu)79584497839SLong Li mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
79684497839SLong Li {
79784497839SLong Li 	struct mana_priv *priv = dev->data->dev_private;
79884497839SLong Li 	struct ifreq request = { .ifr_mtu = mtu, };
79984497839SLong Li 
80084497839SLong Li 	return mana_ifreq(priv, SIOCSIFMTU, &request);
80184497839SLong Li }
80284497839SLong Li 
803517ed6e2SLong Li static const struct eth_dev_ops mana_dev_ops = {
8040dbfecfeSLong Li 	.dev_configure		= mana_dev_configure,
80537544700SLong Li 	.dev_start		= mana_dev_start,
80637544700SLong Li 	.dev_stop		= mana_dev_stop,
8070dbfecfeSLong Li 	.dev_close		= mana_dev_close,
808d878cb09SLong Li 	.dev_infos_get		= mana_dev_info_get,
809f7dc479aSLong Li 	.txq_info_get		= mana_dev_tx_queue_info,
8100c63c005SLong Li 	.rxq_info_get		= mana_dev_rx_queue_info,
811d9679c3aSLong Li 	.dev_supported_ptypes_get = mana_supported_ptypes,
812a382177cSLong Li 	.rss_hash_update	= mana_rss_hash_update,
813a382177cSLong Li 	.rss_hash_conf_get	= mana_rss_hash_conf_get,
814f7dc479aSLong Li 	.tx_queue_setup		= mana_dev_tx_queue_setup,
815f7dc479aSLong Li 	.tx_queue_release	= mana_dev_tx_queue_release,
8160c63c005SLong Li 	.rx_queue_setup		= mana_dev_rx_queue_setup,
8170c63c005SLong Li 	.rx_queue_release	= mana_dev_rx_queue_release,
818afd5d170SLong Li 	.rx_queue_intr_enable	= mana_rx_intr_enable,
819afd5d170SLong Li 	.rx_queue_intr_disable	= mana_rx_intr_disable,
82021958568SLong Li 	.link_update		= mana_dev_link_update,
821e350b568SLong Li 	.stats_get		= mana_dev_stats_get,
822e350b568SLong Li 	.stats_reset		= mana_dev_stats_reset,
82384497839SLong Li 	.mtu_set		= mana_mtu_set,
824517ed6e2SLong Li };
825517ed6e2SLong Li 
826517ed6e2SLong Li static const struct eth_dev_ops mana_dev_secondary_ops = {
827e350b568SLong Li 	.stats_get = mana_dev_stats_get,
828e350b568SLong Li 	.stats_reset = mana_dev_stats_reset,
829d878cb09SLong Li 	.dev_infos_get = mana_dev_info_get,
830517ed6e2SLong Li };
831517ed6e2SLong Li 
832517ed6e2SLong Li uint16_t
mana_rx_burst_removed(void * dpdk_rxq __rte_unused,struct rte_mbuf ** pkts __rte_unused,uint16_t pkts_n __rte_unused)833517ed6e2SLong Li mana_rx_burst_removed(void *dpdk_rxq __rte_unused,
834517ed6e2SLong Li 		      struct rte_mbuf **pkts __rte_unused,
835517ed6e2SLong Li 		      uint16_t pkts_n __rte_unused)
836517ed6e2SLong Li {
837517ed6e2SLong Li 	rte_mb();
838517ed6e2SLong Li 	return 0;
839517ed6e2SLong Li }
840517ed6e2SLong Li 
841517ed6e2SLong Li uint16_t
mana_tx_burst_removed(void * dpdk_rxq __rte_unused,struct rte_mbuf ** pkts __rte_unused,uint16_t pkts_n __rte_unused)842517ed6e2SLong Li mana_tx_burst_removed(void *dpdk_rxq __rte_unused,
843517ed6e2SLong Li 		      struct rte_mbuf **pkts __rte_unused,
844517ed6e2SLong Li 		      uint16_t pkts_n __rte_unused)
845517ed6e2SLong Li {
846517ed6e2SLong Li 	rte_mb();
847517ed6e2SLong Li 	return 0;
848517ed6e2SLong Li }
849517ed6e2SLong Li 
850517ed6e2SLong Li #define ETH_MANA_MAC_ARG "mac"
851517ed6e2SLong Li static const char * const mana_init_args[] = {
852517ed6e2SLong Li 	ETH_MANA_MAC_ARG,
853517ed6e2SLong Li 	NULL,
854517ed6e2SLong Li };
855517ed6e2SLong Li 
856517ed6e2SLong Li /* Support of parsing up to 8 mac address from EAL command line */
857517ed6e2SLong Li #define MAX_NUM_ADDRESS 8
858517ed6e2SLong Li struct mana_conf {
859517ed6e2SLong Li 	struct rte_ether_addr mac_array[MAX_NUM_ADDRESS];
860517ed6e2SLong Li 	unsigned int index;
861517ed6e2SLong Li };
862517ed6e2SLong Li 
863517ed6e2SLong Li static int
mana_arg_parse_callback(const char * key,const char * val,void * private)864517ed6e2SLong Li mana_arg_parse_callback(const char *key, const char *val, void *private)
865517ed6e2SLong Li {
866517ed6e2SLong Li 	struct mana_conf *conf = (struct mana_conf *)private;
867517ed6e2SLong Li 	int ret;
868517ed6e2SLong Li 
869517ed6e2SLong Li 	DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index);
870517ed6e2SLong Li 
871517ed6e2SLong Li 	if (conf->index >= MAX_NUM_ADDRESS) {
872517ed6e2SLong Li 		DRV_LOG(ERR, "Exceeding max MAC address");
873517ed6e2SLong Li 		return 1;
874517ed6e2SLong Li 	}
875517ed6e2SLong Li 
876517ed6e2SLong Li 	ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]);
877517ed6e2SLong Li 	if (ret) {
878517ed6e2SLong Li 		DRV_LOG(ERR, "Invalid MAC address %s", val);
879517ed6e2SLong Li 		return ret;
880517ed6e2SLong Li 	}
881517ed6e2SLong Li 
882517ed6e2SLong Li 	conf->index++;
883517ed6e2SLong Li 
884517ed6e2SLong Li 	return 0;
885517ed6e2SLong Li }
886517ed6e2SLong Li 
887517ed6e2SLong Li static int
mana_parse_args(struct rte_devargs * devargs,struct mana_conf * conf)888517ed6e2SLong Li mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf)
889517ed6e2SLong Li {
890517ed6e2SLong Li 	struct rte_kvargs *kvlist;
891517ed6e2SLong Li 	unsigned int arg_count;
892517ed6e2SLong Li 	int ret = 0;
893517ed6e2SLong Li 
894517ed6e2SLong Li 	kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args);
895517ed6e2SLong Li 	if (!kvlist) {
896517ed6e2SLong Li 		DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str);
897517ed6e2SLong Li 		return -EINVAL;
898517ed6e2SLong Li 	}
899517ed6e2SLong Li 
900517ed6e2SLong Li 	arg_count = rte_kvargs_count(kvlist, mana_init_args[0]);
901517ed6e2SLong Li 	if (arg_count > MAX_NUM_ADDRESS) {
902517ed6e2SLong Li 		ret = -EINVAL;
903517ed6e2SLong Li 		goto free_kvlist;
904517ed6e2SLong Li 	}
905517ed6e2SLong Li 	ret = rte_kvargs_process(kvlist, mana_init_args[0],
906517ed6e2SLong Li 				 mana_arg_parse_callback, conf);
907517ed6e2SLong Li 	if (ret) {
908517ed6e2SLong Li 		DRV_LOG(ERR, "error parsing args");
909517ed6e2SLong Li 		goto free_kvlist;
910517ed6e2SLong Li 	}
911517ed6e2SLong Li 
912517ed6e2SLong Li free_kvlist:
913517ed6e2SLong Li 	rte_kvargs_free(kvlist);
914517ed6e2SLong Li 	return ret;
915517ed6e2SLong Li }
916517ed6e2SLong Li 
917517ed6e2SLong Li static int
get_port_mac(struct ibv_device * device,unsigned int port,struct rte_ether_addr * addr)918517ed6e2SLong Li get_port_mac(struct ibv_device *device, unsigned int port,
919517ed6e2SLong Li 	     struct rte_ether_addr *addr)
920517ed6e2SLong Li {
921517ed6e2SLong Li 	FILE *file;
922517ed6e2SLong Li 	int ret = 0;
923517ed6e2SLong Li 	DIR *dir;
924517ed6e2SLong Li 	struct dirent *dent;
925517ed6e2SLong Li 	unsigned int dev_port;
926517ed6e2SLong Li 
927517ed6e2SLong Li 	MANA_MKSTR(path, "%s/device/net", device->ibdev_path);
928517ed6e2SLong Li 
929517ed6e2SLong Li 	dir = opendir(path);
930517ed6e2SLong Li 	if (!dir)
931517ed6e2SLong Li 		return -ENOENT;
932517ed6e2SLong Li 
933517ed6e2SLong Li 	while ((dent = readdir(dir))) {
934517ed6e2SLong Li 		char *name = dent->d_name;
93535d55f0fSLong Li 		char *mac = NULL;
936517ed6e2SLong Li 
937517ed6e2SLong Li 		MANA_MKSTR(port_path, "%s/%s/dev_port", path, name);
938517ed6e2SLong Li 
939517ed6e2SLong Li 		/* Ignore . and .. */
940517ed6e2SLong Li 		if ((name[0] == '.') &&
941517ed6e2SLong Li 		    ((name[1] == '\0') ||
942517ed6e2SLong Li 		     ((name[1] == '.') && (name[2] == '\0'))))
943517ed6e2SLong Li 			continue;
944517ed6e2SLong Li 
945517ed6e2SLong Li 		file = fopen(port_path, "r");
946517ed6e2SLong Li 		if (!file)
947517ed6e2SLong Li 			continue;
948517ed6e2SLong Li 
949517ed6e2SLong Li 		ret = fscanf(file, "%u", &dev_port);
950517ed6e2SLong Li 		fclose(file);
951517ed6e2SLong Li 
952517ed6e2SLong Li 		if (ret != 1)
953517ed6e2SLong Li 			continue;
954517ed6e2SLong Li 
955517ed6e2SLong Li 		/* Ethernet ports start at 0, IB port start at 1 */
956517ed6e2SLong Li 		if (dev_port == port - 1) {
957517ed6e2SLong Li 			MANA_MKSTR(address_path, "%s/%s/address", path, name);
958517ed6e2SLong Li 
959517ed6e2SLong Li 			file = fopen(address_path, "r");
960517ed6e2SLong Li 			if (!file)
961517ed6e2SLong Li 				continue;
962517ed6e2SLong Li 
96335d55f0fSLong Li 			ret = fscanf(file, "%ms", &mac);
964517ed6e2SLong Li 			fclose(file);
965517ed6e2SLong Li 
966517ed6e2SLong Li 			if (ret < 0)
967517ed6e2SLong Li 				break;
968517ed6e2SLong Li 
969517ed6e2SLong Li 			ret = rte_ether_unformat_addr(mac, addr);
970517ed6e2SLong Li 			if (ret)
971517ed6e2SLong Li 				DRV_LOG(ERR, "unrecognized mac addr %s", mac);
97235d55f0fSLong Li 
97335d55f0fSLong Li 			free(mac);
974517ed6e2SLong Li 			break;
975517ed6e2SLong Li 		}
976517ed6e2SLong Li 	}
977517ed6e2SLong Li 
978517ed6e2SLong Li 	closedir(dir);
979517ed6e2SLong Li 	return ret;
980517ed6e2SLong Li }
981517ed6e2SLong Li 
982517ed6e2SLong Li static int
mana_ibv_device_to_pci_addr(const struct ibv_device * device,struct rte_pci_addr * pci_addr)983517ed6e2SLong Li mana_ibv_device_to_pci_addr(const struct ibv_device *device,
984517ed6e2SLong Li 			    struct rte_pci_addr *pci_addr)
985517ed6e2SLong Li {
986517ed6e2SLong Li 	FILE *file;
987517ed6e2SLong Li 	char *line = NULL;
988517ed6e2SLong Li 	size_t len = 0;
989517ed6e2SLong Li 
990517ed6e2SLong Li 	MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path);
991517ed6e2SLong Li 
992517ed6e2SLong Li 	file = fopen(path, "r");
993517ed6e2SLong Li 	if (!file)
994517ed6e2SLong Li 		return -errno;
995517ed6e2SLong Li 
996517ed6e2SLong Li 	while (getline(&line, &len, file) != -1) {
997517ed6e2SLong Li 		/* Extract information. */
998517ed6e2SLong Li 		if (sscanf(line,
999517ed6e2SLong Li 			   "PCI_SLOT_NAME="
1000517ed6e2SLong Li 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
1001517ed6e2SLong Li 			   &pci_addr->domain,
1002517ed6e2SLong Li 			   &pci_addr->bus,
1003517ed6e2SLong Li 			   &pci_addr->devid,
1004517ed6e2SLong Li 			   &pci_addr->function) == 4) {
1005517ed6e2SLong Li 			break;
1006517ed6e2SLong Li 		}
1007517ed6e2SLong Li 	}
1008517ed6e2SLong Li 
1009517ed6e2SLong Li 	free(line);
1010517ed6e2SLong Li 	fclose(file);
1011517ed6e2SLong Li 	return 0;
1012517ed6e2SLong Li }
1013517ed6e2SLong Li 
1014bd15f237SLong Li /*
1015bd15f237SLong Li  * Interrupt handler from IB layer to notify this device is being removed.
1016bd15f237SLong Li  */
1017bd15f237SLong Li static void
mana_intr_handler(void * arg)1018bd15f237SLong Li mana_intr_handler(void *arg)
1019bd15f237SLong Li {
1020bd15f237SLong Li 	struct mana_priv *priv = arg;
1021bd15f237SLong Li 	struct ibv_context *ctx = priv->ib_ctx;
1022bd15f237SLong Li 	struct ibv_async_event event;
1023bd15f237SLong Li 
1024bd15f237SLong Li 	/* Read and ack all messages from IB device */
1025bd15f237SLong Li 	while (true) {
1026bd15f237SLong Li 		if (ibv_get_async_event(ctx, &event))
1027bd15f237SLong Li 			break;
1028bd15f237SLong Li 
1029bd15f237SLong Li 		if (event.event_type == IBV_EVENT_DEVICE_FATAL) {
1030bd15f237SLong Li 			struct rte_eth_dev *dev;
1031bd15f237SLong Li 
1032bd15f237SLong Li 			dev = &rte_eth_devices[priv->port_id];
1033bd15f237SLong Li 			if (dev->data->dev_conf.intr_conf.rmv)
1034bd15f237SLong Li 				rte_eth_dev_callback_process(dev,
1035bd15f237SLong Li 					RTE_ETH_EVENT_INTR_RMV, NULL);
1036bd15f237SLong Li 		}
1037bd15f237SLong Li 
1038bd15f237SLong Li 		ibv_ack_async_event(&event);
1039bd15f237SLong Li 	}
1040bd15f237SLong Li }
1041bd15f237SLong Li 
1042bd15f237SLong Li static int
mana_intr_uninstall(struct mana_priv * priv)1043bd15f237SLong Li mana_intr_uninstall(struct mana_priv *priv)
1044bd15f237SLong Li {
1045bd15f237SLong Li 	int ret;
1046bd15f237SLong Li 
1047bd15f237SLong Li 	ret = rte_intr_callback_unregister(priv->intr_handle,
1048bd15f237SLong Li 					   mana_intr_handler, priv);
1049bd15f237SLong Li 	if (ret <= 0) {
1050bd15f237SLong Li 		DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret);
1051bd15f237SLong Li 		return ret;
1052bd15f237SLong Li 	}
1053bd15f237SLong Li 
1054bd15f237SLong Li 	rte_intr_instance_free(priv->intr_handle);
1055bd15f237SLong Li 
1056bd15f237SLong Li 	return 0;
1057bd15f237SLong Li }
1058bd15f237SLong Li 
1059afd5d170SLong Li int
mana_fd_set_non_blocking(int fd)1060afd5d170SLong Li mana_fd_set_non_blocking(int fd)
1061bd15f237SLong Li {
1062afd5d170SLong Li 	int ret = fcntl(fd, F_GETFL);
1063afd5d170SLong Li 
1064afd5d170SLong Li 	if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK))
1065afd5d170SLong Li 		return 0;
1066afd5d170SLong Li 
1067afd5d170SLong Li 	rte_errno = errno;
1068afd5d170SLong Li 	return -rte_errno;
1069afd5d170SLong Li }
1070afd5d170SLong Li 
1071afd5d170SLong Li static int
mana_intr_install(struct rte_eth_dev * eth_dev,struct mana_priv * priv)1072afd5d170SLong Li mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv)
1073afd5d170SLong Li {
1074afd5d170SLong Li 	int ret;
1075bd15f237SLong Li 	struct ibv_context *ctx = priv->ib_ctx;
1076bd15f237SLong Li 
1077bd15f237SLong Li 	priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
1078bd15f237SLong Li 	if (!priv->intr_handle) {
1079bd15f237SLong Li 		DRV_LOG(ERR, "Failed to allocate intr_handle");
1080bd15f237SLong Li 		rte_errno = ENOMEM;
1081bd15f237SLong Li 		return -ENOMEM;
1082bd15f237SLong Li 	}
1083bd15f237SLong Li 
1084afd5d170SLong Li 	ret = rte_intr_fd_set(priv->intr_handle, -1);
1085afd5d170SLong Li 	if (ret)
1086afd5d170SLong Li 		goto free_intr;
1087bd15f237SLong Li 
1088afd5d170SLong Li 	ret = mana_fd_set_non_blocking(ctx->async_fd);
1089bd15f237SLong Li 	if (ret) {
1090bd15f237SLong Li 		DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK");
1091bd15f237SLong Li 		goto free_intr;
1092bd15f237SLong Li 	}
1093bd15f237SLong Li 
1094afd5d170SLong Li 	ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd);
1095afd5d170SLong Li 	if (ret)
1096afd5d170SLong Li 		goto free_intr;
1097afd5d170SLong Li 
1098afd5d170SLong Li 	ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT);
1099afd5d170SLong Li 	if (ret)
1100afd5d170SLong Li 		goto free_intr;
1101bd15f237SLong Li 
1102bd15f237SLong Li 	ret = rte_intr_callback_register(priv->intr_handle,
1103bd15f237SLong Li 					 mana_intr_handler, priv);
1104bd15f237SLong Li 	if (ret) {
1105bd15f237SLong Li 		DRV_LOG(ERR, "Failed to register intr callback");
1106bd15f237SLong Li 		rte_intr_fd_set(priv->intr_handle, -1);
1107afd5d170SLong Li 		goto free_intr;
1108bd15f237SLong Li 	}
1109bd15f237SLong Li 
1110afd5d170SLong Li 	eth_dev->intr_handle = priv->intr_handle;
1111bd15f237SLong Li 	return 0;
1112bd15f237SLong Li 
1113bd15f237SLong Li free_intr:
1114bd15f237SLong Li 	rte_intr_instance_free(priv->intr_handle);
1115bd15f237SLong Li 	priv->intr_handle = NULL;
1116bd15f237SLong Li 
1117bd15f237SLong Li 	return ret;
1118bd15f237SLong Li }
1119bd15f237SLong Li 
1120517ed6e2SLong Li static int
mana_proc_priv_init(struct rte_eth_dev * dev)1121517ed6e2SLong Li mana_proc_priv_init(struct rte_eth_dev *dev)
1122517ed6e2SLong Li {
1123517ed6e2SLong Li 	struct mana_process_priv *priv;
1124517ed6e2SLong Li 
1125517ed6e2SLong Li 	priv = rte_zmalloc_socket("mana_proc_priv",
1126517ed6e2SLong Li 				  sizeof(struct mana_process_priv),
1127517ed6e2SLong Li 				  RTE_CACHE_LINE_SIZE,
1128517ed6e2SLong Li 				  dev->device->numa_node);
1129517ed6e2SLong Li 	if (!priv)
1130517ed6e2SLong Li 		return -ENOMEM;
1131517ed6e2SLong Li 
1132517ed6e2SLong Li 	dev->process_private = priv;
1133517ed6e2SLong Li 	return 0;
1134517ed6e2SLong Li }
1135517ed6e2SLong Li 
1136517ed6e2SLong Li /*
1137517ed6e2SLong Li  * Map the doorbell page for the secondary process through IB device handle.
1138517ed6e2SLong Li  */
1139517ed6e2SLong Li static int
mana_map_doorbell_secondary(struct rte_eth_dev * eth_dev,int fd)1140517ed6e2SLong Li mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd)
1141517ed6e2SLong Li {
1142517ed6e2SLong Li 	struct mana_process_priv *priv = eth_dev->process_private;
1143517ed6e2SLong Li 
1144517ed6e2SLong Li 	void *addr;
1145517ed6e2SLong Li 
1146517ed6e2SLong Li 	addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0);
1147517ed6e2SLong Li 	if (addr == MAP_FAILED) {
1148517ed6e2SLong Li 		DRV_LOG(ERR, "Failed to map secondary doorbell port %u",
1149517ed6e2SLong Li 			eth_dev->data->port_id);
1150517ed6e2SLong Li 		return -ENOMEM;
1151517ed6e2SLong Li 	}
1152517ed6e2SLong Li 
1153517ed6e2SLong Li 	DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr);
1154517ed6e2SLong Li 
1155517ed6e2SLong Li 	priv->db_page = addr;
1156517ed6e2SLong Li 
1157517ed6e2SLong Li 	return 0;
1158517ed6e2SLong Li }
1159517ed6e2SLong Li 
1160517ed6e2SLong Li /* Initialize shared data for the driver (all devices) */
1161517ed6e2SLong Li static int
mana_init_shared_data(void)1162517ed6e2SLong Li mana_init_shared_data(void)
1163517ed6e2SLong Li {
1164517ed6e2SLong Li 	int ret =  0;
1165517ed6e2SLong Li 	const struct rte_memzone *secondary_mz;
1166517ed6e2SLong Li 
1167517ed6e2SLong Li 	rte_spinlock_lock(&mana_shared_data_lock);
1168517ed6e2SLong Li 
1169517ed6e2SLong Li 	/* Skip if shared data is already initialized */
1170517ed6e2SLong Li 	if (mana_shared_data)
1171517ed6e2SLong Li 		goto exit;
1172517ed6e2SLong Li 
1173517ed6e2SLong Li 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1174517ed6e2SLong Li 		mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA,
1175517ed6e2SLong Li 						     sizeof(*mana_shared_data),
1176517ed6e2SLong Li 						     SOCKET_ID_ANY, 0);
1177517ed6e2SLong Li 		if (!mana_shared_mz) {
1178517ed6e2SLong Li 			DRV_LOG(ERR, "Cannot allocate mana shared data");
1179517ed6e2SLong Li 			ret = -rte_errno;
1180517ed6e2SLong Li 			goto exit;
1181517ed6e2SLong Li 		}
1182517ed6e2SLong Li 
1183517ed6e2SLong Li 		mana_shared_data = mana_shared_mz->addr;
1184517ed6e2SLong Li 		memset(mana_shared_data, 0, sizeof(*mana_shared_data));
1185517ed6e2SLong Li 		rte_spinlock_init(&mana_shared_data->lock);
1186517ed6e2SLong Li 	} else {
1187517ed6e2SLong Li 		secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA);
1188517ed6e2SLong Li 		if (!secondary_mz) {
1189517ed6e2SLong Li 			DRV_LOG(ERR, "Cannot attach mana shared data");
1190517ed6e2SLong Li 			ret = -rte_errno;
1191517ed6e2SLong Li 			goto exit;
1192517ed6e2SLong Li 		}
1193517ed6e2SLong Li 
1194517ed6e2SLong Li 		mana_shared_data = secondary_mz->addr;
1195517ed6e2SLong Li 		memset(&mana_local_data, 0, sizeof(mana_local_data));
1196517ed6e2SLong Li 	}
1197517ed6e2SLong Li 
1198517ed6e2SLong Li exit:
1199517ed6e2SLong Li 	rte_spinlock_unlock(&mana_shared_data_lock);
1200517ed6e2SLong Li 
1201517ed6e2SLong Li 	return ret;
1202517ed6e2SLong Li }
1203517ed6e2SLong Li 
1204517ed6e2SLong Li /*
1205517ed6e2SLong Li  * Init the data structures for use in primary and secondary processes.
1206517ed6e2SLong Li  */
1207517ed6e2SLong Li static int
mana_init_once(void)1208517ed6e2SLong Li mana_init_once(void)
1209517ed6e2SLong Li {
1210517ed6e2SLong Li 	int ret;
1211517ed6e2SLong Li 
1212517ed6e2SLong Li 	ret = mana_init_shared_data();
1213517ed6e2SLong Li 	if (ret)
1214517ed6e2SLong Li 		return ret;
1215517ed6e2SLong Li 
1216517ed6e2SLong Li 	rte_spinlock_lock(&mana_shared_data->lock);
1217517ed6e2SLong Li 
1218517ed6e2SLong Li 	switch (rte_eal_process_type()) {
1219517ed6e2SLong Li 	case RTE_PROC_PRIMARY:
1220517ed6e2SLong Li 		if (mana_shared_data->init_done)
1221517ed6e2SLong Li 			break;
1222517ed6e2SLong Li 
1223517ed6e2SLong Li 		ret = mana_mp_init_primary();
1224517ed6e2SLong Li 		if (ret)
1225517ed6e2SLong Li 			break;
1226517ed6e2SLong Li 		DRV_LOG(ERR, "MP INIT PRIMARY");
1227517ed6e2SLong Li 
1228517ed6e2SLong Li 		mana_shared_data->init_done = 1;
1229517ed6e2SLong Li 		break;
1230517ed6e2SLong Li 
1231517ed6e2SLong Li 	case RTE_PROC_SECONDARY:
1232517ed6e2SLong Li 
1233517ed6e2SLong Li 		if (mana_local_data.init_done)
1234517ed6e2SLong Li 			break;
1235517ed6e2SLong Li 
1236517ed6e2SLong Li 		ret = mana_mp_init_secondary();
1237517ed6e2SLong Li 		if (ret)
1238517ed6e2SLong Li 			break;
1239517ed6e2SLong Li 
1240517ed6e2SLong Li 		DRV_LOG(ERR, "MP INIT SECONDARY");
1241517ed6e2SLong Li 
1242517ed6e2SLong Li 		mana_local_data.init_done = 1;
1243517ed6e2SLong Li 		break;
1244517ed6e2SLong Li 
1245517ed6e2SLong Li 	default:
1246517ed6e2SLong Li 		/* Impossible, internal error */
1247517ed6e2SLong Li 		ret = -EPROTO;
1248517ed6e2SLong Li 		break;
1249517ed6e2SLong Li 	}
1250517ed6e2SLong Li 
1251517ed6e2SLong Li 	rte_spinlock_unlock(&mana_shared_data->lock);
1252517ed6e2SLong Li 
1253517ed6e2SLong Li 	return ret;
1254517ed6e2SLong Li }
1255517ed6e2SLong Li 
1256517ed6e2SLong Li /*
1257517ed6e2SLong Li  * Probe an IB port
1258517ed6e2SLong Li  * Return value:
1259517ed6e2SLong Li  * positive value: successfully probed port
1260517ed6e2SLong Li  * 0: port not matching specified MAC address
1261517ed6e2SLong Li  * negative value: error code
1262517ed6e2SLong Li  */
1263517ed6e2SLong Li static int
mana_probe_port(struct ibv_device * ibdev,struct ibv_device_attr_ex * dev_attr,uint8_t port,struct rte_pci_device * pci_dev,struct rte_ether_addr * addr)1264517ed6e2SLong Li mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr,
1265517ed6e2SLong Li 		uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr)
1266517ed6e2SLong Li {
1267517ed6e2SLong Li 	struct mana_priv *priv = NULL;
1268517ed6e2SLong Li 	struct rte_eth_dev *eth_dev = NULL;
1269517ed6e2SLong Li 	struct ibv_parent_domain_init_attr attr = {0};
1270517ed6e2SLong Li 	char address[64];
1271517ed6e2SLong Li 	char name[RTE_ETH_NAME_MAX_LEN];
1272517ed6e2SLong Li 	int ret;
1273517ed6e2SLong Li 	struct ibv_context *ctx = NULL;
1274517ed6e2SLong Li 
1275517ed6e2SLong Li 	rte_ether_format_addr(address, sizeof(address), addr);
1276517ed6e2SLong Li 	DRV_LOG(INFO, "device located port %u address %s", port, address);
1277517ed6e2SLong Li 
1278517ed6e2SLong Li 	priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE,
1279517ed6e2SLong Li 				  SOCKET_ID_ANY);
1280517ed6e2SLong Li 	if (!priv)
1281517ed6e2SLong Li 		return -ENOMEM;
1282517ed6e2SLong Li 
1283517ed6e2SLong Li 	snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port);
1284517ed6e2SLong Li 
1285517ed6e2SLong Li 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1286517ed6e2SLong Li 		int fd;
1287517ed6e2SLong Li 
1288517ed6e2SLong Li 		eth_dev = rte_eth_dev_attach_secondary(name);
1289517ed6e2SLong Li 		if (!eth_dev) {
1290517ed6e2SLong Li 			DRV_LOG(ERR, "Can't attach to dev %s", name);
1291517ed6e2SLong Li 			ret =  -ENOMEM;
1292517ed6e2SLong Li 			goto failed;
1293517ed6e2SLong Li 		}
1294517ed6e2SLong Li 
1295517ed6e2SLong Li 		eth_dev->device = &pci_dev->device;
1296517ed6e2SLong Li 		eth_dev->dev_ops = &mana_dev_secondary_ops;
1297517ed6e2SLong Li 		ret = mana_proc_priv_init(eth_dev);
1298517ed6e2SLong Li 		if (ret)
1299517ed6e2SLong Li 			goto failed;
1300517ed6e2SLong Li 		priv->process_priv = eth_dev->process_private;
1301517ed6e2SLong Li 
1302517ed6e2SLong Li 		/* Get the IB FD from the primary process */
1303517ed6e2SLong Li 		fd = mana_mp_req_verbs_cmd_fd(eth_dev);
1304517ed6e2SLong Li 		if (fd < 0) {
1305517ed6e2SLong Li 			DRV_LOG(ERR, "Failed to get FD %d", fd);
1306517ed6e2SLong Li 			ret = -ENODEV;
1307517ed6e2SLong Li 			goto failed;
1308517ed6e2SLong Li 		}
1309517ed6e2SLong Li 
1310517ed6e2SLong Li 		ret = mana_map_doorbell_secondary(eth_dev, fd);
1311517ed6e2SLong Li 		if (ret) {
1312517ed6e2SLong Li 			DRV_LOG(ERR, "Failed secondary map %d", fd);
1313517ed6e2SLong Li 			goto failed;
1314517ed6e2SLong Li 		}
1315517ed6e2SLong Li 
1316517ed6e2SLong Li 		/* fd is no not used after mapping doorbell */
1317517ed6e2SLong Li 		close(fd);
1318517ed6e2SLong Li 
1319095939b6SLong Li 		eth_dev->tx_pkt_burst = mana_tx_burst;
1320095939b6SLong Li 		eth_dev->rx_pkt_burst = mana_rx_burst;
1321517ed6e2SLong Li 
1322517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data->lock);
1323517ed6e2SLong Li 		mana_shared_data->secondary_cnt++;
1324517ed6e2SLong Li 		mana_local_data.secondary_cnt++;
1325517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data->lock);
1326517ed6e2SLong Li 
1327517ed6e2SLong Li 		rte_eth_copy_pci_info(eth_dev, pci_dev);
1328517ed6e2SLong Li 		rte_eth_dev_probing_finish(eth_dev);
1329517ed6e2SLong Li 
1330517ed6e2SLong Li 		return 0;
1331517ed6e2SLong Li 	}
1332517ed6e2SLong Li 
1333517ed6e2SLong Li 	ctx = ibv_open_device(ibdev);
1334517ed6e2SLong Li 	if (!ctx) {
1335517ed6e2SLong Li 		DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name);
1336517ed6e2SLong Li 		ret = -ENODEV;
1337517ed6e2SLong Li 		goto failed;
1338517ed6e2SLong Li 	}
1339517ed6e2SLong Li 
1340517ed6e2SLong Li 	eth_dev = rte_eth_dev_allocate(name);
1341517ed6e2SLong Li 	if (!eth_dev) {
1342517ed6e2SLong Li 		ret = -ENOMEM;
1343517ed6e2SLong Li 		goto failed;
1344517ed6e2SLong Li 	}
1345517ed6e2SLong Li 
1346517ed6e2SLong Li 	eth_dev->data->mac_addrs =
1347517ed6e2SLong Li 		rte_calloc("mana_mac", 1,
1348517ed6e2SLong Li 			   sizeof(struct rte_ether_addr), 0);
1349517ed6e2SLong Li 	if (!eth_dev->data->mac_addrs) {
1350517ed6e2SLong Li 		ret = -ENOMEM;
1351517ed6e2SLong Li 		goto failed;
1352517ed6e2SLong Li 	}
1353517ed6e2SLong Li 
1354517ed6e2SLong Li 	rte_ether_addr_copy(addr, eth_dev->data->mac_addrs);
1355517ed6e2SLong Li 
1356517ed6e2SLong Li 	priv->ib_pd = ibv_alloc_pd(ctx);
1357517ed6e2SLong Li 	if (!priv->ib_pd) {
1358517ed6e2SLong Li 		DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port);
1359517ed6e2SLong Li 		ret = -ENOMEM;
1360517ed6e2SLong Li 		goto failed;
1361517ed6e2SLong Li 	}
1362517ed6e2SLong Li 
1363517ed6e2SLong Li 	/* Create a parent domain with the port number */
1364517ed6e2SLong Li 	attr.pd = priv->ib_pd;
1365517ed6e2SLong Li 	attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
136674decf3bSWei Hu 	attr.pd_context = (void *)(uintptr_t)port;
1367517ed6e2SLong Li 	priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr);
1368517ed6e2SLong Li 	if (!priv->ib_parent_pd) {
1369517ed6e2SLong Li 		DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port);
1370517ed6e2SLong Li 		ret = -ENOMEM;
1371517ed6e2SLong Li 		goto failed;
1372517ed6e2SLong Li 	}
1373517ed6e2SLong Li 
1374517ed6e2SLong Li 	priv->ib_ctx = ctx;
1375517ed6e2SLong Li 	priv->port_id = eth_dev->data->port_id;
1376517ed6e2SLong Li 	priv->dev_port = port;
1377517ed6e2SLong Li 	eth_dev->data->dev_private = priv;
1378517ed6e2SLong Li 	priv->dev_data = eth_dev->data;
1379517ed6e2SLong Li 
1380517ed6e2SLong Li 	priv->max_rx_queues = dev_attr->orig_attr.max_qp;
1381517ed6e2SLong Li 	priv->max_tx_queues = dev_attr->orig_attr.max_qp;
1382517ed6e2SLong Li 
1383517ed6e2SLong Li 	priv->max_rx_desc =
1384517ed6e2SLong Li 		RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1385517ed6e2SLong Li 			dev_attr->orig_attr.max_cqe);
1386517ed6e2SLong Li 	priv->max_tx_desc =
1387517ed6e2SLong Li 		RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1388517ed6e2SLong Li 			dev_attr->orig_attr.max_cqe);
1389517ed6e2SLong Li 
1390517ed6e2SLong Li 	priv->max_send_sge = dev_attr->orig_attr.max_sge;
1391517ed6e2SLong Li 	priv->max_recv_sge = dev_attr->orig_attr.max_sge;
1392517ed6e2SLong Li 
1393517ed6e2SLong Li 	priv->max_mr = dev_attr->orig_attr.max_mr;
1394517ed6e2SLong Li 	priv->max_mr_size = dev_attr->orig_attr.max_mr_size;
1395517ed6e2SLong Li 
1396b7e79896SLong Li 	DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d mr %" PRIu64,
1397517ed6e2SLong Li 		name, priv->max_rx_queues, priv->max_rx_desc,
1398b7e79896SLong Li 		priv->max_send_sge, priv->max_mr_size);
1399517ed6e2SLong Li 
1400517ed6e2SLong Li 	rte_eth_copy_pci_info(eth_dev, pci_dev);
1401517ed6e2SLong Li 
1402bd15f237SLong Li 	/* Create async interrupt handler */
1403afd5d170SLong Li 	ret = mana_intr_install(eth_dev, priv);
1404bd15f237SLong Li 	if (ret) {
1405bd15f237SLong Li 		DRV_LOG(ERR, "Failed to install intr handler");
1406bd15f237SLong Li 		goto failed;
1407bd15f237SLong Li 	}
1408bd15f237SLong Li 
1409517ed6e2SLong Li 	rte_spinlock_lock(&mana_shared_data->lock);
1410517ed6e2SLong Li 	mana_shared_data->primary_cnt++;
1411517ed6e2SLong Li 	rte_spinlock_unlock(&mana_shared_data->lock);
1412517ed6e2SLong Li 
1413517ed6e2SLong Li 	eth_dev->device = &pci_dev->device;
1414517ed6e2SLong Li 
1415517ed6e2SLong Li 	DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id);
1416517ed6e2SLong Li 
1417517ed6e2SLong Li 	eth_dev->rx_pkt_burst = mana_rx_burst_removed;
1418517ed6e2SLong Li 	eth_dev->tx_pkt_burst = mana_tx_burst_removed;
1419517ed6e2SLong Li 	eth_dev->dev_ops = &mana_dev_ops;
1420517ed6e2SLong Li 
1421517ed6e2SLong Li 	rte_eth_dev_probing_finish(eth_dev);
1422517ed6e2SLong Li 
1423517ed6e2SLong Li 	return 0;
1424517ed6e2SLong Li 
1425517ed6e2SLong Li failed:
1426517ed6e2SLong Li 	/* Free the resource for the port failed */
1427517ed6e2SLong Li 	if (priv) {
1428517ed6e2SLong Li 		if (priv->ib_parent_pd)
1429517ed6e2SLong Li 			ibv_dealloc_pd(priv->ib_parent_pd);
1430517ed6e2SLong Li 
1431517ed6e2SLong Li 		if (priv->ib_pd)
1432517ed6e2SLong Li 			ibv_dealloc_pd(priv->ib_pd);
1433517ed6e2SLong Li 	}
1434517ed6e2SLong Li 
1435517ed6e2SLong Li 	if (eth_dev)
1436517ed6e2SLong Li 		rte_eth_dev_release_port(eth_dev);
1437517ed6e2SLong Li 
1438517ed6e2SLong Li 	rte_free(priv);
1439517ed6e2SLong Li 
1440517ed6e2SLong Li 	if (ctx)
1441517ed6e2SLong Li 		ibv_close_device(ctx);
1442517ed6e2SLong Li 
1443517ed6e2SLong Li 	return ret;
1444517ed6e2SLong Li }
1445517ed6e2SLong Li 
1446517ed6e2SLong Li /*
1447517ed6e2SLong Li  * Goes through the IB device list to look for the IB port matching the
1448517ed6e2SLong Li  * mac_addr. If found, create a rte_eth_dev for it.
14490595702eSLong Li  * Return value: number of successfully probed devices
1450517ed6e2SLong Li  */
1451517ed6e2SLong Li static int
mana_pci_probe_mac(struct rte_pci_device * pci_dev,struct rte_ether_addr * mac_addr)1452517ed6e2SLong Li mana_pci_probe_mac(struct rte_pci_device *pci_dev,
1453517ed6e2SLong Li 		   struct rte_ether_addr *mac_addr)
1454517ed6e2SLong Li {
1455517ed6e2SLong Li 	struct ibv_device **ibv_list;
1456517ed6e2SLong Li 	int ibv_idx;
1457517ed6e2SLong Li 	struct ibv_context *ctx;
1458517ed6e2SLong Li 	int num_devices;
14590595702eSLong Li 	int ret;
1460517ed6e2SLong Li 	uint8_t port;
14610595702eSLong Li 	int count = 0;
1462517ed6e2SLong Li 
1463517ed6e2SLong Li 	ibv_list = ibv_get_device_list(&num_devices);
1464517ed6e2SLong Li 	for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
1465517ed6e2SLong Li 		struct ibv_device *ibdev = ibv_list[ibv_idx];
1466517ed6e2SLong Li 		struct rte_pci_addr pci_addr;
1467517ed6e2SLong Li 		struct ibv_device_attr_ex dev_attr;
1468517ed6e2SLong Li 
1469517ed6e2SLong Li 		DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
1470517ed6e2SLong Li 			ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
1471517ed6e2SLong Li 
1472517ed6e2SLong Li 		if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
1473517ed6e2SLong Li 			continue;
1474517ed6e2SLong Li 
1475517ed6e2SLong Li 		/* Ignore if this IB device is not this PCI device */
14768fa22e1fSThomas Monjalon 		if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0)
1477517ed6e2SLong Li 			continue;
1478517ed6e2SLong Li 
1479517ed6e2SLong Li 		ctx = ibv_open_device(ibdev);
1480517ed6e2SLong Li 		if (!ctx) {
1481517ed6e2SLong Li 			DRV_LOG(ERR, "Failed to open IB device %s",
1482517ed6e2SLong Li 				ibdev->name);
1483517ed6e2SLong Li 			continue;
1484517ed6e2SLong Li 		}
1485517ed6e2SLong Li 		ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
1486517ed6e2SLong Li 		ibv_close_device(ctx);
1487517ed6e2SLong Li 
14880595702eSLong Li 		if (ret) {
14890595702eSLong Li 			DRV_LOG(ERR, "Failed to query IB device %s",
14900595702eSLong Li 				ibdev->name);
14910595702eSLong Li 			continue;
14920595702eSLong Li 		}
14930595702eSLong Li 
1494517ed6e2SLong Li 		for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
1495517ed6e2SLong Li 		     port++) {
1496517ed6e2SLong Li 			struct rte_ether_addr addr;
1497517ed6e2SLong Li 			ret = get_port_mac(ibdev, port, &addr);
1498517ed6e2SLong Li 			if (ret)
1499517ed6e2SLong Li 				continue;
1500517ed6e2SLong Li 
1501517ed6e2SLong Li 			if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr))
1502517ed6e2SLong Li 				continue;
1503517ed6e2SLong Li 
1504517ed6e2SLong Li 			ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr);
15050595702eSLong Li 			if (ret) {
1506517ed6e2SLong Li 				DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret);
15070595702eSLong Li 			} else {
15080595702eSLong Li 				count++;
1509517ed6e2SLong Li 				DRV_LOG(INFO, "Successfully probed on IB port %u", port);
1510517ed6e2SLong Li 			}
1511517ed6e2SLong Li 		}
15120595702eSLong Li 	}
1513517ed6e2SLong Li 
1514517ed6e2SLong Li 	ibv_free_device_list(ibv_list);
15150595702eSLong Li 	return count;
1516517ed6e2SLong Li }
1517517ed6e2SLong Li 
1518517ed6e2SLong Li /*
1519517ed6e2SLong Li  * Main callback function from PCI bus to probe a device.
1520517ed6e2SLong Li  */
1521517ed6e2SLong Li static int
mana_pci_probe(struct rte_pci_driver * pci_drv __rte_unused,struct rte_pci_device * pci_dev)1522517ed6e2SLong Li mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1523517ed6e2SLong Li 	       struct rte_pci_device *pci_dev)
1524517ed6e2SLong Li {
1525517ed6e2SLong Li 	struct rte_devargs *args = pci_dev->device.devargs;
1526517ed6e2SLong Li 	struct mana_conf conf = {0};
1527517ed6e2SLong Li 	unsigned int i;
1528517ed6e2SLong Li 	int ret;
15290595702eSLong Li 	int count = 0;
1530517ed6e2SLong Li 
1531517ed6e2SLong Li 	if (args && args->drv_str) {
1532517ed6e2SLong Li 		ret = mana_parse_args(args, &conf);
1533517ed6e2SLong Li 		if (ret) {
1534517ed6e2SLong Li 			DRV_LOG(ERR, "Failed to parse parameters args = %s",
1535517ed6e2SLong Li 				args->drv_str);
1536517ed6e2SLong Li 			return ret;
1537517ed6e2SLong Li 		}
1538517ed6e2SLong Li 	}
1539517ed6e2SLong Li 
1540517ed6e2SLong Li 	ret = mana_init_once();
1541517ed6e2SLong Li 	if (ret) {
1542517ed6e2SLong Li 		DRV_LOG(ERR, "Failed to init PMD global data %d", ret);
1543517ed6e2SLong Li 		return ret;
1544517ed6e2SLong Li 	}
1545517ed6e2SLong Li 
1546517ed6e2SLong Li 	/* If there are no driver parameters, probe on all ports */
15470595702eSLong Li 	if (conf.index) {
15480595702eSLong Li 		for (i = 0; i < conf.index; i++)
15490595702eSLong Li 			count += mana_pci_probe_mac(pci_dev,
15500595702eSLong Li 						    &conf.mac_array[i]);
15510595702eSLong Li 	} else {
15520595702eSLong Li 		count = mana_pci_probe_mac(pci_dev, NULL);
1553517ed6e2SLong Li 	}
1554517ed6e2SLong Li 
15550595702eSLong Li 	if (!count) {
15560595702eSLong Li 		rte_memzone_free(mana_shared_mz);
15570595702eSLong Li 		mana_shared_mz = NULL;
15580595702eSLong Li 		ret = -ENODEV;
15590595702eSLong Li 	}
15600595702eSLong Li 
15610595702eSLong Li 	return ret;
1562517ed6e2SLong Li }
1563517ed6e2SLong Li 
1564517ed6e2SLong Li static int
mana_dev_uninit(struct rte_eth_dev * dev)1565517ed6e2SLong Li mana_dev_uninit(struct rte_eth_dev *dev)
1566517ed6e2SLong Li {
15670dbfecfeSLong Li 	return mana_dev_close(dev);
1568517ed6e2SLong Li }
1569517ed6e2SLong Li 
1570517ed6e2SLong Li /*
1571517ed6e2SLong Li  * Callback from PCI to remove this device.
1572517ed6e2SLong Li  */
1573517ed6e2SLong Li static int
mana_pci_remove(struct rte_pci_device * pci_dev)1574517ed6e2SLong Li mana_pci_remove(struct rte_pci_device *pci_dev)
1575517ed6e2SLong Li {
1576517ed6e2SLong Li 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1577517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data_lock);
1578517ed6e2SLong Li 
1579517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data->lock);
1580517ed6e2SLong Li 
1581517ed6e2SLong Li 		RTE_VERIFY(mana_shared_data->primary_cnt > 0);
1582517ed6e2SLong Li 		mana_shared_data->primary_cnt--;
1583517ed6e2SLong Li 		if (!mana_shared_data->primary_cnt) {
1584517ed6e2SLong Li 			DRV_LOG(DEBUG, "mp uninit primary");
1585517ed6e2SLong Li 			mana_mp_uninit_primary();
1586517ed6e2SLong Li 		}
1587517ed6e2SLong Li 
1588517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data->lock);
1589517ed6e2SLong Li 
1590517ed6e2SLong Li 		/* Also free the shared memory if this is the last */
1591517ed6e2SLong Li 		if (!mana_shared_data->primary_cnt) {
1592517ed6e2SLong Li 			DRV_LOG(DEBUG, "free shared memezone data");
1593517ed6e2SLong Li 			rte_memzone_free(mana_shared_mz);
15940595702eSLong Li 			mana_shared_mz = NULL;
1595517ed6e2SLong Li 		}
1596517ed6e2SLong Li 
1597517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data_lock);
1598517ed6e2SLong Li 	} else {
1599517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data_lock);
1600517ed6e2SLong Li 
1601517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data->lock);
1602517ed6e2SLong Li 		RTE_VERIFY(mana_shared_data->secondary_cnt > 0);
1603517ed6e2SLong Li 		mana_shared_data->secondary_cnt--;
1604517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data->lock);
1605517ed6e2SLong Li 
1606517ed6e2SLong Li 		RTE_VERIFY(mana_local_data.secondary_cnt > 0);
1607517ed6e2SLong Li 		mana_local_data.secondary_cnt--;
1608517ed6e2SLong Li 		if (!mana_local_data.secondary_cnt) {
1609517ed6e2SLong Li 			DRV_LOG(DEBUG, "mp uninit secondary");
1610517ed6e2SLong Li 			mana_mp_uninit_secondary();
1611517ed6e2SLong Li 		}
1612517ed6e2SLong Li 
1613517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data_lock);
1614517ed6e2SLong Li 	}
1615517ed6e2SLong Li 
1616517ed6e2SLong Li 	return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit);
1617517ed6e2SLong Li }
1618517ed6e2SLong Li 
1619517ed6e2SLong Li static const struct rte_pci_id mana_pci_id_map[] = {
1620517ed6e2SLong Li 	{
1621517ed6e2SLong Li 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT,
1622517ed6e2SLong Li 			       PCI_DEVICE_ID_MICROSOFT_MANA)
1623517ed6e2SLong Li 	},
1624517ed6e2SLong Li 	{
1625517ed6e2SLong Li 		.vendor_id = 0
1626517ed6e2SLong Li 	},
1627517ed6e2SLong Li };
1628517ed6e2SLong Li 
1629517ed6e2SLong Li static struct rte_pci_driver mana_pci_driver = {
1630517ed6e2SLong Li 	.id_table = mana_pci_id_map,
1631517ed6e2SLong Li 	.probe = mana_pci_probe,
1632517ed6e2SLong Li 	.remove = mana_pci_remove,
1633517ed6e2SLong Li 	.drv_flags = RTE_PCI_DRV_INTR_RMV,
1634517ed6e2SLong Li };
1635517ed6e2SLong Li 
1636517ed6e2SLong Li RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
1637517ed6e2SLong Li RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
1638517ed6e2SLong Li RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
1639517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
1640517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
1641517ed6e2SLong Li RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>");
1642