xref: /dpdk/drivers/net/mana/mana.c (revision 0c63c005f64eea378bd3a1302abd6cd14d398d24)
1517ed6e2SLong Li /* SPDX-License-Identifier: BSD-3-Clause
2517ed6e2SLong Li  * Copyright 2022 Microsoft Corporation
3517ed6e2SLong Li  */
4517ed6e2SLong Li 
5517ed6e2SLong Li #include <unistd.h>
6517ed6e2SLong Li #include <dirent.h>
7517ed6e2SLong Li #include <fcntl.h>
8517ed6e2SLong Li #include <sys/mman.h>
9517ed6e2SLong Li 
10517ed6e2SLong Li #include <ethdev_driver.h>
11517ed6e2SLong Li #include <ethdev_pci.h>
12517ed6e2SLong Li #include <rte_kvargs.h>
13517ed6e2SLong Li #include <rte_eal_paging.h>
14517ed6e2SLong Li 
15517ed6e2SLong Li #include <infiniband/verbs.h>
16517ed6e2SLong Li #include <infiniband/manadv.h>
17517ed6e2SLong Li 
18517ed6e2SLong Li #include <assert.h>
19517ed6e2SLong Li 
20517ed6e2SLong Li #include "mana.h"
21517ed6e2SLong Li 
22517ed6e2SLong Li /* Shared memory between primary/secondary processes, per driver */
23517ed6e2SLong Li /* Data to track primary/secondary usage */
24517ed6e2SLong Li struct mana_shared_data *mana_shared_data;
25517ed6e2SLong Li static struct mana_shared_data mana_local_data;
26517ed6e2SLong Li 
27517ed6e2SLong Li /* The memory region for the above data */
28517ed6e2SLong Li static const struct rte_memzone *mana_shared_mz;
29517ed6e2SLong Li static const char *MZ_MANA_SHARED_DATA = "mana_shared_data";
30517ed6e2SLong Li 
31517ed6e2SLong Li /* Spinlock for mana_shared_data */
32517ed6e2SLong Li static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
33517ed6e2SLong Li 
34517ed6e2SLong Li /* Allocate a buffer on the stack and fill it with a printf format string. */
35517ed6e2SLong Li #define MANA_MKSTR(name, ...) \
36517ed6e2SLong Li 	int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
37517ed6e2SLong Li 	char name[mkstr_size_##name + 1]; \
38517ed6e2SLong Li 	\
39517ed6e2SLong Li 	memset(name, 0, mkstr_size_##name + 1); \
40517ed6e2SLong Li 	snprintf(name, sizeof(name), "" __VA_ARGS__)
41517ed6e2SLong Li 
42517ed6e2SLong Li int mana_logtype_driver;
43517ed6e2SLong Li int mana_logtype_init;
44517ed6e2SLong Li 
450dbfecfeSLong Li /*
460dbfecfeSLong Li  * Callback from rdma-core to allocate a buffer for a queue.
470dbfecfeSLong Li  */
480dbfecfeSLong Li void *
490dbfecfeSLong Li mana_alloc_verbs_buf(size_t size, void *data)
500dbfecfeSLong Li {
510dbfecfeSLong Li 	void *ret;
520dbfecfeSLong Li 	size_t alignment = rte_mem_page_size();
530dbfecfeSLong Li 	int socket = (int)(uintptr_t)data;
540dbfecfeSLong Li 
550dbfecfeSLong Li 	DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket);
560dbfecfeSLong Li 
570dbfecfeSLong Li 	if (alignment == (size_t)-1) {
580dbfecfeSLong Li 		DRV_LOG(ERR, "Failed to get mem page size");
590dbfecfeSLong Li 		rte_errno = ENOMEM;
600dbfecfeSLong Li 		return NULL;
610dbfecfeSLong Li 	}
620dbfecfeSLong Li 
630dbfecfeSLong Li 	ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket);
640dbfecfeSLong Li 	if (!ret && size)
650dbfecfeSLong Li 		rte_errno = ENOMEM;
660dbfecfeSLong Li 	return ret;
670dbfecfeSLong Li }
680dbfecfeSLong Li 
690dbfecfeSLong Li void
700dbfecfeSLong Li mana_free_verbs_buf(void *ptr, void *data __rte_unused)
710dbfecfeSLong Li {
720dbfecfeSLong Li 	rte_free(ptr);
730dbfecfeSLong Li }
740dbfecfeSLong Li 
750dbfecfeSLong Li static int
760dbfecfeSLong Li mana_dev_configure(struct rte_eth_dev *dev)
770dbfecfeSLong Li {
780dbfecfeSLong Li 	struct mana_priv *priv = dev->data->dev_private;
790dbfecfeSLong Li 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
800dbfecfeSLong Li 
810dbfecfeSLong Li 	if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
820dbfecfeSLong Li 		dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
830dbfecfeSLong Li 
840dbfecfeSLong Li 	if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) {
850dbfecfeSLong Li 		DRV_LOG(ERR, "Only support equal number of rx/tx queues");
860dbfecfeSLong Li 		return -EINVAL;
870dbfecfeSLong Li 	}
880dbfecfeSLong Li 
890dbfecfeSLong Li 	if (!rte_is_power_of_2(dev->data->nb_rx_queues)) {
900dbfecfeSLong Li 		DRV_LOG(ERR, "number of TX/RX queues must be power of 2");
910dbfecfeSLong Li 		return -EINVAL;
920dbfecfeSLong Li 	}
930dbfecfeSLong Li 
940dbfecfeSLong Li 	priv->num_queues = dev->data->nb_rx_queues;
950dbfecfeSLong Li 
960dbfecfeSLong Li 	manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS,
970dbfecfeSLong Li 				(void *)((uintptr_t)&(struct manadv_ctx_allocators){
980dbfecfeSLong Li 					.alloc = &mana_alloc_verbs_buf,
990dbfecfeSLong Li 					.free = &mana_free_verbs_buf,
1000dbfecfeSLong Li 					.data = 0,
1010dbfecfeSLong Li 				}));
1020dbfecfeSLong Li 
1030dbfecfeSLong Li 	return 0;
1040dbfecfeSLong Li }
1050dbfecfeSLong Li 
106bd15f237SLong Li static int mana_intr_uninstall(struct mana_priv *priv);
107bd15f237SLong Li 
1080dbfecfeSLong Li static int
1090dbfecfeSLong Li mana_dev_close(struct rte_eth_dev *dev)
1100dbfecfeSLong Li {
1110dbfecfeSLong Li 	struct mana_priv *priv = dev->data->dev_private;
1120dbfecfeSLong Li 	int ret;
1130dbfecfeSLong Li 
114bd15f237SLong Li 	ret = mana_intr_uninstall(priv);
115bd15f237SLong Li 	if (ret)
116bd15f237SLong Li 		return ret;
117bd15f237SLong Li 
1180dbfecfeSLong Li 	ret = ibv_close_device(priv->ib_ctx);
1190dbfecfeSLong Li 	if (ret) {
1200dbfecfeSLong Li 		ret = errno;
1210dbfecfeSLong Li 		return ret;
1220dbfecfeSLong Li 	}
1230dbfecfeSLong Li 
1240dbfecfeSLong Li 	return 0;
1250dbfecfeSLong Li }
1260dbfecfeSLong Li 
127d878cb09SLong Li static int
128d878cb09SLong Li mana_dev_info_get(struct rte_eth_dev *dev,
129d878cb09SLong Li 		  struct rte_eth_dev_info *dev_info)
130d878cb09SLong Li {
131d878cb09SLong Li 	struct mana_priv *priv = dev->data->dev_private;
132d878cb09SLong Li 
133d878cb09SLong Li 	dev_info->max_mtu = RTE_ETHER_MTU;
134d878cb09SLong Li 
135d878cb09SLong Li 	/* RX params */
136d878cb09SLong Li 	dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE;
137d878cb09SLong Li 	dev_info->max_rx_pktlen = MAX_FRAME_SIZE;
138d878cb09SLong Li 
139d878cb09SLong Li 	dev_info->max_rx_queues = priv->max_rx_queues;
140d878cb09SLong Li 	dev_info->max_tx_queues = priv->max_tx_queues;
141d878cb09SLong Li 
142d878cb09SLong Li 	dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR;
143d878cb09SLong Li 	dev_info->max_hash_mac_addrs = 0;
144d878cb09SLong Li 
145d878cb09SLong Li 	dev_info->max_vfs = 1;
146d878cb09SLong Li 
147d878cb09SLong Li 	/* Offload params */
148d878cb09SLong Li 	dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT;
149d878cb09SLong Li 
150d878cb09SLong Li 	dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT;
151d878cb09SLong Li 
152d878cb09SLong Li 	/* RSS */
153d878cb09SLong Li 	dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS;
154d878cb09SLong Li 	dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES;
155d878cb09SLong Li 	dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT;
156d878cb09SLong Li 
157d878cb09SLong Li 	/* Thresholds */
158d878cb09SLong Li 	dev_info->default_rxconf = (struct rte_eth_rxconf){
159d878cb09SLong Li 		.rx_thresh = {
160d878cb09SLong Li 			.pthresh = 8,
161d878cb09SLong Li 			.hthresh = 8,
162d878cb09SLong Li 			.wthresh = 0,
163d878cb09SLong Li 		},
164d878cb09SLong Li 		.rx_free_thresh = 32,
165d878cb09SLong Li 		/* If no descriptors available, pkts are dropped by default */
166d878cb09SLong Li 		.rx_drop_en = 1,
167d878cb09SLong Li 	};
168d878cb09SLong Li 
169d878cb09SLong Li 	dev_info->default_txconf = (struct rte_eth_txconf){
170d878cb09SLong Li 		.tx_thresh = {
171d878cb09SLong Li 			.pthresh = 32,
172d878cb09SLong Li 			.hthresh = 0,
173d878cb09SLong Li 			.wthresh = 0,
174d878cb09SLong Li 		},
175d878cb09SLong Li 		.tx_rs_thresh = 32,
176d878cb09SLong Li 		.tx_free_thresh = 32,
177d878cb09SLong Li 	};
178d878cb09SLong Li 
179d878cb09SLong Li 	/* Buffer limits */
180d878cb09SLong Li 	dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
181d878cb09SLong Li 	dev_info->rx_desc_lim.nb_max = priv->max_rx_desc;
182d878cb09SLong Li 	dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
183d878cb09SLong Li 	dev_info->rx_desc_lim.nb_seg_max = priv->max_recv_sge;
184d878cb09SLong Li 	dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge;
185d878cb09SLong Li 
186d878cb09SLong Li 	dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
187d878cb09SLong Li 	dev_info->tx_desc_lim.nb_max = priv->max_tx_desc;
188d878cb09SLong Li 	dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
189d878cb09SLong Li 	dev_info->tx_desc_lim.nb_seg_max = priv->max_send_sge;
190d878cb09SLong Li 	dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge;
191d878cb09SLong Li 
192d878cb09SLong Li 	/* Speed */
193d878cb09SLong Li 	dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G;
194d878cb09SLong Li 
195d878cb09SLong Li 	/* RX params */
196d878cb09SLong Li 	dev_info->default_rxportconf.burst_size = 1;
197d878cb09SLong Li 	dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE;
198d878cb09SLong Li 	dev_info->default_rxportconf.nb_queues = 1;
199d878cb09SLong Li 
200d878cb09SLong Li 	/* TX params */
201d878cb09SLong Li 	dev_info->default_txportconf.burst_size = 1;
202d878cb09SLong Li 	dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE;
203d878cb09SLong Li 	dev_info->default_txportconf.nb_queues = 1;
204d878cb09SLong Li 
205d878cb09SLong Li 	return 0;
206d878cb09SLong Li }
207d878cb09SLong Li 
208*0c63c005SLong Li static void
209*0c63c005SLong Li mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
210*0c63c005SLong Li 		       struct rte_eth_rxq_info *qinfo)
211*0c63c005SLong Li {
212*0c63c005SLong Li 	struct mana_rxq *rxq = dev->data->rx_queues[queue_id];
213*0c63c005SLong Li 
214*0c63c005SLong Li 	qinfo->mp = rxq->mp;
215*0c63c005SLong Li 	qinfo->nb_desc = rxq->num_desc;
216*0c63c005SLong Li 	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
217*0c63c005SLong Li }
218*0c63c005SLong Li 
219d9679c3aSLong Li static const uint32_t *
220d9679c3aSLong Li mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused)
221d9679c3aSLong Li {
222d9679c3aSLong Li 	static const uint32_t ptypes[] = {
223d9679c3aSLong Li 		RTE_PTYPE_L2_ETHER,
224d9679c3aSLong Li 		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
225d9679c3aSLong Li 		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
226d9679c3aSLong Li 		RTE_PTYPE_L4_FRAG,
227d9679c3aSLong Li 		RTE_PTYPE_L4_TCP,
228d9679c3aSLong Li 		RTE_PTYPE_L4_UDP,
229d9679c3aSLong Li 		RTE_PTYPE_UNKNOWN
230d9679c3aSLong Li 	};
231d9679c3aSLong Li 
232d9679c3aSLong Li 	return ptypes;
233d9679c3aSLong Li }
234d9679c3aSLong Li 
23521958568SLong Li static int
236a382177cSLong Li mana_rss_hash_update(struct rte_eth_dev *dev,
237a382177cSLong Li 		     struct rte_eth_rss_conf *rss_conf)
238a382177cSLong Li {
239a382177cSLong Li 	struct mana_priv *priv = dev->data->dev_private;
240a382177cSLong Li 
241a382177cSLong Li 	/* Currently can only update RSS hash when device is stopped */
242a382177cSLong Li 	if (dev->data->dev_started) {
243a382177cSLong Li 		DRV_LOG(ERR, "Can't update RSS after device has started");
244a382177cSLong Li 		return -ENODEV;
245a382177cSLong Li 	}
246a382177cSLong Li 
247a382177cSLong Li 	if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) {
248a382177cSLong Li 		DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64,
249a382177cSLong Li 			dev->data->port_id, rss_conf->rss_hf);
250a382177cSLong Li 		return -EINVAL;
251a382177cSLong Li 	}
252a382177cSLong Li 
253a382177cSLong Li 	if (rss_conf->rss_key && rss_conf->rss_key_len) {
254a382177cSLong Li 		if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) {
255a382177cSLong Li 			DRV_LOG(ERR, "Port %u key len must be %u long",
256a382177cSLong Li 				dev->data->port_id,
257a382177cSLong Li 				TOEPLITZ_HASH_KEY_SIZE_IN_BYTES);
258a382177cSLong Li 			return -EINVAL;
259a382177cSLong Li 		}
260a382177cSLong Li 
261a382177cSLong Li 		priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
262a382177cSLong Li 		priv->rss_conf.rss_key =
263a382177cSLong Li 			rte_zmalloc("mana_rss", rss_conf->rss_key_len,
264a382177cSLong Li 				    RTE_CACHE_LINE_SIZE);
265a382177cSLong Li 		if (!priv->rss_conf.rss_key)
266a382177cSLong Li 			return -ENOMEM;
267a382177cSLong Li 		memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
268a382177cSLong Li 		       rss_conf->rss_key_len);
269a382177cSLong Li 	}
270a382177cSLong Li 	priv->rss_conf.rss_hf = rss_conf->rss_hf;
271a382177cSLong Li 
272a382177cSLong Li 	return 0;
273a382177cSLong Li }
274a382177cSLong Li 
275a382177cSLong Li static int
276a382177cSLong Li mana_rss_hash_conf_get(struct rte_eth_dev *dev,
277a382177cSLong Li 		       struct rte_eth_rss_conf *rss_conf)
278a382177cSLong Li {
279a382177cSLong Li 	struct mana_priv *priv = dev->data->dev_private;
280a382177cSLong Li 
281a382177cSLong Li 	if (!rss_conf)
282a382177cSLong Li 		return -EINVAL;
283a382177cSLong Li 
284a382177cSLong Li 	if (rss_conf->rss_key &&
285a382177cSLong Li 	    rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) {
286a382177cSLong Li 		memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
287a382177cSLong Li 		       priv->rss_conf.rss_key_len);
288a382177cSLong Li 	}
289a382177cSLong Li 
290a382177cSLong Li 	rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
291a382177cSLong Li 	rss_conf->rss_hf = priv->rss_conf.rss_hf;
292a382177cSLong Li 
293a382177cSLong Li 	return 0;
294a382177cSLong Li }
295a382177cSLong Li 
296a382177cSLong Li static int
297*0c63c005SLong Li mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
298*0c63c005SLong Li 			uint16_t nb_desc, unsigned int socket_id,
299*0c63c005SLong Li 			const struct rte_eth_rxconf *rx_conf __rte_unused,
300*0c63c005SLong Li 			struct rte_mempool *mp)
301*0c63c005SLong Li {
302*0c63c005SLong Li 	struct mana_priv *priv = dev->data->dev_private;
303*0c63c005SLong Li 	struct mana_rxq *rxq;
304*0c63c005SLong Li 	int ret;
305*0c63c005SLong Li 
306*0c63c005SLong Li 	rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id);
307*0c63c005SLong Li 	if (!rxq) {
308*0c63c005SLong Li 		DRV_LOG(ERR, "failed to allocate rxq");
309*0c63c005SLong Li 		return -ENOMEM;
310*0c63c005SLong Li 	}
311*0c63c005SLong Li 
312*0c63c005SLong Li 	DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u",
313*0c63c005SLong Li 		queue_idx, nb_desc, socket_id);
314*0c63c005SLong Li 
315*0c63c005SLong Li 	rxq->socket = socket_id;
316*0c63c005SLong Li 
317*0c63c005SLong Li 	rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring",
318*0c63c005SLong Li 					    sizeof(struct mana_rxq_desc) *
319*0c63c005SLong Li 						nb_desc,
320*0c63c005SLong Li 					    RTE_CACHE_LINE_SIZE, socket_id);
321*0c63c005SLong Li 
322*0c63c005SLong Li 	if (!rxq->desc_ring) {
323*0c63c005SLong Li 		DRV_LOG(ERR, "failed to allocate rxq desc_ring");
324*0c63c005SLong Li 		ret = -ENOMEM;
325*0c63c005SLong Li 		goto fail;
326*0c63c005SLong Li 	}
327*0c63c005SLong Li 
328*0c63c005SLong Li 	rxq->desc_ring_head = 0;
329*0c63c005SLong Li 	rxq->desc_ring_tail = 0;
330*0c63c005SLong Li 
331*0c63c005SLong Li 	rxq->priv = priv;
332*0c63c005SLong Li 	rxq->num_desc = nb_desc;
333*0c63c005SLong Li 	rxq->mp = mp;
334*0c63c005SLong Li 	dev->data->rx_queues[queue_idx] = rxq;
335*0c63c005SLong Li 
336*0c63c005SLong Li 	return 0;
337*0c63c005SLong Li 
338*0c63c005SLong Li fail:
339*0c63c005SLong Li 	rte_free(rxq->desc_ring);
340*0c63c005SLong Li 	rte_free(rxq);
341*0c63c005SLong Li 	return ret;
342*0c63c005SLong Li }
343*0c63c005SLong Li 
344*0c63c005SLong Li static void
345*0c63c005SLong Li mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
346*0c63c005SLong Li {
347*0c63c005SLong Li 	struct mana_rxq *rxq = dev->data->rx_queues[qid];
348*0c63c005SLong Li 
349*0c63c005SLong Li 	rte_free(rxq->desc_ring);
350*0c63c005SLong Li 	rte_free(rxq);
351*0c63c005SLong Li }
352*0c63c005SLong Li 
353*0c63c005SLong Li static int
35421958568SLong Li mana_dev_link_update(struct rte_eth_dev *dev,
35521958568SLong Li 		     int wait_to_complete __rte_unused)
35621958568SLong Li {
35721958568SLong Li 	struct rte_eth_link link;
35821958568SLong Li 
35921958568SLong Li 	/* MANA has no concept of carrier state, always reporting UP */
36021958568SLong Li 	link = (struct rte_eth_link) {
36121958568SLong Li 		.link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
36221958568SLong Li 		.link_autoneg = RTE_ETH_LINK_SPEED_FIXED,
36321958568SLong Li 		.link_speed = RTE_ETH_SPEED_NUM_100G,
36421958568SLong Li 		.link_status = RTE_ETH_LINK_UP,
36521958568SLong Li 	};
36621958568SLong Li 
36721958568SLong Li 	return rte_eth_linkstatus_set(dev, &link);
36821958568SLong Li }
36921958568SLong Li 
370517ed6e2SLong Li static const struct eth_dev_ops mana_dev_ops = {
3710dbfecfeSLong Li 	.dev_configure		= mana_dev_configure,
3720dbfecfeSLong Li 	.dev_close		= mana_dev_close,
373d878cb09SLong Li 	.dev_infos_get		= mana_dev_info_get,
374*0c63c005SLong Li 	.rxq_info_get		= mana_dev_rx_queue_info,
375d9679c3aSLong Li 	.dev_supported_ptypes_get = mana_supported_ptypes,
376a382177cSLong Li 	.rss_hash_update	= mana_rss_hash_update,
377a382177cSLong Li 	.rss_hash_conf_get	= mana_rss_hash_conf_get,
378*0c63c005SLong Li 	.rx_queue_setup		= mana_dev_rx_queue_setup,
379*0c63c005SLong Li 	.rx_queue_release	= mana_dev_rx_queue_release,
38021958568SLong Li 	.link_update		= mana_dev_link_update,
381517ed6e2SLong Li };
382517ed6e2SLong Li 
383517ed6e2SLong Li static const struct eth_dev_ops mana_dev_secondary_ops = {
384d878cb09SLong Li 	.dev_infos_get = mana_dev_info_get,
385517ed6e2SLong Li };
386517ed6e2SLong Li 
387517ed6e2SLong Li uint16_t
388517ed6e2SLong Li mana_rx_burst_removed(void *dpdk_rxq __rte_unused,
389517ed6e2SLong Li 		      struct rte_mbuf **pkts __rte_unused,
390517ed6e2SLong Li 		      uint16_t pkts_n __rte_unused)
391517ed6e2SLong Li {
392517ed6e2SLong Li 	rte_mb();
393517ed6e2SLong Li 	return 0;
394517ed6e2SLong Li }
395517ed6e2SLong Li 
396517ed6e2SLong Li uint16_t
397517ed6e2SLong Li mana_tx_burst_removed(void *dpdk_rxq __rte_unused,
398517ed6e2SLong Li 		      struct rte_mbuf **pkts __rte_unused,
399517ed6e2SLong Li 		      uint16_t pkts_n __rte_unused)
400517ed6e2SLong Li {
401517ed6e2SLong Li 	rte_mb();
402517ed6e2SLong Li 	return 0;
403517ed6e2SLong Li }
404517ed6e2SLong Li 
405517ed6e2SLong Li #define ETH_MANA_MAC_ARG "mac"
406517ed6e2SLong Li static const char * const mana_init_args[] = {
407517ed6e2SLong Li 	ETH_MANA_MAC_ARG,
408517ed6e2SLong Li 	NULL,
409517ed6e2SLong Li };
410517ed6e2SLong Li 
411517ed6e2SLong Li /* Support of parsing up to 8 mac address from EAL command line */
412517ed6e2SLong Li #define MAX_NUM_ADDRESS 8
413517ed6e2SLong Li struct mana_conf {
414517ed6e2SLong Li 	struct rte_ether_addr mac_array[MAX_NUM_ADDRESS];
415517ed6e2SLong Li 	unsigned int index;
416517ed6e2SLong Li };
417517ed6e2SLong Li 
418517ed6e2SLong Li static int
419517ed6e2SLong Li mana_arg_parse_callback(const char *key, const char *val, void *private)
420517ed6e2SLong Li {
421517ed6e2SLong Li 	struct mana_conf *conf = (struct mana_conf *)private;
422517ed6e2SLong Li 	int ret;
423517ed6e2SLong Li 
424517ed6e2SLong Li 	DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index);
425517ed6e2SLong Li 
426517ed6e2SLong Li 	if (conf->index >= MAX_NUM_ADDRESS) {
427517ed6e2SLong Li 		DRV_LOG(ERR, "Exceeding max MAC address");
428517ed6e2SLong Li 		return 1;
429517ed6e2SLong Li 	}
430517ed6e2SLong Li 
431517ed6e2SLong Li 	ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]);
432517ed6e2SLong Li 	if (ret) {
433517ed6e2SLong Li 		DRV_LOG(ERR, "Invalid MAC address %s", val);
434517ed6e2SLong Li 		return ret;
435517ed6e2SLong Li 	}
436517ed6e2SLong Li 
437517ed6e2SLong Li 	conf->index++;
438517ed6e2SLong Li 
439517ed6e2SLong Li 	return 0;
440517ed6e2SLong Li }
441517ed6e2SLong Li 
442517ed6e2SLong Li static int
443517ed6e2SLong Li mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf)
444517ed6e2SLong Li {
445517ed6e2SLong Li 	struct rte_kvargs *kvlist;
446517ed6e2SLong Li 	unsigned int arg_count;
447517ed6e2SLong Li 	int ret = 0;
448517ed6e2SLong Li 
449517ed6e2SLong Li 	kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args);
450517ed6e2SLong Li 	if (!kvlist) {
451517ed6e2SLong Li 		DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str);
452517ed6e2SLong Li 		return -EINVAL;
453517ed6e2SLong Li 	}
454517ed6e2SLong Li 
455517ed6e2SLong Li 	arg_count = rte_kvargs_count(kvlist, mana_init_args[0]);
456517ed6e2SLong Li 	if (arg_count > MAX_NUM_ADDRESS) {
457517ed6e2SLong Li 		ret = -EINVAL;
458517ed6e2SLong Li 		goto free_kvlist;
459517ed6e2SLong Li 	}
460517ed6e2SLong Li 	ret = rte_kvargs_process(kvlist, mana_init_args[0],
461517ed6e2SLong Li 				 mana_arg_parse_callback, conf);
462517ed6e2SLong Li 	if (ret) {
463517ed6e2SLong Li 		DRV_LOG(ERR, "error parsing args");
464517ed6e2SLong Li 		goto free_kvlist;
465517ed6e2SLong Li 	}
466517ed6e2SLong Li 
467517ed6e2SLong Li free_kvlist:
468517ed6e2SLong Li 	rte_kvargs_free(kvlist);
469517ed6e2SLong Li 	return ret;
470517ed6e2SLong Li }
471517ed6e2SLong Li 
472517ed6e2SLong Li static int
473517ed6e2SLong Li get_port_mac(struct ibv_device *device, unsigned int port,
474517ed6e2SLong Li 	     struct rte_ether_addr *addr)
475517ed6e2SLong Li {
476517ed6e2SLong Li 	FILE *file;
477517ed6e2SLong Li 	int ret = 0;
478517ed6e2SLong Li 	DIR *dir;
479517ed6e2SLong Li 	struct dirent *dent;
480517ed6e2SLong Li 	unsigned int dev_port;
481517ed6e2SLong Li 	char mac[20];
482517ed6e2SLong Li 
483517ed6e2SLong Li 	MANA_MKSTR(path, "%s/device/net", device->ibdev_path);
484517ed6e2SLong Li 
485517ed6e2SLong Li 	dir = opendir(path);
486517ed6e2SLong Li 	if (!dir)
487517ed6e2SLong Li 		return -ENOENT;
488517ed6e2SLong Li 
489517ed6e2SLong Li 	while ((dent = readdir(dir))) {
490517ed6e2SLong Li 		char *name = dent->d_name;
491517ed6e2SLong Li 
492517ed6e2SLong Li 		MANA_MKSTR(port_path, "%s/%s/dev_port", path, name);
493517ed6e2SLong Li 
494517ed6e2SLong Li 		/* Ignore . and .. */
495517ed6e2SLong Li 		if ((name[0] == '.') &&
496517ed6e2SLong Li 		    ((name[1] == '\0') ||
497517ed6e2SLong Li 		     ((name[1] == '.') && (name[2] == '\0'))))
498517ed6e2SLong Li 			continue;
499517ed6e2SLong Li 
500517ed6e2SLong Li 		file = fopen(port_path, "r");
501517ed6e2SLong Li 		if (!file)
502517ed6e2SLong Li 			continue;
503517ed6e2SLong Li 
504517ed6e2SLong Li 		ret = fscanf(file, "%u", &dev_port);
505517ed6e2SLong Li 		fclose(file);
506517ed6e2SLong Li 
507517ed6e2SLong Li 		if (ret != 1)
508517ed6e2SLong Li 			continue;
509517ed6e2SLong Li 
510517ed6e2SLong Li 		/* Ethernet ports start at 0, IB port start at 1 */
511517ed6e2SLong Li 		if (dev_port == port - 1) {
512517ed6e2SLong Li 			MANA_MKSTR(address_path, "%s/%s/address", path, name);
513517ed6e2SLong Li 
514517ed6e2SLong Li 			file = fopen(address_path, "r");
515517ed6e2SLong Li 			if (!file)
516517ed6e2SLong Li 				continue;
517517ed6e2SLong Li 
518517ed6e2SLong Li 			ret = fscanf(file, "%s", mac);
519517ed6e2SLong Li 			fclose(file);
520517ed6e2SLong Li 
521517ed6e2SLong Li 			if (ret < 0)
522517ed6e2SLong Li 				break;
523517ed6e2SLong Li 
524517ed6e2SLong Li 			ret = rte_ether_unformat_addr(mac, addr);
525517ed6e2SLong Li 			if (ret)
526517ed6e2SLong Li 				DRV_LOG(ERR, "unrecognized mac addr %s", mac);
527517ed6e2SLong Li 			break;
528517ed6e2SLong Li 		}
529517ed6e2SLong Li 	}
530517ed6e2SLong Li 
531517ed6e2SLong Li 	closedir(dir);
532517ed6e2SLong Li 	return ret;
533517ed6e2SLong Li }
534517ed6e2SLong Li 
535517ed6e2SLong Li static int
536517ed6e2SLong Li mana_ibv_device_to_pci_addr(const struct ibv_device *device,
537517ed6e2SLong Li 			    struct rte_pci_addr *pci_addr)
538517ed6e2SLong Li {
539517ed6e2SLong Li 	FILE *file;
540517ed6e2SLong Li 	char *line = NULL;
541517ed6e2SLong Li 	size_t len = 0;
542517ed6e2SLong Li 
543517ed6e2SLong Li 	MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path);
544517ed6e2SLong Li 
545517ed6e2SLong Li 	file = fopen(path, "r");
546517ed6e2SLong Li 	if (!file)
547517ed6e2SLong Li 		return -errno;
548517ed6e2SLong Li 
549517ed6e2SLong Li 	while (getline(&line, &len, file) != -1) {
550517ed6e2SLong Li 		/* Extract information. */
551517ed6e2SLong Li 		if (sscanf(line,
552517ed6e2SLong Li 			   "PCI_SLOT_NAME="
553517ed6e2SLong Li 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
554517ed6e2SLong Li 			   &pci_addr->domain,
555517ed6e2SLong Li 			   &pci_addr->bus,
556517ed6e2SLong Li 			   &pci_addr->devid,
557517ed6e2SLong Li 			   &pci_addr->function) == 4) {
558517ed6e2SLong Li 			break;
559517ed6e2SLong Li 		}
560517ed6e2SLong Li 	}
561517ed6e2SLong Li 
562517ed6e2SLong Li 	free(line);
563517ed6e2SLong Li 	fclose(file);
564517ed6e2SLong Li 	return 0;
565517ed6e2SLong Li }
566517ed6e2SLong Li 
567bd15f237SLong Li /*
568bd15f237SLong Li  * Interrupt handler from IB layer to notify this device is being removed.
569bd15f237SLong Li  */
570bd15f237SLong Li static void
571bd15f237SLong Li mana_intr_handler(void *arg)
572bd15f237SLong Li {
573bd15f237SLong Li 	struct mana_priv *priv = arg;
574bd15f237SLong Li 	struct ibv_context *ctx = priv->ib_ctx;
575bd15f237SLong Li 	struct ibv_async_event event;
576bd15f237SLong Li 
577bd15f237SLong Li 	/* Read and ack all messages from IB device */
578bd15f237SLong Li 	while (true) {
579bd15f237SLong Li 		if (ibv_get_async_event(ctx, &event))
580bd15f237SLong Li 			break;
581bd15f237SLong Li 
582bd15f237SLong Li 		if (event.event_type == IBV_EVENT_DEVICE_FATAL) {
583bd15f237SLong Li 			struct rte_eth_dev *dev;
584bd15f237SLong Li 
585bd15f237SLong Li 			dev = &rte_eth_devices[priv->port_id];
586bd15f237SLong Li 			if (dev->data->dev_conf.intr_conf.rmv)
587bd15f237SLong Li 				rte_eth_dev_callback_process(dev,
588bd15f237SLong Li 					RTE_ETH_EVENT_INTR_RMV, NULL);
589bd15f237SLong Li 		}
590bd15f237SLong Li 
591bd15f237SLong Li 		ibv_ack_async_event(&event);
592bd15f237SLong Li 	}
593bd15f237SLong Li }
594bd15f237SLong Li 
595bd15f237SLong Li static int
596bd15f237SLong Li mana_intr_uninstall(struct mana_priv *priv)
597bd15f237SLong Li {
598bd15f237SLong Li 	int ret;
599bd15f237SLong Li 
600bd15f237SLong Li 	ret = rte_intr_callback_unregister(priv->intr_handle,
601bd15f237SLong Li 					   mana_intr_handler, priv);
602bd15f237SLong Li 	if (ret <= 0) {
603bd15f237SLong Li 		DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret);
604bd15f237SLong Li 		return ret;
605bd15f237SLong Li 	}
606bd15f237SLong Li 
607bd15f237SLong Li 	rte_intr_instance_free(priv->intr_handle);
608bd15f237SLong Li 
609bd15f237SLong Li 	return 0;
610bd15f237SLong Li }
611bd15f237SLong Li 
612bd15f237SLong Li static int
613bd15f237SLong Li mana_intr_install(struct mana_priv *priv)
614bd15f237SLong Li {
615bd15f237SLong Li 	int ret, flags;
616bd15f237SLong Li 	struct ibv_context *ctx = priv->ib_ctx;
617bd15f237SLong Li 
618bd15f237SLong Li 	priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
619bd15f237SLong Li 	if (!priv->intr_handle) {
620bd15f237SLong Li 		DRV_LOG(ERR, "Failed to allocate intr_handle");
621bd15f237SLong Li 		rte_errno = ENOMEM;
622bd15f237SLong Li 		return -ENOMEM;
623bd15f237SLong Li 	}
624bd15f237SLong Li 
625bd15f237SLong Li 	rte_intr_fd_set(priv->intr_handle, -1);
626bd15f237SLong Li 
627bd15f237SLong Li 	flags = fcntl(ctx->async_fd, F_GETFL);
628bd15f237SLong Li 	ret = fcntl(ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
629bd15f237SLong Li 	if (ret) {
630bd15f237SLong Li 		DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK");
631bd15f237SLong Li 		goto free_intr;
632bd15f237SLong Li 	}
633bd15f237SLong Li 
634bd15f237SLong Li 	rte_intr_fd_set(priv->intr_handle, ctx->async_fd);
635bd15f237SLong Li 	rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT);
636bd15f237SLong Li 
637bd15f237SLong Li 	ret = rte_intr_callback_register(priv->intr_handle,
638bd15f237SLong Li 					 mana_intr_handler, priv);
639bd15f237SLong Li 	if (ret) {
640bd15f237SLong Li 		DRV_LOG(ERR, "Failed to register intr callback");
641bd15f237SLong Li 		rte_intr_fd_set(priv->intr_handle, -1);
642bd15f237SLong Li 		goto restore_fd;
643bd15f237SLong Li 	}
644bd15f237SLong Li 
645bd15f237SLong Li 	return 0;
646bd15f237SLong Li 
647bd15f237SLong Li restore_fd:
648bd15f237SLong Li 	fcntl(ctx->async_fd, F_SETFL, flags);
649bd15f237SLong Li 
650bd15f237SLong Li free_intr:
651bd15f237SLong Li 	rte_intr_instance_free(priv->intr_handle);
652bd15f237SLong Li 	priv->intr_handle = NULL;
653bd15f237SLong Li 
654bd15f237SLong Li 	return ret;
655bd15f237SLong Li }
656bd15f237SLong Li 
657517ed6e2SLong Li static int
658517ed6e2SLong Li mana_proc_priv_init(struct rte_eth_dev *dev)
659517ed6e2SLong Li {
660517ed6e2SLong Li 	struct mana_process_priv *priv;
661517ed6e2SLong Li 
662517ed6e2SLong Li 	priv = rte_zmalloc_socket("mana_proc_priv",
663517ed6e2SLong Li 				  sizeof(struct mana_process_priv),
664517ed6e2SLong Li 				  RTE_CACHE_LINE_SIZE,
665517ed6e2SLong Li 				  dev->device->numa_node);
666517ed6e2SLong Li 	if (!priv)
667517ed6e2SLong Li 		return -ENOMEM;
668517ed6e2SLong Li 
669517ed6e2SLong Li 	dev->process_private = priv;
670517ed6e2SLong Li 	return 0;
671517ed6e2SLong Li }
672517ed6e2SLong Li 
673517ed6e2SLong Li /*
674517ed6e2SLong Li  * Map the doorbell page for the secondary process through IB device handle.
675517ed6e2SLong Li  */
676517ed6e2SLong Li static int
677517ed6e2SLong Li mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd)
678517ed6e2SLong Li {
679517ed6e2SLong Li 	struct mana_process_priv *priv = eth_dev->process_private;
680517ed6e2SLong Li 
681517ed6e2SLong Li 	void *addr;
682517ed6e2SLong Li 
683517ed6e2SLong Li 	addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0);
684517ed6e2SLong Li 	if (addr == MAP_FAILED) {
685517ed6e2SLong Li 		DRV_LOG(ERR, "Failed to map secondary doorbell port %u",
686517ed6e2SLong Li 			eth_dev->data->port_id);
687517ed6e2SLong Li 		return -ENOMEM;
688517ed6e2SLong Li 	}
689517ed6e2SLong Li 
690517ed6e2SLong Li 	DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr);
691517ed6e2SLong Li 
692517ed6e2SLong Li 	priv->db_page = addr;
693517ed6e2SLong Li 
694517ed6e2SLong Li 	return 0;
695517ed6e2SLong Li }
696517ed6e2SLong Li 
697517ed6e2SLong Li /* Initialize shared data for the driver (all devices) */
698517ed6e2SLong Li static int
699517ed6e2SLong Li mana_init_shared_data(void)
700517ed6e2SLong Li {
701517ed6e2SLong Li 	int ret =  0;
702517ed6e2SLong Li 	const struct rte_memzone *secondary_mz;
703517ed6e2SLong Li 
704517ed6e2SLong Li 	rte_spinlock_lock(&mana_shared_data_lock);
705517ed6e2SLong Li 
706517ed6e2SLong Li 	/* Skip if shared data is already initialized */
707517ed6e2SLong Li 	if (mana_shared_data)
708517ed6e2SLong Li 		goto exit;
709517ed6e2SLong Li 
710517ed6e2SLong Li 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
711517ed6e2SLong Li 		mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA,
712517ed6e2SLong Li 						     sizeof(*mana_shared_data),
713517ed6e2SLong Li 						     SOCKET_ID_ANY, 0);
714517ed6e2SLong Li 		if (!mana_shared_mz) {
715517ed6e2SLong Li 			DRV_LOG(ERR, "Cannot allocate mana shared data");
716517ed6e2SLong Li 			ret = -rte_errno;
717517ed6e2SLong Li 			goto exit;
718517ed6e2SLong Li 		}
719517ed6e2SLong Li 
720517ed6e2SLong Li 		mana_shared_data = mana_shared_mz->addr;
721517ed6e2SLong Li 		memset(mana_shared_data, 0, sizeof(*mana_shared_data));
722517ed6e2SLong Li 		rte_spinlock_init(&mana_shared_data->lock);
723517ed6e2SLong Li 	} else {
724517ed6e2SLong Li 		secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA);
725517ed6e2SLong Li 		if (!secondary_mz) {
726517ed6e2SLong Li 			DRV_LOG(ERR, "Cannot attach mana shared data");
727517ed6e2SLong Li 			ret = -rte_errno;
728517ed6e2SLong Li 			goto exit;
729517ed6e2SLong Li 		}
730517ed6e2SLong Li 
731517ed6e2SLong Li 		mana_shared_data = secondary_mz->addr;
732517ed6e2SLong Li 		memset(&mana_local_data, 0, sizeof(mana_local_data));
733517ed6e2SLong Li 	}
734517ed6e2SLong Li 
735517ed6e2SLong Li exit:
736517ed6e2SLong Li 	rte_spinlock_unlock(&mana_shared_data_lock);
737517ed6e2SLong Li 
738517ed6e2SLong Li 	return ret;
739517ed6e2SLong Li }
740517ed6e2SLong Li 
741517ed6e2SLong Li /*
742517ed6e2SLong Li  * Init the data structures for use in primary and secondary processes.
743517ed6e2SLong Li  */
744517ed6e2SLong Li static int
745517ed6e2SLong Li mana_init_once(void)
746517ed6e2SLong Li {
747517ed6e2SLong Li 	int ret;
748517ed6e2SLong Li 
749517ed6e2SLong Li 	ret = mana_init_shared_data();
750517ed6e2SLong Li 	if (ret)
751517ed6e2SLong Li 		return ret;
752517ed6e2SLong Li 
753517ed6e2SLong Li 	rte_spinlock_lock(&mana_shared_data->lock);
754517ed6e2SLong Li 
755517ed6e2SLong Li 	switch (rte_eal_process_type()) {
756517ed6e2SLong Li 	case RTE_PROC_PRIMARY:
757517ed6e2SLong Li 		if (mana_shared_data->init_done)
758517ed6e2SLong Li 			break;
759517ed6e2SLong Li 
760517ed6e2SLong Li 		ret = mana_mp_init_primary();
761517ed6e2SLong Li 		if (ret)
762517ed6e2SLong Li 			break;
763517ed6e2SLong Li 		DRV_LOG(ERR, "MP INIT PRIMARY");
764517ed6e2SLong Li 
765517ed6e2SLong Li 		mana_shared_data->init_done = 1;
766517ed6e2SLong Li 		break;
767517ed6e2SLong Li 
768517ed6e2SLong Li 	case RTE_PROC_SECONDARY:
769517ed6e2SLong Li 
770517ed6e2SLong Li 		if (mana_local_data.init_done)
771517ed6e2SLong Li 			break;
772517ed6e2SLong Li 
773517ed6e2SLong Li 		ret = mana_mp_init_secondary();
774517ed6e2SLong Li 		if (ret)
775517ed6e2SLong Li 			break;
776517ed6e2SLong Li 
777517ed6e2SLong Li 		DRV_LOG(ERR, "MP INIT SECONDARY");
778517ed6e2SLong Li 
779517ed6e2SLong Li 		mana_local_data.init_done = 1;
780517ed6e2SLong Li 		break;
781517ed6e2SLong Li 
782517ed6e2SLong Li 	default:
783517ed6e2SLong Li 		/* Impossible, internal error */
784517ed6e2SLong Li 		ret = -EPROTO;
785517ed6e2SLong Li 		break;
786517ed6e2SLong Li 	}
787517ed6e2SLong Li 
788517ed6e2SLong Li 	rte_spinlock_unlock(&mana_shared_data->lock);
789517ed6e2SLong Li 
790517ed6e2SLong Li 	return ret;
791517ed6e2SLong Li }
792517ed6e2SLong Li 
793517ed6e2SLong Li /*
794517ed6e2SLong Li  * Probe an IB port
795517ed6e2SLong Li  * Return value:
796517ed6e2SLong Li  * positive value: successfully probed port
797517ed6e2SLong Li  * 0: port not matching specified MAC address
798517ed6e2SLong Li  * negative value: error code
799517ed6e2SLong Li  */
800517ed6e2SLong Li static int
801517ed6e2SLong Li mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr,
802517ed6e2SLong Li 		uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr)
803517ed6e2SLong Li {
804517ed6e2SLong Li 	struct mana_priv *priv = NULL;
805517ed6e2SLong Li 	struct rte_eth_dev *eth_dev = NULL;
806517ed6e2SLong Li 	struct ibv_parent_domain_init_attr attr = {0};
807517ed6e2SLong Li 	char address[64];
808517ed6e2SLong Li 	char name[RTE_ETH_NAME_MAX_LEN];
809517ed6e2SLong Li 	int ret;
810517ed6e2SLong Li 	struct ibv_context *ctx = NULL;
811517ed6e2SLong Li 
812517ed6e2SLong Li 	rte_ether_format_addr(address, sizeof(address), addr);
813517ed6e2SLong Li 	DRV_LOG(INFO, "device located port %u address %s", port, address);
814517ed6e2SLong Li 
815517ed6e2SLong Li 	priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE,
816517ed6e2SLong Li 				  SOCKET_ID_ANY);
817517ed6e2SLong Li 	if (!priv)
818517ed6e2SLong Li 		return -ENOMEM;
819517ed6e2SLong Li 
820517ed6e2SLong Li 	snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port);
821517ed6e2SLong Li 
822517ed6e2SLong Li 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
823517ed6e2SLong Li 		int fd;
824517ed6e2SLong Li 
825517ed6e2SLong Li 		eth_dev = rte_eth_dev_attach_secondary(name);
826517ed6e2SLong Li 		if (!eth_dev) {
827517ed6e2SLong Li 			DRV_LOG(ERR, "Can't attach to dev %s", name);
828517ed6e2SLong Li 			ret =  -ENOMEM;
829517ed6e2SLong Li 			goto failed;
830517ed6e2SLong Li 		}
831517ed6e2SLong Li 
832517ed6e2SLong Li 		eth_dev->device = &pci_dev->device;
833517ed6e2SLong Li 		eth_dev->dev_ops = &mana_dev_secondary_ops;
834517ed6e2SLong Li 		ret = mana_proc_priv_init(eth_dev);
835517ed6e2SLong Li 		if (ret)
836517ed6e2SLong Li 			goto failed;
837517ed6e2SLong Li 		priv->process_priv = eth_dev->process_private;
838517ed6e2SLong Li 
839517ed6e2SLong Li 		/* Get the IB FD from the primary process */
840517ed6e2SLong Li 		fd = mana_mp_req_verbs_cmd_fd(eth_dev);
841517ed6e2SLong Li 		if (fd < 0) {
842517ed6e2SLong Li 			DRV_LOG(ERR, "Failed to get FD %d", fd);
843517ed6e2SLong Li 			ret = -ENODEV;
844517ed6e2SLong Li 			goto failed;
845517ed6e2SLong Li 		}
846517ed6e2SLong Li 
847517ed6e2SLong Li 		ret = mana_map_doorbell_secondary(eth_dev, fd);
848517ed6e2SLong Li 		if (ret) {
849517ed6e2SLong Li 			DRV_LOG(ERR, "Failed secondary map %d", fd);
850517ed6e2SLong Li 			goto failed;
851517ed6e2SLong Li 		}
852517ed6e2SLong Li 
853517ed6e2SLong Li 		/* fd is no not used after mapping doorbell */
854517ed6e2SLong Li 		close(fd);
855517ed6e2SLong Li 
856517ed6e2SLong Li 		eth_dev->tx_pkt_burst = mana_tx_burst_removed;
857517ed6e2SLong Li 		eth_dev->rx_pkt_burst = mana_rx_burst_removed;
858517ed6e2SLong Li 
859517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data->lock);
860517ed6e2SLong Li 		mana_shared_data->secondary_cnt++;
861517ed6e2SLong Li 		mana_local_data.secondary_cnt++;
862517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data->lock);
863517ed6e2SLong Li 
864517ed6e2SLong Li 		rte_eth_copy_pci_info(eth_dev, pci_dev);
865517ed6e2SLong Li 		rte_eth_dev_probing_finish(eth_dev);
866517ed6e2SLong Li 
867517ed6e2SLong Li 		return 0;
868517ed6e2SLong Li 	}
869517ed6e2SLong Li 
870517ed6e2SLong Li 	ctx = ibv_open_device(ibdev);
871517ed6e2SLong Li 	if (!ctx) {
872517ed6e2SLong Li 		DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name);
873517ed6e2SLong Li 		ret = -ENODEV;
874517ed6e2SLong Li 		goto failed;
875517ed6e2SLong Li 	}
876517ed6e2SLong Li 
877517ed6e2SLong Li 	eth_dev = rte_eth_dev_allocate(name);
878517ed6e2SLong Li 	if (!eth_dev) {
879517ed6e2SLong Li 		ret = -ENOMEM;
880517ed6e2SLong Li 		goto failed;
881517ed6e2SLong Li 	}
882517ed6e2SLong Li 
883517ed6e2SLong Li 	eth_dev->data->mac_addrs =
884517ed6e2SLong Li 		rte_calloc("mana_mac", 1,
885517ed6e2SLong Li 			   sizeof(struct rte_ether_addr), 0);
886517ed6e2SLong Li 	if (!eth_dev->data->mac_addrs) {
887517ed6e2SLong Li 		ret = -ENOMEM;
888517ed6e2SLong Li 		goto failed;
889517ed6e2SLong Li 	}
890517ed6e2SLong Li 
891517ed6e2SLong Li 	rte_ether_addr_copy(addr, eth_dev->data->mac_addrs);
892517ed6e2SLong Li 
893517ed6e2SLong Li 	priv->ib_pd = ibv_alloc_pd(ctx);
894517ed6e2SLong Li 	if (!priv->ib_pd) {
895517ed6e2SLong Li 		DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port);
896517ed6e2SLong Li 		ret = -ENOMEM;
897517ed6e2SLong Li 		goto failed;
898517ed6e2SLong Li 	}
899517ed6e2SLong Li 
900517ed6e2SLong Li 	/* Create a parent domain with the port number */
901517ed6e2SLong Li 	attr.pd = priv->ib_pd;
902517ed6e2SLong Li 	attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
903517ed6e2SLong Li 	attr.pd_context = (void *)(uint64_t)port;
904517ed6e2SLong Li 	priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr);
905517ed6e2SLong Li 	if (!priv->ib_parent_pd) {
906517ed6e2SLong Li 		DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port);
907517ed6e2SLong Li 		ret = -ENOMEM;
908517ed6e2SLong Li 		goto failed;
909517ed6e2SLong Li 	}
910517ed6e2SLong Li 
911517ed6e2SLong Li 	priv->ib_ctx = ctx;
912517ed6e2SLong Li 	priv->port_id = eth_dev->data->port_id;
913517ed6e2SLong Li 	priv->dev_port = port;
914517ed6e2SLong Li 	eth_dev->data->dev_private = priv;
915517ed6e2SLong Li 	priv->dev_data = eth_dev->data;
916517ed6e2SLong Li 
917517ed6e2SLong Li 	priv->max_rx_queues = dev_attr->orig_attr.max_qp;
918517ed6e2SLong Li 	priv->max_tx_queues = dev_attr->orig_attr.max_qp;
919517ed6e2SLong Li 
920517ed6e2SLong Li 	priv->max_rx_desc =
921517ed6e2SLong Li 		RTE_MIN(dev_attr->orig_attr.max_qp_wr,
922517ed6e2SLong Li 			dev_attr->orig_attr.max_cqe);
923517ed6e2SLong Li 	priv->max_tx_desc =
924517ed6e2SLong Li 		RTE_MIN(dev_attr->orig_attr.max_qp_wr,
925517ed6e2SLong Li 			dev_attr->orig_attr.max_cqe);
926517ed6e2SLong Li 
927517ed6e2SLong Li 	priv->max_send_sge = dev_attr->orig_attr.max_sge;
928517ed6e2SLong Li 	priv->max_recv_sge = dev_attr->orig_attr.max_sge;
929517ed6e2SLong Li 
930517ed6e2SLong Li 	priv->max_mr = dev_attr->orig_attr.max_mr;
931517ed6e2SLong Li 	priv->max_mr_size = dev_attr->orig_attr.max_mr_size;
932517ed6e2SLong Li 
933517ed6e2SLong Li 	DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d",
934517ed6e2SLong Li 		name, priv->max_rx_queues, priv->max_rx_desc,
935517ed6e2SLong Li 		priv->max_send_sge);
936517ed6e2SLong Li 
937517ed6e2SLong Li 	rte_eth_copy_pci_info(eth_dev, pci_dev);
938517ed6e2SLong Li 
939bd15f237SLong Li 	/* Create async interrupt handler */
940bd15f237SLong Li 	ret = mana_intr_install(priv);
941bd15f237SLong Li 	if (ret) {
942bd15f237SLong Li 		DRV_LOG(ERR, "Failed to install intr handler");
943bd15f237SLong Li 		goto failed;
944bd15f237SLong Li 	}
945bd15f237SLong Li 
946517ed6e2SLong Li 	rte_spinlock_lock(&mana_shared_data->lock);
947517ed6e2SLong Li 	mana_shared_data->primary_cnt++;
948517ed6e2SLong Li 	rte_spinlock_unlock(&mana_shared_data->lock);
949517ed6e2SLong Li 
950517ed6e2SLong Li 	eth_dev->device = &pci_dev->device;
951517ed6e2SLong Li 
952517ed6e2SLong Li 	DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id);
953517ed6e2SLong Li 
954517ed6e2SLong Li 	eth_dev->rx_pkt_burst = mana_rx_burst_removed;
955517ed6e2SLong Li 	eth_dev->tx_pkt_burst = mana_tx_burst_removed;
956517ed6e2SLong Li 	eth_dev->dev_ops = &mana_dev_ops;
957517ed6e2SLong Li 
958517ed6e2SLong Li 	rte_eth_dev_probing_finish(eth_dev);
959517ed6e2SLong Li 
960517ed6e2SLong Li 	return 0;
961517ed6e2SLong Li 
962517ed6e2SLong Li failed:
963517ed6e2SLong Li 	/* Free the resource for the port failed */
964517ed6e2SLong Li 	if (priv) {
965517ed6e2SLong Li 		if (priv->ib_parent_pd)
966517ed6e2SLong Li 			ibv_dealloc_pd(priv->ib_parent_pd);
967517ed6e2SLong Li 
968517ed6e2SLong Li 		if (priv->ib_pd)
969517ed6e2SLong Li 			ibv_dealloc_pd(priv->ib_pd);
970517ed6e2SLong Li 	}
971517ed6e2SLong Li 
972517ed6e2SLong Li 	if (eth_dev)
973517ed6e2SLong Li 		rte_eth_dev_release_port(eth_dev);
974517ed6e2SLong Li 
975517ed6e2SLong Li 	rte_free(priv);
976517ed6e2SLong Li 
977517ed6e2SLong Li 	if (ctx)
978517ed6e2SLong Li 		ibv_close_device(ctx);
979517ed6e2SLong Li 
980517ed6e2SLong Li 	return ret;
981517ed6e2SLong Li }
982517ed6e2SLong Li 
983517ed6e2SLong Li /*
984517ed6e2SLong Li  * Goes through the IB device list to look for the IB port matching the
985517ed6e2SLong Li  * mac_addr. If found, create a rte_eth_dev for it.
986517ed6e2SLong Li  */
987517ed6e2SLong Li static int
988517ed6e2SLong Li mana_pci_probe_mac(struct rte_pci_device *pci_dev,
989517ed6e2SLong Li 		   struct rte_ether_addr *mac_addr)
990517ed6e2SLong Li {
991517ed6e2SLong Li 	struct ibv_device **ibv_list;
992517ed6e2SLong Li 	int ibv_idx;
993517ed6e2SLong Li 	struct ibv_context *ctx;
994517ed6e2SLong Li 	int num_devices;
995517ed6e2SLong Li 	int ret = 0;
996517ed6e2SLong Li 	uint8_t port;
997517ed6e2SLong Li 
998517ed6e2SLong Li 	ibv_list = ibv_get_device_list(&num_devices);
999517ed6e2SLong Li 	for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
1000517ed6e2SLong Li 		struct ibv_device *ibdev = ibv_list[ibv_idx];
1001517ed6e2SLong Li 		struct rte_pci_addr pci_addr;
1002517ed6e2SLong Li 		struct ibv_device_attr_ex dev_attr;
1003517ed6e2SLong Li 
1004517ed6e2SLong Li 		DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
1005517ed6e2SLong Li 			ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
1006517ed6e2SLong Li 
1007517ed6e2SLong Li 		if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
1008517ed6e2SLong Li 			continue;
1009517ed6e2SLong Li 
1010517ed6e2SLong Li 		/* Ignore if this IB device is not this PCI device */
1011517ed6e2SLong Li 		if (pci_dev->addr.domain != pci_addr.domain ||
1012517ed6e2SLong Li 		    pci_dev->addr.bus != pci_addr.bus ||
1013517ed6e2SLong Li 		    pci_dev->addr.devid != pci_addr.devid ||
1014517ed6e2SLong Li 		    pci_dev->addr.function != pci_addr.function)
1015517ed6e2SLong Li 			continue;
1016517ed6e2SLong Li 
1017517ed6e2SLong Li 		ctx = ibv_open_device(ibdev);
1018517ed6e2SLong Li 		if (!ctx) {
1019517ed6e2SLong Li 			DRV_LOG(ERR, "Failed to open IB device %s",
1020517ed6e2SLong Li 				ibdev->name);
1021517ed6e2SLong Li 			continue;
1022517ed6e2SLong Li 		}
1023517ed6e2SLong Li 		ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
1024517ed6e2SLong Li 		ibv_close_device(ctx);
1025517ed6e2SLong Li 
1026517ed6e2SLong Li 		for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
1027517ed6e2SLong Li 		     port++) {
1028517ed6e2SLong Li 			struct rte_ether_addr addr;
1029517ed6e2SLong Li 			ret = get_port_mac(ibdev, port, &addr);
1030517ed6e2SLong Li 			if (ret)
1031517ed6e2SLong Li 				continue;
1032517ed6e2SLong Li 
1033517ed6e2SLong Li 			if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr))
1034517ed6e2SLong Li 				continue;
1035517ed6e2SLong Li 
1036517ed6e2SLong Li 			ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr);
1037517ed6e2SLong Li 			if (ret)
1038517ed6e2SLong Li 				DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret);
1039517ed6e2SLong Li 			else
1040517ed6e2SLong Li 				DRV_LOG(INFO, "Successfully probed on IB port %u", port);
1041517ed6e2SLong Li 		}
1042517ed6e2SLong Li 	}
1043517ed6e2SLong Li 
1044517ed6e2SLong Li 	ibv_free_device_list(ibv_list);
1045517ed6e2SLong Li 	return ret;
1046517ed6e2SLong Li }
1047517ed6e2SLong Li 
1048517ed6e2SLong Li /*
1049517ed6e2SLong Li  * Main callback function from PCI bus to probe a device.
1050517ed6e2SLong Li  */
1051517ed6e2SLong Li static int
1052517ed6e2SLong Li mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1053517ed6e2SLong Li 	       struct rte_pci_device *pci_dev)
1054517ed6e2SLong Li {
1055517ed6e2SLong Li 	struct rte_devargs *args = pci_dev->device.devargs;
1056517ed6e2SLong Li 	struct mana_conf conf = {0};
1057517ed6e2SLong Li 	unsigned int i;
1058517ed6e2SLong Li 	int ret;
1059517ed6e2SLong Li 
1060517ed6e2SLong Li 	if (args && args->drv_str) {
1061517ed6e2SLong Li 		ret = mana_parse_args(args, &conf);
1062517ed6e2SLong Li 		if (ret) {
1063517ed6e2SLong Li 			DRV_LOG(ERR, "Failed to parse parameters args = %s",
1064517ed6e2SLong Li 				args->drv_str);
1065517ed6e2SLong Li 			return ret;
1066517ed6e2SLong Li 		}
1067517ed6e2SLong Li 	}
1068517ed6e2SLong Li 
1069517ed6e2SLong Li 	ret = mana_init_once();
1070517ed6e2SLong Li 	if (ret) {
1071517ed6e2SLong Li 		DRV_LOG(ERR, "Failed to init PMD global data %d", ret);
1072517ed6e2SLong Li 		return ret;
1073517ed6e2SLong Li 	}
1074517ed6e2SLong Li 
1075517ed6e2SLong Li 	/* If there are no driver parameters, probe on all ports */
1076517ed6e2SLong Li 	if (!conf.index)
1077517ed6e2SLong Li 		return mana_pci_probe_mac(pci_dev, NULL);
1078517ed6e2SLong Li 
1079517ed6e2SLong Li 	for (i = 0; i < conf.index; i++) {
1080517ed6e2SLong Li 		ret = mana_pci_probe_mac(pci_dev, &conf.mac_array[i]);
1081517ed6e2SLong Li 		if (ret)
1082517ed6e2SLong Li 			return ret;
1083517ed6e2SLong Li 	}
1084517ed6e2SLong Li 
1085517ed6e2SLong Li 	return 0;
1086517ed6e2SLong Li }
1087517ed6e2SLong Li 
1088517ed6e2SLong Li static int
1089517ed6e2SLong Li mana_dev_uninit(struct rte_eth_dev *dev)
1090517ed6e2SLong Li {
10910dbfecfeSLong Li 	return mana_dev_close(dev);
1092517ed6e2SLong Li }
1093517ed6e2SLong Li 
1094517ed6e2SLong Li /*
1095517ed6e2SLong Li  * Callback from PCI to remove this device.
1096517ed6e2SLong Li  */
1097517ed6e2SLong Li static int
1098517ed6e2SLong Li mana_pci_remove(struct rte_pci_device *pci_dev)
1099517ed6e2SLong Li {
1100517ed6e2SLong Li 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1101517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data_lock);
1102517ed6e2SLong Li 
1103517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data->lock);
1104517ed6e2SLong Li 
1105517ed6e2SLong Li 		RTE_VERIFY(mana_shared_data->primary_cnt > 0);
1106517ed6e2SLong Li 		mana_shared_data->primary_cnt--;
1107517ed6e2SLong Li 		if (!mana_shared_data->primary_cnt) {
1108517ed6e2SLong Li 			DRV_LOG(DEBUG, "mp uninit primary");
1109517ed6e2SLong Li 			mana_mp_uninit_primary();
1110517ed6e2SLong Li 		}
1111517ed6e2SLong Li 
1112517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data->lock);
1113517ed6e2SLong Li 
1114517ed6e2SLong Li 		/* Also free the shared memory if this is the last */
1115517ed6e2SLong Li 		if (!mana_shared_data->primary_cnt) {
1116517ed6e2SLong Li 			DRV_LOG(DEBUG, "free shared memezone data");
1117517ed6e2SLong Li 			rte_memzone_free(mana_shared_mz);
1118517ed6e2SLong Li 		}
1119517ed6e2SLong Li 
1120517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data_lock);
1121517ed6e2SLong Li 	} else {
1122517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data_lock);
1123517ed6e2SLong Li 
1124517ed6e2SLong Li 		rte_spinlock_lock(&mana_shared_data->lock);
1125517ed6e2SLong Li 		RTE_VERIFY(mana_shared_data->secondary_cnt > 0);
1126517ed6e2SLong Li 		mana_shared_data->secondary_cnt--;
1127517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data->lock);
1128517ed6e2SLong Li 
1129517ed6e2SLong Li 		RTE_VERIFY(mana_local_data.secondary_cnt > 0);
1130517ed6e2SLong Li 		mana_local_data.secondary_cnt--;
1131517ed6e2SLong Li 		if (!mana_local_data.secondary_cnt) {
1132517ed6e2SLong Li 			DRV_LOG(DEBUG, "mp uninit secondary");
1133517ed6e2SLong Li 			mana_mp_uninit_secondary();
1134517ed6e2SLong Li 		}
1135517ed6e2SLong Li 
1136517ed6e2SLong Li 		rte_spinlock_unlock(&mana_shared_data_lock);
1137517ed6e2SLong Li 	}
1138517ed6e2SLong Li 
1139517ed6e2SLong Li 	return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit);
1140517ed6e2SLong Li }
1141517ed6e2SLong Li 
1142517ed6e2SLong Li static const struct rte_pci_id mana_pci_id_map[] = {
1143517ed6e2SLong Li 	{
1144517ed6e2SLong Li 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT,
1145517ed6e2SLong Li 			       PCI_DEVICE_ID_MICROSOFT_MANA)
1146517ed6e2SLong Li 	},
1147517ed6e2SLong Li 	{
1148517ed6e2SLong Li 		.vendor_id = 0
1149517ed6e2SLong Li 	},
1150517ed6e2SLong Li };
1151517ed6e2SLong Li 
1152517ed6e2SLong Li static struct rte_pci_driver mana_pci_driver = {
1153517ed6e2SLong Li 	.id_table = mana_pci_id_map,
1154517ed6e2SLong Li 	.probe = mana_pci_probe,
1155517ed6e2SLong Li 	.remove = mana_pci_remove,
1156517ed6e2SLong Li 	.drv_flags = RTE_PCI_DRV_INTR_RMV,
1157517ed6e2SLong Li };
1158517ed6e2SLong Li 
1159517ed6e2SLong Li RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
1160517ed6e2SLong Li RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
1161517ed6e2SLong Li RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
1162517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
1163517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
1164517ed6e2SLong Li RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>");
1165