xref: /dpdk/drivers/net/mlx4/mlx4.c (revision f665790a5dbad7b645ff46f31d65e977324e7bfc)
182092c87SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
23b47f9acSAdrien Mazarguil  * Copyright 2012 6WIND S.A.
35feecc57SShahaf Shuler  * Copyright 2012 Mellanox Technologies, Ltd
498a1f377SBruce Richardson  */
598a1f377SBruce Richardson 
637491c7fSAdrien Mazarguil /**
737491c7fSAdrien Mazarguil  * @file
837491c7fSAdrien Mazarguil  * mlx4 driver initialization.
937491c7fSAdrien Mazarguil  */
1037491c7fSAdrien Mazarguil 
1137491c7fSAdrien Mazarguil #include <errno.h>
1237491c7fSAdrien Mazarguil #include <inttypes.h>
1398a1f377SBruce Richardson #include <stddef.h>
1437491c7fSAdrien Mazarguil #include <stdint.h>
1598a1f377SBruce Richardson #include <stdio.h>
1698a1f377SBruce Richardson #include <stdlib.h>
1798a1f377SBruce Richardson #include <string.h>
180203d33aSYongseok Koh #include <sys/mman.h>
1927cea116SAdrien Mazarguil #include <unistd.h>
20a6e7cd81STonghao Zhang #ifdef RTE_IBVERBS_LINK_DLOPEN
21a6e7cd81STonghao Zhang #include <dlfcn.h>
22a6e7cd81STonghao Zhang #endif
2398a1f377SBruce Richardson 
2437491c7fSAdrien Mazarguil /* Verbs headers do not support -pedantic. */
2537491c7fSAdrien Mazarguil #ifdef PEDANTIC
2637491c7fSAdrien Mazarguil #pragma GCC diagnostic ignored "-Wpedantic"
2737491c7fSAdrien Mazarguil #endif
2837491c7fSAdrien Mazarguil #include <infiniband/verbs.h>
2937491c7fSAdrien Mazarguil #ifdef PEDANTIC
3037491c7fSAdrien Mazarguil #pragma GCC diagnostic error "-Wpedantic"
3137491c7fSAdrien Mazarguil #endif
3237491c7fSAdrien Mazarguil 
3337491c7fSAdrien Mazarguil #include <rte_common.h>
341acb7f54SDavid Marchand #include <dev_driver.h>
3537491c7fSAdrien Mazarguil #include <rte_errno.h>
36df96fd0dSBruce Richardson #include <ethdev_driver.h>
37df96fd0dSBruce Richardson #include <ethdev_pci.h>
3837491c7fSAdrien Mazarguil #include <rte_ether.h>
39fee75e14SAdrien Mazarguil #include <rte_flow.h>
409f05a4b8SMoti Haimovsky #include <rte_interrupts.h>
4137491c7fSAdrien Mazarguil #include <rte_kvargs.h>
4237491c7fSAdrien Mazarguil #include <rte_malloc.h>
4337491c7fSAdrien Mazarguil #include <rte_mbuf.h>
4498a1f377SBruce Richardson 
4598a1f377SBruce Richardson #include "mlx4.h"
464eba244bSAdrien Mazarguil #include "mlx4_glue.h"
4746d5736aSVasily Philipov #include "mlx4_flow.h"
489797bfccSYongseok Koh #include "mlx4_mr.h"
493d555728SAdrien Mazarguil #include "mlx4_rxtx.h"
5076df01ffSAdrien Mazarguil #include "mlx4_utils.h"
5198a1f377SBruce Richardson 
52a5e5af7fSThomas Monjalon #ifdef MLX4_GLUE
53a5e5af7fSThomas Monjalon const struct mlx4_glue *mlx4_glue;
54a5e5af7fSThomas Monjalon #endif
55a5e5af7fSThomas Monjalon 
560203d33aSYongseok Koh static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data";
579797bfccSYongseok Koh 
580203d33aSYongseok Koh /* Shared memory between primary and secondary processes. */
590203d33aSYongseok Koh struct mlx4_shared_data *mlx4_shared_data;
600203d33aSYongseok Koh 
610203d33aSYongseok Koh /* Spinlock for mlx4_shared_data allocation. */
620203d33aSYongseok Koh static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
630203d33aSYongseok Koh 
640203d33aSYongseok Koh /* Process local data for secondary processes. */
650203d33aSYongseok Koh static struct mlx4_local_data mlx4_local_data;
669797bfccSYongseok Koh 
6782642799SAdrien Mazarguil /** Configuration structure for device arguments. */
68001a520eSGaetan Rivet struct mlx4_conf {
6982642799SAdrien Mazarguil 	struct {
7082642799SAdrien Mazarguil 		uint32_t present; /**< Bit-field for existing ports. */
7182642799SAdrien Mazarguil 		uint32_t enabled; /**< Bit-field for user-enabled ports. */
7282642799SAdrien Mazarguil 	} ports;
73f4efc0ebSYongseok Koh 	int mr_ext_memseg_en;
74f4efc0ebSYongseok Koh 	/** Whether memseg should be extended for MR creation. */
75001a520eSGaetan Rivet };
76001a520eSGaetan Rivet 
77001a520eSGaetan Rivet /* Available parameters list. */
78001a520eSGaetan Rivet const char *pmd_mlx4_init_params[] = {
79001a520eSGaetan Rivet 	MLX4_PMD_PORT_KVARG,
80f4efc0ebSYongseok Koh 	MLX4_MR_EXT_MEMSEG_EN_KVARG,
81001a520eSGaetan Rivet 	NULL,
82001a520eSGaetan Rivet };
83001a520eSGaetan Rivet 
8462024eb8SIvan Ilchenko static int mlx4_dev_stop(struct rte_eth_dev *dev);
8584a68486SAdrien Mazarguil 
860203d33aSYongseok Koh /**
870203d33aSYongseok Koh  * Initialize shared data between primary and secondary process.
880203d33aSYongseok Koh  *
890203d33aSYongseok Koh  * A memzone is reserved by primary process and secondary processes attach to
900203d33aSYongseok Koh  * the memzone.
910203d33aSYongseok Koh  *
920203d33aSYongseok Koh  * @return
930203d33aSYongseok Koh  *   0 on success, a negative errno value otherwise and rte_errno is set.
940203d33aSYongseok Koh  */
950203d33aSYongseok Koh static int
960203d33aSYongseok Koh mlx4_init_shared_data(void)
970203d33aSYongseok Koh {
980203d33aSYongseok Koh 	const struct rte_memzone *mz;
990203d33aSYongseok Koh 	int ret = 0;
1000203d33aSYongseok Koh 
1010203d33aSYongseok Koh 	rte_spinlock_lock(&mlx4_shared_data_lock);
1020203d33aSYongseok Koh 	if (mlx4_shared_data == NULL) {
1030203d33aSYongseok Koh 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1040203d33aSYongseok Koh 			/* Allocate shared memory. */
1050203d33aSYongseok Koh 			mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA,
1060203d33aSYongseok Koh 						 sizeof(*mlx4_shared_data),
1070203d33aSYongseok Koh 						 SOCKET_ID_ANY, 0);
1080203d33aSYongseok Koh 			if (mz == NULL) {
109*f665790aSDavid Marchand 				ERROR("Cannot allocate mlx4 shared data");
1100203d33aSYongseok Koh 				ret = -rte_errno;
1110203d33aSYongseok Koh 				goto error;
1120203d33aSYongseok Koh 			}
1130203d33aSYongseok Koh 			mlx4_shared_data = mz->addr;
1140203d33aSYongseok Koh 			memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data));
1150203d33aSYongseok Koh 			rte_spinlock_init(&mlx4_shared_data->lock);
1160203d33aSYongseok Koh 		} else {
1170203d33aSYongseok Koh 			/* Lookup allocated shared memory. */
1180203d33aSYongseok Koh 			mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
1190203d33aSYongseok Koh 			if (mz == NULL) {
120*f665790aSDavid Marchand 				ERROR("Cannot attach mlx4 shared data");
1210203d33aSYongseok Koh 				ret = -rte_errno;
1220203d33aSYongseok Koh 				goto error;
1230203d33aSYongseok Koh 			}
1240203d33aSYongseok Koh 			mlx4_shared_data = mz->addr;
1250203d33aSYongseok Koh 			memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
1260203d33aSYongseok Koh 		}
1270203d33aSYongseok Koh 	}
1280203d33aSYongseok Koh error:
1290203d33aSYongseok Koh 	rte_spinlock_unlock(&mlx4_shared_data_lock);
1300203d33aSYongseok Koh 	return ret;
1310203d33aSYongseok Koh }
1320203d33aSYongseok Koh 
1338e493764SYongseok Koh #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
1348e493764SYongseok Koh /**
1358e493764SYongseok Koh  * Verbs callback to allocate a memory. This function should allocate the space
1368e493764SYongseok Koh  * according to the size provided residing inside a huge page.
1378e493764SYongseok Koh  * Please note that all allocation must respect the alignment from libmlx4
1388e493764SYongseok Koh  * (i.e. currently sysconf(_SC_PAGESIZE)).
1398e493764SYongseok Koh  *
1408e493764SYongseok Koh  * @param[in] size
1418e493764SYongseok Koh  *   The size in bytes of the memory to allocate.
1428e493764SYongseok Koh  * @param[in] data
1438e493764SYongseok Koh  *   A pointer to the callback data.
1448e493764SYongseok Koh  *
1458e493764SYongseok Koh  * @return
1468e493764SYongseok Koh  *   Allocated buffer, NULL otherwise and rte_errno is set.
1478e493764SYongseok Koh  */
1488e493764SYongseok Koh static void *
1498e493764SYongseok Koh mlx4_alloc_verbs_buf(size_t size, void *data)
1508e493764SYongseok Koh {
1518e493764SYongseok Koh 	struct mlx4_priv *priv = data;
1528e493764SYongseok Koh 	void *ret;
1538e493764SYongseok Koh 	size_t alignment = sysconf(_SC_PAGESIZE);
1548e493764SYongseok Koh 	unsigned int socket = SOCKET_ID_ANY;
1558e493764SYongseok Koh 
1568e493764SYongseok Koh 	if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) {
1578e493764SYongseok Koh 		const struct txq *txq = priv->verbs_alloc_ctx.obj;
1588e493764SYongseok Koh 
1598e493764SYongseok Koh 		socket = txq->socket;
1608e493764SYongseok Koh 	} else if (priv->verbs_alloc_ctx.type ==
1618e493764SYongseok Koh 		   MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) {
1628e493764SYongseok Koh 		const struct rxq *rxq = priv->verbs_alloc_ctx.obj;
1638e493764SYongseok Koh 
1648e493764SYongseok Koh 		socket = rxq->socket;
1658e493764SYongseok Koh 	}
1668e08df22SAlexander Kozyrev 	MLX4_ASSERT(data != NULL);
1678e493764SYongseok Koh 	ret = rte_malloc_socket(__func__, size, alignment, socket);
1688e493764SYongseok Koh 	if (!ret && size)
1698e493764SYongseok Koh 		rte_errno = ENOMEM;
1708e493764SYongseok Koh 	return ret;
1718e493764SYongseok Koh }
1728e493764SYongseok Koh 
1738e493764SYongseok Koh /**
1748e493764SYongseok Koh  * Verbs callback to free a memory.
1758e493764SYongseok Koh  *
1768e493764SYongseok Koh  * @param[in] ptr
1778e493764SYongseok Koh  *   A pointer to the memory to free.
1788e493764SYongseok Koh  * @param[in] data
1798e493764SYongseok Koh  *   A pointer to the callback data.
1808e493764SYongseok Koh  */
1818e493764SYongseok Koh static void
1828e493764SYongseok Koh mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
1838e493764SYongseok Koh {
1848e08df22SAlexander Kozyrev 	MLX4_ASSERT(data != NULL);
1858e493764SYongseok Koh 	rte_free(ptr);
1868e493764SYongseok Koh }
1878e493764SYongseok Koh #endif
1888e493764SYongseok Koh 
18998a1f377SBruce Richardson /**
19097d37d2cSYongseok Koh  * Initialize process private data structure.
19197d37d2cSYongseok Koh  *
19297d37d2cSYongseok Koh  * @param dev
19397d37d2cSYongseok Koh  *   Pointer to Ethernet device structure.
19497d37d2cSYongseok Koh  *
19597d37d2cSYongseok Koh  * @return
19697d37d2cSYongseok Koh  *   0 on success, a negative errno value otherwise and rte_errno is set.
19797d37d2cSYongseok Koh  */
198ed879addSSuanming Mou int
19997d37d2cSYongseok Koh mlx4_proc_priv_init(struct rte_eth_dev *dev)
20097d37d2cSYongseok Koh {
20197d37d2cSYongseok Koh 	struct mlx4_proc_priv *ppriv;
20297d37d2cSYongseok Koh 	size_t ppriv_size;
20397d37d2cSYongseok Koh 
2046f14d4d7SYunjian Wang 	mlx4_proc_priv_uninit(dev);
20597d37d2cSYongseok Koh 	/*
20697d37d2cSYongseok Koh 	 * UAR register table follows the process private structure. BlueFlame
20797d37d2cSYongseok Koh 	 * registers for Tx queues are stored in the table.
20897d37d2cSYongseok Koh 	 */
20997d37d2cSYongseok Koh 	ppriv_size = sizeof(struct mlx4_proc_priv) +
21097d37d2cSYongseok Koh 		     dev->data->nb_tx_queues * sizeof(void *);
211ed879addSSuanming Mou 	ppriv = rte_zmalloc_socket("mlx4_proc_priv", ppriv_size,
21297d37d2cSYongseok Koh 				   RTE_CACHE_LINE_SIZE, dev->device->numa_node);
21397d37d2cSYongseok Koh 	if (!ppriv) {
21497d37d2cSYongseok Koh 		rte_errno = ENOMEM;
21597d37d2cSYongseok Koh 		return -rte_errno;
21697d37d2cSYongseok Koh 	}
217ed879addSSuanming Mou 	ppriv->uar_table_sz = dev->data->nb_tx_queues;
21897d37d2cSYongseok Koh 	dev->process_private = ppriv;
21997d37d2cSYongseok Koh 	return 0;
22097d37d2cSYongseok Koh }
22197d37d2cSYongseok Koh 
22297d37d2cSYongseok Koh /**
22397d37d2cSYongseok Koh  * Un-initialize process private data structure.
22497d37d2cSYongseok Koh  *
22597d37d2cSYongseok Koh  * @param dev
22697d37d2cSYongseok Koh  *   Pointer to Ethernet device structure.
22797d37d2cSYongseok Koh  */
228ed879addSSuanming Mou void
22997d37d2cSYongseok Koh mlx4_proc_priv_uninit(struct rte_eth_dev *dev)
23097d37d2cSYongseok Koh {
23197d37d2cSYongseok Koh 	if (!dev->process_private)
23297d37d2cSYongseok Koh 		return;
23397d37d2cSYongseok Koh 	rte_free(dev->process_private);
23497d37d2cSYongseok Koh 	dev->process_private = NULL;
23597d37d2cSYongseok Koh }
23697d37d2cSYongseok Koh 
23797d37d2cSYongseok Koh /**
2383cf06ceaSAdrien Mazarguil  * DPDK callback for Ethernet device configuration.
23998a1f377SBruce Richardson  *
24098a1f377SBruce Richardson  * @param dev
24198a1f377SBruce Richardson  *   Pointer to Ethernet device structure.
24298a1f377SBruce Richardson  *
24398a1f377SBruce Richardson  * @return
2449d14b273SAdrien Mazarguil  *   0 on success, negative errno value otherwise and rte_errno is set.
24598a1f377SBruce Richardson  */
24698a1f377SBruce Richardson static int
2473cf06ceaSAdrien Mazarguil mlx4_dev_configure(struct rte_eth_dev *dev)
24898a1f377SBruce Richardson {
249dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
250fee75e14SAdrien Mazarguil 	struct rte_flow_error error;
251bdcad2f4SAdrien Mazarguil 	int ret;
252bdcad2f4SAdrien Mazarguil 
253bdcad2f4SAdrien Mazarguil 	/* Prepare internal flow rules. */
254fee75e14SAdrien Mazarguil 	ret = mlx4_flow_sync(priv, &error);
255fee75e14SAdrien Mazarguil 	if (ret) {
256fee75e14SAdrien Mazarguil 		ERROR("cannot set up internal flow rules (code %d, \"%s\"),"
257fee75e14SAdrien Mazarguil 		      " flow error type %d, cause %p, message: %s",
258fee75e14SAdrien Mazarguil 		      -ret, strerror(-ret), error.type, error.cause,
259fee75e14SAdrien Mazarguil 		      error.message ? error.message : "(unspecified)");
260fc1b5ec5SMoti Haimovsky 		goto exit;
261fee75e14SAdrien Mazarguil 	}
262fc1b5ec5SMoti Haimovsky 	ret = mlx4_intr_install(priv);
26397d37d2cSYongseok Koh 	if (ret) {
264fc1b5ec5SMoti Haimovsky 		ERROR("%p: interrupt handler installation failed",
265fc1b5ec5SMoti Haimovsky 		      (void *)dev);
26697d37d2cSYongseok Koh 		goto exit;
26797d37d2cSYongseok Koh 	}
26897d37d2cSYongseok Koh 	ret = mlx4_proc_priv_init(dev);
26997d37d2cSYongseok Koh 	if (ret) {
27097d37d2cSYongseok Koh 		ERROR("%p: process private data allocation failed",
27197d37d2cSYongseok Koh 		      (void *)dev);
27297d37d2cSYongseok Koh 		goto exit;
27397d37d2cSYongseok Koh 	}
274fc1b5ec5SMoti Haimovsky exit:
275bdcad2f4SAdrien Mazarguil 	return ret;
27698a1f377SBruce Richardson }
27798a1f377SBruce Richardson 
27898a1f377SBruce Richardson /**
27998a1f377SBruce Richardson  * DPDK callback to start the device.
28098a1f377SBruce Richardson  *
2815697a414SAdrien Mazarguil  * Simulate device start by initializing common RSS resources and attaching
2825697a414SAdrien Mazarguil  * all configured flows.
28398a1f377SBruce Richardson  *
28498a1f377SBruce Richardson  * @param dev
28598a1f377SBruce Richardson  *   Pointer to Ethernet device structure.
28698a1f377SBruce Richardson  *
28798a1f377SBruce Richardson  * @return
2889d14b273SAdrien Mazarguil  *   0 on success, negative errno value otherwise and rte_errno is set.
28998a1f377SBruce Richardson  */
29098a1f377SBruce Richardson static int
29198a1f377SBruce Richardson mlx4_dev_start(struct rte_eth_dev *dev)
29298a1f377SBruce Richardson {
293dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
294fee75e14SAdrien Mazarguil 	struct rte_flow_error error;
2957cc3ea89SJie Hai 	uint16_t i;
29646d5736aSVasily Philipov 	int ret;
29798a1f377SBruce Richardson 
298e4dff4d8SAdrien Mazarguil 	if (priv->started)
29998a1f377SBruce Richardson 		return 0;
30098a1f377SBruce Richardson 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
30198a1f377SBruce Richardson 	priv->started = 1;
3025697a414SAdrien Mazarguil 	ret = mlx4_rss_init(priv);
3035697a414SAdrien Mazarguil 	if (ret) {
3045697a414SAdrien Mazarguil 		ERROR("%p: cannot initialize RSS resources: %s",
3055697a414SAdrien Mazarguil 		      (void *)dev, strerror(-ret));
3065697a414SAdrien Mazarguil 		goto err;
3075697a414SAdrien Mazarguil 	}
308e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG
3099797bfccSYongseok Koh 	mlx4_mr_dump_dev(dev);
3109797bfccSYongseok Koh #endif
311fc1b5ec5SMoti Haimovsky 	ret = mlx4_rxq_intr_enable(priv);
3126dd7b705SGaetan Rivet 	if (ret) {
313a6e8b01cSAdrien Mazarguil 		ERROR("%p: interrupt handler installation failed",
3140a2ae703SAdrien Mazarguil 		     (void *)dev);
3150a2ae703SAdrien Mazarguil 		goto err;
3169f05a4b8SMoti Haimovsky 	}
317fee75e14SAdrien Mazarguil 	ret = mlx4_flow_sync(priv, &error);
31846d5736aSVasily Philipov 	if (ret) {
319fee75e14SAdrien Mazarguil 		ERROR("%p: cannot attach flow rules (code %d, \"%s\"),"
320fee75e14SAdrien Mazarguil 		      " flow error type %d, cause %p, message: %s",
321fee75e14SAdrien Mazarguil 		      (void *)dev,
322fee75e14SAdrien Mazarguil 		      -ret, strerror(-ret), error.type, error.cause,
323fee75e14SAdrien Mazarguil 		      error.message ? error.message : "(unspecified)");
32446d5736aSVasily Philipov 		goto err;
32546d5736aSVasily Philipov 	}
32667e6cce6SAdrien Mazarguil 	rte_wmb();
32767e6cce6SAdrien Mazarguil 	dev->tx_pkt_burst = mlx4_tx_burst;
32867e6cce6SAdrien Mazarguil 	dev->rx_pkt_burst = mlx4_rx_burst;
3290203d33aSYongseok Koh 	/* Enable datapath on secondary process. */
3300203d33aSYongseok Koh 	mlx4_mp_req_start_rxtx(dev);
3317cc3ea89SJie Hai 
3327cc3ea89SJie Hai 	for (i = 0; i < dev->data->nb_rx_queues; i++)
3337cc3ea89SJie Hai 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
3347cc3ea89SJie Hai 	for (i = 0; i < dev->data->nb_tx_queues; i++)
3357cc3ea89SJie Hai 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
3367cc3ea89SJie Hai 
33746d5736aSVasily Philipov 	return 0;
33846d5736aSVasily Philipov err:
33984a68486SAdrien Mazarguil 	mlx4_dev_stop(dev);
3409d14b273SAdrien Mazarguil 	return ret;
34198a1f377SBruce Richardson }
34298a1f377SBruce Richardson 
34398a1f377SBruce Richardson /**
34498a1f377SBruce Richardson  * DPDK callback to stop the device.
34598a1f377SBruce Richardson  *
34698a1f377SBruce Richardson  * Simulate device stop by detaching all configured flows.
34798a1f377SBruce Richardson  *
34898a1f377SBruce Richardson  * @param dev
34998a1f377SBruce Richardson  *   Pointer to Ethernet device structure.
35098a1f377SBruce Richardson  */
35162024eb8SIvan Ilchenko static int
35298a1f377SBruce Richardson mlx4_dev_stop(struct rte_eth_dev *dev)
35398a1f377SBruce Richardson {
354dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
3557cc3ea89SJie Hai 	uint16_t i;
35698a1f377SBruce Richardson 
357e4dff4d8SAdrien Mazarguil 	if (!priv->started)
35862024eb8SIvan Ilchenko 		return 0;
35998a1f377SBruce Richardson 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
36098a1f377SBruce Richardson 	priv->started = 0;
361a41f593fSFerruh Yigit 	dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
362a41f593fSFerruh Yigit 	dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
36367e6cce6SAdrien Mazarguil 	rte_wmb();
3640203d33aSYongseok Koh 	/* Disable datapath on secondary process. */
3650203d33aSYongseok Koh 	mlx4_mp_req_stop_rxtx(dev);
366fee75e14SAdrien Mazarguil 	mlx4_flow_sync(priv, NULL);
367fc1b5ec5SMoti Haimovsky 	mlx4_rxq_intr_disable(priv);
3685697a414SAdrien Mazarguil 	mlx4_rss_deinit(priv);
36962024eb8SIvan Ilchenko 
3707cc3ea89SJie Hai 	for (i = 0; i < dev->data->nb_rx_queues; i++)
3717cc3ea89SJie Hai 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
3727cc3ea89SJie Hai 	for (i = 0; i < dev->data->nb_tx_queues; i++)
3737cc3ea89SJie Hai 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
3747cc3ea89SJie Hai 
37562024eb8SIvan Ilchenko 	return 0;
37698a1f377SBruce Richardson }
37798a1f377SBruce Richardson 
37898a1f377SBruce Richardson /**
37998a1f377SBruce Richardson  * DPDK callback to close the device.
38098a1f377SBruce Richardson  *
38198a1f377SBruce Richardson  * Destroy all queues and objects, free memory.
38298a1f377SBruce Richardson  *
38398a1f377SBruce Richardson  * @param dev
38498a1f377SBruce Richardson  *   Pointer to Ethernet device structure.
38598a1f377SBruce Richardson  */
386b142387bSThomas Monjalon static int
38798a1f377SBruce Richardson mlx4_dev_close(struct rte_eth_dev *dev)
38898a1f377SBruce Richardson {
389dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
39098a1f377SBruce Richardson 	unsigned int i;
39198a1f377SBruce Richardson 
3928e1630e0SMichael Baum 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3938e1630e0SMichael Baum 		rte_eth_dev_release_port(dev);
39430410493SThomas Monjalon 		return 0;
3958e1630e0SMichael Baum 	}
39698a1f377SBruce Richardson 	DEBUG("%p: closing device \"%s\"",
39798a1f377SBruce Richardson 	      (void *)dev,
39898a1f377SBruce Richardson 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
399a41f593fSFerruh Yigit 	dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
400a41f593fSFerruh Yigit 	dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
40167e6cce6SAdrien Mazarguil 	rte_wmb();
4020203d33aSYongseok Koh 	/* Disable datapath on secondary process. */
4030203d33aSYongseok Koh 	mlx4_mp_req_stop_rxtx(dev);
40467e6cce6SAdrien Mazarguil 	mlx4_flow_clean(priv);
40584a68486SAdrien Mazarguil 	mlx4_rss_deinit(priv);
406be65fdcbSAdrien Mazarguil 	for (i = 0; i != dev->data->nb_rx_queues; ++i)
4077483341aSXueming Li 		mlx4_rx_queue_release(dev, i);
408be65fdcbSAdrien Mazarguil 	for (i = 0; i != dev->data->nb_tx_queues; ++i)
4097483341aSXueming Li 		mlx4_tx_queue_release(dev, i);
41097d37d2cSYongseok Koh 	mlx4_proc_priv_uninit(dev);
4119797bfccSYongseok Koh 	mlx4_mr_release(dev);
41298a1f377SBruce Richardson 	if (priv->pd != NULL) {
4138e08df22SAlexander Kozyrev 		MLX4_ASSERT(priv->ctx != NULL);
4144eba244bSAdrien Mazarguil 		claim_zero(mlx4_glue->dealloc_pd(priv->pd));
4154eba244bSAdrien Mazarguil 		claim_zero(mlx4_glue->close_device(priv->ctx));
41698a1f377SBruce Richardson 	} else
4178e08df22SAlexander Kozyrev 		MLX4_ASSERT(priv->ctx == NULL);
418b62579d4SAdrien Mazarguil 	mlx4_intr_uninstall(priv);
41998a1f377SBruce Richardson 	memset(priv, 0, sizeof(*priv));
420c0722108SThomas Monjalon 	/* mac_addrs must not be freed because part of dev_private */
421c0722108SThomas Monjalon 	dev->data->mac_addrs = NULL;
422b142387bSThomas Monjalon 	return 0;
42398a1f377SBruce Richardson }
42498a1f377SBruce Richardson 
42598a1f377SBruce Richardson static const struct eth_dev_ops mlx4_dev_ops = {
42698a1f377SBruce Richardson 	.dev_configure = mlx4_dev_configure,
42798a1f377SBruce Richardson 	.dev_start = mlx4_dev_start,
42898a1f377SBruce Richardson 	.dev_stop = mlx4_dev_stop,
42961cbdd41SAdrien Mazarguil 	.dev_set_link_down = mlx4_dev_set_link_down,
43061cbdd41SAdrien Mazarguil 	.dev_set_link_up = mlx4_dev_set_link_up,
43198a1f377SBruce Richardson 	.dev_close = mlx4_dev_close,
43298a1f377SBruce Richardson 	.link_update = mlx4_link_update,
433eacaac7bSAdrien Mazarguil 	.promiscuous_enable = mlx4_promiscuous_enable,
434eacaac7bSAdrien Mazarguil 	.promiscuous_disable = mlx4_promiscuous_disable,
435eacaac7bSAdrien Mazarguil 	.allmulticast_enable = mlx4_allmulticast_enable,
436eacaac7bSAdrien Mazarguil 	.allmulticast_disable = mlx4_allmulticast_disable,
4371437784bSAdrien Mazarguil 	.mac_addr_remove = mlx4_mac_addr_remove,
4381437784bSAdrien Mazarguil 	.mac_addr_add = mlx4_mac_addr_add,
4391437784bSAdrien Mazarguil 	.mac_addr_set = mlx4_mac_addr_set,
440138a740cSAdrien Mazarguil 	.set_mc_addr_list = mlx4_set_mc_addr_list,
44198a1f377SBruce Richardson 	.stats_get = mlx4_stats_get,
44298a1f377SBruce Richardson 	.stats_reset = mlx4_stats_reset,
443714bf46eSThomas Monjalon 	.fw_version_get = mlx4_fw_version_get,
44498a1f377SBruce Richardson 	.dev_infos_get = mlx4_dev_infos_get,
445aee4a03fSMoti Haimovsky 	.dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get,
44630695adbSAdrien Mazarguil 	.vlan_filter_set = mlx4_vlan_filter_set,
44798a1f377SBruce Richardson 	.rx_queue_setup = mlx4_rx_queue_setup,
44898a1f377SBruce Richardson 	.tx_queue_setup = mlx4_tx_queue_setup,
44998a1f377SBruce Richardson 	.rx_queue_release = mlx4_rx_queue_release,
45098a1f377SBruce Richardson 	.tx_queue_release = mlx4_tx_queue_release,
45161cbdd41SAdrien Mazarguil 	.flow_ctrl_get = mlx4_flow_ctrl_get,
45261cbdd41SAdrien Mazarguil 	.flow_ctrl_set = mlx4_flow_ctrl_set,
45361cbdd41SAdrien Mazarguil 	.mtu_set = mlx4_mtu_set,
454fb7ad441SThomas Monjalon 	.flow_ops_get = mlx4_flow_ops_get,
4559f05a4b8SMoti Haimovsky 	.rx_queue_intr_enable = mlx4_rx_intr_enable,
4569f05a4b8SMoti Haimovsky 	.rx_queue_intr_disable = mlx4_rx_intr_disable,
457cdf4ec6eSMatan Azrad 	.is_removed = mlx4_is_removed,
45898a1f377SBruce Richardson };
45998a1f377SBruce Richardson 
4600203d33aSYongseok Koh /* Available operations from secondary process. */
4610203d33aSYongseok Koh static const struct eth_dev_ops mlx4_dev_sec_ops = {
4620203d33aSYongseok Koh 	.stats_get = mlx4_stats_get,
4630203d33aSYongseok Koh 	.stats_reset = mlx4_stats_reset,
4640203d33aSYongseok Koh 	.fw_version_get = mlx4_fw_version_get,
4650203d33aSYongseok Koh 	.dev_infos_get = mlx4_dev_infos_get,
4660203d33aSYongseok Koh };
4670203d33aSYongseok Koh 
46898a1f377SBruce Richardson /**
46998a1f377SBruce Richardson  * Get PCI information from struct ibv_device.
47098a1f377SBruce Richardson  *
47198a1f377SBruce Richardson  * @param device
47298a1f377SBruce Richardson  *   Pointer to Ethernet device structure.
47398a1f377SBruce Richardson  * @param[out] pci_addr
47498a1f377SBruce Richardson  *   PCI bus address output buffer.
47598a1f377SBruce Richardson  *
47698a1f377SBruce Richardson  * @return
4779d14b273SAdrien Mazarguil  *   0 on success, negative errno value otherwise and rte_errno is set.
47898a1f377SBruce Richardson  */
47998a1f377SBruce Richardson static int
48098a1f377SBruce Richardson mlx4_ibv_device_to_pci_addr(const struct ibv_device *device,
48198a1f377SBruce Richardson 			    struct rte_pci_addr *pci_addr)
48298a1f377SBruce Richardson {
48398a1f377SBruce Richardson 	FILE *file;
48498a1f377SBruce Richardson 	char line[32];
48598a1f377SBruce Richardson 	MKSTR(path, "%s/device/uevent", device->ibdev_path);
48698a1f377SBruce Richardson 
48798a1f377SBruce Richardson 	file = fopen(path, "rb");
4889d14b273SAdrien Mazarguil 	if (file == NULL) {
4899d14b273SAdrien Mazarguil 		rte_errno = errno;
4909d14b273SAdrien Mazarguil 		return -rte_errno;
4919d14b273SAdrien Mazarguil 	}
49298a1f377SBruce Richardson 	while (fgets(line, sizeof(line), file) == line) {
49398a1f377SBruce Richardson 		size_t len = strlen(line);
49498a1f377SBruce Richardson 		int ret;
49598a1f377SBruce Richardson 
49698a1f377SBruce Richardson 		/* Truncate long lines. */
49798a1f377SBruce Richardson 		if (len == (sizeof(line) - 1))
49898a1f377SBruce Richardson 			while (line[(len - 1)] != '\n') {
49998a1f377SBruce Richardson 				ret = fgetc(file);
50098a1f377SBruce Richardson 				if (ret == EOF)
50198a1f377SBruce Richardson 					break;
50298a1f377SBruce Richardson 				line[(len - 1)] = ret;
50398a1f377SBruce Richardson 			}
50498a1f377SBruce Richardson 		/* Extract information. */
50598a1f377SBruce Richardson 		if (sscanf(line,
50698a1f377SBruce Richardson 			   "PCI_SLOT_NAME="
507463ced95SStephen Hemminger 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
50898a1f377SBruce Richardson 			   &pci_addr->domain,
50998a1f377SBruce Richardson 			   &pci_addr->bus,
51098a1f377SBruce Richardson 			   &pci_addr->devid,
51198a1f377SBruce Richardson 			   &pci_addr->function) == 4) {
51298a1f377SBruce Richardson 			break;
51398a1f377SBruce Richardson 		}
51498a1f377SBruce Richardson 	}
51598a1f377SBruce Richardson 	fclose(file);
51698a1f377SBruce Richardson 	return 0;
51798a1f377SBruce Richardson }
51898a1f377SBruce Richardson 
51998a1f377SBruce Richardson /**
520001a520eSGaetan Rivet  * Verify and store value for device argument.
521001a520eSGaetan Rivet  *
522001a520eSGaetan Rivet  * @param[in] key
523001a520eSGaetan Rivet  *   Key argument to verify.
524001a520eSGaetan Rivet  * @param[in] val
525001a520eSGaetan Rivet  *   Value associated with key.
52682642799SAdrien Mazarguil  * @param[in, out] conf
52782642799SAdrien Mazarguil  *   Shared configuration data.
528001a520eSGaetan Rivet  *
529001a520eSGaetan Rivet  * @return
5309d14b273SAdrien Mazarguil  *   0 on success, negative errno value otherwise and rte_errno is set.
531001a520eSGaetan Rivet  */
532001a520eSGaetan Rivet static int
53382642799SAdrien Mazarguil mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf)
534001a520eSGaetan Rivet {
535001a520eSGaetan Rivet 	unsigned long tmp;
536001a520eSGaetan Rivet 
537001a520eSGaetan Rivet 	errno = 0;
538001a520eSGaetan Rivet 	tmp = strtoul(val, NULL, 0);
539001a520eSGaetan Rivet 	if (errno) {
5409d14b273SAdrien Mazarguil 		rte_errno = errno;
541001a520eSGaetan Rivet 		WARN("%s: \"%s\" is not a valid integer", key, val);
5429d14b273SAdrien Mazarguil 		return -rte_errno;
543001a520eSGaetan Rivet 	}
544001a520eSGaetan Rivet 	if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) {
545a43fba2cSOphir Munk 		uint32_t ports = rte_log2_u32(conf->ports.present + 1);
54682642799SAdrien Mazarguil 
54782642799SAdrien Mazarguil 		if (tmp >= ports) {
54882642799SAdrien Mazarguil 			ERROR("port index %lu outside range [0,%" PRIu32 ")",
54982642799SAdrien Mazarguil 			      tmp, ports);
550001a520eSGaetan Rivet 			return -EINVAL;
551001a520eSGaetan Rivet 		}
55282642799SAdrien Mazarguil 		if (!(conf->ports.present & (1 << tmp))) {
5539d14b273SAdrien Mazarguil 			rte_errno = EINVAL;
55482642799SAdrien Mazarguil 			ERROR("invalid port index %lu", tmp);
5559d14b273SAdrien Mazarguil 			return -rte_errno;
55682642799SAdrien Mazarguil 		}
55782642799SAdrien Mazarguil 		conf->ports.enabled |= 1 << tmp;
558f4efc0ebSYongseok Koh 	} else if (strcmp(MLX4_MR_EXT_MEMSEG_EN_KVARG, key) == 0) {
559f4efc0ebSYongseok Koh 		conf->mr_ext_memseg_en = !!tmp;
560001a520eSGaetan Rivet 	} else {
5619d14b273SAdrien Mazarguil 		rte_errno = EINVAL;
562001a520eSGaetan Rivet 		WARN("%s: unknown parameter", key);
5639d14b273SAdrien Mazarguil 		return -rte_errno;
564001a520eSGaetan Rivet 	}
565001a520eSGaetan Rivet 	return 0;
566001a520eSGaetan Rivet }
567001a520eSGaetan Rivet 
568001a520eSGaetan Rivet /**
569001a520eSGaetan Rivet  * Parse device parameters.
570001a520eSGaetan Rivet  *
571001a520eSGaetan Rivet  * @param devargs
572001a520eSGaetan Rivet  *   Device arguments structure.
573001a520eSGaetan Rivet  *
574001a520eSGaetan Rivet  * @return
5759d14b273SAdrien Mazarguil  *   0 on success, negative errno value otherwise and rte_errno is set.
576001a520eSGaetan Rivet  */
577001a520eSGaetan Rivet static int
578001a520eSGaetan Rivet mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf)
579001a520eSGaetan Rivet {
580001a520eSGaetan Rivet 	struct rte_kvargs *kvlist;
581001a520eSGaetan Rivet 	unsigned int arg_count;
582001a520eSGaetan Rivet 	int ret = 0;
583001a520eSGaetan Rivet 	int i;
584001a520eSGaetan Rivet 
585001a520eSGaetan Rivet 	if (devargs == NULL)
586001a520eSGaetan Rivet 		return 0;
587001a520eSGaetan Rivet 	kvlist = rte_kvargs_parse(devargs->args, pmd_mlx4_init_params);
588001a520eSGaetan Rivet 	if (kvlist == NULL) {
5899d14b273SAdrien Mazarguil 		rte_errno = EINVAL;
590001a520eSGaetan Rivet 		ERROR("failed to parse kvargs");
5919d14b273SAdrien Mazarguil 		return -rte_errno;
592001a520eSGaetan Rivet 	}
593001a520eSGaetan Rivet 	/* Process parameters. */
594001a520eSGaetan Rivet 	for (i = 0; pmd_mlx4_init_params[i]; ++i) {
595f4efc0ebSYongseok Koh 		arg_count = rte_kvargs_count(kvlist, pmd_mlx4_init_params[i]);
596001a520eSGaetan Rivet 		while (arg_count-- > 0) {
59782642799SAdrien Mazarguil 			ret = rte_kvargs_process(kvlist,
598f4efc0ebSYongseok Koh 						 pmd_mlx4_init_params[i],
59982642799SAdrien Mazarguil 						 (int (*)(const char *,
60082642799SAdrien Mazarguil 							  const char *,
60182642799SAdrien Mazarguil 							  void *))
60282642799SAdrien Mazarguil 						 mlx4_arg_parse,
60382642799SAdrien Mazarguil 						 conf);
604001a520eSGaetan Rivet 			if (ret != 0)
605001a520eSGaetan Rivet 				goto free_kvlist;
606001a520eSGaetan Rivet 		}
607001a520eSGaetan Rivet 	}
608001a520eSGaetan Rivet free_kvlist:
609001a520eSGaetan Rivet 	rte_kvargs_free(kvlist);
610001a520eSGaetan Rivet 	return ret;
611001a520eSGaetan Rivet }
612001a520eSGaetan Rivet 
613828a4ce3SAdrien Mazarguil /**
614828a4ce3SAdrien Mazarguil  * Interpret RSS capabilities reported by device.
615828a4ce3SAdrien Mazarguil  *
616828a4ce3SAdrien Mazarguil  * This function returns the set of usable Verbs RSS hash fields, kernel
617828a4ce3SAdrien Mazarguil  * quirks taken into account.
618828a4ce3SAdrien Mazarguil  *
619828a4ce3SAdrien Mazarguil  * @param ctx
620828a4ce3SAdrien Mazarguil  *   Verbs context.
621828a4ce3SAdrien Mazarguil  * @param pd
622828a4ce3SAdrien Mazarguil  *   Verbs protection domain.
623828a4ce3SAdrien Mazarguil  * @param device_attr_ex
624828a4ce3SAdrien Mazarguil  *   Extended device attributes to interpret.
625828a4ce3SAdrien Mazarguil  *
626828a4ce3SAdrien Mazarguil  * @return
627828a4ce3SAdrien Mazarguil  *   Usable RSS hash fields mask in Verbs format.
628828a4ce3SAdrien Mazarguil  */
629828a4ce3SAdrien Mazarguil static uint64_t
630828a4ce3SAdrien Mazarguil mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
631828a4ce3SAdrien Mazarguil 		struct ibv_device_attr_ex *device_attr_ex)
632828a4ce3SAdrien Mazarguil {
633828a4ce3SAdrien Mazarguil 	uint64_t hw_rss_sup = device_attr_ex->rss_caps.rx_hash_fields_mask;
634828a4ce3SAdrien Mazarguil 	struct ibv_cq *cq = NULL;
635828a4ce3SAdrien Mazarguil 	struct ibv_wq *wq = NULL;
636828a4ce3SAdrien Mazarguil 	struct ibv_rwq_ind_table *ind = NULL;
637828a4ce3SAdrien Mazarguil 	struct ibv_qp *qp = NULL;
638828a4ce3SAdrien Mazarguil 
639828a4ce3SAdrien Mazarguil 	if (!hw_rss_sup) {
640828a4ce3SAdrien Mazarguil 		WARN("no RSS capabilities reported; disabling support for UDP"
641828a4ce3SAdrien Mazarguil 		     " RSS and inner VXLAN RSS");
642828a4ce3SAdrien Mazarguil 		return IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4 |
643828a4ce3SAdrien Mazarguil 			IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6 |
644828a4ce3SAdrien Mazarguil 			IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP;
645828a4ce3SAdrien Mazarguil 	}
646828a4ce3SAdrien Mazarguil 	if (!(hw_rss_sup & IBV_RX_HASH_INNER))
647828a4ce3SAdrien Mazarguil 		return hw_rss_sup;
648828a4ce3SAdrien Mazarguil 	/*
649828a4ce3SAdrien Mazarguil 	 * Although reported as supported, missing code in some Linux
650828a4ce3SAdrien Mazarguil 	 * versions (v4.15, v4.16) prevents the creation of hash QPs with
651828a4ce3SAdrien Mazarguil 	 * inner capability.
652828a4ce3SAdrien Mazarguil 	 *
653828a4ce3SAdrien Mazarguil 	 * There is no choice but to attempt to instantiate a temporary RSS
654828a4ce3SAdrien Mazarguil 	 * context in order to confirm its support.
655828a4ce3SAdrien Mazarguil 	 */
656828a4ce3SAdrien Mazarguil 	cq = mlx4_glue->create_cq(ctx, 1, NULL, NULL, 0);
657828a4ce3SAdrien Mazarguil 	wq = cq ? mlx4_glue->create_wq
658828a4ce3SAdrien Mazarguil 		(ctx,
659828a4ce3SAdrien Mazarguil 		 &(struct ibv_wq_init_attr){
660828a4ce3SAdrien Mazarguil 			.wq_type = IBV_WQT_RQ,
661828a4ce3SAdrien Mazarguil 			.max_wr = 1,
662828a4ce3SAdrien Mazarguil 			.max_sge = 1,
663828a4ce3SAdrien Mazarguil 			.pd = pd,
664828a4ce3SAdrien Mazarguil 			.cq = cq,
665828a4ce3SAdrien Mazarguil 		 }) : NULL;
666828a4ce3SAdrien Mazarguil 	ind = wq ? mlx4_glue->create_rwq_ind_table
667828a4ce3SAdrien Mazarguil 		(ctx,
668828a4ce3SAdrien Mazarguil 		 &(struct ibv_rwq_ind_table_init_attr){
669828a4ce3SAdrien Mazarguil 			.log_ind_tbl_size = 0,
670828a4ce3SAdrien Mazarguil 			.ind_tbl = &wq,
671828a4ce3SAdrien Mazarguil 			.comp_mask = 0,
672828a4ce3SAdrien Mazarguil 		 }) : NULL;
673828a4ce3SAdrien Mazarguil 	qp = ind ? mlx4_glue->create_qp_ex
674828a4ce3SAdrien Mazarguil 		(ctx,
675828a4ce3SAdrien Mazarguil 		 &(struct ibv_qp_init_attr_ex){
676828a4ce3SAdrien Mazarguil 			.comp_mask =
677828a4ce3SAdrien Mazarguil 				(IBV_QP_INIT_ATTR_PD |
678828a4ce3SAdrien Mazarguil 				 IBV_QP_INIT_ATTR_RX_HASH |
679828a4ce3SAdrien Mazarguil 				 IBV_QP_INIT_ATTR_IND_TABLE),
680828a4ce3SAdrien Mazarguil 			.qp_type = IBV_QPT_RAW_PACKET,
681828a4ce3SAdrien Mazarguil 			.pd = pd,
682828a4ce3SAdrien Mazarguil 			.rwq_ind_tbl = ind,
683828a4ce3SAdrien Mazarguil 			.rx_hash_conf = {
684828a4ce3SAdrien Mazarguil 				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
685828a4ce3SAdrien Mazarguil 				.rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE,
686828a4ce3SAdrien Mazarguil 				.rx_hash_key = mlx4_rss_hash_key_default,
687828a4ce3SAdrien Mazarguil 				.rx_hash_fields_mask = hw_rss_sup,
688828a4ce3SAdrien Mazarguil 			},
689828a4ce3SAdrien Mazarguil 		 }) : NULL;
690828a4ce3SAdrien Mazarguil 	if (!qp) {
691828a4ce3SAdrien Mazarguil 		WARN("disabling unusable inner RSS capability due to kernel"
692828a4ce3SAdrien Mazarguil 		     " quirk");
693828a4ce3SAdrien Mazarguil 		hw_rss_sup &= ~IBV_RX_HASH_INNER;
694828a4ce3SAdrien Mazarguil 	} else {
695828a4ce3SAdrien Mazarguil 		claim_zero(mlx4_glue->destroy_qp(qp));
696828a4ce3SAdrien Mazarguil 	}
697828a4ce3SAdrien Mazarguil 	if (ind)
698828a4ce3SAdrien Mazarguil 		claim_zero(mlx4_glue->destroy_rwq_ind_table(ind));
699828a4ce3SAdrien Mazarguil 	if (wq)
700828a4ce3SAdrien Mazarguil 		claim_zero(mlx4_glue->destroy_wq(wq));
701828a4ce3SAdrien Mazarguil 	if (cq)
702828a4ce3SAdrien Mazarguil 		claim_zero(mlx4_glue->destroy_cq(cq));
703828a4ce3SAdrien Mazarguil 	return hw_rss_sup;
704828a4ce3SAdrien Mazarguil }
705828a4ce3SAdrien Mazarguil 
706fdf91e0fSJan Blunck static struct rte_pci_driver mlx4_driver;
70798a1f377SBruce Richardson 
7080203d33aSYongseok Koh /**
7090203d33aSYongseok Koh  * PMD global initialization.
7100203d33aSYongseok Koh  *
7110203d33aSYongseok Koh  * Independent from individual device, this function initializes global
7120203d33aSYongseok Koh  * per-PMD data structures distinguishing primary and secondary processes.
7130203d33aSYongseok Koh  * Hence, each initialization is called once per a process.
7140203d33aSYongseok Koh  *
7150203d33aSYongseok Koh  * @return
7160203d33aSYongseok Koh  *   0 on success, a negative errno value otherwise and rte_errno is set.
7170203d33aSYongseok Koh  */
7180203d33aSYongseok Koh static int
7190203d33aSYongseok Koh mlx4_init_once(void)
7200203d33aSYongseok Koh {
7210203d33aSYongseok Koh 	struct mlx4_shared_data *sd;
7220203d33aSYongseok Koh 	struct mlx4_local_data *ld = &mlx4_local_data;
723edf73dd3SAnatoly Burakov 	int ret = 0;
7240203d33aSYongseok Koh 
7250203d33aSYongseok Koh 	if (mlx4_init_shared_data())
7260203d33aSYongseok Koh 		return -rte_errno;
7270203d33aSYongseok Koh 	sd = mlx4_shared_data;
7288e08df22SAlexander Kozyrev 	MLX4_ASSERT(sd);
7290203d33aSYongseok Koh 	rte_spinlock_lock(&sd->lock);
7300203d33aSYongseok Koh 	switch (rte_eal_process_type()) {
7310203d33aSYongseok Koh 	case RTE_PROC_PRIMARY:
7320203d33aSYongseok Koh 		if (sd->init_done)
7330203d33aSYongseok Koh 			break;
7340203d33aSYongseok Koh 		LIST_INIT(&sd->mem_event_cb_list);
7350203d33aSYongseok Koh 		rte_rwlock_init(&sd->mem_event_rwlock);
7360203d33aSYongseok Koh 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
7370203d33aSYongseok Koh 						mlx4_mr_mem_event_cb, NULL);
738edf73dd3SAnatoly Burakov 		ret = mlx4_mp_init_primary();
739edf73dd3SAnatoly Burakov 		if (ret)
740edf73dd3SAnatoly Burakov 			goto out;
741028669bcSAnatoly Burakov 		sd->init_done = 1;
7420203d33aSYongseok Koh 		break;
7430203d33aSYongseok Koh 	case RTE_PROC_SECONDARY:
7440203d33aSYongseok Koh 		if (ld->init_done)
7450203d33aSYongseok Koh 			break;
746edf73dd3SAnatoly Burakov 		ret = mlx4_mp_init_secondary();
747edf73dd3SAnatoly Burakov 		if (ret)
748edf73dd3SAnatoly Burakov 			goto out;
7490203d33aSYongseok Koh 		++sd->secondary_cnt;
750028669bcSAnatoly Burakov 		ld->init_done = 1;
7510203d33aSYongseok Koh 		break;
7520203d33aSYongseok Koh 	default:
7530203d33aSYongseok Koh 		break;
7540203d33aSYongseok Koh 	}
755edf73dd3SAnatoly Burakov out:
7560203d33aSYongseok Koh 	rte_spinlock_unlock(&sd->lock);
757edf73dd3SAnatoly Burakov 	return ret;
7580203d33aSYongseok Koh }
7590203d33aSYongseok Koh 
76098a1f377SBruce Richardson /**
76198a1f377SBruce Richardson  * DPDK callback to register a PCI device.
76298a1f377SBruce Richardson  *
76398a1f377SBruce Richardson  * This function creates an Ethernet device for each port of a given
76498a1f377SBruce Richardson  * PCI device.
76598a1f377SBruce Richardson  *
76698a1f377SBruce Richardson  * @param[in] pci_drv
76798a1f377SBruce Richardson  *   PCI driver structure (mlx4_driver).
76898a1f377SBruce Richardson  * @param[in] pci_dev
76998a1f377SBruce Richardson  *   PCI device information.
77098a1f377SBruce Richardson  *
77198a1f377SBruce Richardson  * @return
7729d14b273SAdrien Mazarguil  *   0 on success, negative errno value otherwise and rte_errno is set.
77398a1f377SBruce Richardson  */
77498a1f377SBruce Richardson static int
775af424af8SShreyansh Jain mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
77698a1f377SBruce Richardson {
77798a1f377SBruce Richardson 	struct ibv_device **list;
77898a1f377SBruce Richardson 	struct ibv_device *ibv_dev;
77998a1f377SBruce Richardson 	int err = 0;
78098a1f377SBruce Richardson 	struct ibv_context *attr_ctx = NULL;
78198a1f377SBruce Richardson 	struct ibv_device_attr device_attr;
782024e87beSAdrien Mazarguil 	struct ibv_device_attr_ex device_attr_ex;
783bcf58b64SMichael Baum 	struct rte_eth_dev *prev_dev = NULL;
784001a520eSGaetan Rivet 	struct mlx4_conf conf = {
78582642799SAdrien Mazarguil 		.ports.present = 0,
786f4efc0ebSYongseok Koh 		.mr_ext_memseg_en = 1,
787001a520eSGaetan Rivet 	};
78898a1f377SBruce Richardson 	unsigned int vf;
78998a1f377SBruce Richardson 	int i;
790164cad78SStephen Hemminger 	char ifname[IF_NAMESIZE];
79198a1f377SBruce Richardson 
79298a1f377SBruce Richardson 	(void)pci_drv;
7930203d33aSYongseok Koh 	err = mlx4_init_once();
7940203d33aSYongseok Koh 	if (err) {
7950203d33aSYongseok Koh 		ERROR("unable to init PMD global data: %s",
7960203d33aSYongseok Koh 		      strerror(rte_errno));
7970203d33aSYongseok Koh 		return -rte_errno;
7980203d33aSYongseok Koh 	}
7998e08df22SAlexander Kozyrev 	MLX4_ASSERT(pci_drv == &mlx4_driver);
8004eba244bSAdrien Mazarguil 	list = mlx4_glue->get_device_list(&i);
80198a1f377SBruce Richardson 	if (list == NULL) {
8029d14b273SAdrien Mazarguil 		rte_errno = errno;
8038e08df22SAlexander Kozyrev 		MLX4_ASSERT(rte_errno);
8049d14b273SAdrien Mazarguil 		if (rte_errno == ENOSYS)
8059e09761bSGaetan Rivet 			ERROR("cannot list devices, is ib_uverbs loaded?");
8069d14b273SAdrien Mazarguil 		return -rte_errno;
80798a1f377SBruce Richardson 	}
8088e08df22SAlexander Kozyrev 	MLX4_ASSERT(i >= 0);
80998a1f377SBruce Richardson 	/*
81098a1f377SBruce Richardson 	 * For each listed device, check related sysfs entry against
81198a1f377SBruce Richardson 	 * the provided PCI ID.
81298a1f377SBruce Richardson 	 */
81398a1f377SBruce Richardson 	while (i != 0) {
81498a1f377SBruce Richardson 		struct rte_pci_addr pci_addr;
81598a1f377SBruce Richardson 
81698a1f377SBruce Richardson 		--i;
81798a1f377SBruce Richardson 		DEBUG("checking device \"%s\"", list[i]->name);
81898a1f377SBruce Richardson 		if (mlx4_ibv_device_to_pci_addr(list[i], &pci_addr))
81998a1f377SBruce Richardson 			continue;
8208fa22e1fSThomas Monjalon 		if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0)
82198a1f377SBruce Richardson 			continue;
82298a1f377SBruce Richardson 		vf = (pci_dev->id.device_id ==
82398a1f377SBruce Richardson 		      PCI_DEVICE_ID_MELLANOX_CONNECTX3VF);
82498a1f377SBruce Richardson 		INFO("PCI information matches, using device \"%s\" (VF: %s)",
82598a1f377SBruce Richardson 		     list[i]->name, (vf ? "true" : "false"));
8264eba244bSAdrien Mazarguil 		attr_ctx = mlx4_glue->open_device(list[i]);
82798a1f377SBruce Richardson 		err = errno;
82898a1f377SBruce Richardson 		break;
82998a1f377SBruce Richardson 	}
83098a1f377SBruce Richardson 	if (attr_ctx == NULL) {
8314eba244bSAdrien Mazarguil 		mlx4_glue->free_device_list(list);
83298a1f377SBruce Richardson 		switch (err) {
83398a1f377SBruce Richardson 		case 0:
8349d14b273SAdrien Mazarguil 			rte_errno = ENODEV;
8359e09761bSGaetan Rivet 			ERROR("cannot access device, is mlx4_ib loaded?");
8369d14b273SAdrien Mazarguil 			return -rte_errno;
83798a1f377SBruce Richardson 		case EINVAL:
8389d14b273SAdrien Mazarguil 			rte_errno = EINVAL;
8399e09761bSGaetan Rivet 			ERROR("cannot use device, are drivers up to date?");
8409d14b273SAdrien Mazarguil 			return -rte_errno;
84198a1f377SBruce Richardson 		}
8428e08df22SAlexander Kozyrev 		MLX4_ASSERT(err > 0);
8439d14b273SAdrien Mazarguil 		rte_errno = err;
8449d14b273SAdrien Mazarguil 		return -rte_errno;
84598a1f377SBruce Richardson 	}
84698a1f377SBruce Richardson 	ibv_dev = list[i];
84798a1f377SBruce Richardson 	DEBUG("device opened");
8484eba244bSAdrien Mazarguil 	if (mlx4_glue->query_device(attr_ctx, &device_attr)) {
84966f2ac0fSAdrien Mazarguil 		err = ENODEV;
85098a1f377SBruce Richardson 		goto error;
8518d0f8016SMatan Azrad 	}
85298a1f377SBruce Richardson 	INFO("%u port(s) detected", device_attr.phys_port_cnt);
85382642799SAdrien Mazarguil 	conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1;
854001a520eSGaetan Rivet 	if (mlx4_args(pci_dev->device.devargs, &conf)) {
855001a520eSGaetan Rivet 		ERROR("failed to process device arguments");
85666f2ac0fSAdrien Mazarguil 		err = EINVAL;
857001a520eSGaetan Rivet 		goto error;
858001a520eSGaetan Rivet 	}
859001a520eSGaetan Rivet 	/* Use all ports when none are defined */
86082642799SAdrien Mazarguil 	if (!conf.ports.enabled)
86182642799SAdrien Mazarguil 		conf.ports.enabled = conf.ports.present;
862024e87beSAdrien Mazarguil 	/* Retrieve extended device attributes. */
8634eba244bSAdrien Mazarguil 	if (mlx4_glue->query_device_ex(attr_ctx, NULL, &device_attr_ex)) {
86466f2ac0fSAdrien Mazarguil 		err = ENODEV;
865024e87beSAdrien Mazarguil 		goto error;
866024e87beSAdrien Mazarguil 	}
8678e08df22SAlexander Kozyrev 	MLX4_ASSERT(device_attr.max_sge >= MLX4_MAX_SGE);
86898a1f377SBruce Richardson 	for (i = 0; i < device_attr.phys_port_cnt; i++) {
86998a1f377SBruce Richardson 		uint32_t port = i + 1; /* ports are indexed from one */
87098a1f377SBruce Richardson 		struct ibv_context *ctx = NULL;
87198a1f377SBruce Richardson 		struct ibv_port_attr port_attr;
87298a1f377SBruce Richardson 		struct ibv_pd *pd = NULL;
873dbeba4cfSThomas Monjalon 		struct mlx4_priv *priv = NULL;
874e9d39be8SOr Ami 		struct rte_eth_dev *eth_dev = NULL;
8756d13ea8eSOlivier Matz 		struct rte_ether_addr mac;
8760203d33aSYongseok Koh 		char name[RTE_ETH_NAME_MAX_LEN];
87798a1f377SBruce Richardson 
87882642799SAdrien Mazarguil 		/* If port is not enabled, skip. */
87982642799SAdrien Mazarguil 		if (!(conf.ports.enabled & (1 << i)))
880001a520eSGaetan Rivet 			continue;
881f2318196SAdrien Mazarguil 		DEBUG("using port %u", port);
8824eba244bSAdrien Mazarguil 		ctx = mlx4_glue->open_device(ibv_dev);
8838d0f8016SMatan Azrad 		if (ctx == NULL) {
88466f2ac0fSAdrien Mazarguil 			err = ENODEV;
88598a1f377SBruce Richardson 			goto port_error;
8868d0f8016SMatan Azrad 		}
8870203d33aSYongseok Koh 		snprintf(name, sizeof(name), "%s port %u",
8880203d33aSYongseok Koh 			 mlx4_glue->get_device_name(ibv_dev), port);
8890203d33aSYongseok Koh 		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
890ff9c3548SLong Li 			int fd;
891ff9c3548SLong Li 
8920203d33aSYongseok Koh 			eth_dev = rte_eth_dev_attach_secondary(name);
8930203d33aSYongseok Koh 			if (eth_dev == NULL) {
8940203d33aSYongseok Koh 				ERROR("can not attach rte ethdev");
8950203d33aSYongseok Koh 				rte_errno = ENOMEM;
8960203d33aSYongseok Koh 				err = rte_errno;
897bcf58b64SMichael Baum 				goto err_secondary;
8980203d33aSYongseok Koh 			}
8990203d33aSYongseok Koh 			priv = eth_dev->data->dev_private;
9000203d33aSYongseok Koh 			if (!priv->verbs_alloc_ctx.enabled) {
9010203d33aSYongseok Koh 				ERROR("secondary process is not supported"
9020203d33aSYongseok Koh 				      " due to lack of external allocator"
9030203d33aSYongseok Koh 				      " from Verbs");
9040203d33aSYongseok Koh 				rte_errno = ENOTSUP;
9050203d33aSYongseok Koh 				err = rte_errno;
906bcf58b64SMichael Baum 				goto err_secondary;
9070203d33aSYongseok Koh 			}
9080203d33aSYongseok Koh 			eth_dev->device = &pci_dev->device;
9090203d33aSYongseok Koh 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
91097d37d2cSYongseok Koh 			err = mlx4_proc_priv_init(eth_dev);
91197d37d2cSYongseok Koh 			if (err)
912bcf58b64SMichael Baum 				goto err_secondary;
9130203d33aSYongseok Koh 			/* Receive command fd from primary process. */
914ff9c3548SLong Li 			fd = mlx4_mp_req_verbs_cmd_fd(eth_dev);
915ff9c3548SLong Li 			if (fd < 0) {
9160203d33aSYongseok Koh 				err = rte_errno;
917bcf58b64SMichael Baum 				goto err_secondary;
9180203d33aSYongseok Koh 			}
9190203d33aSYongseok Koh 			/* Remap UAR for Tx queues. */
920ff9c3548SLong Li 			err = mlx4_tx_uar_init_secondary(eth_dev, fd);
921ff9c3548SLong Li 			close(fd);
9220203d33aSYongseok Koh 			if (err) {
9230203d33aSYongseok Koh 				err = rte_errno;
924bcf58b64SMichael Baum 				goto err_secondary;
9250203d33aSYongseok Koh 			}
9260203d33aSYongseok Koh 			/*
9270203d33aSYongseok Koh 			 * Ethdev pointer is still required as input since
9280203d33aSYongseok Koh 			 * the primary device is not accessible from the
9290203d33aSYongseok Koh 			 * secondary process.
9300203d33aSYongseok Koh 			 */
9310203d33aSYongseok Koh 			eth_dev->tx_pkt_burst = mlx4_tx_burst;
9320203d33aSYongseok Koh 			eth_dev->rx_pkt_burst = mlx4_rx_burst;
9330203d33aSYongseok Koh 			claim_zero(mlx4_glue->close_device(ctx));
9340203d33aSYongseok Koh 			rte_eth_copy_pci_info(eth_dev, pci_dev);
9350203d33aSYongseok Koh 			rte_eth_dev_probing_finish(eth_dev);
936bcf58b64SMichael Baum 			prev_dev = eth_dev;
9370203d33aSYongseok Koh 			continue;
938bcf58b64SMichael Baum err_secondary:
939bcf58b64SMichael Baum 			claim_zero(mlx4_glue->close_device(ctx));
940bcf58b64SMichael Baum 			rte_eth_dev_release_port(eth_dev);
941bcf58b64SMichael Baum 			if (prev_dev)
942bcf58b64SMichael Baum 				rte_eth_dev_release_port(prev_dev);
943bcf58b64SMichael Baum 			break;
9440203d33aSYongseok Koh 		}
94598a1f377SBruce Richardson 		/* Check port status. */
9464eba244bSAdrien Mazarguil 		err = mlx4_glue->query_port(ctx, port, &port_attr);
94798a1f377SBruce Richardson 		if (err) {
94866f2ac0fSAdrien Mazarguil 			err = ENODEV;
94966f2ac0fSAdrien Mazarguil 			ERROR("port query failed: %s", strerror(err));
95098a1f377SBruce Richardson 			goto port_error;
95198a1f377SBruce Richardson 		}
9527b066153SAdrien Mazarguil 		if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
95366f2ac0fSAdrien Mazarguil 			err = ENOTSUP;
9547b066153SAdrien Mazarguil 			ERROR("port %d is not configured in Ethernet mode",
9557b066153SAdrien Mazarguil 			      port);
9567b066153SAdrien Mazarguil 			goto port_error;
9577b066153SAdrien Mazarguil 		}
95898a1f377SBruce Richardson 		if (port_attr.state != IBV_PORT_ACTIVE)
9599df03dd7SAdrien Mazarguil 			DEBUG("port %d is not active: \"%s\" (%d)",
9604eba244bSAdrien Mazarguil 			      port, mlx4_glue->port_state_str(port_attr.state),
96198a1f377SBruce Richardson 			      port_attr.state);
96235d02c54SAdrien Mazarguil 		/* Make asynchronous FD non-blocking to handle interrupts. */
96366f2ac0fSAdrien Mazarguil 		err = mlx4_fd_set_non_blocking(ctx->async_fd);
96466f2ac0fSAdrien Mazarguil 		if (err) {
96535d02c54SAdrien Mazarguil 			ERROR("cannot make asynchronous FD non-blocking: %s",
96666f2ac0fSAdrien Mazarguil 			      strerror(err));
96735d02c54SAdrien Mazarguil 			goto port_error;
96835d02c54SAdrien Mazarguil 		}
96998a1f377SBruce Richardson 		/* Allocate protection domain. */
9704eba244bSAdrien Mazarguil 		pd = mlx4_glue->alloc_pd(ctx);
97198a1f377SBruce Richardson 		if (pd == NULL) {
97266f2ac0fSAdrien Mazarguil 			err = ENOMEM;
97398a1f377SBruce Richardson 			ERROR("PD allocation failure");
97498a1f377SBruce Richardson 			goto port_error;
97598a1f377SBruce Richardson 		}
97698a1f377SBruce Richardson 		/* from rte_ethdev.c */
97798a1f377SBruce Richardson 		priv = rte_zmalloc("ethdev private structure",
97898a1f377SBruce Richardson 				   sizeof(*priv),
97998a1f377SBruce Richardson 				   RTE_CACHE_LINE_SIZE);
98098a1f377SBruce Richardson 		if (priv == NULL) {
98166f2ac0fSAdrien Mazarguil 			err = ENOMEM;
98298a1f377SBruce Richardson 			ERROR("priv allocation failure");
98398a1f377SBruce Richardson 			goto port_error;
98498a1f377SBruce Richardson 		}
98598a1f377SBruce Richardson 		priv->ctx = ctx;
98698a1f377SBruce Richardson 		priv->device_attr = device_attr;
98798a1f377SBruce Richardson 		priv->port = port;
98898a1f377SBruce Richardson 		priv->pd = pd;
98935b2d13fSOlivier Matz 		priv->mtu = RTE_ETHER_MTU;
99098a1f377SBruce Richardson 		priv->vf = vf;
9915db1d364SMoti Haimovsky 		priv->hw_csum =	!!(device_attr.device_cap_flags &
9925db1d364SMoti Haimovsky 				   IBV_DEVICE_RAW_IP_CSUM);
9935db1d364SMoti Haimovsky 		DEBUG("checksum offloading is %ssupported",
9945db1d364SMoti Haimovsky 		      (priv->hw_csum ? "" : "not "));
9955db1d364SMoti Haimovsky 		/* Only ConnectX-3 Pro supports tunneling. */
9965db1d364SMoti Haimovsky 		priv->hw_csum_l2tun =
9975db1d364SMoti Haimovsky 			priv->hw_csum &&
9985db1d364SMoti Haimovsky 			(device_attr.vendor_part_id ==
9995db1d364SMoti Haimovsky 			 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO);
10005db1d364SMoti Haimovsky 		DEBUG("L2 tunnel checksum offloads are %ssupported",
1001de1df14eSOphir Munk 		      priv->hw_csum_l2tun ? "" : "not ");
1002828a4ce3SAdrien Mazarguil 		priv->hw_rss_sup = mlx4_hw_rss_sup(priv->ctx, priv->pd,
1003828a4ce3SAdrien Mazarguil 						   &device_attr_ex);
1004024e87beSAdrien Mazarguil 		DEBUG("supported RSS hash fields mask: %016" PRIx64,
1005024e87beSAdrien Mazarguil 		      priv->hw_rss_sup);
10062b4e423fSMoti Haimovsky 		priv->hw_rss_max_qps =
10072b4e423fSMoti Haimovsky 			device_attr_ex.rss_caps.max_rwq_indirection_table_size;
10082b4e423fSMoti Haimovsky 		DEBUG("MAX RSS queues %d", priv->hw_rss_max_qps);
1009de1df14eSOphir Munk 		priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps &
1010de1df14eSOphir Munk 					IBV_RAW_PACKET_CAP_SCATTER_FCS);
1011de1df14eSOphir Munk 		DEBUG("FCS stripping toggling is %ssupported",
1012de1df14eSOphir Munk 		      priv->hw_fcs_strip ? "" : "not ");
1013ba576975SMoti Haimovsky 		priv->tso =
1014ba576975SMoti Haimovsky 			((device_attr_ex.tso_caps.max_tso > 0) &&
1015ba576975SMoti Haimovsky 			 (device_attr_ex.tso_caps.supported_qpts &
1016ba576975SMoti Haimovsky 			  (1 << IBV_QPT_RAW_PACKET)));
1017ba576975SMoti Haimovsky 		if (priv->tso)
1018ba576975SMoti Haimovsky 			priv->tso_max_payload_sz =
1019ba576975SMoti Haimovsky 					device_attr_ex.tso_caps.max_tso;
1020ba576975SMoti Haimovsky 		DEBUG("TSO is %ssupported",
1021ba576975SMoti Haimovsky 		      priv->tso ? "" : "not ");
1022f4efc0ebSYongseok Koh 		priv->mr_ext_memseg_en = conf.mr_ext_memseg_en;
1023fec36086SOr Ami 		/* Configure the first MAC address by default. */
102466f2ac0fSAdrien Mazarguil 		err = mlx4_get_mac(priv, &mac.addr_bytes);
102566f2ac0fSAdrien Mazarguil 		if (err) {
1026fec36086SOr Ami 			ERROR("cannot get MAC address, is mlx4_en loaded?"
102766f2ac0fSAdrien Mazarguil 			      " (error: %s)", strerror(err));
102898a1f377SBruce Richardson 			goto port_error;
102998a1f377SBruce Richardson 		}
1030c2c4f87bSAman Deep Singh 		INFO("port %u MAC address is " RTE_ETHER_ADDR_PRT_FMT,
1031a7db3afcSAman Deep Singh 		     priv->port, RTE_ETHER_ADDR_BYTES(&mac));
1032320dc09fSAdrien Mazarguil 		/* Register MAC address. */
10331437784bSAdrien Mazarguil 		priv->mac[0] = mac;
103498a1f377SBruce Richardson 
1035164cad78SStephen Hemminger 		if (mlx4_get_ifname(priv, &ifname) == 0) {
103698a1f377SBruce Richardson 			DEBUG("port %u ifname is \"%s\"",
103798a1f377SBruce Richardson 			      priv->port, ifname);
1038164cad78SStephen Hemminger 			priv->if_index = if_nametoindex(ifname);
1039164cad78SStephen Hemminger 		} else {
104098a1f377SBruce Richardson 			DEBUG("port %u ifname is unknown", priv->port);
104198a1f377SBruce Richardson 		}
1042164cad78SStephen Hemminger 
104398a1f377SBruce Richardson 		/* Get actual MTU if possible. */
104461cbdd41SAdrien Mazarguil 		mlx4_mtu_get(priv, &priv->mtu);
104598a1f377SBruce Richardson 		DEBUG("port %u MTU is %u", priv->port, priv->mtu);
10466751f6deSDavid Marchand 		eth_dev = rte_eth_dev_allocate(name);
104798a1f377SBruce Richardson 		if (eth_dev == NULL) {
104866f2ac0fSAdrien Mazarguil 			err = ENOMEM;
104998a1f377SBruce Richardson 			ERROR("can not allocate rte ethdev");
105098a1f377SBruce Richardson 			goto port_error;
105198a1f377SBruce Richardson 		}
105298a1f377SBruce Richardson 		eth_dev->data->dev_private = priv;
10531437784bSAdrien Mazarguil 		eth_dev->data->mac_addrs = priv->mac;
1054eac901ceSJan Blunck 		eth_dev->device = &pci_dev->device;
1055eeefe73fSBernard Iremonger 		rte_eth_copy_pci_info(eth_dev, pci_dev);
1056f30e69b4SFerruh Yigit 		eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
105763c2f23cSAdrien Mazarguil 		/* Initialize local interrupt handle for current port. */
1058d61138d4SHarman Kalra 		priv->intr_handle =
1059d61138d4SHarman Kalra 			rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
1060d61138d4SHarman Kalra 		if (priv->intr_handle == NULL) {
106130d38a71SDavid Marchand 			ERROR("can not allocate intr_handle");
1062d61138d4SHarman Kalra 			goto port_error;
1063d61138d4SHarman Kalra 		}
1064d61138d4SHarman Kalra 
1065d61138d4SHarman Kalra 		if (rte_intr_fd_set(priv->intr_handle, -1))
1066d61138d4SHarman Kalra 			goto port_error;
1067d61138d4SHarman Kalra 
1068d61138d4SHarman Kalra 		if (rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT))
1069d61138d4SHarman Kalra 			goto port_error;
1070d61138d4SHarman Kalra 
10713c560ec3SAdrien Mazarguil 		/*
107263c2f23cSAdrien Mazarguil 		 * Override ethdev interrupt handle pointer with private
107363c2f23cSAdrien Mazarguil 		 * handle instead of that of the parent PCI device used by
107463c2f23cSAdrien Mazarguil 		 * default. This prevents it from being shared between all
107563c2f23cSAdrien Mazarguil 		 * ports of the same PCI device since each of them is
107663c2f23cSAdrien Mazarguil 		 * associated its own Verbs context.
107763c2f23cSAdrien Mazarguil 		 *
107863c2f23cSAdrien Mazarguil 		 * Rx interrupts in particular require this as the PMD has
107963c2f23cSAdrien Mazarguil 		 * no control over the registration of queue interrupts
108063c2f23cSAdrien Mazarguil 		 * besides setting up eth_dev->intr_handle, the rest is
108163c2f23cSAdrien Mazarguil 		 * handled by rte_intr_rx_ctl().
10823c560ec3SAdrien Mazarguil 		 */
1083d61138d4SHarman Kalra 		eth_dev->intr_handle = priv->intr_handle;
1084099c2c53SYongseok Koh 		priv->dev_data = eth_dev->data;
108598a1f377SBruce Richardson 		eth_dev->dev_ops = &mlx4_dev_ops;
10868e493764SYongseok Koh #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
10878e493764SYongseok Koh 		/* Hint libmlx4 to use PMD allocator for data plane resources */
108850e24943SMichael Baum 		err = mlx4_glue->dv_set_context_attr
108950e24943SMichael Baum 			(ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
109050e24943SMichael Baum 			 (void *)((uintptr_t)&(struct mlx4dv_ctx_allocators){
10918e493764SYongseok Koh 				 .alloc = &mlx4_alloc_verbs_buf,
10928e493764SYongseok Koh 				 .free = &mlx4_free_verbs_buf,
10938e493764SYongseok Koh 				 .data = priv,
109450e24943SMichael Baum 			}));
10950203d33aSYongseok Koh 		if (err)
10960203d33aSYongseok Koh 			WARN("Verbs external allocator is not supported");
10970203d33aSYongseok Koh 		else
10980203d33aSYongseok Koh 			priv->verbs_alloc_ctx.enabled = 1;
10998e493764SYongseok Koh #endif
110098a1f377SBruce Richardson 		/* Bring Ethernet device up. */
110198a1f377SBruce Richardson 		DEBUG("forcing Ethernet interface up");
1102099c2c53SYongseok Koh 		mlx4_dev_set_link_up(eth_dev);
11033fca2ab5SGaetan Rivet 		/* Update link status once if waiting for LSC. */
11043fca2ab5SGaetan Rivet 		if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
11053fca2ab5SGaetan Rivet 			mlx4_link_update(eth_dev, 0);
11064423d4a1SYongseok Koh 		/*
11074423d4a1SYongseok Koh 		 * Once the device is added to the list of memory event
11084423d4a1SYongseok Koh 		 * callback, its global MR cache table cannot be expanded
11094423d4a1SYongseok Koh 		 * on the fly because of deadlock. If it overflows, lookup
11104423d4a1SYongseok Koh 		 * should be done by searching MR list linearly, which is slow.
11114423d4a1SYongseok Koh 		 */
11124423d4a1SYongseok Koh 		err = mlx4_mr_btree_init(&priv->mr.cache,
11134423d4a1SYongseok Koh 					 MLX4_MR_BTREE_CACHE_N * 2,
11144423d4a1SYongseok Koh 					 eth_dev->device->numa_node);
11154423d4a1SYongseok Koh 		if (err) {
11164423d4a1SYongseok Koh 			/* rte_errno is already set. */
11174423d4a1SYongseok Koh 			goto port_error;
11184423d4a1SYongseok Koh 		}
11194423d4a1SYongseok Koh 		/* Add device to memory callback list. */
11200203d33aSYongseok Koh 		rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
11210203d33aSYongseok Koh 		LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list,
11220203d33aSYongseok Koh 				 priv, mem_event_cb);
11230203d33aSYongseok Koh 		rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
1124fbe90cddSThomas Monjalon 		rte_eth_dev_probing_finish(eth_dev);
1125bcf58b64SMichael Baum 		prev_dev = eth_dev;
112698a1f377SBruce Richardson 		continue;
112798a1f377SBruce Richardson port_error:
112849fdb0aeSHarman Kalra 		if (priv != NULL)
1129d61138d4SHarman Kalra 			rte_intr_instance_free(priv->intr_handle);
113098a1f377SBruce Richardson 		rte_free(priv);
1131e16adf08SThomas Monjalon 		if (eth_dev != NULL)
1132e16adf08SThomas Monjalon 			eth_dev->data->dev_private = NULL;
113398a1f377SBruce Richardson 		if (pd)
11344eba244bSAdrien Mazarguil 			claim_zero(mlx4_glue->dealloc_pd(pd));
113598a1f377SBruce Richardson 		if (ctx)
11364eba244bSAdrien Mazarguil 			claim_zero(mlx4_glue->close_device(ctx));
1137e16adf08SThomas Monjalon 		if (eth_dev != NULL) {
1138e16adf08SThomas Monjalon 			/* mac_addrs must not be freed because part of dev_private */
1139e16adf08SThomas Monjalon 			eth_dev->data->mac_addrs = NULL;
1140e9d39be8SOr Ami 			rte_eth_dev_release_port(eth_dev);
1141e16adf08SThomas Monjalon 		}
1142bcf58b64SMichael Baum 		if (prev_dev)
1143bcf58b64SMichael Baum 			mlx4_dev_close(prev_dev);
114498a1f377SBruce Richardson 		break;
114598a1f377SBruce Richardson 	}
114698a1f377SBruce Richardson error:
114798a1f377SBruce Richardson 	if (attr_ctx)
11484eba244bSAdrien Mazarguil 		claim_zero(mlx4_glue->close_device(attr_ctx));
114998a1f377SBruce Richardson 	if (list)
11504eba244bSAdrien Mazarguil 		mlx4_glue->free_device_list(list);
115166f2ac0fSAdrien Mazarguil 	if (err)
115266f2ac0fSAdrien Mazarguil 		rte_errno = err;
115366f2ac0fSAdrien Mazarguil 	return -err;
115498a1f377SBruce Richardson }
115598a1f377SBruce Richardson 
11568e1630e0SMichael Baum /**
11578e1630e0SMichael Baum  * DPDK callback to remove a PCI device.
11588e1630e0SMichael Baum  *
11598e1630e0SMichael Baum  * This function removes all Ethernet devices belong to a given PCI device.
11608e1630e0SMichael Baum  *
11618e1630e0SMichael Baum  * @param[in] pci_dev
11628e1630e0SMichael Baum  *   Pointer to the PCI device.
11638e1630e0SMichael Baum  *
11648e1630e0SMichael Baum  * @return
11658e1630e0SMichael Baum  *   0 on success, the function cannot fail.
11668e1630e0SMichael Baum  */
11678e1630e0SMichael Baum static int
11688e1630e0SMichael Baum mlx4_pci_remove(struct rte_pci_device *pci_dev)
11698e1630e0SMichael Baum {
11708e1630e0SMichael Baum 	uint16_t port_id;
11718e1630e0SMichael Baum 	int ret = 0;
11728e1630e0SMichael Baum 
11738e1630e0SMichael Baum 	RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
11748e1630e0SMichael Baum 		/*
11758e1630e0SMichael Baum 		 * mlx4_dev_close() is not registered to secondary process,
11768e1630e0SMichael Baum 		 * call the close function explicitly for secondary process.
11778e1630e0SMichael Baum 		 */
11788e1630e0SMichael Baum 		if (rte_eal_process_type() == RTE_PROC_SECONDARY)
11798e1630e0SMichael Baum 			ret |= mlx4_dev_close(&rte_eth_devices[port_id]);
11808e1630e0SMichael Baum 		else
11818e1630e0SMichael Baum 			ret |= rte_eth_dev_close(port_id);
11828e1630e0SMichael Baum 	}
11838e1630e0SMichael Baum 	return ret == 0 ? 0 : -EIO;
11848e1630e0SMichael Baum }
11858e1630e0SMichael Baum 
118698a1f377SBruce Richardson static const struct rte_pci_id mlx4_pci_id_map[] = {
118798a1f377SBruce Richardson 	{
11881d1bc870SNélio Laranjeiro 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
11891d1bc870SNélio Laranjeiro 			       PCI_DEVICE_ID_MELLANOX_CONNECTX3)
119098a1f377SBruce Richardson 	},
119198a1f377SBruce Richardson 	{
11921d1bc870SNélio Laranjeiro 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
11931d1bc870SNélio Laranjeiro 			       PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO)
119498a1f377SBruce Richardson 	},
119598a1f377SBruce Richardson 	{
11961d1bc870SNélio Laranjeiro 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
11971d1bc870SNélio Laranjeiro 			       PCI_DEVICE_ID_MELLANOX_CONNECTX3VF)
119898a1f377SBruce Richardson 	},
119998a1f377SBruce Richardson 	{
120098a1f377SBruce Richardson 		.vendor_id = 0
120198a1f377SBruce Richardson 	}
120298a1f377SBruce Richardson };
120398a1f377SBruce Richardson 
1204fdf91e0fSJan Blunck static struct rte_pci_driver mlx4_driver = {
12052f3193cfSJan Viktorin 	.driver = {
12062f3193cfSJan Viktorin 		.name = MLX4_DRIVER_NAME
12072f3193cfSJan Viktorin 	},
120898a1f377SBruce Richardson 	.id_table = mlx4_pci_id_map,
1209af424af8SShreyansh Jain 	.probe = mlx4_pci_probe,
12108e1630e0SMichael Baum 	.remove = mlx4_pci_remove,
1211b76fafb1SDavid Marchand 	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
121298a1f377SBruce Richardson };
121398a1f377SBruce Richardson 
121472b934adSThomas Monjalon #ifdef RTE_IBVERBS_LINK_DLOPEN
121527cea116SAdrien Mazarguil 
121627cea116SAdrien Mazarguil /**
121708c028d0SAdrien Mazarguil  * Suffix RTE_EAL_PMD_PATH with "-glue".
121808c028d0SAdrien Mazarguil  *
121908c028d0SAdrien Mazarguil  * This function performs a sanity check on RTE_EAL_PMD_PATH before
122008c028d0SAdrien Mazarguil  * suffixing its last component.
122108c028d0SAdrien Mazarguil  *
122208c028d0SAdrien Mazarguil  * @param buf[out]
122308c028d0SAdrien Mazarguil  *   Output buffer, should be large enough otherwise NULL is returned.
122408c028d0SAdrien Mazarguil  * @param size
122508c028d0SAdrien Mazarguil  *   Size of @p out.
122608c028d0SAdrien Mazarguil  *
122708c028d0SAdrien Mazarguil  * @return
122808c028d0SAdrien Mazarguil  *   Pointer to @p buf or @p NULL in case suffix cannot be appended.
122908c028d0SAdrien Mazarguil  */
123008c028d0SAdrien Mazarguil static char *
123108c028d0SAdrien Mazarguil mlx4_glue_path(char *buf, size_t size)
123208c028d0SAdrien Mazarguil {
123308c028d0SAdrien Mazarguil 	static const char *const bad[] = { "/", ".", "..", NULL };
123408c028d0SAdrien Mazarguil 	const char *path = RTE_EAL_PMD_PATH;
123508c028d0SAdrien Mazarguil 	size_t len = strlen(path);
123608c028d0SAdrien Mazarguil 	size_t off;
123708c028d0SAdrien Mazarguil 	int i;
123808c028d0SAdrien Mazarguil 
123908c028d0SAdrien Mazarguil 	while (len && path[len - 1] == '/')
124008c028d0SAdrien Mazarguil 		--len;
124108c028d0SAdrien Mazarguil 	for (off = len; off && path[off - 1] != '/'; --off)
124208c028d0SAdrien Mazarguil 		;
124308c028d0SAdrien Mazarguil 	for (i = 0; bad[i]; ++i)
124408c028d0SAdrien Mazarguil 		if (!strncmp(path + off, bad[i], (int)(len - off)))
124508c028d0SAdrien Mazarguil 			goto error;
124608c028d0SAdrien Mazarguil 	i = snprintf(buf, size, "%.*s-glue", (int)len, path);
124708c028d0SAdrien Mazarguil 	if (i == -1 || (size_t)i >= size)
124808c028d0SAdrien Mazarguil 		goto error;
124908c028d0SAdrien Mazarguil 	return buf;
125008c028d0SAdrien Mazarguil error:
125108c028d0SAdrien Mazarguil 	ERROR("unable to append \"-glue\" to last component of"
125208c028d0SAdrien Mazarguil 	      " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"),"
125308c028d0SAdrien Mazarguil 	      " please re-configure DPDK");
125408c028d0SAdrien Mazarguil 	return NULL;
125508c028d0SAdrien Mazarguil }
125608c028d0SAdrien Mazarguil 
125708c028d0SAdrien Mazarguil /**
125827cea116SAdrien Mazarguil  * Initialization routine for run-time dependency on rdma-core.
125927cea116SAdrien Mazarguil  */
126027cea116SAdrien Mazarguil static int
126127cea116SAdrien Mazarguil mlx4_glue_init(void)
126227cea116SAdrien Mazarguil {
126308c028d0SAdrien Mazarguil 	char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
1264f6242d06SAdrien Mazarguil 	const char *path[] = {
1265f6242d06SAdrien Mazarguil 		/*
1266f6242d06SAdrien Mazarguil 		 * A basic security check is necessary before trusting
1267f6242d06SAdrien Mazarguil 		 * MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
1268f6242d06SAdrien Mazarguil 		 */
1269f6242d06SAdrien Mazarguil 		(geteuid() == getuid() && getegid() == getgid() ?
1270f6242d06SAdrien Mazarguil 		 getenv("MLX4_GLUE_PATH") : NULL),
127108c028d0SAdrien Mazarguil 		/*
127208c028d0SAdrien Mazarguil 		 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
127308c028d0SAdrien Mazarguil 		 * variant, otherwise let dlopen() look up libraries on its
127408c028d0SAdrien Mazarguil 		 * own.
127508c028d0SAdrien Mazarguil 		 */
127608c028d0SAdrien Mazarguil 		(*RTE_EAL_PMD_PATH ?
127708c028d0SAdrien Mazarguil 		 mlx4_glue_path(glue_path, sizeof(glue_path)) : ""),
1278f6242d06SAdrien Mazarguil 	};
1279f6242d06SAdrien Mazarguil 	unsigned int i = 0;
128027cea116SAdrien Mazarguil 	void *handle = NULL;
128127cea116SAdrien Mazarguil 	void **sym;
128227cea116SAdrien Mazarguil 	const char *dlmsg;
128327cea116SAdrien Mazarguil 
1284f6242d06SAdrien Mazarguil 	while (!handle && i != RTE_DIM(path)) {
1285f6242d06SAdrien Mazarguil 		const char *end;
1286f6242d06SAdrien Mazarguil 		size_t len;
1287f6242d06SAdrien Mazarguil 		int ret;
1288f6242d06SAdrien Mazarguil 
1289f6242d06SAdrien Mazarguil 		if (!path[i]) {
1290f6242d06SAdrien Mazarguil 			++i;
1291f6242d06SAdrien Mazarguil 			continue;
1292f6242d06SAdrien Mazarguil 		}
1293f6242d06SAdrien Mazarguil 		end = strpbrk(path[i], ":;");
1294f6242d06SAdrien Mazarguil 		if (!end)
1295f6242d06SAdrien Mazarguil 			end = path[i] + strlen(path[i]);
1296f6242d06SAdrien Mazarguil 		len = end - path[i];
1297f6242d06SAdrien Mazarguil 		ret = 0;
1298f6242d06SAdrien Mazarguil 		do {
1299f6242d06SAdrien Mazarguil 			char name[ret + 1];
1300f6242d06SAdrien Mazarguil 
1301f6242d06SAdrien Mazarguil 			ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE,
1302f6242d06SAdrien Mazarguil 				       (int)len, path[i],
1303f6242d06SAdrien Mazarguil 				       (!len || *(end - 1) == '/') ? "" : "/");
1304f6242d06SAdrien Mazarguil 			if (ret == -1)
1305f6242d06SAdrien Mazarguil 				break;
1306f6242d06SAdrien Mazarguil 			if (sizeof(name) != (size_t)ret + 1)
1307f6242d06SAdrien Mazarguil 				continue;
1308f6242d06SAdrien Mazarguil 			DEBUG("looking for rdma-core glue as \"%s\"", name);
1309f6242d06SAdrien Mazarguil 			handle = dlopen(name, RTLD_LAZY);
1310f6242d06SAdrien Mazarguil 			break;
1311f6242d06SAdrien Mazarguil 		} while (1);
1312f6242d06SAdrien Mazarguil 		path[i] = end + 1;
1313f6242d06SAdrien Mazarguil 		if (!*end)
1314f6242d06SAdrien Mazarguil 			++i;
1315f6242d06SAdrien Mazarguil 	}
131627cea116SAdrien Mazarguil 	if (!handle) {
131727cea116SAdrien Mazarguil 		rte_errno = EINVAL;
131827cea116SAdrien Mazarguil 		dlmsg = dlerror();
131927cea116SAdrien Mazarguil 		if (dlmsg)
132027cea116SAdrien Mazarguil 			WARN("cannot load glue library: %s", dlmsg);
132127cea116SAdrien Mazarguil 		goto glue_error;
132227cea116SAdrien Mazarguil 	}
132327cea116SAdrien Mazarguil 	sym = dlsym(handle, "mlx4_glue");
132427cea116SAdrien Mazarguil 	if (!sym || !*sym) {
132527cea116SAdrien Mazarguil 		rte_errno = EINVAL;
132627cea116SAdrien Mazarguil 		dlmsg = dlerror();
132727cea116SAdrien Mazarguil 		if (dlmsg)
132827cea116SAdrien Mazarguil 			ERROR("cannot resolve glue symbol: %s", dlmsg);
132927cea116SAdrien Mazarguil 		goto glue_error;
133027cea116SAdrien Mazarguil 	}
133127cea116SAdrien Mazarguil 	mlx4_glue = *sym;
133227cea116SAdrien Mazarguil 	return 0;
133327cea116SAdrien Mazarguil glue_error:
133427cea116SAdrien Mazarguil 	if (handle)
133527cea116SAdrien Mazarguil 		dlclose(handle);
133627cea116SAdrien Mazarguil 	WARN("cannot initialize PMD due to missing run-time"
133727cea116SAdrien Mazarguil 	     " dependency on rdma-core libraries (libibverbs,"
133827cea116SAdrien Mazarguil 	     " libmlx4)");
133927cea116SAdrien Mazarguil 	return -rte_errno;
134027cea116SAdrien Mazarguil }
134127cea116SAdrien Mazarguil 
134227cea116SAdrien Mazarguil #endif
134327cea116SAdrien Mazarguil 
13449c99878aSJerin Jacob /* Initialize driver log type. */
1345eeded204SDavid Marchand RTE_LOG_REGISTER_DEFAULT(mlx4_logtype, NOTICE)
13469c99878aSJerin Jacob 
134798a1f377SBruce Richardson /**
134898a1f377SBruce Richardson  * Driver initialization routine.
134998a1f377SBruce Richardson  */
1350f8e99896SThomas Monjalon RTE_INIT(rte_mlx4_pmd_init)
135198a1f377SBruce Richardson {
1352643777a1SOlga Shern 	/*
1353c2b3dba8SMatan Azrad 	 * MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we
1354c2b3dba8SMatan Azrad 	 * want to get success errno value in case of calling them
1355c2b3dba8SMatan Azrad 	 * when the device was removed.
1356c2b3dba8SMatan Azrad 	 */
1357c2b3dba8SMatan Azrad 	setenv("MLX4_DEVICE_FATAL_CLEANUP", "1", 1);
1358c2b3dba8SMatan Azrad 	/*
1359643777a1SOlga Shern 	 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
1360643777a1SOlga Shern 	 * huge pages. Calling ibv_fork_init() during init allows
1361643777a1SOlga Shern 	 * applications to use fork() safely for purposes other than
1362643777a1SOlga Shern 	 * using this PMD, which is not supported in forked processes.
1363643777a1SOlga Shern 	 */
1364643777a1SOlga Shern 	setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
136572b934adSThomas Monjalon #ifdef RTE_IBVERBS_LINK_DLOPEN
136627cea116SAdrien Mazarguil 	if (mlx4_glue_init())
136727cea116SAdrien Mazarguil 		return;
13688e08df22SAlexander Kozyrev 	MLX4_ASSERT(mlx4_glue);
136927cea116SAdrien Mazarguil #endif
1370e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG
13712a3b0097SAdrien Mazarguil 	/* Glue structure must not contain any NULL pointers. */
13722a3b0097SAdrien Mazarguil 	{
13732a3b0097SAdrien Mazarguil 		unsigned int i;
13742a3b0097SAdrien Mazarguil 
13752a3b0097SAdrien Mazarguil 		for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i)
13768e08df22SAlexander Kozyrev 			MLX4_ASSERT(((const void *const *)mlx4_glue)[i]);
13772a3b0097SAdrien Mazarguil 	}
13782a3b0097SAdrien Mazarguil #endif
13796d5df2eaSAdrien Mazarguil 	if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) {
13806d5df2eaSAdrien Mazarguil 		ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required",
13816d5df2eaSAdrien Mazarguil 		      mlx4_glue->version, MLX4_GLUE_VERSION);
13826d5df2eaSAdrien Mazarguil 		return;
13836d5df2eaSAdrien Mazarguil 	}
13844eba244bSAdrien Mazarguil 	mlx4_glue->fork_init();
13853dcfe039SThomas Monjalon 	rte_pci_register(&mlx4_driver);
138698a1f377SBruce Richardson }
138798a1f377SBruce Richardson 
138801f19227SShreyansh Jain RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);
138901f19227SShreyansh Jain RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map);
13900880c401SOlivier Matz RTE_PMD_REGISTER_KMOD_DEP(net_mlx4,
13910880c401SOlivier Matz 	"* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib");
1392