182092c87SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause 23b47f9acSAdrien Mazarguil * Copyright 2012 6WIND S.A. 35feecc57SShahaf Shuler * Copyright 2012 Mellanox Technologies, Ltd 498a1f377SBruce Richardson */ 598a1f377SBruce Richardson 637491c7fSAdrien Mazarguil /** 737491c7fSAdrien Mazarguil * @file 837491c7fSAdrien Mazarguil * mlx4 driver initialization. 937491c7fSAdrien Mazarguil */ 1037491c7fSAdrien Mazarguil 1137491c7fSAdrien Mazarguil #include <errno.h> 1237491c7fSAdrien Mazarguil #include <inttypes.h> 1398a1f377SBruce Richardson #include <stddef.h> 1437491c7fSAdrien Mazarguil #include <stdint.h> 1598a1f377SBruce Richardson #include <stdio.h> 1698a1f377SBruce Richardson #include <stdlib.h> 1798a1f377SBruce Richardson #include <string.h> 180203d33aSYongseok Koh #include <sys/mman.h> 1927cea116SAdrien Mazarguil #include <unistd.h> 20a6e7cd81STonghao Zhang #ifdef RTE_IBVERBS_LINK_DLOPEN 21a6e7cd81STonghao Zhang #include <dlfcn.h> 22a6e7cd81STonghao Zhang #endif 2398a1f377SBruce Richardson 2437491c7fSAdrien Mazarguil /* Verbs headers do not support -pedantic. */ 2537491c7fSAdrien Mazarguil #ifdef PEDANTIC 2637491c7fSAdrien Mazarguil #pragma GCC diagnostic ignored "-Wpedantic" 2737491c7fSAdrien Mazarguil #endif 2837491c7fSAdrien Mazarguil #include <infiniband/verbs.h> 2937491c7fSAdrien Mazarguil #ifdef PEDANTIC 3037491c7fSAdrien Mazarguil #pragma GCC diagnostic error "-Wpedantic" 3137491c7fSAdrien Mazarguil #endif 3237491c7fSAdrien Mazarguil 3337491c7fSAdrien Mazarguil #include <rte_common.h> 341acb7f54SDavid Marchand #include <dev_driver.h> 3537491c7fSAdrien Mazarguil #include <rte_errno.h> 36df96fd0dSBruce Richardson #include <ethdev_driver.h> 37df96fd0dSBruce Richardson #include <ethdev_pci.h> 3837491c7fSAdrien Mazarguil #include <rte_ether.h> 39fee75e14SAdrien Mazarguil #include <rte_flow.h> 409f05a4b8SMoti Haimovsky #include <rte_interrupts.h> 4137491c7fSAdrien Mazarguil #include <rte_kvargs.h> 4237491c7fSAdrien Mazarguil #include <rte_malloc.h> 4337491c7fSAdrien Mazarguil #include <rte_mbuf.h> 4498a1f377SBruce Richardson 4598a1f377SBruce Richardson #include "mlx4.h" 464eba244bSAdrien Mazarguil #include "mlx4_glue.h" 4746d5736aSVasily Philipov #include "mlx4_flow.h" 489797bfccSYongseok Koh #include "mlx4_mr.h" 493d555728SAdrien Mazarguil #include "mlx4_rxtx.h" 5076df01ffSAdrien Mazarguil #include "mlx4_utils.h" 5198a1f377SBruce Richardson 52a5e5af7fSThomas Monjalon #ifdef MLX4_GLUE 53a5e5af7fSThomas Monjalon const struct mlx4_glue *mlx4_glue; 54a5e5af7fSThomas Monjalon #endif 55a5e5af7fSThomas Monjalon 560203d33aSYongseok Koh static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data"; 579797bfccSYongseok Koh 580203d33aSYongseok Koh /* Shared memory between primary and secondary processes. */ 590203d33aSYongseok Koh struct mlx4_shared_data *mlx4_shared_data; 600203d33aSYongseok Koh 610203d33aSYongseok Koh /* Spinlock for mlx4_shared_data allocation. */ 620203d33aSYongseok Koh static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 630203d33aSYongseok Koh 640203d33aSYongseok Koh /* Process local data for secondary processes. */ 650203d33aSYongseok Koh static struct mlx4_local_data mlx4_local_data; 669797bfccSYongseok Koh 6782642799SAdrien Mazarguil /** Configuration structure for device arguments. */ 68001a520eSGaetan Rivet struct mlx4_conf { 6982642799SAdrien Mazarguil struct { 7082642799SAdrien Mazarguil uint32_t present; /**< Bit-field for existing ports. */ 7182642799SAdrien Mazarguil uint32_t enabled; /**< Bit-field for user-enabled ports. */ 7282642799SAdrien Mazarguil } ports; 73f4efc0ebSYongseok Koh int mr_ext_memseg_en; 74f4efc0ebSYongseok Koh /** Whether memseg should be extended for MR creation. */ 75001a520eSGaetan Rivet }; 76001a520eSGaetan Rivet 77001a520eSGaetan Rivet /* Available parameters list. */ 78001a520eSGaetan Rivet const char *pmd_mlx4_init_params[] = { 79001a520eSGaetan Rivet MLX4_PMD_PORT_KVARG, 80f4efc0ebSYongseok Koh MLX4_MR_EXT_MEMSEG_EN_KVARG, 81001a520eSGaetan Rivet NULL, 82001a520eSGaetan Rivet }; 83001a520eSGaetan Rivet 8462024eb8SIvan Ilchenko static int mlx4_dev_stop(struct rte_eth_dev *dev); 8584a68486SAdrien Mazarguil 860203d33aSYongseok Koh /** 870203d33aSYongseok Koh * Initialize shared data between primary and secondary process. 880203d33aSYongseok Koh * 890203d33aSYongseok Koh * A memzone is reserved by primary process and secondary processes attach to 900203d33aSYongseok Koh * the memzone. 910203d33aSYongseok Koh * 920203d33aSYongseok Koh * @return 930203d33aSYongseok Koh * 0 on success, a negative errno value otherwise and rte_errno is set. 940203d33aSYongseok Koh */ 950203d33aSYongseok Koh static int 960203d33aSYongseok Koh mlx4_init_shared_data(void) 970203d33aSYongseok Koh { 980203d33aSYongseok Koh const struct rte_memzone *mz; 990203d33aSYongseok Koh int ret = 0; 1000203d33aSYongseok Koh 1010203d33aSYongseok Koh rte_spinlock_lock(&mlx4_shared_data_lock); 1020203d33aSYongseok Koh if (mlx4_shared_data == NULL) { 1030203d33aSYongseok Koh if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1040203d33aSYongseok Koh /* Allocate shared memory. */ 1050203d33aSYongseok Koh mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA, 1060203d33aSYongseok Koh sizeof(*mlx4_shared_data), 1070203d33aSYongseok Koh SOCKET_ID_ANY, 0); 1080203d33aSYongseok Koh if (mz == NULL) { 109*f665790aSDavid Marchand ERROR("Cannot allocate mlx4 shared data"); 1100203d33aSYongseok Koh ret = -rte_errno; 1110203d33aSYongseok Koh goto error; 1120203d33aSYongseok Koh } 1130203d33aSYongseok Koh mlx4_shared_data = mz->addr; 1140203d33aSYongseok Koh memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data)); 1150203d33aSYongseok Koh rte_spinlock_init(&mlx4_shared_data->lock); 1160203d33aSYongseok Koh } else { 1170203d33aSYongseok Koh /* Lookup allocated shared memory. */ 1180203d33aSYongseok Koh mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA); 1190203d33aSYongseok Koh if (mz == NULL) { 120*f665790aSDavid Marchand ERROR("Cannot attach mlx4 shared data"); 1210203d33aSYongseok Koh ret = -rte_errno; 1220203d33aSYongseok Koh goto error; 1230203d33aSYongseok Koh } 1240203d33aSYongseok Koh mlx4_shared_data = mz->addr; 1250203d33aSYongseok Koh memset(&mlx4_local_data, 0, sizeof(mlx4_local_data)); 1260203d33aSYongseok Koh } 1270203d33aSYongseok Koh } 1280203d33aSYongseok Koh error: 1290203d33aSYongseok Koh rte_spinlock_unlock(&mlx4_shared_data_lock); 1300203d33aSYongseok Koh return ret; 1310203d33aSYongseok Koh } 1320203d33aSYongseok Koh 1338e493764SYongseok Koh #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 1348e493764SYongseok Koh /** 1358e493764SYongseok Koh * Verbs callback to allocate a memory. This function should allocate the space 1368e493764SYongseok Koh * according to the size provided residing inside a huge page. 1378e493764SYongseok Koh * Please note that all allocation must respect the alignment from libmlx4 1388e493764SYongseok Koh * (i.e. currently sysconf(_SC_PAGESIZE)). 1398e493764SYongseok Koh * 1408e493764SYongseok Koh * @param[in] size 1418e493764SYongseok Koh * The size in bytes of the memory to allocate. 1428e493764SYongseok Koh * @param[in] data 1438e493764SYongseok Koh * A pointer to the callback data. 1448e493764SYongseok Koh * 1458e493764SYongseok Koh * @return 1468e493764SYongseok Koh * Allocated buffer, NULL otherwise and rte_errno is set. 1478e493764SYongseok Koh */ 1488e493764SYongseok Koh static void * 1498e493764SYongseok Koh mlx4_alloc_verbs_buf(size_t size, void *data) 1508e493764SYongseok Koh { 1518e493764SYongseok Koh struct mlx4_priv *priv = data; 1528e493764SYongseok Koh void *ret; 1538e493764SYongseok Koh size_t alignment = sysconf(_SC_PAGESIZE); 1548e493764SYongseok Koh unsigned int socket = SOCKET_ID_ANY; 1558e493764SYongseok Koh 1568e493764SYongseok Koh if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) { 1578e493764SYongseok Koh const struct txq *txq = priv->verbs_alloc_ctx.obj; 1588e493764SYongseok Koh 1598e493764SYongseok Koh socket = txq->socket; 1608e493764SYongseok Koh } else if (priv->verbs_alloc_ctx.type == 1618e493764SYongseok Koh MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) { 1628e493764SYongseok Koh const struct rxq *rxq = priv->verbs_alloc_ctx.obj; 1638e493764SYongseok Koh 1648e493764SYongseok Koh socket = rxq->socket; 1658e493764SYongseok Koh } 1668e08df22SAlexander Kozyrev MLX4_ASSERT(data != NULL); 1678e493764SYongseok Koh ret = rte_malloc_socket(__func__, size, alignment, socket); 1688e493764SYongseok Koh if (!ret && size) 1698e493764SYongseok Koh rte_errno = ENOMEM; 1708e493764SYongseok Koh return ret; 1718e493764SYongseok Koh } 1728e493764SYongseok Koh 1738e493764SYongseok Koh /** 1748e493764SYongseok Koh * Verbs callback to free a memory. 1758e493764SYongseok Koh * 1768e493764SYongseok Koh * @param[in] ptr 1778e493764SYongseok Koh * A pointer to the memory to free. 1788e493764SYongseok Koh * @param[in] data 1798e493764SYongseok Koh * A pointer to the callback data. 1808e493764SYongseok Koh */ 1818e493764SYongseok Koh static void 1828e493764SYongseok Koh mlx4_free_verbs_buf(void *ptr, void *data __rte_unused) 1838e493764SYongseok Koh { 1848e08df22SAlexander Kozyrev MLX4_ASSERT(data != NULL); 1858e493764SYongseok Koh rte_free(ptr); 1868e493764SYongseok Koh } 1878e493764SYongseok Koh #endif 1888e493764SYongseok Koh 18998a1f377SBruce Richardson /** 19097d37d2cSYongseok Koh * Initialize process private data structure. 19197d37d2cSYongseok Koh * 19297d37d2cSYongseok Koh * @param dev 19397d37d2cSYongseok Koh * Pointer to Ethernet device structure. 19497d37d2cSYongseok Koh * 19597d37d2cSYongseok Koh * @return 19697d37d2cSYongseok Koh * 0 on success, a negative errno value otherwise and rte_errno is set. 19797d37d2cSYongseok Koh */ 198ed879addSSuanming Mou int 19997d37d2cSYongseok Koh mlx4_proc_priv_init(struct rte_eth_dev *dev) 20097d37d2cSYongseok Koh { 20197d37d2cSYongseok Koh struct mlx4_proc_priv *ppriv; 20297d37d2cSYongseok Koh size_t ppriv_size; 20397d37d2cSYongseok Koh 2046f14d4d7SYunjian Wang mlx4_proc_priv_uninit(dev); 20597d37d2cSYongseok Koh /* 20697d37d2cSYongseok Koh * UAR register table follows the process private structure. BlueFlame 20797d37d2cSYongseok Koh * registers for Tx queues are stored in the table. 20897d37d2cSYongseok Koh */ 20997d37d2cSYongseok Koh ppriv_size = sizeof(struct mlx4_proc_priv) + 21097d37d2cSYongseok Koh dev->data->nb_tx_queues * sizeof(void *); 211ed879addSSuanming Mou ppriv = rte_zmalloc_socket("mlx4_proc_priv", ppriv_size, 21297d37d2cSYongseok Koh RTE_CACHE_LINE_SIZE, dev->device->numa_node); 21397d37d2cSYongseok Koh if (!ppriv) { 21497d37d2cSYongseok Koh rte_errno = ENOMEM; 21597d37d2cSYongseok Koh return -rte_errno; 21697d37d2cSYongseok Koh } 217ed879addSSuanming Mou ppriv->uar_table_sz = dev->data->nb_tx_queues; 21897d37d2cSYongseok Koh dev->process_private = ppriv; 21997d37d2cSYongseok Koh return 0; 22097d37d2cSYongseok Koh } 22197d37d2cSYongseok Koh 22297d37d2cSYongseok Koh /** 22397d37d2cSYongseok Koh * Un-initialize process private data structure. 22497d37d2cSYongseok Koh * 22597d37d2cSYongseok Koh * @param dev 22697d37d2cSYongseok Koh * Pointer to Ethernet device structure. 22797d37d2cSYongseok Koh */ 228ed879addSSuanming Mou void 22997d37d2cSYongseok Koh mlx4_proc_priv_uninit(struct rte_eth_dev *dev) 23097d37d2cSYongseok Koh { 23197d37d2cSYongseok Koh if (!dev->process_private) 23297d37d2cSYongseok Koh return; 23397d37d2cSYongseok Koh rte_free(dev->process_private); 23497d37d2cSYongseok Koh dev->process_private = NULL; 23597d37d2cSYongseok Koh } 23697d37d2cSYongseok Koh 23797d37d2cSYongseok Koh /** 2383cf06ceaSAdrien Mazarguil * DPDK callback for Ethernet device configuration. 23998a1f377SBruce Richardson * 24098a1f377SBruce Richardson * @param dev 24198a1f377SBruce Richardson * Pointer to Ethernet device structure. 24298a1f377SBruce Richardson * 24398a1f377SBruce Richardson * @return 2449d14b273SAdrien Mazarguil * 0 on success, negative errno value otherwise and rte_errno is set. 24598a1f377SBruce Richardson */ 24698a1f377SBruce Richardson static int 2473cf06ceaSAdrien Mazarguil mlx4_dev_configure(struct rte_eth_dev *dev) 24898a1f377SBruce Richardson { 249dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 250fee75e14SAdrien Mazarguil struct rte_flow_error error; 251bdcad2f4SAdrien Mazarguil int ret; 252bdcad2f4SAdrien Mazarguil 253bdcad2f4SAdrien Mazarguil /* Prepare internal flow rules. */ 254fee75e14SAdrien Mazarguil ret = mlx4_flow_sync(priv, &error); 255fee75e14SAdrien Mazarguil if (ret) { 256fee75e14SAdrien Mazarguil ERROR("cannot set up internal flow rules (code %d, \"%s\")," 257fee75e14SAdrien Mazarguil " flow error type %d, cause %p, message: %s", 258fee75e14SAdrien Mazarguil -ret, strerror(-ret), error.type, error.cause, 259fee75e14SAdrien Mazarguil error.message ? error.message : "(unspecified)"); 260fc1b5ec5SMoti Haimovsky goto exit; 261fee75e14SAdrien Mazarguil } 262fc1b5ec5SMoti Haimovsky ret = mlx4_intr_install(priv); 26397d37d2cSYongseok Koh if (ret) { 264fc1b5ec5SMoti Haimovsky ERROR("%p: interrupt handler installation failed", 265fc1b5ec5SMoti Haimovsky (void *)dev); 26697d37d2cSYongseok Koh goto exit; 26797d37d2cSYongseok Koh } 26897d37d2cSYongseok Koh ret = mlx4_proc_priv_init(dev); 26997d37d2cSYongseok Koh if (ret) { 27097d37d2cSYongseok Koh ERROR("%p: process private data allocation failed", 27197d37d2cSYongseok Koh (void *)dev); 27297d37d2cSYongseok Koh goto exit; 27397d37d2cSYongseok Koh } 274fc1b5ec5SMoti Haimovsky exit: 275bdcad2f4SAdrien Mazarguil return ret; 27698a1f377SBruce Richardson } 27798a1f377SBruce Richardson 27898a1f377SBruce Richardson /** 27998a1f377SBruce Richardson * DPDK callback to start the device. 28098a1f377SBruce Richardson * 2815697a414SAdrien Mazarguil * Simulate device start by initializing common RSS resources and attaching 2825697a414SAdrien Mazarguil * all configured flows. 28398a1f377SBruce Richardson * 28498a1f377SBruce Richardson * @param dev 28598a1f377SBruce Richardson * Pointer to Ethernet device structure. 28698a1f377SBruce Richardson * 28798a1f377SBruce Richardson * @return 2889d14b273SAdrien Mazarguil * 0 on success, negative errno value otherwise and rte_errno is set. 28998a1f377SBruce Richardson */ 29098a1f377SBruce Richardson static int 29198a1f377SBruce Richardson mlx4_dev_start(struct rte_eth_dev *dev) 29298a1f377SBruce Richardson { 293dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 294fee75e14SAdrien Mazarguil struct rte_flow_error error; 2957cc3ea89SJie Hai uint16_t i; 29646d5736aSVasily Philipov int ret; 29798a1f377SBruce Richardson 298e4dff4d8SAdrien Mazarguil if (priv->started) 29998a1f377SBruce Richardson return 0; 30098a1f377SBruce Richardson DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); 30198a1f377SBruce Richardson priv->started = 1; 3025697a414SAdrien Mazarguil ret = mlx4_rss_init(priv); 3035697a414SAdrien Mazarguil if (ret) { 3045697a414SAdrien Mazarguil ERROR("%p: cannot initialize RSS resources: %s", 3055697a414SAdrien Mazarguil (void *)dev, strerror(-ret)); 3065697a414SAdrien Mazarguil goto err; 3075697a414SAdrien Mazarguil } 308e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG 3099797bfccSYongseok Koh mlx4_mr_dump_dev(dev); 3109797bfccSYongseok Koh #endif 311fc1b5ec5SMoti Haimovsky ret = mlx4_rxq_intr_enable(priv); 3126dd7b705SGaetan Rivet if (ret) { 313a6e8b01cSAdrien Mazarguil ERROR("%p: interrupt handler installation failed", 3140a2ae703SAdrien Mazarguil (void *)dev); 3150a2ae703SAdrien Mazarguil goto err; 3169f05a4b8SMoti Haimovsky } 317fee75e14SAdrien Mazarguil ret = mlx4_flow_sync(priv, &error); 31846d5736aSVasily Philipov if (ret) { 319fee75e14SAdrien Mazarguil ERROR("%p: cannot attach flow rules (code %d, \"%s\")," 320fee75e14SAdrien Mazarguil " flow error type %d, cause %p, message: %s", 321fee75e14SAdrien Mazarguil (void *)dev, 322fee75e14SAdrien Mazarguil -ret, strerror(-ret), error.type, error.cause, 323fee75e14SAdrien Mazarguil error.message ? error.message : "(unspecified)"); 32446d5736aSVasily Philipov goto err; 32546d5736aSVasily Philipov } 32667e6cce6SAdrien Mazarguil rte_wmb(); 32767e6cce6SAdrien Mazarguil dev->tx_pkt_burst = mlx4_tx_burst; 32867e6cce6SAdrien Mazarguil dev->rx_pkt_burst = mlx4_rx_burst; 3290203d33aSYongseok Koh /* Enable datapath on secondary process. */ 3300203d33aSYongseok Koh mlx4_mp_req_start_rxtx(dev); 3317cc3ea89SJie Hai 3327cc3ea89SJie Hai for (i = 0; i < dev->data->nb_rx_queues; i++) 3337cc3ea89SJie Hai dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 3347cc3ea89SJie Hai for (i = 0; i < dev->data->nb_tx_queues; i++) 3357cc3ea89SJie Hai dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 3367cc3ea89SJie Hai 33746d5736aSVasily Philipov return 0; 33846d5736aSVasily Philipov err: 33984a68486SAdrien Mazarguil mlx4_dev_stop(dev); 3409d14b273SAdrien Mazarguil return ret; 34198a1f377SBruce Richardson } 34298a1f377SBruce Richardson 34398a1f377SBruce Richardson /** 34498a1f377SBruce Richardson * DPDK callback to stop the device. 34598a1f377SBruce Richardson * 34698a1f377SBruce Richardson * Simulate device stop by detaching all configured flows. 34798a1f377SBruce Richardson * 34898a1f377SBruce Richardson * @param dev 34998a1f377SBruce Richardson * Pointer to Ethernet device structure. 35098a1f377SBruce Richardson */ 35162024eb8SIvan Ilchenko static int 35298a1f377SBruce Richardson mlx4_dev_stop(struct rte_eth_dev *dev) 35398a1f377SBruce Richardson { 354dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 3557cc3ea89SJie Hai uint16_t i; 35698a1f377SBruce Richardson 357e4dff4d8SAdrien Mazarguil if (!priv->started) 35862024eb8SIvan Ilchenko return 0; 35998a1f377SBruce Richardson DEBUG("%p: detaching flows from all RX queues", (void *)dev); 36098a1f377SBruce Richardson priv->started = 0; 361a41f593fSFerruh Yigit dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 362a41f593fSFerruh Yigit dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 36367e6cce6SAdrien Mazarguil rte_wmb(); 3640203d33aSYongseok Koh /* Disable datapath on secondary process. */ 3650203d33aSYongseok Koh mlx4_mp_req_stop_rxtx(dev); 366fee75e14SAdrien Mazarguil mlx4_flow_sync(priv, NULL); 367fc1b5ec5SMoti Haimovsky mlx4_rxq_intr_disable(priv); 3685697a414SAdrien Mazarguil mlx4_rss_deinit(priv); 36962024eb8SIvan Ilchenko 3707cc3ea89SJie Hai for (i = 0; i < dev->data->nb_rx_queues; i++) 3717cc3ea89SJie Hai dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 3727cc3ea89SJie Hai for (i = 0; i < dev->data->nb_tx_queues; i++) 3737cc3ea89SJie Hai dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 3747cc3ea89SJie Hai 37562024eb8SIvan Ilchenko return 0; 37698a1f377SBruce Richardson } 37798a1f377SBruce Richardson 37898a1f377SBruce Richardson /** 37998a1f377SBruce Richardson * DPDK callback to close the device. 38098a1f377SBruce Richardson * 38198a1f377SBruce Richardson * Destroy all queues and objects, free memory. 38298a1f377SBruce Richardson * 38398a1f377SBruce Richardson * @param dev 38498a1f377SBruce Richardson * Pointer to Ethernet device structure. 38598a1f377SBruce Richardson */ 386b142387bSThomas Monjalon static int 38798a1f377SBruce Richardson mlx4_dev_close(struct rte_eth_dev *dev) 38898a1f377SBruce Richardson { 389dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 39098a1f377SBruce Richardson unsigned int i; 39198a1f377SBruce Richardson 3928e1630e0SMichael Baum if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 3938e1630e0SMichael Baum rte_eth_dev_release_port(dev); 39430410493SThomas Monjalon return 0; 3958e1630e0SMichael Baum } 39698a1f377SBruce Richardson DEBUG("%p: closing device \"%s\"", 39798a1f377SBruce Richardson (void *)dev, 39898a1f377SBruce Richardson ((priv->ctx != NULL) ? priv->ctx->device->name : "")); 399a41f593fSFerruh Yigit dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 400a41f593fSFerruh Yigit dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 40167e6cce6SAdrien Mazarguil rte_wmb(); 4020203d33aSYongseok Koh /* Disable datapath on secondary process. */ 4030203d33aSYongseok Koh mlx4_mp_req_stop_rxtx(dev); 40467e6cce6SAdrien Mazarguil mlx4_flow_clean(priv); 40584a68486SAdrien Mazarguil mlx4_rss_deinit(priv); 406be65fdcbSAdrien Mazarguil for (i = 0; i != dev->data->nb_rx_queues; ++i) 4077483341aSXueming Li mlx4_rx_queue_release(dev, i); 408be65fdcbSAdrien Mazarguil for (i = 0; i != dev->data->nb_tx_queues; ++i) 4097483341aSXueming Li mlx4_tx_queue_release(dev, i); 41097d37d2cSYongseok Koh mlx4_proc_priv_uninit(dev); 4119797bfccSYongseok Koh mlx4_mr_release(dev); 41298a1f377SBruce Richardson if (priv->pd != NULL) { 4138e08df22SAlexander Kozyrev MLX4_ASSERT(priv->ctx != NULL); 4144eba244bSAdrien Mazarguil claim_zero(mlx4_glue->dealloc_pd(priv->pd)); 4154eba244bSAdrien Mazarguil claim_zero(mlx4_glue->close_device(priv->ctx)); 41698a1f377SBruce Richardson } else 4178e08df22SAlexander Kozyrev MLX4_ASSERT(priv->ctx == NULL); 418b62579d4SAdrien Mazarguil mlx4_intr_uninstall(priv); 41998a1f377SBruce Richardson memset(priv, 0, sizeof(*priv)); 420c0722108SThomas Monjalon /* mac_addrs must not be freed because part of dev_private */ 421c0722108SThomas Monjalon dev->data->mac_addrs = NULL; 422b142387bSThomas Monjalon return 0; 42398a1f377SBruce Richardson } 42498a1f377SBruce Richardson 42598a1f377SBruce Richardson static const struct eth_dev_ops mlx4_dev_ops = { 42698a1f377SBruce Richardson .dev_configure = mlx4_dev_configure, 42798a1f377SBruce Richardson .dev_start = mlx4_dev_start, 42898a1f377SBruce Richardson .dev_stop = mlx4_dev_stop, 42961cbdd41SAdrien Mazarguil .dev_set_link_down = mlx4_dev_set_link_down, 43061cbdd41SAdrien Mazarguil .dev_set_link_up = mlx4_dev_set_link_up, 43198a1f377SBruce Richardson .dev_close = mlx4_dev_close, 43298a1f377SBruce Richardson .link_update = mlx4_link_update, 433eacaac7bSAdrien Mazarguil .promiscuous_enable = mlx4_promiscuous_enable, 434eacaac7bSAdrien Mazarguil .promiscuous_disable = mlx4_promiscuous_disable, 435eacaac7bSAdrien Mazarguil .allmulticast_enable = mlx4_allmulticast_enable, 436eacaac7bSAdrien Mazarguil .allmulticast_disable = mlx4_allmulticast_disable, 4371437784bSAdrien Mazarguil .mac_addr_remove = mlx4_mac_addr_remove, 4381437784bSAdrien Mazarguil .mac_addr_add = mlx4_mac_addr_add, 4391437784bSAdrien Mazarguil .mac_addr_set = mlx4_mac_addr_set, 440138a740cSAdrien Mazarguil .set_mc_addr_list = mlx4_set_mc_addr_list, 44198a1f377SBruce Richardson .stats_get = mlx4_stats_get, 44298a1f377SBruce Richardson .stats_reset = mlx4_stats_reset, 443714bf46eSThomas Monjalon .fw_version_get = mlx4_fw_version_get, 44498a1f377SBruce Richardson .dev_infos_get = mlx4_dev_infos_get, 445aee4a03fSMoti Haimovsky .dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get, 44630695adbSAdrien Mazarguil .vlan_filter_set = mlx4_vlan_filter_set, 44798a1f377SBruce Richardson .rx_queue_setup = mlx4_rx_queue_setup, 44898a1f377SBruce Richardson .tx_queue_setup = mlx4_tx_queue_setup, 44998a1f377SBruce Richardson .rx_queue_release = mlx4_rx_queue_release, 45098a1f377SBruce Richardson .tx_queue_release = mlx4_tx_queue_release, 45161cbdd41SAdrien Mazarguil .flow_ctrl_get = mlx4_flow_ctrl_get, 45261cbdd41SAdrien Mazarguil .flow_ctrl_set = mlx4_flow_ctrl_set, 45361cbdd41SAdrien Mazarguil .mtu_set = mlx4_mtu_set, 454fb7ad441SThomas Monjalon .flow_ops_get = mlx4_flow_ops_get, 4559f05a4b8SMoti Haimovsky .rx_queue_intr_enable = mlx4_rx_intr_enable, 4569f05a4b8SMoti Haimovsky .rx_queue_intr_disable = mlx4_rx_intr_disable, 457cdf4ec6eSMatan Azrad .is_removed = mlx4_is_removed, 45898a1f377SBruce Richardson }; 45998a1f377SBruce Richardson 4600203d33aSYongseok Koh /* Available operations from secondary process. */ 4610203d33aSYongseok Koh static const struct eth_dev_ops mlx4_dev_sec_ops = { 4620203d33aSYongseok Koh .stats_get = mlx4_stats_get, 4630203d33aSYongseok Koh .stats_reset = mlx4_stats_reset, 4640203d33aSYongseok Koh .fw_version_get = mlx4_fw_version_get, 4650203d33aSYongseok Koh .dev_infos_get = mlx4_dev_infos_get, 4660203d33aSYongseok Koh }; 4670203d33aSYongseok Koh 46898a1f377SBruce Richardson /** 46998a1f377SBruce Richardson * Get PCI information from struct ibv_device. 47098a1f377SBruce Richardson * 47198a1f377SBruce Richardson * @param device 47298a1f377SBruce Richardson * Pointer to Ethernet device structure. 47398a1f377SBruce Richardson * @param[out] pci_addr 47498a1f377SBruce Richardson * PCI bus address output buffer. 47598a1f377SBruce Richardson * 47698a1f377SBruce Richardson * @return 4779d14b273SAdrien Mazarguil * 0 on success, negative errno value otherwise and rte_errno is set. 47898a1f377SBruce Richardson */ 47998a1f377SBruce Richardson static int 48098a1f377SBruce Richardson mlx4_ibv_device_to_pci_addr(const struct ibv_device *device, 48198a1f377SBruce Richardson struct rte_pci_addr *pci_addr) 48298a1f377SBruce Richardson { 48398a1f377SBruce Richardson FILE *file; 48498a1f377SBruce Richardson char line[32]; 48598a1f377SBruce Richardson MKSTR(path, "%s/device/uevent", device->ibdev_path); 48698a1f377SBruce Richardson 48798a1f377SBruce Richardson file = fopen(path, "rb"); 4889d14b273SAdrien Mazarguil if (file == NULL) { 4899d14b273SAdrien Mazarguil rte_errno = errno; 4909d14b273SAdrien Mazarguil return -rte_errno; 4919d14b273SAdrien Mazarguil } 49298a1f377SBruce Richardson while (fgets(line, sizeof(line), file) == line) { 49398a1f377SBruce Richardson size_t len = strlen(line); 49498a1f377SBruce Richardson int ret; 49598a1f377SBruce Richardson 49698a1f377SBruce Richardson /* Truncate long lines. */ 49798a1f377SBruce Richardson if (len == (sizeof(line) - 1)) 49898a1f377SBruce Richardson while (line[(len - 1)] != '\n') { 49998a1f377SBruce Richardson ret = fgetc(file); 50098a1f377SBruce Richardson if (ret == EOF) 50198a1f377SBruce Richardson break; 50298a1f377SBruce Richardson line[(len - 1)] = ret; 50398a1f377SBruce Richardson } 50498a1f377SBruce Richardson /* Extract information. */ 50598a1f377SBruce Richardson if (sscanf(line, 50698a1f377SBruce Richardson "PCI_SLOT_NAME=" 507463ced95SStephen Hemminger "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 50898a1f377SBruce Richardson &pci_addr->domain, 50998a1f377SBruce Richardson &pci_addr->bus, 51098a1f377SBruce Richardson &pci_addr->devid, 51198a1f377SBruce Richardson &pci_addr->function) == 4) { 51298a1f377SBruce Richardson break; 51398a1f377SBruce Richardson } 51498a1f377SBruce Richardson } 51598a1f377SBruce Richardson fclose(file); 51698a1f377SBruce Richardson return 0; 51798a1f377SBruce Richardson } 51898a1f377SBruce Richardson 51998a1f377SBruce Richardson /** 520001a520eSGaetan Rivet * Verify and store value for device argument. 521001a520eSGaetan Rivet * 522001a520eSGaetan Rivet * @param[in] key 523001a520eSGaetan Rivet * Key argument to verify. 524001a520eSGaetan Rivet * @param[in] val 525001a520eSGaetan Rivet * Value associated with key. 52682642799SAdrien Mazarguil * @param[in, out] conf 52782642799SAdrien Mazarguil * Shared configuration data. 528001a520eSGaetan Rivet * 529001a520eSGaetan Rivet * @return 5309d14b273SAdrien Mazarguil * 0 on success, negative errno value otherwise and rte_errno is set. 531001a520eSGaetan Rivet */ 532001a520eSGaetan Rivet static int 53382642799SAdrien Mazarguil mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf) 534001a520eSGaetan Rivet { 535001a520eSGaetan Rivet unsigned long tmp; 536001a520eSGaetan Rivet 537001a520eSGaetan Rivet errno = 0; 538001a520eSGaetan Rivet tmp = strtoul(val, NULL, 0); 539001a520eSGaetan Rivet if (errno) { 5409d14b273SAdrien Mazarguil rte_errno = errno; 541001a520eSGaetan Rivet WARN("%s: \"%s\" is not a valid integer", key, val); 5429d14b273SAdrien Mazarguil return -rte_errno; 543001a520eSGaetan Rivet } 544001a520eSGaetan Rivet if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) { 545a43fba2cSOphir Munk uint32_t ports = rte_log2_u32(conf->ports.present + 1); 54682642799SAdrien Mazarguil 54782642799SAdrien Mazarguil if (tmp >= ports) { 54882642799SAdrien Mazarguil ERROR("port index %lu outside range [0,%" PRIu32 ")", 54982642799SAdrien Mazarguil tmp, ports); 550001a520eSGaetan Rivet return -EINVAL; 551001a520eSGaetan Rivet } 55282642799SAdrien Mazarguil if (!(conf->ports.present & (1 << tmp))) { 5539d14b273SAdrien Mazarguil rte_errno = EINVAL; 55482642799SAdrien Mazarguil ERROR("invalid port index %lu", tmp); 5559d14b273SAdrien Mazarguil return -rte_errno; 55682642799SAdrien Mazarguil } 55782642799SAdrien Mazarguil conf->ports.enabled |= 1 << tmp; 558f4efc0ebSYongseok Koh } else if (strcmp(MLX4_MR_EXT_MEMSEG_EN_KVARG, key) == 0) { 559f4efc0ebSYongseok Koh conf->mr_ext_memseg_en = !!tmp; 560001a520eSGaetan Rivet } else { 5619d14b273SAdrien Mazarguil rte_errno = EINVAL; 562001a520eSGaetan Rivet WARN("%s: unknown parameter", key); 5639d14b273SAdrien Mazarguil return -rte_errno; 564001a520eSGaetan Rivet } 565001a520eSGaetan Rivet return 0; 566001a520eSGaetan Rivet } 567001a520eSGaetan Rivet 568001a520eSGaetan Rivet /** 569001a520eSGaetan Rivet * Parse device parameters. 570001a520eSGaetan Rivet * 571001a520eSGaetan Rivet * @param devargs 572001a520eSGaetan Rivet * Device arguments structure. 573001a520eSGaetan Rivet * 574001a520eSGaetan Rivet * @return 5759d14b273SAdrien Mazarguil * 0 on success, negative errno value otherwise and rte_errno is set. 576001a520eSGaetan Rivet */ 577001a520eSGaetan Rivet static int 578001a520eSGaetan Rivet mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf) 579001a520eSGaetan Rivet { 580001a520eSGaetan Rivet struct rte_kvargs *kvlist; 581001a520eSGaetan Rivet unsigned int arg_count; 582001a520eSGaetan Rivet int ret = 0; 583001a520eSGaetan Rivet int i; 584001a520eSGaetan Rivet 585001a520eSGaetan Rivet if (devargs == NULL) 586001a520eSGaetan Rivet return 0; 587001a520eSGaetan Rivet kvlist = rte_kvargs_parse(devargs->args, pmd_mlx4_init_params); 588001a520eSGaetan Rivet if (kvlist == NULL) { 5899d14b273SAdrien Mazarguil rte_errno = EINVAL; 590001a520eSGaetan Rivet ERROR("failed to parse kvargs"); 5919d14b273SAdrien Mazarguil return -rte_errno; 592001a520eSGaetan Rivet } 593001a520eSGaetan Rivet /* Process parameters. */ 594001a520eSGaetan Rivet for (i = 0; pmd_mlx4_init_params[i]; ++i) { 595f4efc0ebSYongseok Koh arg_count = rte_kvargs_count(kvlist, pmd_mlx4_init_params[i]); 596001a520eSGaetan Rivet while (arg_count-- > 0) { 59782642799SAdrien Mazarguil ret = rte_kvargs_process(kvlist, 598f4efc0ebSYongseok Koh pmd_mlx4_init_params[i], 59982642799SAdrien Mazarguil (int (*)(const char *, 60082642799SAdrien Mazarguil const char *, 60182642799SAdrien Mazarguil void *)) 60282642799SAdrien Mazarguil mlx4_arg_parse, 60382642799SAdrien Mazarguil conf); 604001a520eSGaetan Rivet if (ret != 0) 605001a520eSGaetan Rivet goto free_kvlist; 606001a520eSGaetan Rivet } 607001a520eSGaetan Rivet } 608001a520eSGaetan Rivet free_kvlist: 609001a520eSGaetan Rivet rte_kvargs_free(kvlist); 610001a520eSGaetan Rivet return ret; 611001a520eSGaetan Rivet } 612001a520eSGaetan Rivet 613828a4ce3SAdrien Mazarguil /** 614828a4ce3SAdrien Mazarguil * Interpret RSS capabilities reported by device. 615828a4ce3SAdrien Mazarguil * 616828a4ce3SAdrien Mazarguil * This function returns the set of usable Verbs RSS hash fields, kernel 617828a4ce3SAdrien Mazarguil * quirks taken into account. 618828a4ce3SAdrien Mazarguil * 619828a4ce3SAdrien Mazarguil * @param ctx 620828a4ce3SAdrien Mazarguil * Verbs context. 621828a4ce3SAdrien Mazarguil * @param pd 622828a4ce3SAdrien Mazarguil * Verbs protection domain. 623828a4ce3SAdrien Mazarguil * @param device_attr_ex 624828a4ce3SAdrien Mazarguil * Extended device attributes to interpret. 625828a4ce3SAdrien Mazarguil * 626828a4ce3SAdrien Mazarguil * @return 627828a4ce3SAdrien Mazarguil * Usable RSS hash fields mask in Verbs format. 628828a4ce3SAdrien Mazarguil */ 629828a4ce3SAdrien Mazarguil static uint64_t 630828a4ce3SAdrien Mazarguil mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, 631828a4ce3SAdrien Mazarguil struct ibv_device_attr_ex *device_attr_ex) 632828a4ce3SAdrien Mazarguil { 633828a4ce3SAdrien Mazarguil uint64_t hw_rss_sup = device_attr_ex->rss_caps.rx_hash_fields_mask; 634828a4ce3SAdrien Mazarguil struct ibv_cq *cq = NULL; 635828a4ce3SAdrien Mazarguil struct ibv_wq *wq = NULL; 636828a4ce3SAdrien Mazarguil struct ibv_rwq_ind_table *ind = NULL; 637828a4ce3SAdrien Mazarguil struct ibv_qp *qp = NULL; 638828a4ce3SAdrien Mazarguil 639828a4ce3SAdrien Mazarguil if (!hw_rss_sup) { 640828a4ce3SAdrien Mazarguil WARN("no RSS capabilities reported; disabling support for UDP" 641828a4ce3SAdrien Mazarguil " RSS and inner VXLAN RSS"); 642828a4ce3SAdrien Mazarguil return IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4 | 643828a4ce3SAdrien Mazarguil IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6 | 644828a4ce3SAdrien Mazarguil IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP; 645828a4ce3SAdrien Mazarguil } 646828a4ce3SAdrien Mazarguil if (!(hw_rss_sup & IBV_RX_HASH_INNER)) 647828a4ce3SAdrien Mazarguil return hw_rss_sup; 648828a4ce3SAdrien Mazarguil /* 649828a4ce3SAdrien Mazarguil * Although reported as supported, missing code in some Linux 650828a4ce3SAdrien Mazarguil * versions (v4.15, v4.16) prevents the creation of hash QPs with 651828a4ce3SAdrien Mazarguil * inner capability. 652828a4ce3SAdrien Mazarguil * 653828a4ce3SAdrien Mazarguil * There is no choice but to attempt to instantiate a temporary RSS 654828a4ce3SAdrien Mazarguil * context in order to confirm its support. 655828a4ce3SAdrien Mazarguil */ 656828a4ce3SAdrien Mazarguil cq = mlx4_glue->create_cq(ctx, 1, NULL, NULL, 0); 657828a4ce3SAdrien Mazarguil wq = cq ? mlx4_glue->create_wq 658828a4ce3SAdrien Mazarguil (ctx, 659828a4ce3SAdrien Mazarguil &(struct ibv_wq_init_attr){ 660828a4ce3SAdrien Mazarguil .wq_type = IBV_WQT_RQ, 661828a4ce3SAdrien Mazarguil .max_wr = 1, 662828a4ce3SAdrien Mazarguil .max_sge = 1, 663828a4ce3SAdrien Mazarguil .pd = pd, 664828a4ce3SAdrien Mazarguil .cq = cq, 665828a4ce3SAdrien Mazarguil }) : NULL; 666828a4ce3SAdrien Mazarguil ind = wq ? mlx4_glue->create_rwq_ind_table 667828a4ce3SAdrien Mazarguil (ctx, 668828a4ce3SAdrien Mazarguil &(struct ibv_rwq_ind_table_init_attr){ 669828a4ce3SAdrien Mazarguil .log_ind_tbl_size = 0, 670828a4ce3SAdrien Mazarguil .ind_tbl = &wq, 671828a4ce3SAdrien Mazarguil .comp_mask = 0, 672828a4ce3SAdrien Mazarguil }) : NULL; 673828a4ce3SAdrien Mazarguil qp = ind ? mlx4_glue->create_qp_ex 674828a4ce3SAdrien Mazarguil (ctx, 675828a4ce3SAdrien Mazarguil &(struct ibv_qp_init_attr_ex){ 676828a4ce3SAdrien Mazarguil .comp_mask = 677828a4ce3SAdrien Mazarguil (IBV_QP_INIT_ATTR_PD | 678828a4ce3SAdrien Mazarguil IBV_QP_INIT_ATTR_RX_HASH | 679828a4ce3SAdrien Mazarguil IBV_QP_INIT_ATTR_IND_TABLE), 680828a4ce3SAdrien Mazarguil .qp_type = IBV_QPT_RAW_PACKET, 681828a4ce3SAdrien Mazarguil .pd = pd, 682828a4ce3SAdrien Mazarguil .rwq_ind_tbl = ind, 683828a4ce3SAdrien Mazarguil .rx_hash_conf = { 684828a4ce3SAdrien Mazarguil .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 685828a4ce3SAdrien Mazarguil .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, 686828a4ce3SAdrien Mazarguil .rx_hash_key = mlx4_rss_hash_key_default, 687828a4ce3SAdrien Mazarguil .rx_hash_fields_mask = hw_rss_sup, 688828a4ce3SAdrien Mazarguil }, 689828a4ce3SAdrien Mazarguil }) : NULL; 690828a4ce3SAdrien Mazarguil if (!qp) { 691828a4ce3SAdrien Mazarguil WARN("disabling unusable inner RSS capability due to kernel" 692828a4ce3SAdrien Mazarguil " quirk"); 693828a4ce3SAdrien Mazarguil hw_rss_sup &= ~IBV_RX_HASH_INNER; 694828a4ce3SAdrien Mazarguil } else { 695828a4ce3SAdrien Mazarguil claim_zero(mlx4_glue->destroy_qp(qp)); 696828a4ce3SAdrien Mazarguil } 697828a4ce3SAdrien Mazarguil if (ind) 698828a4ce3SAdrien Mazarguil claim_zero(mlx4_glue->destroy_rwq_ind_table(ind)); 699828a4ce3SAdrien Mazarguil if (wq) 700828a4ce3SAdrien Mazarguil claim_zero(mlx4_glue->destroy_wq(wq)); 701828a4ce3SAdrien Mazarguil if (cq) 702828a4ce3SAdrien Mazarguil claim_zero(mlx4_glue->destroy_cq(cq)); 703828a4ce3SAdrien Mazarguil return hw_rss_sup; 704828a4ce3SAdrien Mazarguil } 705828a4ce3SAdrien Mazarguil 706fdf91e0fSJan Blunck static struct rte_pci_driver mlx4_driver; 70798a1f377SBruce Richardson 7080203d33aSYongseok Koh /** 7090203d33aSYongseok Koh * PMD global initialization. 7100203d33aSYongseok Koh * 7110203d33aSYongseok Koh * Independent from individual device, this function initializes global 7120203d33aSYongseok Koh * per-PMD data structures distinguishing primary and secondary processes. 7130203d33aSYongseok Koh * Hence, each initialization is called once per a process. 7140203d33aSYongseok Koh * 7150203d33aSYongseok Koh * @return 7160203d33aSYongseok Koh * 0 on success, a negative errno value otherwise and rte_errno is set. 7170203d33aSYongseok Koh */ 7180203d33aSYongseok Koh static int 7190203d33aSYongseok Koh mlx4_init_once(void) 7200203d33aSYongseok Koh { 7210203d33aSYongseok Koh struct mlx4_shared_data *sd; 7220203d33aSYongseok Koh struct mlx4_local_data *ld = &mlx4_local_data; 723edf73dd3SAnatoly Burakov int ret = 0; 7240203d33aSYongseok Koh 7250203d33aSYongseok Koh if (mlx4_init_shared_data()) 7260203d33aSYongseok Koh return -rte_errno; 7270203d33aSYongseok Koh sd = mlx4_shared_data; 7288e08df22SAlexander Kozyrev MLX4_ASSERT(sd); 7290203d33aSYongseok Koh rte_spinlock_lock(&sd->lock); 7300203d33aSYongseok Koh switch (rte_eal_process_type()) { 7310203d33aSYongseok Koh case RTE_PROC_PRIMARY: 7320203d33aSYongseok Koh if (sd->init_done) 7330203d33aSYongseok Koh break; 7340203d33aSYongseok Koh LIST_INIT(&sd->mem_event_cb_list); 7350203d33aSYongseok Koh rte_rwlock_init(&sd->mem_event_rwlock); 7360203d33aSYongseok Koh rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", 7370203d33aSYongseok Koh mlx4_mr_mem_event_cb, NULL); 738edf73dd3SAnatoly Burakov ret = mlx4_mp_init_primary(); 739edf73dd3SAnatoly Burakov if (ret) 740edf73dd3SAnatoly Burakov goto out; 741028669bcSAnatoly Burakov sd->init_done = 1; 7420203d33aSYongseok Koh break; 7430203d33aSYongseok Koh case RTE_PROC_SECONDARY: 7440203d33aSYongseok Koh if (ld->init_done) 7450203d33aSYongseok Koh break; 746edf73dd3SAnatoly Burakov ret = mlx4_mp_init_secondary(); 747edf73dd3SAnatoly Burakov if (ret) 748edf73dd3SAnatoly Burakov goto out; 7490203d33aSYongseok Koh ++sd->secondary_cnt; 750028669bcSAnatoly Burakov ld->init_done = 1; 7510203d33aSYongseok Koh break; 7520203d33aSYongseok Koh default: 7530203d33aSYongseok Koh break; 7540203d33aSYongseok Koh } 755edf73dd3SAnatoly Burakov out: 7560203d33aSYongseok Koh rte_spinlock_unlock(&sd->lock); 757edf73dd3SAnatoly Burakov return ret; 7580203d33aSYongseok Koh } 7590203d33aSYongseok Koh 76098a1f377SBruce Richardson /** 76198a1f377SBruce Richardson * DPDK callback to register a PCI device. 76298a1f377SBruce Richardson * 76398a1f377SBruce Richardson * This function creates an Ethernet device for each port of a given 76498a1f377SBruce Richardson * PCI device. 76598a1f377SBruce Richardson * 76698a1f377SBruce Richardson * @param[in] pci_drv 76798a1f377SBruce Richardson * PCI driver structure (mlx4_driver). 76898a1f377SBruce Richardson * @param[in] pci_dev 76998a1f377SBruce Richardson * PCI device information. 77098a1f377SBruce Richardson * 77198a1f377SBruce Richardson * @return 7729d14b273SAdrien Mazarguil * 0 on success, negative errno value otherwise and rte_errno is set. 77398a1f377SBruce Richardson */ 77498a1f377SBruce Richardson static int 775af424af8SShreyansh Jain mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 77698a1f377SBruce Richardson { 77798a1f377SBruce Richardson struct ibv_device **list; 77898a1f377SBruce Richardson struct ibv_device *ibv_dev; 77998a1f377SBruce Richardson int err = 0; 78098a1f377SBruce Richardson struct ibv_context *attr_ctx = NULL; 78198a1f377SBruce Richardson struct ibv_device_attr device_attr; 782024e87beSAdrien Mazarguil struct ibv_device_attr_ex device_attr_ex; 783bcf58b64SMichael Baum struct rte_eth_dev *prev_dev = NULL; 784001a520eSGaetan Rivet struct mlx4_conf conf = { 78582642799SAdrien Mazarguil .ports.present = 0, 786f4efc0ebSYongseok Koh .mr_ext_memseg_en = 1, 787001a520eSGaetan Rivet }; 78898a1f377SBruce Richardson unsigned int vf; 78998a1f377SBruce Richardson int i; 790164cad78SStephen Hemminger char ifname[IF_NAMESIZE]; 79198a1f377SBruce Richardson 79298a1f377SBruce Richardson (void)pci_drv; 7930203d33aSYongseok Koh err = mlx4_init_once(); 7940203d33aSYongseok Koh if (err) { 7950203d33aSYongseok Koh ERROR("unable to init PMD global data: %s", 7960203d33aSYongseok Koh strerror(rte_errno)); 7970203d33aSYongseok Koh return -rte_errno; 7980203d33aSYongseok Koh } 7998e08df22SAlexander Kozyrev MLX4_ASSERT(pci_drv == &mlx4_driver); 8004eba244bSAdrien Mazarguil list = mlx4_glue->get_device_list(&i); 80198a1f377SBruce Richardson if (list == NULL) { 8029d14b273SAdrien Mazarguil rte_errno = errno; 8038e08df22SAlexander Kozyrev MLX4_ASSERT(rte_errno); 8049d14b273SAdrien Mazarguil if (rte_errno == ENOSYS) 8059e09761bSGaetan Rivet ERROR("cannot list devices, is ib_uverbs loaded?"); 8069d14b273SAdrien Mazarguil return -rte_errno; 80798a1f377SBruce Richardson } 8088e08df22SAlexander Kozyrev MLX4_ASSERT(i >= 0); 80998a1f377SBruce Richardson /* 81098a1f377SBruce Richardson * For each listed device, check related sysfs entry against 81198a1f377SBruce Richardson * the provided PCI ID. 81298a1f377SBruce Richardson */ 81398a1f377SBruce Richardson while (i != 0) { 81498a1f377SBruce Richardson struct rte_pci_addr pci_addr; 81598a1f377SBruce Richardson 81698a1f377SBruce Richardson --i; 81798a1f377SBruce Richardson DEBUG("checking device \"%s\"", list[i]->name); 81898a1f377SBruce Richardson if (mlx4_ibv_device_to_pci_addr(list[i], &pci_addr)) 81998a1f377SBruce Richardson continue; 8208fa22e1fSThomas Monjalon if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0) 82198a1f377SBruce Richardson continue; 82298a1f377SBruce Richardson vf = (pci_dev->id.device_id == 82398a1f377SBruce Richardson PCI_DEVICE_ID_MELLANOX_CONNECTX3VF); 82498a1f377SBruce Richardson INFO("PCI information matches, using device \"%s\" (VF: %s)", 82598a1f377SBruce Richardson list[i]->name, (vf ? "true" : "false")); 8264eba244bSAdrien Mazarguil attr_ctx = mlx4_glue->open_device(list[i]); 82798a1f377SBruce Richardson err = errno; 82898a1f377SBruce Richardson break; 82998a1f377SBruce Richardson } 83098a1f377SBruce Richardson if (attr_ctx == NULL) { 8314eba244bSAdrien Mazarguil mlx4_glue->free_device_list(list); 83298a1f377SBruce Richardson switch (err) { 83398a1f377SBruce Richardson case 0: 8349d14b273SAdrien Mazarguil rte_errno = ENODEV; 8359e09761bSGaetan Rivet ERROR("cannot access device, is mlx4_ib loaded?"); 8369d14b273SAdrien Mazarguil return -rte_errno; 83798a1f377SBruce Richardson case EINVAL: 8389d14b273SAdrien Mazarguil rte_errno = EINVAL; 8399e09761bSGaetan Rivet ERROR("cannot use device, are drivers up to date?"); 8409d14b273SAdrien Mazarguil return -rte_errno; 84198a1f377SBruce Richardson } 8428e08df22SAlexander Kozyrev MLX4_ASSERT(err > 0); 8439d14b273SAdrien Mazarguil rte_errno = err; 8449d14b273SAdrien Mazarguil return -rte_errno; 84598a1f377SBruce Richardson } 84698a1f377SBruce Richardson ibv_dev = list[i]; 84798a1f377SBruce Richardson DEBUG("device opened"); 8484eba244bSAdrien Mazarguil if (mlx4_glue->query_device(attr_ctx, &device_attr)) { 84966f2ac0fSAdrien Mazarguil err = ENODEV; 85098a1f377SBruce Richardson goto error; 8518d0f8016SMatan Azrad } 85298a1f377SBruce Richardson INFO("%u port(s) detected", device_attr.phys_port_cnt); 85382642799SAdrien Mazarguil conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1; 854001a520eSGaetan Rivet if (mlx4_args(pci_dev->device.devargs, &conf)) { 855001a520eSGaetan Rivet ERROR("failed to process device arguments"); 85666f2ac0fSAdrien Mazarguil err = EINVAL; 857001a520eSGaetan Rivet goto error; 858001a520eSGaetan Rivet } 859001a520eSGaetan Rivet /* Use all ports when none are defined */ 86082642799SAdrien Mazarguil if (!conf.ports.enabled) 86182642799SAdrien Mazarguil conf.ports.enabled = conf.ports.present; 862024e87beSAdrien Mazarguil /* Retrieve extended device attributes. */ 8634eba244bSAdrien Mazarguil if (mlx4_glue->query_device_ex(attr_ctx, NULL, &device_attr_ex)) { 86466f2ac0fSAdrien Mazarguil err = ENODEV; 865024e87beSAdrien Mazarguil goto error; 866024e87beSAdrien Mazarguil } 8678e08df22SAlexander Kozyrev MLX4_ASSERT(device_attr.max_sge >= MLX4_MAX_SGE); 86898a1f377SBruce Richardson for (i = 0; i < device_attr.phys_port_cnt; i++) { 86998a1f377SBruce Richardson uint32_t port = i + 1; /* ports are indexed from one */ 87098a1f377SBruce Richardson struct ibv_context *ctx = NULL; 87198a1f377SBruce Richardson struct ibv_port_attr port_attr; 87298a1f377SBruce Richardson struct ibv_pd *pd = NULL; 873dbeba4cfSThomas Monjalon struct mlx4_priv *priv = NULL; 874e9d39be8SOr Ami struct rte_eth_dev *eth_dev = NULL; 8756d13ea8eSOlivier Matz struct rte_ether_addr mac; 8760203d33aSYongseok Koh char name[RTE_ETH_NAME_MAX_LEN]; 87798a1f377SBruce Richardson 87882642799SAdrien Mazarguil /* If port is not enabled, skip. */ 87982642799SAdrien Mazarguil if (!(conf.ports.enabled & (1 << i))) 880001a520eSGaetan Rivet continue; 881f2318196SAdrien Mazarguil DEBUG("using port %u", port); 8824eba244bSAdrien Mazarguil ctx = mlx4_glue->open_device(ibv_dev); 8838d0f8016SMatan Azrad if (ctx == NULL) { 88466f2ac0fSAdrien Mazarguil err = ENODEV; 88598a1f377SBruce Richardson goto port_error; 8868d0f8016SMatan Azrad } 8870203d33aSYongseok Koh snprintf(name, sizeof(name), "%s port %u", 8880203d33aSYongseok Koh mlx4_glue->get_device_name(ibv_dev), port); 8890203d33aSYongseok Koh if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 890ff9c3548SLong Li int fd; 891ff9c3548SLong Li 8920203d33aSYongseok Koh eth_dev = rte_eth_dev_attach_secondary(name); 8930203d33aSYongseok Koh if (eth_dev == NULL) { 8940203d33aSYongseok Koh ERROR("can not attach rte ethdev"); 8950203d33aSYongseok Koh rte_errno = ENOMEM; 8960203d33aSYongseok Koh err = rte_errno; 897bcf58b64SMichael Baum goto err_secondary; 8980203d33aSYongseok Koh } 8990203d33aSYongseok Koh priv = eth_dev->data->dev_private; 9000203d33aSYongseok Koh if (!priv->verbs_alloc_ctx.enabled) { 9010203d33aSYongseok Koh ERROR("secondary process is not supported" 9020203d33aSYongseok Koh " due to lack of external allocator" 9030203d33aSYongseok Koh " from Verbs"); 9040203d33aSYongseok Koh rte_errno = ENOTSUP; 9050203d33aSYongseok Koh err = rte_errno; 906bcf58b64SMichael Baum goto err_secondary; 9070203d33aSYongseok Koh } 9080203d33aSYongseok Koh eth_dev->device = &pci_dev->device; 9090203d33aSYongseok Koh eth_dev->dev_ops = &mlx4_dev_sec_ops; 91097d37d2cSYongseok Koh err = mlx4_proc_priv_init(eth_dev); 91197d37d2cSYongseok Koh if (err) 912bcf58b64SMichael Baum goto err_secondary; 9130203d33aSYongseok Koh /* Receive command fd from primary process. */ 914ff9c3548SLong Li fd = mlx4_mp_req_verbs_cmd_fd(eth_dev); 915ff9c3548SLong Li if (fd < 0) { 9160203d33aSYongseok Koh err = rte_errno; 917bcf58b64SMichael Baum goto err_secondary; 9180203d33aSYongseok Koh } 9190203d33aSYongseok Koh /* Remap UAR for Tx queues. */ 920ff9c3548SLong Li err = mlx4_tx_uar_init_secondary(eth_dev, fd); 921ff9c3548SLong Li close(fd); 9220203d33aSYongseok Koh if (err) { 9230203d33aSYongseok Koh err = rte_errno; 924bcf58b64SMichael Baum goto err_secondary; 9250203d33aSYongseok Koh } 9260203d33aSYongseok Koh /* 9270203d33aSYongseok Koh * Ethdev pointer is still required as input since 9280203d33aSYongseok Koh * the primary device is not accessible from the 9290203d33aSYongseok Koh * secondary process. 9300203d33aSYongseok Koh */ 9310203d33aSYongseok Koh eth_dev->tx_pkt_burst = mlx4_tx_burst; 9320203d33aSYongseok Koh eth_dev->rx_pkt_burst = mlx4_rx_burst; 9330203d33aSYongseok Koh claim_zero(mlx4_glue->close_device(ctx)); 9340203d33aSYongseok Koh rte_eth_copy_pci_info(eth_dev, pci_dev); 9350203d33aSYongseok Koh rte_eth_dev_probing_finish(eth_dev); 936bcf58b64SMichael Baum prev_dev = eth_dev; 9370203d33aSYongseok Koh continue; 938bcf58b64SMichael Baum err_secondary: 939bcf58b64SMichael Baum claim_zero(mlx4_glue->close_device(ctx)); 940bcf58b64SMichael Baum rte_eth_dev_release_port(eth_dev); 941bcf58b64SMichael Baum if (prev_dev) 942bcf58b64SMichael Baum rte_eth_dev_release_port(prev_dev); 943bcf58b64SMichael Baum break; 9440203d33aSYongseok Koh } 94598a1f377SBruce Richardson /* Check port status. */ 9464eba244bSAdrien Mazarguil err = mlx4_glue->query_port(ctx, port, &port_attr); 94798a1f377SBruce Richardson if (err) { 94866f2ac0fSAdrien Mazarguil err = ENODEV; 94966f2ac0fSAdrien Mazarguil ERROR("port query failed: %s", strerror(err)); 95098a1f377SBruce Richardson goto port_error; 95198a1f377SBruce Richardson } 9527b066153SAdrien Mazarguil if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 95366f2ac0fSAdrien Mazarguil err = ENOTSUP; 9547b066153SAdrien Mazarguil ERROR("port %d is not configured in Ethernet mode", 9557b066153SAdrien Mazarguil port); 9567b066153SAdrien Mazarguil goto port_error; 9577b066153SAdrien Mazarguil } 95898a1f377SBruce Richardson if (port_attr.state != IBV_PORT_ACTIVE) 9599df03dd7SAdrien Mazarguil DEBUG("port %d is not active: \"%s\" (%d)", 9604eba244bSAdrien Mazarguil port, mlx4_glue->port_state_str(port_attr.state), 96198a1f377SBruce Richardson port_attr.state); 96235d02c54SAdrien Mazarguil /* Make asynchronous FD non-blocking to handle interrupts. */ 96366f2ac0fSAdrien Mazarguil err = mlx4_fd_set_non_blocking(ctx->async_fd); 96466f2ac0fSAdrien Mazarguil if (err) { 96535d02c54SAdrien Mazarguil ERROR("cannot make asynchronous FD non-blocking: %s", 96666f2ac0fSAdrien Mazarguil strerror(err)); 96735d02c54SAdrien Mazarguil goto port_error; 96835d02c54SAdrien Mazarguil } 96998a1f377SBruce Richardson /* Allocate protection domain. */ 9704eba244bSAdrien Mazarguil pd = mlx4_glue->alloc_pd(ctx); 97198a1f377SBruce Richardson if (pd == NULL) { 97266f2ac0fSAdrien Mazarguil err = ENOMEM; 97398a1f377SBruce Richardson ERROR("PD allocation failure"); 97498a1f377SBruce Richardson goto port_error; 97598a1f377SBruce Richardson } 97698a1f377SBruce Richardson /* from rte_ethdev.c */ 97798a1f377SBruce Richardson priv = rte_zmalloc("ethdev private structure", 97898a1f377SBruce Richardson sizeof(*priv), 97998a1f377SBruce Richardson RTE_CACHE_LINE_SIZE); 98098a1f377SBruce Richardson if (priv == NULL) { 98166f2ac0fSAdrien Mazarguil err = ENOMEM; 98298a1f377SBruce Richardson ERROR("priv allocation failure"); 98398a1f377SBruce Richardson goto port_error; 98498a1f377SBruce Richardson } 98598a1f377SBruce Richardson priv->ctx = ctx; 98698a1f377SBruce Richardson priv->device_attr = device_attr; 98798a1f377SBruce Richardson priv->port = port; 98898a1f377SBruce Richardson priv->pd = pd; 98935b2d13fSOlivier Matz priv->mtu = RTE_ETHER_MTU; 99098a1f377SBruce Richardson priv->vf = vf; 9915db1d364SMoti Haimovsky priv->hw_csum = !!(device_attr.device_cap_flags & 9925db1d364SMoti Haimovsky IBV_DEVICE_RAW_IP_CSUM); 9935db1d364SMoti Haimovsky DEBUG("checksum offloading is %ssupported", 9945db1d364SMoti Haimovsky (priv->hw_csum ? "" : "not ")); 9955db1d364SMoti Haimovsky /* Only ConnectX-3 Pro supports tunneling. */ 9965db1d364SMoti Haimovsky priv->hw_csum_l2tun = 9975db1d364SMoti Haimovsky priv->hw_csum && 9985db1d364SMoti Haimovsky (device_attr.vendor_part_id == 9995db1d364SMoti Haimovsky PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO); 10005db1d364SMoti Haimovsky DEBUG("L2 tunnel checksum offloads are %ssupported", 1001de1df14eSOphir Munk priv->hw_csum_l2tun ? "" : "not "); 1002828a4ce3SAdrien Mazarguil priv->hw_rss_sup = mlx4_hw_rss_sup(priv->ctx, priv->pd, 1003828a4ce3SAdrien Mazarguil &device_attr_ex); 1004024e87beSAdrien Mazarguil DEBUG("supported RSS hash fields mask: %016" PRIx64, 1005024e87beSAdrien Mazarguil priv->hw_rss_sup); 10062b4e423fSMoti Haimovsky priv->hw_rss_max_qps = 10072b4e423fSMoti Haimovsky device_attr_ex.rss_caps.max_rwq_indirection_table_size; 10082b4e423fSMoti Haimovsky DEBUG("MAX RSS queues %d", priv->hw_rss_max_qps); 1009de1df14eSOphir Munk priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps & 1010de1df14eSOphir Munk IBV_RAW_PACKET_CAP_SCATTER_FCS); 1011de1df14eSOphir Munk DEBUG("FCS stripping toggling is %ssupported", 1012de1df14eSOphir Munk priv->hw_fcs_strip ? "" : "not "); 1013ba576975SMoti Haimovsky priv->tso = 1014ba576975SMoti Haimovsky ((device_attr_ex.tso_caps.max_tso > 0) && 1015ba576975SMoti Haimovsky (device_attr_ex.tso_caps.supported_qpts & 1016ba576975SMoti Haimovsky (1 << IBV_QPT_RAW_PACKET))); 1017ba576975SMoti Haimovsky if (priv->tso) 1018ba576975SMoti Haimovsky priv->tso_max_payload_sz = 1019ba576975SMoti Haimovsky device_attr_ex.tso_caps.max_tso; 1020ba576975SMoti Haimovsky DEBUG("TSO is %ssupported", 1021ba576975SMoti Haimovsky priv->tso ? "" : "not "); 1022f4efc0ebSYongseok Koh priv->mr_ext_memseg_en = conf.mr_ext_memseg_en; 1023fec36086SOr Ami /* Configure the first MAC address by default. */ 102466f2ac0fSAdrien Mazarguil err = mlx4_get_mac(priv, &mac.addr_bytes); 102566f2ac0fSAdrien Mazarguil if (err) { 1026fec36086SOr Ami ERROR("cannot get MAC address, is mlx4_en loaded?" 102766f2ac0fSAdrien Mazarguil " (error: %s)", strerror(err)); 102898a1f377SBruce Richardson goto port_error; 102998a1f377SBruce Richardson } 1030c2c4f87bSAman Deep Singh INFO("port %u MAC address is " RTE_ETHER_ADDR_PRT_FMT, 1031a7db3afcSAman Deep Singh priv->port, RTE_ETHER_ADDR_BYTES(&mac)); 1032320dc09fSAdrien Mazarguil /* Register MAC address. */ 10331437784bSAdrien Mazarguil priv->mac[0] = mac; 103498a1f377SBruce Richardson 1035164cad78SStephen Hemminger if (mlx4_get_ifname(priv, &ifname) == 0) { 103698a1f377SBruce Richardson DEBUG("port %u ifname is \"%s\"", 103798a1f377SBruce Richardson priv->port, ifname); 1038164cad78SStephen Hemminger priv->if_index = if_nametoindex(ifname); 1039164cad78SStephen Hemminger } else { 104098a1f377SBruce Richardson DEBUG("port %u ifname is unknown", priv->port); 104198a1f377SBruce Richardson } 1042164cad78SStephen Hemminger 104398a1f377SBruce Richardson /* Get actual MTU if possible. */ 104461cbdd41SAdrien Mazarguil mlx4_mtu_get(priv, &priv->mtu); 104598a1f377SBruce Richardson DEBUG("port %u MTU is %u", priv->port, priv->mtu); 10466751f6deSDavid Marchand eth_dev = rte_eth_dev_allocate(name); 104798a1f377SBruce Richardson if (eth_dev == NULL) { 104866f2ac0fSAdrien Mazarguil err = ENOMEM; 104998a1f377SBruce Richardson ERROR("can not allocate rte ethdev"); 105098a1f377SBruce Richardson goto port_error; 105198a1f377SBruce Richardson } 105298a1f377SBruce Richardson eth_dev->data->dev_private = priv; 10531437784bSAdrien Mazarguil eth_dev->data->mac_addrs = priv->mac; 1054eac901ceSJan Blunck eth_dev->device = &pci_dev->device; 1055eeefe73fSBernard Iremonger rte_eth_copy_pci_info(eth_dev, pci_dev); 1056f30e69b4SFerruh Yigit eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 105763c2f23cSAdrien Mazarguil /* Initialize local interrupt handle for current port. */ 1058d61138d4SHarman Kalra priv->intr_handle = 1059d61138d4SHarman Kalra rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 1060d61138d4SHarman Kalra if (priv->intr_handle == NULL) { 106130d38a71SDavid Marchand ERROR("can not allocate intr_handle"); 1062d61138d4SHarman Kalra goto port_error; 1063d61138d4SHarman Kalra } 1064d61138d4SHarman Kalra 1065d61138d4SHarman Kalra if (rte_intr_fd_set(priv->intr_handle, -1)) 1066d61138d4SHarman Kalra goto port_error; 1067d61138d4SHarman Kalra 1068d61138d4SHarman Kalra if (rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT)) 1069d61138d4SHarman Kalra goto port_error; 1070d61138d4SHarman Kalra 10713c560ec3SAdrien Mazarguil /* 107263c2f23cSAdrien Mazarguil * Override ethdev interrupt handle pointer with private 107363c2f23cSAdrien Mazarguil * handle instead of that of the parent PCI device used by 107463c2f23cSAdrien Mazarguil * default. This prevents it from being shared between all 107563c2f23cSAdrien Mazarguil * ports of the same PCI device since each of them is 107663c2f23cSAdrien Mazarguil * associated its own Verbs context. 107763c2f23cSAdrien Mazarguil * 107863c2f23cSAdrien Mazarguil * Rx interrupts in particular require this as the PMD has 107963c2f23cSAdrien Mazarguil * no control over the registration of queue interrupts 108063c2f23cSAdrien Mazarguil * besides setting up eth_dev->intr_handle, the rest is 108163c2f23cSAdrien Mazarguil * handled by rte_intr_rx_ctl(). 10823c560ec3SAdrien Mazarguil */ 1083d61138d4SHarman Kalra eth_dev->intr_handle = priv->intr_handle; 1084099c2c53SYongseok Koh priv->dev_data = eth_dev->data; 108598a1f377SBruce Richardson eth_dev->dev_ops = &mlx4_dev_ops; 10868e493764SYongseok Koh #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 10878e493764SYongseok Koh /* Hint libmlx4 to use PMD allocator for data plane resources */ 108850e24943SMichael Baum err = mlx4_glue->dv_set_context_attr 108950e24943SMichael Baum (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS, 109050e24943SMichael Baum (void *)((uintptr_t)&(struct mlx4dv_ctx_allocators){ 10918e493764SYongseok Koh .alloc = &mlx4_alloc_verbs_buf, 10928e493764SYongseok Koh .free = &mlx4_free_verbs_buf, 10938e493764SYongseok Koh .data = priv, 109450e24943SMichael Baum })); 10950203d33aSYongseok Koh if (err) 10960203d33aSYongseok Koh WARN("Verbs external allocator is not supported"); 10970203d33aSYongseok Koh else 10980203d33aSYongseok Koh priv->verbs_alloc_ctx.enabled = 1; 10998e493764SYongseok Koh #endif 110098a1f377SBruce Richardson /* Bring Ethernet device up. */ 110198a1f377SBruce Richardson DEBUG("forcing Ethernet interface up"); 1102099c2c53SYongseok Koh mlx4_dev_set_link_up(eth_dev); 11033fca2ab5SGaetan Rivet /* Update link status once if waiting for LSC. */ 11043fca2ab5SGaetan Rivet if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 11053fca2ab5SGaetan Rivet mlx4_link_update(eth_dev, 0); 11064423d4a1SYongseok Koh /* 11074423d4a1SYongseok Koh * Once the device is added to the list of memory event 11084423d4a1SYongseok Koh * callback, its global MR cache table cannot be expanded 11094423d4a1SYongseok Koh * on the fly because of deadlock. If it overflows, lookup 11104423d4a1SYongseok Koh * should be done by searching MR list linearly, which is slow. 11114423d4a1SYongseok Koh */ 11124423d4a1SYongseok Koh err = mlx4_mr_btree_init(&priv->mr.cache, 11134423d4a1SYongseok Koh MLX4_MR_BTREE_CACHE_N * 2, 11144423d4a1SYongseok Koh eth_dev->device->numa_node); 11154423d4a1SYongseok Koh if (err) { 11164423d4a1SYongseok Koh /* rte_errno is already set. */ 11174423d4a1SYongseok Koh goto port_error; 11184423d4a1SYongseok Koh } 11194423d4a1SYongseok Koh /* Add device to memory callback list. */ 11200203d33aSYongseok Koh rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock); 11210203d33aSYongseok Koh LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list, 11220203d33aSYongseok Koh priv, mem_event_cb); 11230203d33aSYongseok Koh rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock); 1124fbe90cddSThomas Monjalon rte_eth_dev_probing_finish(eth_dev); 1125bcf58b64SMichael Baum prev_dev = eth_dev; 112698a1f377SBruce Richardson continue; 112798a1f377SBruce Richardson port_error: 112849fdb0aeSHarman Kalra if (priv != NULL) 1129d61138d4SHarman Kalra rte_intr_instance_free(priv->intr_handle); 113098a1f377SBruce Richardson rte_free(priv); 1131e16adf08SThomas Monjalon if (eth_dev != NULL) 1132e16adf08SThomas Monjalon eth_dev->data->dev_private = NULL; 113398a1f377SBruce Richardson if (pd) 11344eba244bSAdrien Mazarguil claim_zero(mlx4_glue->dealloc_pd(pd)); 113598a1f377SBruce Richardson if (ctx) 11364eba244bSAdrien Mazarguil claim_zero(mlx4_glue->close_device(ctx)); 1137e16adf08SThomas Monjalon if (eth_dev != NULL) { 1138e16adf08SThomas Monjalon /* mac_addrs must not be freed because part of dev_private */ 1139e16adf08SThomas Monjalon eth_dev->data->mac_addrs = NULL; 1140e9d39be8SOr Ami rte_eth_dev_release_port(eth_dev); 1141e16adf08SThomas Monjalon } 1142bcf58b64SMichael Baum if (prev_dev) 1143bcf58b64SMichael Baum mlx4_dev_close(prev_dev); 114498a1f377SBruce Richardson break; 114598a1f377SBruce Richardson } 114698a1f377SBruce Richardson error: 114798a1f377SBruce Richardson if (attr_ctx) 11484eba244bSAdrien Mazarguil claim_zero(mlx4_glue->close_device(attr_ctx)); 114998a1f377SBruce Richardson if (list) 11504eba244bSAdrien Mazarguil mlx4_glue->free_device_list(list); 115166f2ac0fSAdrien Mazarguil if (err) 115266f2ac0fSAdrien Mazarguil rte_errno = err; 115366f2ac0fSAdrien Mazarguil return -err; 115498a1f377SBruce Richardson } 115598a1f377SBruce Richardson 11568e1630e0SMichael Baum /** 11578e1630e0SMichael Baum * DPDK callback to remove a PCI device. 11588e1630e0SMichael Baum * 11598e1630e0SMichael Baum * This function removes all Ethernet devices belong to a given PCI device. 11608e1630e0SMichael Baum * 11618e1630e0SMichael Baum * @param[in] pci_dev 11628e1630e0SMichael Baum * Pointer to the PCI device. 11638e1630e0SMichael Baum * 11648e1630e0SMichael Baum * @return 11658e1630e0SMichael Baum * 0 on success, the function cannot fail. 11668e1630e0SMichael Baum */ 11678e1630e0SMichael Baum static int 11688e1630e0SMichael Baum mlx4_pci_remove(struct rte_pci_device *pci_dev) 11698e1630e0SMichael Baum { 11708e1630e0SMichael Baum uint16_t port_id; 11718e1630e0SMichael Baum int ret = 0; 11728e1630e0SMichael Baum 11738e1630e0SMichael Baum RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { 11748e1630e0SMichael Baum /* 11758e1630e0SMichael Baum * mlx4_dev_close() is not registered to secondary process, 11768e1630e0SMichael Baum * call the close function explicitly for secondary process. 11778e1630e0SMichael Baum */ 11788e1630e0SMichael Baum if (rte_eal_process_type() == RTE_PROC_SECONDARY) 11798e1630e0SMichael Baum ret |= mlx4_dev_close(&rte_eth_devices[port_id]); 11808e1630e0SMichael Baum else 11818e1630e0SMichael Baum ret |= rte_eth_dev_close(port_id); 11828e1630e0SMichael Baum } 11838e1630e0SMichael Baum return ret == 0 ? 0 : -EIO; 11848e1630e0SMichael Baum } 11858e1630e0SMichael Baum 118698a1f377SBruce Richardson static const struct rte_pci_id mlx4_pci_id_map[] = { 118798a1f377SBruce Richardson { 11881d1bc870SNélio Laranjeiro RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 11891d1bc870SNélio Laranjeiro PCI_DEVICE_ID_MELLANOX_CONNECTX3) 119098a1f377SBruce Richardson }, 119198a1f377SBruce Richardson { 11921d1bc870SNélio Laranjeiro RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 11931d1bc870SNélio Laranjeiro PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO) 119498a1f377SBruce Richardson }, 119598a1f377SBruce Richardson { 11961d1bc870SNélio Laranjeiro RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 11971d1bc870SNélio Laranjeiro PCI_DEVICE_ID_MELLANOX_CONNECTX3VF) 119898a1f377SBruce Richardson }, 119998a1f377SBruce Richardson { 120098a1f377SBruce Richardson .vendor_id = 0 120198a1f377SBruce Richardson } 120298a1f377SBruce Richardson }; 120398a1f377SBruce Richardson 1204fdf91e0fSJan Blunck static struct rte_pci_driver mlx4_driver = { 12052f3193cfSJan Viktorin .driver = { 12062f3193cfSJan Viktorin .name = MLX4_DRIVER_NAME 12072f3193cfSJan Viktorin }, 120898a1f377SBruce Richardson .id_table = mlx4_pci_id_map, 1209af424af8SShreyansh Jain .probe = mlx4_pci_probe, 12108e1630e0SMichael Baum .remove = mlx4_pci_remove, 1211b76fafb1SDavid Marchand .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, 121298a1f377SBruce Richardson }; 121398a1f377SBruce Richardson 121472b934adSThomas Monjalon #ifdef RTE_IBVERBS_LINK_DLOPEN 121527cea116SAdrien Mazarguil 121627cea116SAdrien Mazarguil /** 121708c028d0SAdrien Mazarguil * Suffix RTE_EAL_PMD_PATH with "-glue". 121808c028d0SAdrien Mazarguil * 121908c028d0SAdrien Mazarguil * This function performs a sanity check on RTE_EAL_PMD_PATH before 122008c028d0SAdrien Mazarguil * suffixing its last component. 122108c028d0SAdrien Mazarguil * 122208c028d0SAdrien Mazarguil * @param buf[out] 122308c028d0SAdrien Mazarguil * Output buffer, should be large enough otherwise NULL is returned. 122408c028d0SAdrien Mazarguil * @param size 122508c028d0SAdrien Mazarguil * Size of @p out. 122608c028d0SAdrien Mazarguil * 122708c028d0SAdrien Mazarguil * @return 122808c028d0SAdrien Mazarguil * Pointer to @p buf or @p NULL in case suffix cannot be appended. 122908c028d0SAdrien Mazarguil */ 123008c028d0SAdrien Mazarguil static char * 123108c028d0SAdrien Mazarguil mlx4_glue_path(char *buf, size_t size) 123208c028d0SAdrien Mazarguil { 123308c028d0SAdrien Mazarguil static const char *const bad[] = { "/", ".", "..", NULL }; 123408c028d0SAdrien Mazarguil const char *path = RTE_EAL_PMD_PATH; 123508c028d0SAdrien Mazarguil size_t len = strlen(path); 123608c028d0SAdrien Mazarguil size_t off; 123708c028d0SAdrien Mazarguil int i; 123808c028d0SAdrien Mazarguil 123908c028d0SAdrien Mazarguil while (len && path[len - 1] == '/') 124008c028d0SAdrien Mazarguil --len; 124108c028d0SAdrien Mazarguil for (off = len; off && path[off - 1] != '/'; --off) 124208c028d0SAdrien Mazarguil ; 124308c028d0SAdrien Mazarguil for (i = 0; bad[i]; ++i) 124408c028d0SAdrien Mazarguil if (!strncmp(path + off, bad[i], (int)(len - off))) 124508c028d0SAdrien Mazarguil goto error; 124608c028d0SAdrien Mazarguil i = snprintf(buf, size, "%.*s-glue", (int)len, path); 124708c028d0SAdrien Mazarguil if (i == -1 || (size_t)i >= size) 124808c028d0SAdrien Mazarguil goto error; 124908c028d0SAdrien Mazarguil return buf; 125008c028d0SAdrien Mazarguil error: 125108c028d0SAdrien Mazarguil ERROR("unable to append \"-glue\" to last component of" 125208c028d0SAdrien Mazarguil " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\")," 125308c028d0SAdrien Mazarguil " please re-configure DPDK"); 125408c028d0SAdrien Mazarguil return NULL; 125508c028d0SAdrien Mazarguil } 125608c028d0SAdrien Mazarguil 125708c028d0SAdrien Mazarguil /** 125827cea116SAdrien Mazarguil * Initialization routine for run-time dependency on rdma-core. 125927cea116SAdrien Mazarguil */ 126027cea116SAdrien Mazarguil static int 126127cea116SAdrien Mazarguil mlx4_glue_init(void) 126227cea116SAdrien Mazarguil { 126308c028d0SAdrien Mazarguil char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 1264f6242d06SAdrien Mazarguil const char *path[] = { 1265f6242d06SAdrien Mazarguil /* 1266f6242d06SAdrien Mazarguil * A basic security check is necessary before trusting 1267f6242d06SAdrien Mazarguil * MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 1268f6242d06SAdrien Mazarguil */ 1269f6242d06SAdrien Mazarguil (geteuid() == getuid() && getegid() == getgid() ? 1270f6242d06SAdrien Mazarguil getenv("MLX4_GLUE_PATH") : NULL), 127108c028d0SAdrien Mazarguil /* 127208c028d0SAdrien Mazarguil * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 127308c028d0SAdrien Mazarguil * variant, otherwise let dlopen() look up libraries on its 127408c028d0SAdrien Mazarguil * own. 127508c028d0SAdrien Mazarguil */ 127608c028d0SAdrien Mazarguil (*RTE_EAL_PMD_PATH ? 127708c028d0SAdrien Mazarguil mlx4_glue_path(glue_path, sizeof(glue_path)) : ""), 1278f6242d06SAdrien Mazarguil }; 1279f6242d06SAdrien Mazarguil unsigned int i = 0; 128027cea116SAdrien Mazarguil void *handle = NULL; 128127cea116SAdrien Mazarguil void **sym; 128227cea116SAdrien Mazarguil const char *dlmsg; 128327cea116SAdrien Mazarguil 1284f6242d06SAdrien Mazarguil while (!handle && i != RTE_DIM(path)) { 1285f6242d06SAdrien Mazarguil const char *end; 1286f6242d06SAdrien Mazarguil size_t len; 1287f6242d06SAdrien Mazarguil int ret; 1288f6242d06SAdrien Mazarguil 1289f6242d06SAdrien Mazarguil if (!path[i]) { 1290f6242d06SAdrien Mazarguil ++i; 1291f6242d06SAdrien Mazarguil continue; 1292f6242d06SAdrien Mazarguil } 1293f6242d06SAdrien Mazarguil end = strpbrk(path[i], ":;"); 1294f6242d06SAdrien Mazarguil if (!end) 1295f6242d06SAdrien Mazarguil end = path[i] + strlen(path[i]); 1296f6242d06SAdrien Mazarguil len = end - path[i]; 1297f6242d06SAdrien Mazarguil ret = 0; 1298f6242d06SAdrien Mazarguil do { 1299f6242d06SAdrien Mazarguil char name[ret + 1]; 1300f6242d06SAdrien Mazarguil 1301f6242d06SAdrien Mazarguil ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE, 1302f6242d06SAdrien Mazarguil (int)len, path[i], 1303f6242d06SAdrien Mazarguil (!len || *(end - 1) == '/') ? "" : "/"); 1304f6242d06SAdrien Mazarguil if (ret == -1) 1305f6242d06SAdrien Mazarguil break; 1306f6242d06SAdrien Mazarguil if (sizeof(name) != (size_t)ret + 1) 1307f6242d06SAdrien Mazarguil continue; 1308f6242d06SAdrien Mazarguil DEBUG("looking for rdma-core glue as \"%s\"", name); 1309f6242d06SAdrien Mazarguil handle = dlopen(name, RTLD_LAZY); 1310f6242d06SAdrien Mazarguil break; 1311f6242d06SAdrien Mazarguil } while (1); 1312f6242d06SAdrien Mazarguil path[i] = end + 1; 1313f6242d06SAdrien Mazarguil if (!*end) 1314f6242d06SAdrien Mazarguil ++i; 1315f6242d06SAdrien Mazarguil } 131627cea116SAdrien Mazarguil if (!handle) { 131727cea116SAdrien Mazarguil rte_errno = EINVAL; 131827cea116SAdrien Mazarguil dlmsg = dlerror(); 131927cea116SAdrien Mazarguil if (dlmsg) 132027cea116SAdrien Mazarguil WARN("cannot load glue library: %s", dlmsg); 132127cea116SAdrien Mazarguil goto glue_error; 132227cea116SAdrien Mazarguil } 132327cea116SAdrien Mazarguil sym = dlsym(handle, "mlx4_glue"); 132427cea116SAdrien Mazarguil if (!sym || !*sym) { 132527cea116SAdrien Mazarguil rte_errno = EINVAL; 132627cea116SAdrien Mazarguil dlmsg = dlerror(); 132727cea116SAdrien Mazarguil if (dlmsg) 132827cea116SAdrien Mazarguil ERROR("cannot resolve glue symbol: %s", dlmsg); 132927cea116SAdrien Mazarguil goto glue_error; 133027cea116SAdrien Mazarguil } 133127cea116SAdrien Mazarguil mlx4_glue = *sym; 133227cea116SAdrien Mazarguil return 0; 133327cea116SAdrien Mazarguil glue_error: 133427cea116SAdrien Mazarguil if (handle) 133527cea116SAdrien Mazarguil dlclose(handle); 133627cea116SAdrien Mazarguil WARN("cannot initialize PMD due to missing run-time" 133727cea116SAdrien Mazarguil " dependency on rdma-core libraries (libibverbs," 133827cea116SAdrien Mazarguil " libmlx4)"); 133927cea116SAdrien Mazarguil return -rte_errno; 134027cea116SAdrien Mazarguil } 134127cea116SAdrien Mazarguil 134227cea116SAdrien Mazarguil #endif 134327cea116SAdrien Mazarguil 13449c99878aSJerin Jacob /* Initialize driver log type. */ 1345eeded204SDavid Marchand RTE_LOG_REGISTER_DEFAULT(mlx4_logtype, NOTICE) 13469c99878aSJerin Jacob 134798a1f377SBruce Richardson /** 134898a1f377SBruce Richardson * Driver initialization routine. 134998a1f377SBruce Richardson */ 1350f8e99896SThomas Monjalon RTE_INIT(rte_mlx4_pmd_init) 135198a1f377SBruce Richardson { 1352643777a1SOlga Shern /* 1353c2b3dba8SMatan Azrad * MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we 1354c2b3dba8SMatan Azrad * want to get success errno value in case of calling them 1355c2b3dba8SMatan Azrad * when the device was removed. 1356c2b3dba8SMatan Azrad */ 1357c2b3dba8SMatan Azrad setenv("MLX4_DEVICE_FATAL_CLEANUP", "1", 1); 1358c2b3dba8SMatan Azrad /* 1359643777a1SOlga Shern * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 1360643777a1SOlga Shern * huge pages. Calling ibv_fork_init() during init allows 1361643777a1SOlga Shern * applications to use fork() safely for purposes other than 1362643777a1SOlga Shern * using this PMD, which is not supported in forked processes. 1363643777a1SOlga Shern */ 1364643777a1SOlga Shern setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 136572b934adSThomas Monjalon #ifdef RTE_IBVERBS_LINK_DLOPEN 136627cea116SAdrien Mazarguil if (mlx4_glue_init()) 136727cea116SAdrien Mazarguil return; 13688e08df22SAlexander Kozyrev MLX4_ASSERT(mlx4_glue); 136927cea116SAdrien Mazarguil #endif 1370e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG 13712a3b0097SAdrien Mazarguil /* Glue structure must not contain any NULL pointers. */ 13722a3b0097SAdrien Mazarguil { 13732a3b0097SAdrien Mazarguil unsigned int i; 13742a3b0097SAdrien Mazarguil 13752a3b0097SAdrien Mazarguil for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i) 13768e08df22SAlexander Kozyrev MLX4_ASSERT(((const void *const *)mlx4_glue)[i]); 13772a3b0097SAdrien Mazarguil } 13782a3b0097SAdrien Mazarguil #endif 13796d5df2eaSAdrien Mazarguil if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) { 13806d5df2eaSAdrien Mazarguil ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", 13816d5df2eaSAdrien Mazarguil mlx4_glue->version, MLX4_GLUE_VERSION); 13826d5df2eaSAdrien Mazarguil return; 13836d5df2eaSAdrien Mazarguil } 13844eba244bSAdrien Mazarguil mlx4_glue->fork_init(); 13853dcfe039SThomas Monjalon rte_pci_register(&mlx4_driver); 138698a1f377SBruce Richardson } 138798a1f377SBruce Richardson 138801f19227SShreyansh Jain RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__); 138901f19227SShreyansh Jain RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map); 13900880c401SOlivier Matz RTE_PMD_REGISTER_KMOD_DEP(net_mlx4, 13910880c401SOlivier Matz "* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib"); 1392