1517ed6e2SLong Li /* SPDX-License-Identifier: BSD-3-Clause 2517ed6e2SLong Li * Copyright 2022 Microsoft Corporation 3517ed6e2SLong Li */ 4517ed6e2SLong Li 5517ed6e2SLong Li #include <unistd.h> 6517ed6e2SLong Li #include <dirent.h> 7517ed6e2SLong Li #include <fcntl.h> 8517ed6e2SLong Li #include <sys/mman.h> 984497839SLong Li #include <sys/ioctl.h> 1084497839SLong Li #include <net/if.h> 11517ed6e2SLong Li 12517ed6e2SLong Li #include <ethdev_driver.h> 13517ed6e2SLong Li #include <ethdev_pci.h> 14517ed6e2SLong Li #include <rte_kvargs.h> 15517ed6e2SLong Li #include <rte_eal_paging.h> 16*8fa22e1fSThomas Monjalon #include <rte_pci.h> 17517ed6e2SLong Li 18517ed6e2SLong Li #include <infiniband/verbs.h> 19517ed6e2SLong Li #include <infiniband/manadv.h> 20517ed6e2SLong Li 21517ed6e2SLong Li #include <assert.h> 22517ed6e2SLong Li 23517ed6e2SLong Li #include "mana.h" 24517ed6e2SLong Li 25517ed6e2SLong Li /* Shared memory between primary/secondary processes, per driver */ 26517ed6e2SLong Li /* Data to track primary/secondary usage */ 27517ed6e2SLong Li struct mana_shared_data *mana_shared_data; 28517ed6e2SLong Li static struct mana_shared_data mana_local_data; 29517ed6e2SLong Li 30517ed6e2SLong Li /* The memory region for the above data */ 31517ed6e2SLong Li static const struct rte_memzone *mana_shared_mz; 32517ed6e2SLong Li static const char *MZ_MANA_SHARED_DATA = "mana_shared_data"; 33517ed6e2SLong Li 34517ed6e2SLong Li /* Spinlock for mana_shared_data */ 35517ed6e2SLong Li static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 36517ed6e2SLong Li 37517ed6e2SLong Li /* Allocate a buffer on the stack and fill it with a printf format string. */ 38517ed6e2SLong Li #define MANA_MKSTR(name, ...) \ 39517ed6e2SLong Li int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 40517ed6e2SLong Li char name[mkstr_size_##name + 1]; \ 41517ed6e2SLong Li \ 42517ed6e2SLong Li memset(name, 0, mkstr_size_##name + 1); \ 43517ed6e2SLong Li snprintf(name, sizeof(name), "" __VA_ARGS__) 44517ed6e2SLong Li 45517ed6e2SLong Li int mana_logtype_driver; 46517ed6e2SLong Li int mana_logtype_init; 47517ed6e2SLong Li 480dbfecfeSLong Li /* 490dbfecfeSLong Li * Callback from rdma-core to allocate a buffer for a queue. 500dbfecfeSLong Li */ 510dbfecfeSLong Li void * 520dbfecfeSLong Li mana_alloc_verbs_buf(size_t size, void *data) 530dbfecfeSLong Li { 540dbfecfeSLong Li void *ret; 550dbfecfeSLong Li size_t alignment = rte_mem_page_size(); 560dbfecfeSLong Li int socket = (int)(uintptr_t)data; 570dbfecfeSLong Li 580dbfecfeSLong Li DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket); 590dbfecfeSLong Li 600dbfecfeSLong Li if (alignment == (size_t)-1) { 610dbfecfeSLong Li DRV_LOG(ERR, "Failed to get mem page size"); 620dbfecfeSLong Li rte_errno = ENOMEM; 630dbfecfeSLong Li return NULL; 640dbfecfeSLong Li } 650dbfecfeSLong Li 660dbfecfeSLong Li ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket); 670dbfecfeSLong Li if (!ret && size) 680dbfecfeSLong Li rte_errno = ENOMEM; 690dbfecfeSLong Li return ret; 700dbfecfeSLong Li } 710dbfecfeSLong Li 720dbfecfeSLong Li void 730dbfecfeSLong Li mana_free_verbs_buf(void *ptr, void *data __rte_unused) 740dbfecfeSLong Li { 750dbfecfeSLong Li rte_free(ptr); 760dbfecfeSLong Li } 770dbfecfeSLong Li 780dbfecfeSLong Li static int 790dbfecfeSLong Li mana_dev_configure(struct rte_eth_dev *dev) 800dbfecfeSLong Li { 810dbfecfeSLong Li struct mana_priv *priv = dev->data->dev_private; 820dbfecfeSLong Li struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 830dbfecfeSLong Li 840dbfecfeSLong Li if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 850dbfecfeSLong Li dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 860dbfecfeSLong Li 870dbfecfeSLong Li if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) { 880dbfecfeSLong Li DRV_LOG(ERR, "Only support equal number of rx/tx queues"); 890dbfecfeSLong Li return -EINVAL; 900dbfecfeSLong Li } 910dbfecfeSLong Li 920dbfecfeSLong Li if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { 930dbfecfeSLong Li DRV_LOG(ERR, "number of TX/RX queues must be power of 2"); 940dbfecfeSLong Li return -EINVAL; 950dbfecfeSLong Li } 960dbfecfeSLong Li 970dbfecfeSLong Li priv->num_queues = dev->data->nb_rx_queues; 980dbfecfeSLong Li 990dbfecfeSLong Li manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS, 1000dbfecfeSLong Li (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 1010dbfecfeSLong Li .alloc = &mana_alloc_verbs_buf, 1020dbfecfeSLong Li .free = &mana_free_verbs_buf, 1030dbfecfeSLong Li .data = 0, 1040dbfecfeSLong Li })); 1050dbfecfeSLong Li 1060dbfecfeSLong Li return 0; 1070dbfecfeSLong Li } 1080dbfecfeSLong Li 109afd5d170SLong Li static void 110afd5d170SLong Li rx_intr_vec_disable(struct mana_priv *priv) 111afd5d170SLong Li { 112afd5d170SLong Li struct rte_intr_handle *intr_handle = priv->intr_handle; 113afd5d170SLong Li 114afd5d170SLong Li rte_intr_free_epoll_fd(intr_handle); 115afd5d170SLong Li rte_intr_vec_list_free(intr_handle); 116afd5d170SLong Li rte_intr_nb_efd_set(intr_handle, 0); 117afd5d170SLong Li } 118afd5d170SLong Li 119afd5d170SLong Li static int 120afd5d170SLong Li rx_intr_vec_enable(struct mana_priv *priv) 121afd5d170SLong Li { 122afd5d170SLong Li unsigned int i; 123afd5d170SLong Li unsigned int rxqs_n = priv->dev_data->nb_rx_queues; 124afd5d170SLong Li unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 125afd5d170SLong Li struct rte_intr_handle *intr_handle = priv->intr_handle; 126afd5d170SLong Li int ret; 127afd5d170SLong Li 128afd5d170SLong Li rx_intr_vec_disable(priv); 129afd5d170SLong Li 130afd5d170SLong Li if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 131afd5d170SLong Li DRV_LOG(ERR, "Failed to allocate memory for interrupt vector"); 132afd5d170SLong Li return -ENOMEM; 133afd5d170SLong Li } 134afd5d170SLong Li 135afd5d170SLong Li for (i = 0; i < n; i++) { 136afd5d170SLong Li struct mana_rxq *rxq = priv->dev_data->rx_queues[i]; 137afd5d170SLong Li 138afd5d170SLong Li ret = rte_intr_vec_list_index_set(intr_handle, i, 139afd5d170SLong Li RTE_INTR_VEC_RXTX_OFFSET + i); 140afd5d170SLong Li if (ret) { 141afd5d170SLong Li DRV_LOG(ERR, "Failed to set intr vec %u", i); 142afd5d170SLong Li return ret; 143afd5d170SLong Li } 144afd5d170SLong Li 145afd5d170SLong Li ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd); 146afd5d170SLong Li if (ret) { 147afd5d170SLong Li DRV_LOG(ERR, "Failed to set FD at intr %u", i); 148afd5d170SLong Li return ret; 149afd5d170SLong Li } 150afd5d170SLong Li } 151afd5d170SLong Li 152afd5d170SLong Li return rte_intr_nb_efd_set(intr_handle, n); 153afd5d170SLong Li } 154afd5d170SLong Li 155afd5d170SLong Li static void 156afd5d170SLong Li rxq_intr_disable(struct mana_priv *priv) 157afd5d170SLong Li { 158afd5d170SLong Li int err = rte_errno; 159afd5d170SLong Li 160afd5d170SLong Li rx_intr_vec_disable(priv); 161afd5d170SLong Li rte_errno = err; 162afd5d170SLong Li } 163afd5d170SLong Li 164afd5d170SLong Li static int 165afd5d170SLong Li rxq_intr_enable(struct mana_priv *priv) 166afd5d170SLong Li { 167afd5d170SLong Li const struct rte_eth_intr_conf *const intr_conf = 168afd5d170SLong Li &priv->dev_data->dev_conf.intr_conf; 169afd5d170SLong Li 170afd5d170SLong Li if (!intr_conf->rxq) 171afd5d170SLong Li return 0; 172afd5d170SLong Li 173afd5d170SLong Li return rx_intr_vec_enable(priv); 174afd5d170SLong Li } 175bd15f237SLong Li 1760dbfecfeSLong Li static int 17737544700SLong Li mana_dev_start(struct rte_eth_dev *dev) 17837544700SLong Li { 17937544700SLong Li int ret; 18037544700SLong Li struct mana_priv *priv = dev->data->dev_private; 18137544700SLong Li 18237544700SLong Li rte_spinlock_init(&priv->mr_btree_lock); 18337544700SLong Li ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N, 18437544700SLong Li dev->device->numa_node); 18537544700SLong Li if (ret) { 18637544700SLong Li DRV_LOG(ERR, "Failed to init device MR btree %d", ret); 18737544700SLong Li return ret; 18837544700SLong Li } 18937544700SLong Li 19037544700SLong Li ret = mana_start_tx_queues(dev); 19137544700SLong Li if (ret) { 19237544700SLong Li DRV_LOG(ERR, "failed to start tx queues %d", ret); 19337544700SLong Li goto failed_tx; 19437544700SLong Li } 19537544700SLong Li 19637544700SLong Li ret = mana_start_rx_queues(dev); 19737544700SLong Li if (ret) { 19837544700SLong Li DRV_LOG(ERR, "failed to start rx queues %d", ret); 19937544700SLong Li goto failed_rx; 20037544700SLong Li } 20137544700SLong Li 20237544700SLong Li rte_wmb(); 20337544700SLong Li 20437544700SLong Li dev->tx_pkt_burst = mana_tx_burst; 20537544700SLong Li dev->rx_pkt_burst = mana_rx_burst; 20637544700SLong Li 20737544700SLong Li DRV_LOG(INFO, "TX/RX queues have started"); 20837544700SLong Li 20937544700SLong Li /* Enable datapath for secondary processes */ 21037544700SLong Li mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX); 21137544700SLong Li 212afd5d170SLong Li ret = rxq_intr_enable(priv); 213afd5d170SLong Li if (ret) { 214afd5d170SLong Li DRV_LOG(ERR, "Failed to enable RX interrupts"); 215afd5d170SLong Li goto failed_intr; 216afd5d170SLong Li } 217afd5d170SLong Li 21837544700SLong Li return 0; 21937544700SLong Li 220afd5d170SLong Li failed_intr: 221afd5d170SLong Li mana_stop_rx_queues(dev); 222afd5d170SLong Li 22337544700SLong Li failed_rx: 22437544700SLong Li mana_stop_tx_queues(dev); 22537544700SLong Li 22637544700SLong Li failed_tx: 22737544700SLong Li mana_mr_btree_free(&priv->mr_btree); 22837544700SLong Li 22937544700SLong Li return ret; 23037544700SLong Li } 23137544700SLong Li 23237544700SLong Li static int 233afd5d170SLong Li mana_dev_stop(struct rte_eth_dev *dev) 23437544700SLong Li { 23537544700SLong Li int ret; 236afd5d170SLong Li struct mana_priv *priv = dev->data->dev_private; 237afd5d170SLong Li 238afd5d170SLong Li rxq_intr_disable(priv); 23937544700SLong Li 24037544700SLong Li dev->tx_pkt_burst = mana_tx_burst_removed; 24137544700SLong Li dev->rx_pkt_burst = mana_rx_burst_removed; 24237544700SLong Li 24337544700SLong Li /* Stop datapath on secondary processes */ 24437544700SLong Li mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX); 24537544700SLong Li 24637544700SLong Li rte_wmb(); 24737544700SLong Li 24837544700SLong Li ret = mana_stop_tx_queues(dev); 24937544700SLong Li if (ret) { 25037544700SLong Li DRV_LOG(ERR, "failed to stop tx queues"); 25137544700SLong Li return ret; 25237544700SLong Li } 25337544700SLong Li 25437544700SLong Li ret = mana_stop_rx_queues(dev); 25537544700SLong Li if (ret) { 25637544700SLong Li DRV_LOG(ERR, "failed to stop tx queues"); 25737544700SLong Li return ret; 25837544700SLong Li } 25937544700SLong Li 26037544700SLong Li return 0; 26137544700SLong Li } 26237544700SLong Li 263afd5d170SLong Li static int mana_intr_uninstall(struct mana_priv *priv); 264afd5d170SLong Li 26537544700SLong Li static int 2660dbfecfeSLong Li mana_dev_close(struct rte_eth_dev *dev) 2670dbfecfeSLong Li { 2680dbfecfeSLong Li struct mana_priv *priv = dev->data->dev_private; 2690dbfecfeSLong Li int ret; 2700dbfecfeSLong Li 2710f5db3c6SLong Li mana_remove_all_mr(priv); 2720f5db3c6SLong Li 273bd15f237SLong Li ret = mana_intr_uninstall(priv); 274bd15f237SLong Li if (ret) 275bd15f237SLong Li return ret; 276bd15f237SLong Li 2770dbfecfeSLong Li ret = ibv_close_device(priv->ib_ctx); 2780dbfecfeSLong Li if (ret) { 2790dbfecfeSLong Li ret = errno; 2800dbfecfeSLong Li return ret; 2810dbfecfeSLong Li } 2820dbfecfeSLong Li 2830dbfecfeSLong Li return 0; 2840dbfecfeSLong Li } 2850dbfecfeSLong Li 286d878cb09SLong Li static int 287d878cb09SLong Li mana_dev_info_get(struct rte_eth_dev *dev, 288d878cb09SLong Li struct rte_eth_dev_info *dev_info) 289d878cb09SLong Li { 290d878cb09SLong Li struct mana_priv *priv = dev->data->dev_private; 291d878cb09SLong Li 29284497839SLong Li dev_info->min_mtu = RTE_ETHER_MIN_MTU; 29384497839SLong Li dev_info->max_mtu = MANA_MAX_MTU; 294d878cb09SLong Li 295d878cb09SLong Li /* RX params */ 296d878cb09SLong Li dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE; 29784497839SLong Li dev_info->max_rx_pktlen = MANA_MAX_MTU + RTE_ETHER_HDR_LEN; 298d878cb09SLong Li 299d878cb09SLong Li dev_info->max_rx_queues = priv->max_rx_queues; 300d878cb09SLong Li dev_info->max_tx_queues = priv->max_tx_queues; 301d878cb09SLong Li 302d878cb09SLong Li dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR; 303d878cb09SLong Li dev_info->max_hash_mac_addrs = 0; 304d878cb09SLong Li 305d878cb09SLong Li dev_info->max_vfs = 1; 306d878cb09SLong Li 307d878cb09SLong Li /* Offload params */ 308d878cb09SLong Li dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT; 309d878cb09SLong Li 310d878cb09SLong Li dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT; 311d878cb09SLong Li 312d878cb09SLong Li /* RSS */ 313d878cb09SLong Li dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS; 314d878cb09SLong Li dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES; 315d878cb09SLong Li dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT; 316d878cb09SLong Li 317d878cb09SLong Li /* Thresholds */ 318d878cb09SLong Li dev_info->default_rxconf = (struct rte_eth_rxconf){ 319d878cb09SLong Li .rx_thresh = { 320d878cb09SLong Li .pthresh = 8, 321d878cb09SLong Li .hthresh = 8, 322d878cb09SLong Li .wthresh = 0, 323d878cb09SLong Li }, 324d878cb09SLong Li .rx_free_thresh = 32, 325d878cb09SLong Li /* If no descriptors available, pkts are dropped by default */ 326d878cb09SLong Li .rx_drop_en = 1, 327d878cb09SLong Li }; 328d878cb09SLong Li 329d878cb09SLong Li dev_info->default_txconf = (struct rte_eth_txconf){ 330d878cb09SLong Li .tx_thresh = { 331d878cb09SLong Li .pthresh = 32, 332d878cb09SLong Li .hthresh = 0, 333d878cb09SLong Li .wthresh = 0, 334d878cb09SLong Li }, 335d878cb09SLong Li .tx_rs_thresh = 32, 336d878cb09SLong Li .tx_free_thresh = 32, 337d878cb09SLong Li }; 338d878cb09SLong Li 339d878cb09SLong Li /* Buffer limits */ 340d878cb09SLong Li dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 341d878cb09SLong Li dev_info->rx_desc_lim.nb_max = priv->max_rx_desc; 342d878cb09SLong Li dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 343d878cb09SLong Li dev_info->rx_desc_lim.nb_seg_max = priv->max_recv_sge; 344d878cb09SLong Li dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 345d878cb09SLong Li 346d878cb09SLong Li dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 347d878cb09SLong Li dev_info->tx_desc_lim.nb_max = priv->max_tx_desc; 348d878cb09SLong Li dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 349d878cb09SLong Li dev_info->tx_desc_lim.nb_seg_max = priv->max_send_sge; 350d878cb09SLong Li dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 351d878cb09SLong Li 352d878cb09SLong Li /* Speed */ 353d878cb09SLong Li dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G; 354d878cb09SLong Li 355d878cb09SLong Li /* RX params */ 356d878cb09SLong Li dev_info->default_rxportconf.burst_size = 1; 357d878cb09SLong Li dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE; 358d878cb09SLong Li dev_info->default_rxportconf.nb_queues = 1; 359d878cb09SLong Li 360d878cb09SLong Li /* TX params */ 361d878cb09SLong Li dev_info->default_txportconf.burst_size = 1; 362d878cb09SLong Li dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE; 363d878cb09SLong Li dev_info->default_txportconf.nb_queues = 1; 364d878cb09SLong Li 365d878cb09SLong Li return 0; 366d878cb09SLong Li } 367d878cb09SLong Li 3680c63c005SLong Li static void 369f7dc479aSLong Li mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 370f7dc479aSLong Li struct rte_eth_txq_info *qinfo) 371f7dc479aSLong Li { 372f7dc479aSLong Li struct mana_txq *txq = dev->data->tx_queues[queue_id]; 373f7dc479aSLong Li 374f7dc479aSLong Li qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 375f7dc479aSLong Li qinfo->nb_desc = txq->num_desc; 376f7dc479aSLong Li } 377f7dc479aSLong Li 378f7dc479aSLong Li static void 3790c63c005SLong Li mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 3800c63c005SLong Li struct rte_eth_rxq_info *qinfo) 3810c63c005SLong Li { 3820c63c005SLong Li struct mana_rxq *rxq = dev->data->rx_queues[queue_id]; 3830c63c005SLong Li 3840c63c005SLong Li qinfo->mp = rxq->mp; 3850c63c005SLong Li qinfo->nb_desc = rxq->num_desc; 3860c63c005SLong Li qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 3870c63c005SLong Li } 3880c63c005SLong Li 389d9679c3aSLong Li static const uint32_t * 390d9679c3aSLong Li mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused) 391d9679c3aSLong Li { 392d9679c3aSLong Li static const uint32_t ptypes[] = { 393d9679c3aSLong Li RTE_PTYPE_L2_ETHER, 394d9679c3aSLong Li RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 395d9679c3aSLong Li RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 396d9679c3aSLong Li RTE_PTYPE_L4_FRAG, 397d9679c3aSLong Li RTE_PTYPE_L4_TCP, 398d9679c3aSLong Li RTE_PTYPE_L4_UDP, 399d9679c3aSLong Li RTE_PTYPE_UNKNOWN 400d9679c3aSLong Li }; 401d9679c3aSLong Li 402d9679c3aSLong Li return ptypes; 403d9679c3aSLong Li } 404d9679c3aSLong Li 40521958568SLong Li static int 406a382177cSLong Li mana_rss_hash_update(struct rte_eth_dev *dev, 407a382177cSLong Li struct rte_eth_rss_conf *rss_conf) 408a382177cSLong Li { 409a382177cSLong Li struct mana_priv *priv = dev->data->dev_private; 410a382177cSLong Li 411a382177cSLong Li /* Currently can only update RSS hash when device is stopped */ 412a382177cSLong Li if (dev->data->dev_started) { 413a382177cSLong Li DRV_LOG(ERR, "Can't update RSS after device has started"); 414a382177cSLong Li return -ENODEV; 415a382177cSLong Li } 416a382177cSLong Li 417a382177cSLong Li if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) { 418a382177cSLong Li DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64, 419a382177cSLong Li dev->data->port_id, rss_conf->rss_hf); 420a382177cSLong Li return -EINVAL; 421a382177cSLong Li } 422a382177cSLong Li 423a382177cSLong Li if (rss_conf->rss_key && rss_conf->rss_key_len) { 424a382177cSLong Li if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) { 425a382177cSLong Li DRV_LOG(ERR, "Port %u key len must be %u long", 426a382177cSLong Li dev->data->port_id, 427a382177cSLong Li TOEPLITZ_HASH_KEY_SIZE_IN_BYTES); 428a382177cSLong Li return -EINVAL; 429a382177cSLong Li } 430a382177cSLong Li 431a382177cSLong Li priv->rss_conf.rss_key_len = rss_conf->rss_key_len; 432a382177cSLong Li priv->rss_conf.rss_key = 433a382177cSLong Li rte_zmalloc("mana_rss", rss_conf->rss_key_len, 434a382177cSLong Li RTE_CACHE_LINE_SIZE); 435a382177cSLong Li if (!priv->rss_conf.rss_key) 436a382177cSLong Li return -ENOMEM; 437a382177cSLong Li memcpy(priv->rss_conf.rss_key, rss_conf->rss_key, 438a382177cSLong Li rss_conf->rss_key_len); 439a382177cSLong Li } 440a382177cSLong Li priv->rss_conf.rss_hf = rss_conf->rss_hf; 441a382177cSLong Li 442a382177cSLong Li return 0; 443a382177cSLong Li } 444a382177cSLong Li 445a382177cSLong Li static int 446a382177cSLong Li mana_rss_hash_conf_get(struct rte_eth_dev *dev, 447a382177cSLong Li struct rte_eth_rss_conf *rss_conf) 448a382177cSLong Li { 449a382177cSLong Li struct mana_priv *priv = dev->data->dev_private; 450a382177cSLong Li 451a382177cSLong Li if (!rss_conf) 452a382177cSLong Li return -EINVAL; 453a382177cSLong Li 454a382177cSLong Li if (rss_conf->rss_key && 455a382177cSLong Li rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) { 456a382177cSLong Li memcpy(rss_conf->rss_key, priv->rss_conf.rss_key, 457a382177cSLong Li priv->rss_conf.rss_key_len); 458a382177cSLong Li } 459a382177cSLong Li 460a382177cSLong Li rss_conf->rss_key_len = priv->rss_conf.rss_key_len; 461a382177cSLong Li rss_conf->rss_hf = priv->rss_conf.rss_hf; 462a382177cSLong Li 463a382177cSLong Li return 0; 464a382177cSLong Li } 465a382177cSLong Li 466a382177cSLong Li static int 467f7dc479aSLong Li mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 468f7dc479aSLong Li uint16_t nb_desc, unsigned int socket_id, 469f7dc479aSLong Li const struct rte_eth_txconf *tx_conf __rte_unused) 470f7dc479aSLong Li 471f7dc479aSLong Li { 472f7dc479aSLong Li struct mana_priv *priv = dev->data->dev_private; 473f7dc479aSLong Li struct mana_txq *txq; 474f7dc479aSLong Li int ret; 475f7dc479aSLong Li 476f7dc479aSLong Li txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id); 477f7dc479aSLong Li if (!txq) { 478f7dc479aSLong Li DRV_LOG(ERR, "failed to allocate txq"); 479f7dc479aSLong Li return -ENOMEM; 480f7dc479aSLong Li } 481f7dc479aSLong Li 482f7dc479aSLong Li txq->socket = socket_id; 483f7dc479aSLong Li 484f7dc479aSLong Li txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring", 485f7dc479aSLong Li sizeof(struct mana_txq_desc) * 486f7dc479aSLong Li nb_desc, 487f7dc479aSLong Li RTE_CACHE_LINE_SIZE, socket_id); 488f7dc479aSLong Li if (!txq->desc_ring) { 489f7dc479aSLong Li DRV_LOG(ERR, "failed to allocate txq desc_ring"); 490f7dc479aSLong Li ret = -ENOMEM; 491f7dc479aSLong Li goto fail; 492f7dc479aSLong Li } 493f7dc479aSLong Li 49431124619SLong Li txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp", 49531124619SLong Li sizeof(*txq->gdma_comp_buf) * nb_desc, 49631124619SLong Li RTE_CACHE_LINE_SIZE, socket_id); 49731124619SLong Li if (!txq->gdma_comp_buf) { 49831124619SLong Li DRV_LOG(ERR, "failed to allocate txq comp"); 49931124619SLong Li ret = -ENOMEM; 50031124619SLong Li goto fail; 50131124619SLong Li } 50231124619SLong Li 5030f5db3c6SLong Li ret = mana_mr_btree_init(&txq->mr_btree, 5040f5db3c6SLong Li MANA_MR_BTREE_PER_QUEUE_N, socket_id); 5050f5db3c6SLong Li if (ret) { 5060f5db3c6SLong Li DRV_LOG(ERR, "Failed to init TXQ MR btree"); 5070f5db3c6SLong Li goto fail; 5080f5db3c6SLong Li } 5090f5db3c6SLong Li 510f7dc479aSLong Li DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p", 511f7dc479aSLong Li queue_idx, nb_desc, socket_id, txq->desc_ring); 512f7dc479aSLong Li 513f7dc479aSLong Li txq->desc_ring_head = 0; 514f7dc479aSLong Li txq->desc_ring_tail = 0; 515f7dc479aSLong Li txq->priv = priv; 516f7dc479aSLong Li txq->num_desc = nb_desc; 517f7dc479aSLong Li dev->data->tx_queues[queue_idx] = txq; 518f7dc479aSLong Li 519f7dc479aSLong Li return 0; 520f7dc479aSLong Li 521f7dc479aSLong Li fail: 52231124619SLong Li rte_free(txq->gdma_comp_buf); 523f7dc479aSLong Li rte_free(txq->desc_ring); 524f7dc479aSLong Li rte_free(txq); 525f7dc479aSLong Li return ret; 526f7dc479aSLong Li } 527f7dc479aSLong Li 528f7dc479aSLong Li static void 529f7dc479aSLong Li mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 530f7dc479aSLong Li { 531f7dc479aSLong Li struct mana_txq *txq = dev->data->tx_queues[qid]; 532f7dc479aSLong Li 5330f5db3c6SLong Li mana_mr_btree_free(&txq->mr_btree); 5340f5db3c6SLong Li 53531124619SLong Li rte_free(txq->gdma_comp_buf); 536f7dc479aSLong Li rte_free(txq->desc_ring); 537f7dc479aSLong Li rte_free(txq); 538f7dc479aSLong Li } 539f7dc479aSLong Li 540f7dc479aSLong Li static int 5410c63c005SLong Li mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 5420c63c005SLong Li uint16_t nb_desc, unsigned int socket_id, 5430c63c005SLong Li const struct rte_eth_rxconf *rx_conf __rte_unused, 5440c63c005SLong Li struct rte_mempool *mp) 5450c63c005SLong Li { 5460c63c005SLong Li struct mana_priv *priv = dev->data->dev_private; 5470c63c005SLong Li struct mana_rxq *rxq; 5480c63c005SLong Li int ret; 5490c63c005SLong Li 5500c63c005SLong Li rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id); 5510c63c005SLong Li if (!rxq) { 5520c63c005SLong Li DRV_LOG(ERR, "failed to allocate rxq"); 5530c63c005SLong Li return -ENOMEM; 5540c63c005SLong Li } 5550c63c005SLong Li 5560c63c005SLong Li DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u", 5570c63c005SLong Li queue_idx, nb_desc, socket_id); 5580c63c005SLong Li 5590c63c005SLong Li rxq->socket = socket_id; 5600c63c005SLong Li 5610c63c005SLong Li rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring", 5620c63c005SLong Li sizeof(struct mana_rxq_desc) * 5630c63c005SLong Li nb_desc, 5640c63c005SLong Li RTE_CACHE_LINE_SIZE, socket_id); 5650c63c005SLong Li 5660c63c005SLong Li if (!rxq->desc_ring) { 5670c63c005SLong Li DRV_LOG(ERR, "failed to allocate rxq desc_ring"); 5680c63c005SLong Li ret = -ENOMEM; 5690c63c005SLong Li goto fail; 5700c63c005SLong Li } 5710c63c005SLong Li 5720c63c005SLong Li rxq->desc_ring_head = 0; 5730c63c005SLong Li rxq->desc_ring_tail = 0; 5740c63c005SLong Li 57531124619SLong Li rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp", 57631124619SLong Li sizeof(*rxq->gdma_comp_buf) * nb_desc, 57731124619SLong Li RTE_CACHE_LINE_SIZE, socket_id); 57831124619SLong Li if (!rxq->gdma_comp_buf) { 57931124619SLong Li DRV_LOG(ERR, "failed to allocate rxq comp"); 58031124619SLong Li ret = -ENOMEM; 58131124619SLong Li goto fail; 58231124619SLong Li } 58331124619SLong Li 5840f5db3c6SLong Li ret = mana_mr_btree_init(&rxq->mr_btree, 5850f5db3c6SLong Li MANA_MR_BTREE_PER_QUEUE_N, socket_id); 5860f5db3c6SLong Li if (ret) { 5870f5db3c6SLong Li DRV_LOG(ERR, "Failed to init RXQ MR btree"); 5880f5db3c6SLong Li goto fail; 5890f5db3c6SLong Li } 5900f5db3c6SLong Li 5910c63c005SLong Li rxq->priv = priv; 5920c63c005SLong Li rxq->num_desc = nb_desc; 5930c63c005SLong Li rxq->mp = mp; 5940c63c005SLong Li dev->data->rx_queues[queue_idx] = rxq; 5950c63c005SLong Li 5960c63c005SLong Li return 0; 5970c63c005SLong Li 5980c63c005SLong Li fail: 59931124619SLong Li rte_free(rxq->gdma_comp_buf); 6000c63c005SLong Li rte_free(rxq->desc_ring); 6010c63c005SLong Li rte_free(rxq); 6020c63c005SLong Li return ret; 6030c63c005SLong Li } 6040c63c005SLong Li 6050c63c005SLong Li static void 6060c63c005SLong Li mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 6070c63c005SLong Li { 6080c63c005SLong Li struct mana_rxq *rxq = dev->data->rx_queues[qid]; 6090c63c005SLong Li 6100f5db3c6SLong Li mana_mr_btree_free(&rxq->mr_btree); 6110f5db3c6SLong Li 61231124619SLong Li rte_free(rxq->gdma_comp_buf); 6130c63c005SLong Li rte_free(rxq->desc_ring); 6140c63c005SLong Li rte_free(rxq); 6150c63c005SLong Li } 6160c63c005SLong Li 6170c63c005SLong Li static int 61821958568SLong Li mana_dev_link_update(struct rte_eth_dev *dev, 61921958568SLong Li int wait_to_complete __rte_unused) 62021958568SLong Li { 62121958568SLong Li struct rte_eth_link link; 62221958568SLong Li 62321958568SLong Li /* MANA has no concept of carrier state, always reporting UP */ 62421958568SLong Li link = (struct rte_eth_link) { 62521958568SLong Li .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 62621958568SLong Li .link_autoneg = RTE_ETH_LINK_SPEED_FIXED, 62721958568SLong Li .link_speed = RTE_ETH_SPEED_NUM_100G, 62821958568SLong Li .link_status = RTE_ETH_LINK_UP, 62921958568SLong Li }; 63021958568SLong Li 63121958568SLong Li return rte_eth_linkstatus_set(dev, &link); 63221958568SLong Li } 63321958568SLong Li 634e350b568SLong Li static int 635e350b568SLong Li mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 636e350b568SLong Li { 637e350b568SLong Li unsigned int i; 638e350b568SLong Li 639e350b568SLong Li for (i = 0; i < dev->data->nb_tx_queues; i++) { 640e350b568SLong Li struct mana_txq *txq = dev->data->tx_queues[i]; 641e350b568SLong Li 642e350b568SLong Li if (!txq) 643e350b568SLong Li continue; 644e350b568SLong Li 645eb6f507aSLong Li stats->opackets += txq->stats.packets; 646eb6f507aSLong Li stats->obytes += txq->stats.bytes; 647eb6f507aSLong Li stats->oerrors += txq->stats.errors; 648e350b568SLong Li 649e350b568SLong Li if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 650e350b568SLong Li stats->q_opackets[i] = txq->stats.packets; 651e350b568SLong Li stats->q_obytes[i] = txq->stats.bytes; 652e350b568SLong Li } 653e350b568SLong Li } 654e350b568SLong Li 655e350b568SLong Li stats->rx_nombuf = 0; 656e350b568SLong Li for (i = 0; i < dev->data->nb_rx_queues; i++) { 657e350b568SLong Li struct mana_rxq *rxq = dev->data->rx_queues[i]; 658e350b568SLong Li 659e350b568SLong Li if (!rxq) 660e350b568SLong Li continue; 661e350b568SLong Li 662eb6f507aSLong Li stats->ipackets += rxq->stats.packets; 663eb6f507aSLong Li stats->ibytes += rxq->stats.bytes; 664eb6f507aSLong Li stats->ierrors += rxq->stats.errors; 665e350b568SLong Li 666e350b568SLong Li /* There is no good way to get stats->imissed, not setting it */ 667e350b568SLong Li 668e350b568SLong Li if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 669e350b568SLong Li stats->q_ipackets[i] = rxq->stats.packets; 670e350b568SLong Li stats->q_ibytes[i] = rxq->stats.bytes; 671e350b568SLong Li } 672e350b568SLong Li 673e350b568SLong Li stats->rx_nombuf += rxq->stats.nombuf; 674e350b568SLong Li } 675e350b568SLong Li 676e350b568SLong Li return 0; 677e350b568SLong Li } 678e350b568SLong Li 679e350b568SLong Li static int 680e350b568SLong Li mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused) 681e350b568SLong Li { 682e350b568SLong Li unsigned int i; 683e350b568SLong Li 684e350b568SLong Li PMD_INIT_FUNC_TRACE(); 685e350b568SLong Li 686e350b568SLong Li for (i = 0; i < dev->data->nb_tx_queues; i++) { 687e350b568SLong Li struct mana_txq *txq = dev->data->tx_queues[i]; 688e350b568SLong Li 689e350b568SLong Li if (!txq) 690e350b568SLong Li continue; 691e350b568SLong Li 692e350b568SLong Li memset(&txq->stats, 0, sizeof(txq->stats)); 693e350b568SLong Li } 694e350b568SLong Li 695e350b568SLong Li for (i = 0; i < dev->data->nb_rx_queues; i++) { 696e350b568SLong Li struct mana_rxq *rxq = dev->data->rx_queues[i]; 697e350b568SLong Li 698e350b568SLong Li if (!rxq) 699e350b568SLong Li continue; 700e350b568SLong Li 701e350b568SLong Li memset(&rxq->stats, 0, sizeof(rxq->stats)); 702e350b568SLong Li } 703e350b568SLong Li 704e350b568SLong Li return 0; 705e350b568SLong Li } 706e350b568SLong Li 70784497839SLong Li static int 70884497839SLong Li mana_get_ifname(const struct mana_priv *priv, char (*ifname)[IF_NAMESIZE]) 70984497839SLong Li { 71084497839SLong Li int ret; 71184497839SLong Li DIR *dir; 71284497839SLong Li struct dirent *dent; 71384497839SLong Li 71484497839SLong Li MANA_MKSTR(dirpath, "%s/device/net", priv->ib_ctx->device->ibdev_path); 71584497839SLong Li 71684497839SLong Li dir = opendir(dirpath); 71784497839SLong Li if (dir == NULL) 71884497839SLong Li return -ENODEV; 71984497839SLong Li 72084497839SLong Li while ((dent = readdir(dir)) != NULL) { 72184497839SLong Li char *name = dent->d_name; 72284497839SLong Li FILE *file; 72384497839SLong Li struct rte_ether_addr addr; 72484497839SLong Li char *mac = NULL; 72584497839SLong Li 72684497839SLong Li if ((name[0] == '.') && 72784497839SLong Li ((name[1] == '\0') || 72884497839SLong Li ((name[1] == '.') && (name[2] == '\0')))) 72984497839SLong Li continue; 73084497839SLong Li 73184497839SLong Li MANA_MKSTR(path, "%s/%s/address", dirpath, name); 73284497839SLong Li 73384497839SLong Li file = fopen(path, "r"); 73484497839SLong Li if (!file) { 73584497839SLong Li ret = -ENODEV; 73684497839SLong Li break; 73784497839SLong Li } 73884497839SLong Li 73984497839SLong Li ret = fscanf(file, "%ms", &mac); 74084497839SLong Li fclose(file); 74184497839SLong Li 74284497839SLong Li if (ret <= 0) { 74384497839SLong Li ret = -EINVAL; 74484497839SLong Li break; 74584497839SLong Li } 74684497839SLong Li 74784497839SLong Li ret = rte_ether_unformat_addr(mac, &addr); 74884497839SLong Li free(mac); 74984497839SLong Li if (ret) 75084497839SLong Li break; 75184497839SLong Li 75284497839SLong Li if (rte_is_same_ether_addr(&addr, priv->dev_data->mac_addrs)) { 75384497839SLong Li strlcpy(*ifname, name, sizeof(*ifname)); 75484497839SLong Li ret = 0; 75584497839SLong Li break; 75684497839SLong Li } 75784497839SLong Li } 75884497839SLong Li 75984497839SLong Li closedir(dir); 76084497839SLong Li return ret; 76184497839SLong Li } 76284497839SLong Li 76384497839SLong Li static int 76484497839SLong Li mana_ifreq(const struct mana_priv *priv, int req, struct ifreq *ifr) 76584497839SLong Li { 76684497839SLong Li int sock, ret; 76784497839SLong Li 76884497839SLong Li sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 76984497839SLong Li if (sock == -1) 77084497839SLong Li return -errno; 77184497839SLong Li 77284497839SLong Li ret = mana_get_ifname(priv, &ifr->ifr_name); 77384497839SLong Li if (ret) { 77484497839SLong Li close(sock); 77584497839SLong Li return ret; 77684497839SLong Li } 77784497839SLong Li 77884497839SLong Li if (ioctl(sock, req, ifr) == -1) 77984497839SLong Li ret = -errno; 78084497839SLong Li 78184497839SLong Li close(sock); 78284497839SLong Li 78384497839SLong Li return ret; 78484497839SLong Li } 78584497839SLong Li 78684497839SLong Li static int 78784497839SLong Li mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 78884497839SLong Li { 78984497839SLong Li struct mana_priv *priv = dev->data->dev_private; 79084497839SLong Li struct ifreq request = { .ifr_mtu = mtu, }; 79184497839SLong Li 79284497839SLong Li return mana_ifreq(priv, SIOCSIFMTU, &request); 79384497839SLong Li } 79484497839SLong Li 795517ed6e2SLong Li static const struct eth_dev_ops mana_dev_ops = { 7960dbfecfeSLong Li .dev_configure = mana_dev_configure, 79737544700SLong Li .dev_start = mana_dev_start, 79837544700SLong Li .dev_stop = mana_dev_stop, 7990dbfecfeSLong Li .dev_close = mana_dev_close, 800d878cb09SLong Li .dev_infos_get = mana_dev_info_get, 801f7dc479aSLong Li .txq_info_get = mana_dev_tx_queue_info, 8020c63c005SLong Li .rxq_info_get = mana_dev_rx_queue_info, 803d9679c3aSLong Li .dev_supported_ptypes_get = mana_supported_ptypes, 804a382177cSLong Li .rss_hash_update = mana_rss_hash_update, 805a382177cSLong Li .rss_hash_conf_get = mana_rss_hash_conf_get, 806f7dc479aSLong Li .tx_queue_setup = mana_dev_tx_queue_setup, 807f7dc479aSLong Li .tx_queue_release = mana_dev_tx_queue_release, 8080c63c005SLong Li .rx_queue_setup = mana_dev_rx_queue_setup, 8090c63c005SLong Li .rx_queue_release = mana_dev_rx_queue_release, 810afd5d170SLong Li .rx_queue_intr_enable = mana_rx_intr_enable, 811afd5d170SLong Li .rx_queue_intr_disable = mana_rx_intr_disable, 81221958568SLong Li .link_update = mana_dev_link_update, 813e350b568SLong Li .stats_get = mana_dev_stats_get, 814e350b568SLong Li .stats_reset = mana_dev_stats_reset, 81584497839SLong Li .mtu_set = mana_mtu_set, 816517ed6e2SLong Li }; 817517ed6e2SLong Li 818517ed6e2SLong Li static const struct eth_dev_ops mana_dev_secondary_ops = { 819e350b568SLong Li .stats_get = mana_dev_stats_get, 820e350b568SLong Li .stats_reset = mana_dev_stats_reset, 821d878cb09SLong Li .dev_infos_get = mana_dev_info_get, 822517ed6e2SLong Li }; 823517ed6e2SLong Li 824517ed6e2SLong Li uint16_t 825517ed6e2SLong Li mana_rx_burst_removed(void *dpdk_rxq __rte_unused, 826517ed6e2SLong Li struct rte_mbuf **pkts __rte_unused, 827517ed6e2SLong Li uint16_t pkts_n __rte_unused) 828517ed6e2SLong Li { 829517ed6e2SLong Li rte_mb(); 830517ed6e2SLong Li return 0; 831517ed6e2SLong Li } 832517ed6e2SLong Li 833517ed6e2SLong Li uint16_t 834517ed6e2SLong Li mana_tx_burst_removed(void *dpdk_rxq __rte_unused, 835517ed6e2SLong Li struct rte_mbuf **pkts __rte_unused, 836517ed6e2SLong Li uint16_t pkts_n __rte_unused) 837517ed6e2SLong Li { 838517ed6e2SLong Li rte_mb(); 839517ed6e2SLong Li return 0; 840517ed6e2SLong Li } 841517ed6e2SLong Li 842517ed6e2SLong Li #define ETH_MANA_MAC_ARG "mac" 843517ed6e2SLong Li static const char * const mana_init_args[] = { 844517ed6e2SLong Li ETH_MANA_MAC_ARG, 845517ed6e2SLong Li NULL, 846517ed6e2SLong Li }; 847517ed6e2SLong Li 848517ed6e2SLong Li /* Support of parsing up to 8 mac address from EAL command line */ 849517ed6e2SLong Li #define MAX_NUM_ADDRESS 8 850517ed6e2SLong Li struct mana_conf { 851517ed6e2SLong Li struct rte_ether_addr mac_array[MAX_NUM_ADDRESS]; 852517ed6e2SLong Li unsigned int index; 853517ed6e2SLong Li }; 854517ed6e2SLong Li 855517ed6e2SLong Li static int 856517ed6e2SLong Li mana_arg_parse_callback(const char *key, const char *val, void *private) 857517ed6e2SLong Li { 858517ed6e2SLong Li struct mana_conf *conf = (struct mana_conf *)private; 859517ed6e2SLong Li int ret; 860517ed6e2SLong Li 861517ed6e2SLong Li DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index); 862517ed6e2SLong Li 863517ed6e2SLong Li if (conf->index >= MAX_NUM_ADDRESS) { 864517ed6e2SLong Li DRV_LOG(ERR, "Exceeding max MAC address"); 865517ed6e2SLong Li return 1; 866517ed6e2SLong Li } 867517ed6e2SLong Li 868517ed6e2SLong Li ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]); 869517ed6e2SLong Li if (ret) { 870517ed6e2SLong Li DRV_LOG(ERR, "Invalid MAC address %s", val); 871517ed6e2SLong Li return ret; 872517ed6e2SLong Li } 873517ed6e2SLong Li 874517ed6e2SLong Li conf->index++; 875517ed6e2SLong Li 876517ed6e2SLong Li return 0; 877517ed6e2SLong Li } 878517ed6e2SLong Li 879517ed6e2SLong Li static int 880517ed6e2SLong Li mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf) 881517ed6e2SLong Li { 882517ed6e2SLong Li struct rte_kvargs *kvlist; 883517ed6e2SLong Li unsigned int arg_count; 884517ed6e2SLong Li int ret = 0; 885517ed6e2SLong Li 886517ed6e2SLong Li kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args); 887517ed6e2SLong Li if (!kvlist) { 888517ed6e2SLong Li DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str); 889517ed6e2SLong Li return -EINVAL; 890517ed6e2SLong Li } 891517ed6e2SLong Li 892517ed6e2SLong Li arg_count = rte_kvargs_count(kvlist, mana_init_args[0]); 893517ed6e2SLong Li if (arg_count > MAX_NUM_ADDRESS) { 894517ed6e2SLong Li ret = -EINVAL; 895517ed6e2SLong Li goto free_kvlist; 896517ed6e2SLong Li } 897517ed6e2SLong Li ret = rte_kvargs_process(kvlist, mana_init_args[0], 898517ed6e2SLong Li mana_arg_parse_callback, conf); 899517ed6e2SLong Li if (ret) { 900517ed6e2SLong Li DRV_LOG(ERR, "error parsing args"); 901517ed6e2SLong Li goto free_kvlist; 902517ed6e2SLong Li } 903517ed6e2SLong Li 904517ed6e2SLong Li free_kvlist: 905517ed6e2SLong Li rte_kvargs_free(kvlist); 906517ed6e2SLong Li return ret; 907517ed6e2SLong Li } 908517ed6e2SLong Li 909517ed6e2SLong Li static int 910517ed6e2SLong Li get_port_mac(struct ibv_device *device, unsigned int port, 911517ed6e2SLong Li struct rte_ether_addr *addr) 912517ed6e2SLong Li { 913517ed6e2SLong Li FILE *file; 914517ed6e2SLong Li int ret = 0; 915517ed6e2SLong Li DIR *dir; 916517ed6e2SLong Li struct dirent *dent; 917517ed6e2SLong Li unsigned int dev_port; 918517ed6e2SLong Li 919517ed6e2SLong Li MANA_MKSTR(path, "%s/device/net", device->ibdev_path); 920517ed6e2SLong Li 921517ed6e2SLong Li dir = opendir(path); 922517ed6e2SLong Li if (!dir) 923517ed6e2SLong Li return -ENOENT; 924517ed6e2SLong Li 925517ed6e2SLong Li while ((dent = readdir(dir))) { 926517ed6e2SLong Li char *name = dent->d_name; 92735d55f0fSLong Li char *mac = NULL; 928517ed6e2SLong Li 929517ed6e2SLong Li MANA_MKSTR(port_path, "%s/%s/dev_port", path, name); 930517ed6e2SLong Li 931517ed6e2SLong Li /* Ignore . and .. */ 932517ed6e2SLong Li if ((name[0] == '.') && 933517ed6e2SLong Li ((name[1] == '\0') || 934517ed6e2SLong Li ((name[1] == '.') && (name[2] == '\0')))) 935517ed6e2SLong Li continue; 936517ed6e2SLong Li 937517ed6e2SLong Li file = fopen(port_path, "r"); 938517ed6e2SLong Li if (!file) 939517ed6e2SLong Li continue; 940517ed6e2SLong Li 941517ed6e2SLong Li ret = fscanf(file, "%u", &dev_port); 942517ed6e2SLong Li fclose(file); 943517ed6e2SLong Li 944517ed6e2SLong Li if (ret != 1) 945517ed6e2SLong Li continue; 946517ed6e2SLong Li 947517ed6e2SLong Li /* Ethernet ports start at 0, IB port start at 1 */ 948517ed6e2SLong Li if (dev_port == port - 1) { 949517ed6e2SLong Li MANA_MKSTR(address_path, "%s/%s/address", path, name); 950517ed6e2SLong Li 951517ed6e2SLong Li file = fopen(address_path, "r"); 952517ed6e2SLong Li if (!file) 953517ed6e2SLong Li continue; 954517ed6e2SLong Li 95535d55f0fSLong Li ret = fscanf(file, "%ms", &mac); 956517ed6e2SLong Li fclose(file); 957517ed6e2SLong Li 958517ed6e2SLong Li if (ret < 0) 959517ed6e2SLong Li break; 960517ed6e2SLong Li 961517ed6e2SLong Li ret = rte_ether_unformat_addr(mac, addr); 962517ed6e2SLong Li if (ret) 963517ed6e2SLong Li DRV_LOG(ERR, "unrecognized mac addr %s", mac); 96435d55f0fSLong Li 96535d55f0fSLong Li free(mac); 966517ed6e2SLong Li break; 967517ed6e2SLong Li } 968517ed6e2SLong Li } 969517ed6e2SLong Li 970517ed6e2SLong Li closedir(dir); 971517ed6e2SLong Li return ret; 972517ed6e2SLong Li } 973517ed6e2SLong Li 974517ed6e2SLong Li static int 975517ed6e2SLong Li mana_ibv_device_to_pci_addr(const struct ibv_device *device, 976517ed6e2SLong Li struct rte_pci_addr *pci_addr) 977517ed6e2SLong Li { 978517ed6e2SLong Li FILE *file; 979517ed6e2SLong Li char *line = NULL; 980517ed6e2SLong Li size_t len = 0; 981517ed6e2SLong Li 982517ed6e2SLong Li MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path); 983517ed6e2SLong Li 984517ed6e2SLong Li file = fopen(path, "r"); 985517ed6e2SLong Li if (!file) 986517ed6e2SLong Li return -errno; 987517ed6e2SLong Li 988517ed6e2SLong Li while (getline(&line, &len, file) != -1) { 989517ed6e2SLong Li /* Extract information. */ 990517ed6e2SLong Li if (sscanf(line, 991517ed6e2SLong Li "PCI_SLOT_NAME=" 992517ed6e2SLong Li "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 993517ed6e2SLong Li &pci_addr->domain, 994517ed6e2SLong Li &pci_addr->bus, 995517ed6e2SLong Li &pci_addr->devid, 996517ed6e2SLong Li &pci_addr->function) == 4) { 997517ed6e2SLong Li break; 998517ed6e2SLong Li } 999517ed6e2SLong Li } 1000517ed6e2SLong Li 1001517ed6e2SLong Li free(line); 1002517ed6e2SLong Li fclose(file); 1003517ed6e2SLong Li return 0; 1004517ed6e2SLong Li } 1005517ed6e2SLong Li 1006bd15f237SLong Li /* 1007bd15f237SLong Li * Interrupt handler from IB layer to notify this device is being removed. 1008bd15f237SLong Li */ 1009bd15f237SLong Li static void 1010bd15f237SLong Li mana_intr_handler(void *arg) 1011bd15f237SLong Li { 1012bd15f237SLong Li struct mana_priv *priv = arg; 1013bd15f237SLong Li struct ibv_context *ctx = priv->ib_ctx; 1014bd15f237SLong Li struct ibv_async_event event; 1015bd15f237SLong Li 1016bd15f237SLong Li /* Read and ack all messages from IB device */ 1017bd15f237SLong Li while (true) { 1018bd15f237SLong Li if (ibv_get_async_event(ctx, &event)) 1019bd15f237SLong Li break; 1020bd15f237SLong Li 1021bd15f237SLong Li if (event.event_type == IBV_EVENT_DEVICE_FATAL) { 1022bd15f237SLong Li struct rte_eth_dev *dev; 1023bd15f237SLong Li 1024bd15f237SLong Li dev = &rte_eth_devices[priv->port_id]; 1025bd15f237SLong Li if (dev->data->dev_conf.intr_conf.rmv) 1026bd15f237SLong Li rte_eth_dev_callback_process(dev, 1027bd15f237SLong Li RTE_ETH_EVENT_INTR_RMV, NULL); 1028bd15f237SLong Li } 1029bd15f237SLong Li 1030bd15f237SLong Li ibv_ack_async_event(&event); 1031bd15f237SLong Li } 1032bd15f237SLong Li } 1033bd15f237SLong Li 1034bd15f237SLong Li static int 1035bd15f237SLong Li mana_intr_uninstall(struct mana_priv *priv) 1036bd15f237SLong Li { 1037bd15f237SLong Li int ret; 1038bd15f237SLong Li 1039bd15f237SLong Li ret = rte_intr_callback_unregister(priv->intr_handle, 1040bd15f237SLong Li mana_intr_handler, priv); 1041bd15f237SLong Li if (ret <= 0) { 1042bd15f237SLong Li DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret); 1043bd15f237SLong Li return ret; 1044bd15f237SLong Li } 1045bd15f237SLong Li 1046bd15f237SLong Li rte_intr_instance_free(priv->intr_handle); 1047bd15f237SLong Li 1048bd15f237SLong Li return 0; 1049bd15f237SLong Li } 1050bd15f237SLong Li 1051afd5d170SLong Li int 1052afd5d170SLong Li mana_fd_set_non_blocking(int fd) 1053bd15f237SLong Li { 1054afd5d170SLong Li int ret = fcntl(fd, F_GETFL); 1055afd5d170SLong Li 1056afd5d170SLong Li if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK)) 1057afd5d170SLong Li return 0; 1058afd5d170SLong Li 1059afd5d170SLong Li rte_errno = errno; 1060afd5d170SLong Li return -rte_errno; 1061afd5d170SLong Li } 1062afd5d170SLong Li 1063afd5d170SLong Li static int 1064afd5d170SLong Li mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv) 1065afd5d170SLong Li { 1066afd5d170SLong Li int ret; 1067bd15f237SLong Li struct ibv_context *ctx = priv->ib_ctx; 1068bd15f237SLong Li 1069bd15f237SLong Li priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 1070bd15f237SLong Li if (!priv->intr_handle) { 1071bd15f237SLong Li DRV_LOG(ERR, "Failed to allocate intr_handle"); 1072bd15f237SLong Li rte_errno = ENOMEM; 1073bd15f237SLong Li return -ENOMEM; 1074bd15f237SLong Li } 1075bd15f237SLong Li 1076afd5d170SLong Li ret = rte_intr_fd_set(priv->intr_handle, -1); 1077afd5d170SLong Li if (ret) 1078afd5d170SLong Li goto free_intr; 1079bd15f237SLong Li 1080afd5d170SLong Li ret = mana_fd_set_non_blocking(ctx->async_fd); 1081bd15f237SLong Li if (ret) { 1082bd15f237SLong Li DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK"); 1083bd15f237SLong Li goto free_intr; 1084bd15f237SLong Li } 1085bd15f237SLong Li 1086afd5d170SLong Li ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd); 1087afd5d170SLong Li if (ret) 1088afd5d170SLong Li goto free_intr; 1089afd5d170SLong Li 1090afd5d170SLong Li ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT); 1091afd5d170SLong Li if (ret) 1092afd5d170SLong Li goto free_intr; 1093bd15f237SLong Li 1094bd15f237SLong Li ret = rte_intr_callback_register(priv->intr_handle, 1095bd15f237SLong Li mana_intr_handler, priv); 1096bd15f237SLong Li if (ret) { 1097bd15f237SLong Li DRV_LOG(ERR, "Failed to register intr callback"); 1098bd15f237SLong Li rte_intr_fd_set(priv->intr_handle, -1); 1099afd5d170SLong Li goto free_intr; 1100bd15f237SLong Li } 1101bd15f237SLong Li 1102afd5d170SLong Li eth_dev->intr_handle = priv->intr_handle; 1103bd15f237SLong Li return 0; 1104bd15f237SLong Li 1105bd15f237SLong Li free_intr: 1106bd15f237SLong Li rte_intr_instance_free(priv->intr_handle); 1107bd15f237SLong Li priv->intr_handle = NULL; 1108bd15f237SLong Li 1109bd15f237SLong Li return ret; 1110bd15f237SLong Li } 1111bd15f237SLong Li 1112517ed6e2SLong Li static int 1113517ed6e2SLong Li mana_proc_priv_init(struct rte_eth_dev *dev) 1114517ed6e2SLong Li { 1115517ed6e2SLong Li struct mana_process_priv *priv; 1116517ed6e2SLong Li 1117517ed6e2SLong Li priv = rte_zmalloc_socket("mana_proc_priv", 1118517ed6e2SLong Li sizeof(struct mana_process_priv), 1119517ed6e2SLong Li RTE_CACHE_LINE_SIZE, 1120517ed6e2SLong Li dev->device->numa_node); 1121517ed6e2SLong Li if (!priv) 1122517ed6e2SLong Li return -ENOMEM; 1123517ed6e2SLong Li 1124517ed6e2SLong Li dev->process_private = priv; 1125517ed6e2SLong Li return 0; 1126517ed6e2SLong Li } 1127517ed6e2SLong Li 1128517ed6e2SLong Li /* 1129517ed6e2SLong Li * Map the doorbell page for the secondary process through IB device handle. 1130517ed6e2SLong Li */ 1131517ed6e2SLong Li static int 1132517ed6e2SLong Li mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd) 1133517ed6e2SLong Li { 1134517ed6e2SLong Li struct mana_process_priv *priv = eth_dev->process_private; 1135517ed6e2SLong Li 1136517ed6e2SLong Li void *addr; 1137517ed6e2SLong Li 1138517ed6e2SLong Li addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0); 1139517ed6e2SLong Li if (addr == MAP_FAILED) { 1140517ed6e2SLong Li DRV_LOG(ERR, "Failed to map secondary doorbell port %u", 1141517ed6e2SLong Li eth_dev->data->port_id); 1142517ed6e2SLong Li return -ENOMEM; 1143517ed6e2SLong Li } 1144517ed6e2SLong Li 1145517ed6e2SLong Li DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr); 1146517ed6e2SLong Li 1147517ed6e2SLong Li priv->db_page = addr; 1148517ed6e2SLong Li 1149517ed6e2SLong Li return 0; 1150517ed6e2SLong Li } 1151517ed6e2SLong Li 1152517ed6e2SLong Li /* Initialize shared data for the driver (all devices) */ 1153517ed6e2SLong Li static int 1154517ed6e2SLong Li mana_init_shared_data(void) 1155517ed6e2SLong Li { 1156517ed6e2SLong Li int ret = 0; 1157517ed6e2SLong Li const struct rte_memzone *secondary_mz; 1158517ed6e2SLong Li 1159517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock); 1160517ed6e2SLong Li 1161517ed6e2SLong Li /* Skip if shared data is already initialized */ 1162517ed6e2SLong Li if (mana_shared_data) 1163517ed6e2SLong Li goto exit; 1164517ed6e2SLong Li 1165517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1166517ed6e2SLong Li mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA, 1167517ed6e2SLong Li sizeof(*mana_shared_data), 1168517ed6e2SLong Li SOCKET_ID_ANY, 0); 1169517ed6e2SLong Li if (!mana_shared_mz) { 1170517ed6e2SLong Li DRV_LOG(ERR, "Cannot allocate mana shared data"); 1171517ed6e2SLong Li ret = -rte_errno; 1172517ed6e2SLong Li goto exit; 1173517ed6e2SLong Li } 1174517ed6e2SLong Li 1175517ed6e2SLong Li mana_shared_data = mana_shared_mz->addr; 1176517ed6e2SLong Li memset(mana_shared_data, 0, sizeof(*mana_shared_data)); 1177517ed6e2SLong Li rte_spinlock_init(&mana_shared_data->lock); 1178517ed6e2SLong Li } else { 1179517ed6e2SLong Li secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA); 1180517ed6e2SLong Li if (!secondary_mz) { 1181517ed6e2SLong Li DRV_LOG(ERR, "Cannot attach mana shared data"); 1182517ed6e2SLong Li ret = -rte_errno; 1183517ed6e2SLong Li goto exit; 1184517ed6e2SLong Li } 1185517ed6e2SLong Li 1186517ed6e2SLong Li mana_shared_data = secondary_mz->addr; 1187517ed6e2SLong Li memset(&mana_local_data, 0, sizeof(mana_local_data)); 1188517ed6e2SLong Li } 1189517ed6e2SLong Li 1190517ed6e2SLong Li exit: 1191517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock); 1192517ed6e2SLong Li 1193517ed6e2SLong Li return ret; 1194517ed6e2SLong Li } 1195517ed6e2SLong Li 1196517ed6e2SLong Li /* 1197517ed6e2SLong Li * Init the data structures for use in primary and secondary processes. 1198517ed6e2SLong Li */ 1199517ed6e2SLong Li static int 1200517ed6e2SLong Li mana_init_once(void) 1201517ed6e2SLong Li { 1202517ed6e2SLong Li int ret; 1203517ed6e2SLong Li 1204517ed6e2SLong Li ret = mana_init_shared_data(); 1205517ed6e2SLong Li if (ret) 1206517ed6e2SLong Li return ret; 1207517ed6e2SLong Li 1208517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 1209517ed6e2SLong Li 1210517ed6e2SLong Li switch (rte_eal_process_type()) { 1211517ed6e2SLong Li case RTE_PROC_PRIMARY: 1212517ed6e2SLong Li if (mana_shared_data->init_done) 1213517ed6e2SLong Li break; 1214517ed6e2SLong Li 1215517ed6e2SLong Li ret = mana_mp_init_primary(); 1216517ed6e2SLong Li if (ret) 1217517ed6e2SLong Li break; 1218517ed6e2SLong Li DRV_LOG(ERR, "MP INIT PRIMARY"); 1219517ed6e2SLong Li 1220517ed6e2SLong Li mana_shared_data->init_done = 1; 1221517ed6e2SLong Li break; 1222517ed6e2SLong Li 1223517ed6e2SLong Li case RTE_PROC_SECONDARY: 1224517ed6e2SLong Li 1225517ed6e2SLong Li if (mana_local_data.init_done) 1226517ed6e2SLong Li break; 1227517ed6e2SLong Li 1228517ed6e2SLong Li ret = mana_mp_init_secondary(); 1229517ed6e2SLong Li if (ret) 1230517ed6e2SLong Li break; 1231517ed6e2SLong Li 1232517ed6e2SLong Li DRV_LOG(ERR, "MP INIT SECONDARY"); 1233517ed6e2SLong Li 1234517ed6e2SLong Li mana_local_data.init_done = 1; 1235517ed6e2SLong Li break; 1236517ed6e2SLong Li 1237517ed6e2SLong Li default: 1238517ed6e2SLong Li /* Impossible, internal error */ 1239517ed6e2SLong Li ret = -EPROTO; 1240517ed6e2SLong Li break; 1241517ed6e2SLong Li } 1242517ed6e2SLong Li 1243517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 1244517ed6e2SLong Li 1245517ed6e2SLong Li return ret; 1246517ed6e2SLong Li } 1247517ed6e2SLong Li 1248517ed6e2SLong Li /* 1249517ed6e2SLong Li * Probe an IB port 1250517ed6e2SLong Li * Return value: 1251517ed6e2SLong Li * positive value: successfully probed port 1252517ed6e2SLong Li * 0: port not matching specified MAC address 1253517ed6e2SLong Li * negative value: error code 1254517ed6e2SLong Li */ 1255517ed6e2SLong Li static int 1256517ed6e2SLong Li mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, 1257517ed6e2SLong Li uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr) 1258517ed6e2SLong Li { 1259517ed6e2SLong Li struct mana_priv *priv = NULL; 1260517ed6e2SLong Li struct rte_eth_dev *eth_dev = NULL; 1261517ed6e2SLong Li struct ibv_parent_domain_init_attr attr = {0}; 1262517ed6e2SLong Li char address[64]; 1263517ed6e2SLong Li char name[RTE_ETH_NAME_MAX_LEN]; 1264517ed6e2SLong Li int ret; 1265517ed6e2SLong Li struct ibv_context *ctx = NULL; 1266517ed6e2SLong Li 1267517ed6e2SLong Li rte_ether_format_addr(address, sizeof(address), addr); 1268517ed6e2SLong Li DRV_LOG(INFO, "device located port %u address %s", port, address); 1269517ed6e2SLong Li 1270517ed6e2SLong Li priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE, 1271517ed6e2SLong Li SOCKET_ID_ANY); 1272517ed6e2SLong Li if (!priv) 1273517ed6e2SLong Li return -ENOMEM; 1274517ed6e2SLong Li 1275517ed6e2SLong Li snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port); 1276517ed6e2SLong Li 1277517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1278517ed6e2SLong Li int fd; 1279517ed6e2SLong Li 1280517ed6e2SLong Li eth_dev = rte_eth_dev_attach_secondary(name); 1281517ed6e2SLong Li if (!eth_dev) { 1282517ed6e2SLong Li DRV_LOG(ERR, "Can't attach to dev %s", name); 1283517ed6e2SLong Li ret = -ENOMEM; 1284517ed6e2SLong Li goto failed; 1285517ed6e2SLong Li } 1286517ed6e2SLong Li 1287517ed6e2SLong Li eth_dev->device = &pci_dev->device; 1288517ed6e2SLong Li eth_dev->dev_ops = &mana_dev_secondary_ops; 1289517ed6e2SLong Li ret = mana_proc_priv_init(eth_dev); 1290517ed6e2SLong Li if (ret) 1291517ed6e2SLong Li goto failed; 1292517ed6e2SLong Li priv->process_priv = eth_dev->process_private; 1293517ed6e2SLong Li 1294517ed6e2SLong Li /* Get the IB FD from the primary process */ 1295517ed6e2SLong Li fd = mana_mp_req_verbs_cmd_fd(eth_dev); 1296517ed6e2SLong Li if (fd < 0) { 1297517ed6e2SLong Li DRV_LOG(ERR, "Failed to get FD %d", fd); 1298517ed6e2SLong Li ret = -ENODEV; 1299517ed6e2SLong Li goto failed; 1300517ed6e2SLong Li } 1301517ed6e2SLong Li 1302517ed6e2SLong Li ret = mana_map_doorbell_secondary(eth_dev, fd); 1303517ed6e2SLong Li if (ret) { 1304517ed6e2SLong Li DRV_LOG(ERR, "Failed secondary map %d", fd); 1305517ed6e2SLong Li goto failed; 1306517ed6e2SLong Li } 1307517ed6e2SLong Li 1308517ed6e2SLong Li /* fd is no not used after mapping doorbell */ 1309517ed6e2SLong Li close(fd); 1310517ed6e2SLong Li 1311517ed6e2SLong Li eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1312517ed6e2SLong Li eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1313517ed6e2SLong Li 1314517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 1315517ed6e2SLong Li mana_shared_data->secondary_cnt++; 1316517ed6e2SLong Li mana_local_data.secondary_cnt++; 1317517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 1318517ed6e2SLong Li 1319517ed6e2SLong Li rte_eth_copy_pci_info(eth_dev, pci_dev); 1320517ed6e2SLong Li rte_eth_dev_probing_finish(eth_dev); 1321517ed6e2SLong Li 1322517ed6e2SLong Li return 0; 1323517ed6e2SLong Li } 1324517ed6e2SLong Li 1325517ed6e2SLong Li ctx = ibv_open_device(ibdev); 1326517ed6e2SLong Li if (!ctx) { 1327517ed6e2SLong Li DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name); 1328517ed6e2SLong Li ret = -ENODEV; 1329517ed6e2SLong Li goto failed; 1330517ed6e2SLong Li } 1331517ed6e2SLong Li 1332517ed6e2SLong Li eth_dev = rte_eth_dev_allocate(name); 1333517ed6e2SLong Li if (!eth_dev) { 1334517ed6e2SLong Li ret = -ENOMEM; 1335517ed6e2SLong Li goto failed; 1336517ed6e2SLong Li } 1337517ed6e2SLong Li 1338517ed6e2SLong Li eth_dev->data->mac_addrs = 1339517ed6e2SLong Li rte_calloc("mana_mac", 1, 1340517ed6e2SLong Li sizeof(struct rte_ether_addr), 0); 1341517ed6e2SLong Li if (!eth_dev->data->mac_addrs) { 1342517ed6e2SLong Li ret = -ENOMEM; 1343517ed6e2SLong Li goto failed; 1344517ed6e2SLong Li } 1345517ed6e2SLong Li 1346517ed6e2SLong Li rte_ether_addr_copy(addr, eth_dev->data->mac_addrs); 1347517ed6e2SLong Li 1348517ed6e2SLong Li priv->ib_pd = ibv_alloc_pd(ctx); 1349517ed6e2SLong Li if (!priv->ib_pd) { 1350517ed6e2SLong Li DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port); 1351517ed6e2SLong Li ret = -ENOMEM; 1352517ed6e2SLong Li goto failed; 1353517ed6e2SLong Li } 1354517ed6e2SLong Li 1355517ed6e2SLong Li /* Create a parent domain with the port number */ 1356517ed6e2SLong Li attr.pd = priv->ib_pd; 1357517ed6e2SLong Li attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT; 135874decf3bSWei Hu attr.pd_context = (void *)(uintptr_t)port; 1359517ed6e2SLong Li priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr); 1360517ed6e2SLong Li if (!priv->ib_parent_pd) { 1361517ed6e2SLong Li DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port); 1362517ed6e2SLong Li ret = -ENOMEM; 1363517ed6e2SLong Li goto failed; 1364517ed6e2SLong Li } 1365517ed6e2SLong Li 1366517ed6e2SLong Li priv->ib_ctx = ctx; 1367517ed6e2SLong Li priv->port_id = eth_dev->data->port_id; 1368517ed6e2SLong Li priv->dev_port = port; 1369517ed6e2SLong Li eth_dev->data->dev_private = priv; 1370517ed6e2SLong Li priv->dev_data = eth_dev->data; 1371517ed6e2SLong Li 1372517ed6e2SLong Li priv->max_rx_queues = dev_attr->orig_attr.max_qp; 1373517ed6e2SLong Li priv->max_tx_queues = dev_attr->orig_attr.max_qp; 1374517ed6e2SLong Li 1375517ed6e2SLong Li priv->max_rx_desc = 1376517ed6e2SLong Li RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1377517ed6e2SLong Li dev_attr->orig_attr.max_cqe); 1378517ed6e2SLong Li priv->max_tx_desc = 1379517ed6e2SLong Li RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1380517ed6e2SLong Li dev_attr->orig_attr.max_cqe); 1381517ed6e2SLong Li 1382517ed6e2SLong Li priv->max_send_sge = dev_attr->orig_attr.max_sge; 1383517ed6e2SLong Li priv->max_recv_sge = dev_attr->orig_attr.max_sge; 1384517ed6e2SLong Li 1385517ed6e2SLong Li priv->max_mr = dev_attr->orig_attr.max_mr; 1386517ed6e2SLong Li priv->max_mr_size = dev_attr->orig_attr.max_mr_size; 1387517ed6e2SLong Li 1388517ed6e2SLong Li DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d", 1389517ed6e2SLong Li name, priv->max_rx_queues, priv->max_rx_desc, 1390517ed6e2SLong Li priv->max_send_sge); 1391517ed6e2SLong Li 1392517ed6e2SLong Li rte_eth_copy_pci_info(eth_dev, pci_dev); 1393517ed6e2SLong Li 1394bd15f237SLong Li /* Create async interrupt handler */ 1395afd5d170SLong Li ret = mana_intr_install(eth_dev, priv); 1396bd15f237SLong Li if (ret) { 1397bd15f237SLong Li DRV_LOG(ERR, "Failed to install intr handler"); 1398bd15f237SLong Li goto failed; 1399bd15f237SLong Li } 1400bd15f237SLong Li 1401517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 1402517ed6e2SLong Li mana_shared_data->primary_cnt++; 1403517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 1404517ed6e2SLong Li 1405517ed6e2SLong Li eth_dev->device = &pci_dev->device; 1406517ed6e2SLong Li 1407517ed6e2SLong Li DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id); 1408517ed6e2SLong Li 1409517ed6e2SLong Li eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1410517ed6e2SLong Li eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1411517ed6e2SLong Li eth_dev->dev_ops = &mana_dev_ops; 1412517ed6e2SLong Li 1413517ed6e2SLong Li rte_eth_dev_probing_finish(eth_dev); 1414517ed6e2SLong Li 1415517ed6e2SLong Li return 0; 1416517ed6e2SLong Li 1417517ed6e2SLong Li failed: 1418517ed6e2SLong Li /* Free the resource for the port failed */ 1419517ed6e2SLong Li if (priv) { 1420517ed6e2SLong Li if (priv->ib_parent_pd) 1421517ed6e2SLong Li ibv_dealloc_pd(priv->ib_parent_pd); 1422517ed6e2SLong Li 1423517ed6e2SLong Li if (priv->ib_pd) 1424517ed6e2SLong Li ibv_dealloc_pd(priv->ib_pd); 1425517ed6e2SLong Li } 1426517ed6e2SLong Li 1427517ed6e2SLong Li if (eth_dev) 1428517ed6e2SLong Li rte_eth_dev_release_port(eth_dev); 1429517ed6e2SLong Li 1430517ed6e2SLong Li rte_free(priv); 1431517ed6e2SLong Li 1432517ed6e2SLong Li if (ctx) 1433517ed6e2SLong Li ibv_close_device(ctx); 1434517ed6e2SLong Li 1435517ed6e2SLong Li return ret; 1436517ed6e2SLong Li } 1437517ed6e2SLong Li 1438517ed6e2SLong Li /* 1439517ed6e2SLong Li * Goes through the IB device list to look for the IB port matching the 1440517ed6e2SLong Li * mac_addr. If found, create a rte_eth_dev for it. 14410595702eSLong Li * Return value: number of successfully probed devices 1442517ed6e2SLong Li */ 1443517ed6e2SLong Li static int 1444517ed6e2SLong Li mana_pci_probe_mac(struct rte_pci_device *pci_dev, 1445517ed6e2SLong Li struct rte_ether_addr *mac_addr) 1446517ed6e2SLong Li { 1447517ed6e2SLong Li struct ibv_device **ibv_list; 1448517ed6e2SLong Li int ibv_idx; 1449517ed6e2SLong Li struct ibv_context *ctx; 1450517ed6e2SLong Li int num_devices; 14510595702eSLong Li int ret; 1452517ed6e2SLong Li uint8_t port; 14530595702eSLong Li int count = 0; 1454517ed6e2SLong Li 1455517ed6e2SLong Li ibv_list = ibv_get_device_list(&num_devices); 1456517ed6e2SLong Li for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) { 1457517ed6e2SLong Li struct ibv_device *ibdev = ibv_list[ibv_idx]; 1458517ed6e2SLong Li struct rte_pci_addr pci_addr; 1459517ed6e2SLong Li struct ibv_device_attr_ex dev_attr; 1460517ed6e2SLong Li 1461517ed6e2SLong Li DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s", 1462517ed6e2SLong Li ibdev->name, ibdev->dev_name, ibdev->ibdev_path); 1463517ed6e2SLong Li 1464517ed6e2SLong Li if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr)) 1465517ed6e2SLong Li continue; 1466517ed6e2SLong Li 1467517ed6e2SLong Li /* Ignore if this IB device is not this PCI device */ 1468*8fa22e1fSThomas Monjalon if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0) 1469517ed6e2SLong Li continue; 1470517ed6e2SLong Li 1471517ed6e2SLong Li ctx = ibv_open_device(ibdev); 1472517ed6e2SLong Li if (!ctx) { 1473517ed6e2SLong Li DRV_LOG(ERR, "Failed to open IB device %s", 1474517ed6e2SLong Li ibdev->name); 1475517ed6e2SLong Li continue; 1476517ed6e2SLong Li } 1477517ed6e2SLong Li ret = ibv_query_device_ex(ctx, NULL, &dev_attr); 1478517ed6e2SLong Li ibv_close_device(ctx); 1479517ed6e2SLong Li 14800595702eSLong Li if (ret) { 14810595702eSLong Li DRV_LOG(ERR, "Failed to query IB device %s", 14820595702eSLong Li ibdev->name); 14830595702eSLong Li continue; 14840595702eSLong Li } 14850595702eSLong Li 1486517ed6e2SLong Li for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt; 1487517ed6e2SLong Li port++) { 1488517ed6e2SLong Li struct rte_ether_addr addr; 1489517ed6e2SLong Li ret = get_port_mac(ibdev, port, &addr); 1490517ed6e2SLong Li if (ret) 1491517ed6e2SLong Li continue; 1492517ed6e2SLong Li 1493517ed6e2SLong Li if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr)) 1494517ed6e2SLong Li continue; 1495517ed6e2SLong Li 1496517ed6e2SLong Li ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr); 14970595702eSLong Li if (ret) { 1498517ed6e2SLong Li DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret); 14990595702eSLong Li } else { 15000595702eSLong Li count++; 1501517ed6e2SLong Li DRV_LOG(INFO, "Successfully probed on IB port %u", port); 1502517ed6e2SLong Li } 1503517ed6e2SLong Li } 15040595702eSLong Li } 1505517ed6e2SLong Li 1506517ed6e2SLong Li ibv_free_device_list(ibv_list); 15070595702eSLong Li return count; 1508517ed6e2SLong Li } 1509517ed6e2SLong Li 1510517ed6e2SLong Li /* 1511517ed6e2SLong Li * Main callback function from PCI bus to probe a device. 1512517ed6e2SLong Li */ 1513517ed6e2SLong Li static int 1514517ed6e2SLong Li mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1515517ed6e2SLong Li struct rte_pci_device *pci_dev) 1516517ed6e2SLong Li { 1517517ed6e2SLong Li struct rte_devargs *args = pci_dev->device.devargs; 1518517ed6e2SLong Li struct mana_conf conf = {0}; 1519517ed6e2SLong Li unsigned int i; 1520517ed6e2SLong Li int ret; 15210595702eSLong Li int count = 0; 1522517ed6e2SLong Li 1523517ed6e2SLong Li if (args && args->drv_str) { 1524517ed6e2SLong Li ret = mana_parse_args(args, &conf); 1525517ed6e2SLong Li if (ret) { 1526517ed6e2SLong Li DRV_LOG(ERR, "Failed to parse parameters args = %s", 1527517ed6e2SLong Li args->drv_str); 1528517ed6e2SLong Li return ret; 1529517ed6e2SLong Li } 1530517ed6e2SLong Li } 1531517ed6e2SLong Li 1532517ed6e2SLong Li ret = mana_init_once(); 1533517ed6e2SLong Li if (ret) { 1534517ed6e2SLong Li DRV_LOG(ERR, "Failed to init PMD global data %d", ret); 1535517ed6e2SLong Li return ret; 1536517ed6e2SLong Li } 1537517ed6e2SLong Li 1538517ed6e2SLong Li /* If there are no driver parameters, probe on all ports */ 15390595702eSLong Li if (conf.index) { 15400595702eSLong Li for (i = 0; i < conf.index; i++) 15410595702eSLong Li count += mana_pci_probe_mac(pci_dev, 15420595702eSLong Li &conf.mac_array[i]); 15430595702eSLong Li } else { 15440595702eSLong Li count = mana_pci_probe_mac(pci_dev, NULL); 1545517ed6e2SLong Li } 1546517ed6e2SLong Li 15470595702eSLong Li if (!count) { 15480595702eSLong Li rte_memzone_free(mana_shared_mz); 15490595702eSLong Li mana_shared_mz = NULL; 15500595702eSLong Li ret = -ENODEV; 15510595702eSLong Li } 15520595702eSLong Li 15530595702eSLong Li return ret; 1554517ed6e2SLong Li } 1555517ed6e2SLong Li 1556517ed6e2SLong Li static int 1557517ed6e2SLong Li mana_dev_uninit(struct rte_eth_dev *dev) 1558517ed6e2SLong Li { 15590dbfecfeSLong Li return mana_dev_close(dev); 1560517ed6e2SLong Li } 1561517ed6e2SLong Li 1562517ed6e2SLong Li /* 1563517ed6e2SLong Li * Callback from PCI to remove this device. 1564517ed6e2SLong Li */ 1565517ed6e2SLong Li static int 1566517ed6e2SLong Li mana_pci_remove(struct rte_pci_device *pci_dev) 1567517ed6e2SLong Li { 1568517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1569517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock); 1570517ed6e2SLong Li 1571517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 1572517ed6e2SLong Li 1573517ed6e2SLong Li RTE_VERIFY(mana_shared_data->primary_cnt > 0); 1574517ed6e2SLong Li mana_shared_data->primary_cnt--; 1575517ed6e2SLong Li if (!mana_shared_data->primary_cnt) { 1576517ed6e2SLong Li DRV_LOG(DEBUG, "mp uninit primary"); 1577517ed6e2SLong Li mana_mp_uninit_primary(); 1578517ed6e2SLong Li } 1579517ed6e2SLong Li 1580517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 1581517ed6e2SLong Li 1582517ed6e2SLong Li /* Also free the shared memory if this is the last */ 1583517ed6e2SLong Li if (!mana_shared_data->primary_cnt) { 1584517ed6e2SLong Li DRV_LOG(DEBUG, "free shared memezone data"); 1585517ed6e2SLong Li rte_memzone_free(mana_shared_mz); 15860595702eSLong Li mana_shared_mz = NULL; 1587517ed6e2SLong Li } 1588517ed6e2SLong Li 1589517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock); 1590517ed6e2SLong Li } else { 1591517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock); 1592517ed6e2SLong Li 1593517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 1594517ed6e2SLong Li RTE_VERIFY(mana_shared_data->secondary_cnt > 0); 1595517ed6e2SLong Li mana_shared_data->secondary_cnt--; 1596517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 1597517ed6e2SLong Li 1598517ed6e2SLong Li RTE_VERIFY(mana_local_data.secondary_cnt > 0); 1599517ed6e2SLong Li mana_local_data.secondary_cnt--; 1600517ed6e2SLong Li if (!mana_local_data.secondary_cnt) { 1601517ed6e2SLong Li DRV_LOG(DEBUG, "mp uninit secondary"); 1602517ed6e2SLong Li mana_mp_uninit_secondary(); 1603517ed6e2SLong Li } 1604517ed6e2SLong Li 1605517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock); 1606517ed6e2SLong Li } 1607517ed6e2SLong Li 1608517ed6e2SLong Li return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit); 1609517ed6e2SLong Li } 1610517ed6e2SLong Li 1611517ed6e2SLong Li static const struct rte_pci_id mana_pci_id_map[] = { 1612517ed6e2SLong Li { 1613517ed6e2SLong Li RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 1614517ed6e2SLong Li PCI_DEVICE_ID_MICROSOFT_MANA) 1615517ed6e2SLong Li }, 1616517ed6e2SLong Li { 1617517ed6e2SLong Li .vendor_id = 0 1618517ed6e2SLong Li }, 1619517ed6e2SLong Li }; 1620517ed6e2SLong Li 1621517ed6e2SLong Li static struct rte_pci_driver mana_pci_driver = { 1622517ed6e2SLong Li .id_table = mana_pci_id_map, 1623517ed6e2SLong Li .probe = mana_pci_probe, 1624517ed6e2SLong Li .remove = mana_pci_remove, 1625517ed6e2SLong Li .drv_flags = RTE_PCI_DRV_INTR_RMV, 1626517ed6e2SLong Li }; 1627517ed6e2SLong Li 1628517ed6e2SLong Li RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver); 1629517ed6e2SLong Li RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map); 1630517ed6e2SLong Li RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib"); 1631517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE); 1632517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE); 1633517ed6e2SLong Li RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>"); 1634