1517ed6e2SLong Li /* SPDX-License-Identifier: BSD-3-Clause 2517ed6e2SLong Li * Copyright 2022 Microsoft Corporation 3517ed6e2SLong Li */ 4517ed6e2SLong Li 5517ed6e2SLong Li #include <unistd.h> 6517ed6e2SLong Li #include <dirent.h> 7517ed6e2SLong Li #include <fcntl.h> 8517ed6e2SLong Li #include <sys/mman.h> 9517ed6e2SLong Li 10517ed6e2SLong Li #include <ethdev_driver.h> 11517ed6e2SLong Li #include <ethdev_pci.h> 12517ed6e2SLong Li #include <rte_kvargs.h> 13517ed6e2SLong Li #include <rte_eal_paging.h> 14517ed6e2SLong Li 15517ed6e2SLong Li #include <infiniband/verbs.h> 16517ed6e2SLong Li #include <infiniband/manadv.h> 17517ed6e2SLong Li 18517ed6e2SLong Li #include <assert.h> 19517ed6e2SLong Li 20517ed6e2SLong Li #include "mana.h" 21517ed6e2SLong Li 22517ed6e2SLong Li /* Shared memory between primary/secondary processes, per driver */ 23517ed6e2SLong Li /* Data to track primary/secondary usage */ 24517ed6e2SLong Li struct mana_shared_data *mana_shared_data; 25517ed6e2SLong Li static struct mana_shared_data mana_local_data; 26517ed6e2SLong Li 27517ed6e2SLong Li /* The memory region for the above data */ 28517ed6e2SLong Li static const struct rte_memzone *mana_shared_mz; 29517ed6e2SLong Li static const char *MZ_MANA_SHARED_DATA = "mana_shared_data"; 30517ed6e2SLong Li 31517ed6e2SLong Li /* Spinlock for mana_shared_data */ 32517ed6e2SLong Li static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 33517ed6e2SLong Li 34517ed6e2SLong Li /* Allocate a buffer on the stack and fill it with a printf format string. */ 35517ed6e2SLong Li #define MANA_MKSTR(name, ...) \ 36517ed6e2SLong Li int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 37517ed6e2SLong Li char name[mkstr_size_##name + 1]; \ 38517ed6e2SLong Li \ 39517ed6e2SLong Li memset(name, 0, mkstr_size_##name + 1); \ 40517ed6e2SLong Li snprintf(name, sizeof(name), "" __VA_ARGS__) 41517ed6e2SLong Li 42517ed6e2SLong Li int mana_logtype_driver; 43517ed6e2SLong Li int mana_logtype_init; 44517ed6e2SLong Li 450dbfecfeSLong Li /* 460dbfecfeSLong Li * Callback from rdma-core to allocate a buffer for a queue. 470dbfecfeSLong Li */ 480dbfecfeSLong Li void * 490dbfecfeSLong Li mana_alloc_verbs_buf(size_t size, void *data) 500dbfecfeSLong Li { 510dbfecfeSLong Li void *ret; 520dbfecfeSLong Li size_t alignment = rte_mem_page_size(); 530dbfecfeSLong Li int socket = (int)(uintptr_t)data; 540dbfecfeSLong Li 550dbfecfeSLong Li DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket); 560dbfecfeSLong Li 570dbfecfeSLong Li if (alignment == (size_t)-1) { 580dbfecfeSLong Li DRV_LOG(ERR, "Failed to get mem page size"); 590dbfecfeSLong Li rte_errno = ENOMEM; 600dbfecfeSLong Li return NULL; 610dbfecfeSLong Li } 620dbfecfeSLong Li 630dbfecfeSLong Li ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket); 640dbfecfeSLong Li if (!ret && size) 650dbfecfeSLong Li rte_errno = ENOMEM; 660dbfecfeSLong Li return ret; 670dbfecfeSLong Li } 680dbfecfeSLong Li 690dbfecfeSLong Li void 700dbfecfeSLong Li mana_free_verbs_buf(void *ptr, void *data __rte_unused) 710dbfecfeSLong Li { 720dbfecfeSLong Li rte_free(ptr); 730dbfecfeSLong Li } 740dbfecfeSLong Li 750dbfecfeSLong Li static int 760dbfecfeSLong Li mana_dev_configure(struct rte_eth_dev *dev) 770dbfecfeSLong Li { 780dbfecfeSLong Li struct mana_priv *priv = dev->data->dev_private; 790dbfecfeSLong Li struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 800dbfecfeSLong Li 810dbfecfeSLong Li if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 820dbfecfeSLong Li dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 830dbfecfeSLong Li 840dbfecfeSLong Li if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) { 850dbfecfeSLong Li DRV_LOG(ERR, "Only support equal number of rx/tx queues"); 860dbfecfeSLong Li return -EINVAL; 870dbfecfeSLong Li } 880dbfecfeSLong Li 890dbfecfeSLong Li if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { 900dbfecfeSLong Li DRV_LOG(ERR, "number of TX/RX queues must be power of 2"); 910dbfecfeSLong Li return -EINVAL; 920dbfecfeSLong Li } 930dbfecfeSLong Li 940dbfecfeSLong Li priv->num_queues = dev->data->nb_rx_queues; 950dbfecfeSLong Li 960dbfecfeSLong Li manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS, 970dbfecfeSLong Li (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 980dbfecfeSLong Li .alloc = &mana_alloc_verbs_buf, 990dbfecfeSLong Li .free = &mana_free_verbs_buf, 1000dbfecfeSLong Li .data = 0, 1010dbfecfeSLong Li })); 1020dbfecfeSLong Li 1030dbfecfeSLong Li return 0; 1040dbfecfeSLong Li } 1050dbfecfeSLong Li 106bd15f237SLong Li static int mana_intr_uninstall(struct mana_priv *priv); 107bd15f237SLong Li 1080dbfecfeSLong Li static int 1090dbfecfeSLong Li mana_dev_close(struct rte_eth_dev *dev) 1100dbfecfeSLong Li { 1110dbfecfeSLong Li struct mana_priv *priv = dev->data->dev_private; 1120dbfecfeSLong Li int ret; 1130dbfecfeSLong Li 114bd15f237SLong Li ret = mana_intr_uninstall(priv); 115bd15f237SLong Li if (ret) 116bd15f237SLong Li return ret; 117bd15f237SLong Li 1180dbfecfeSLong Li ret = ibv_close_device(priv->ib_ctx); 1190dbfecfeSLong Li if (ret) { 1200dbfecfeSLong Li ret = errno; 1210dbfecfeSLong Li return ret; 1220dbfecfeSLong Li } 1230dbfecfeSLong Li 1240dbfecfeSLong Li return 0; 1250dbfecfeSLong Li } 1260dbfecfeSLong Li 127d878cb09SLong Li static int 128d878cb09SLong Li mana_dev_info_get(struct rte_eth_dev *dev, 129d878cb09SLong Li struct rte_eth_dev_info *dev_info) 130d878cb09SLong Li { 131d878cb09SLong Li struct mana_priv *priv = dev->data->dev_private; 132d878cb09SLong Li 133d878cb09SLong Li dev_info->max_mtu = RTE_ETHER_MTU; 134d878cb09SLong Li 135d878cb09SLong Li /* RX params */ 136d878cb09SLong Li dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE; 137d878cb09SLong Li dev_info->max_rx_pktlen = MAX_FRAME_SIZE; 138d878cb09SLong Li 139d878cb09SLong Li dev_info->max_rx_queues = priv->max_rx_queues; 140d878cb09SLong Li dev_info->max_tx_queues = priv->max_tx_queues; 141d878cb09SLong Li 142d878cb09SLong Li dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR; 143d878cb09SLong Li dev_info->max_hash_mac_addrs = 0; 144d878cb09SLong Li 145d878cb09SLong Li dev_info->max_vfs = 1; 146d878cb09SLong Li 147d878cb09SLong Li /* Offload params */ 148d878cb09SLong Li dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT; 149d878cb09SLong Li 150d878cb09SLong Li dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT; 151d878cb09SLong Li 152d878cb09SLong Li /* RSS */ 153d878cb09SLong Li dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS; 154d878cb09SLong Li dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES; 155d878cb09SLong Li dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT; 156d878cb09SLong Li 157d878cb09SLong Li /* Thresholds */ 158d878cb09SLong Li dev_info->default_rxconf = (struct rte_eth_rxconf){ 159d878cb09SLong Li .rx_thresh = { 160d878cb09SLong Li .pthresh = 8, 161d878cb09SLong Li .hthresh = 8, 162d878cb09SLong Li .wthresh = 0, 163d878cb09SLong Li }, 164d878cb09SLong Li .rx_free_thresh = 32, 165d878cb09SLong Li /* If no descriptors available, pkts are dropped by default */ 166d878cb09SLong Li .rx_drop_en = 1, 167d878cb09SLong Li }; 168d878cb09SLong Li 169d878cb09SLong Li dev_info->default_txconf = (struct rte_eth_txconf){ 170d878cb09SLong Li .tx_thresh = { 171d878cb09SLong Li .pthresh = 32, 172d878cb09SLong Li .hthresh = 0, 173d878cb09SLong Li .wthresh = 0, 174d878cb09SLong Li }, 175d878cb09SLong Li .tx_rs_thresh = 32, 176d878cb09SLong Li .tx_free_thresh = 32, 177d878cb09SLong Li }; 178d878cb09SLong Li 179d878cb09SLong Li /* Buffer limits */ 180d878cb09SLong Li dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 181d878cb09SLong Li dev_info->rx_desc_lim.nb_max = priv->max_rx_desc; 182d878cb09SLong Li dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 183d878cb09SLong Li dev_info->rx_desc_lim.nb_seg_max = priv->max_recv_sge; 184d878cb09SLong Li dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 185d878cb09SLong Li 186d878cb09SLong Li dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 187d878cb09SLong Li dev_info->tx_desc_lim.nb_max = priv->max_tx_desc; 188d878cb09SLong Li dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 189d878cb09SLong Li dev_info->tx_desc_lim.nb_seg_max = priv->max_send_sge; 190d878cb09SLong Li dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 191d878cb09SLong Li 192d878cb09SLong Li /* Speed */ 193d878cb09SLong Li dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G; 194d878cb09SLong Li 195d878cb09SLong Li /* RX params */ 196d878cb09SLong Li dev_info->default_rxportconf.burst_size = 1; 197d878cb09SLong Li dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE; 198d878cb09SLong Li dev_info->default_rxportconf.nb_queues = 1; 199d878cb09SLong Li 200d878cb09SLong Li /* TX params */ 201d878cb09SLong Li dev_info->default_txportconf.burst_size = 1; 202d878cb09SLong Li dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE; 203d878cb09SLong Li dev_info->default_txportconf.nb_queues = 1; 204d878cb09SLong Li 205d878cb09SLong Li return 0; 206d878cb09SLong Li } 207d878cb09SLong Li 208*0c63c005SLong Li static void 209*0c63c005SLong Li mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 210*0c63c005SLong Li struct rte_eth_rxq_info *qinfo) 211*0c63c005SLong Li { 212*0c63c005SLong Li struct mana_rxq *rxq = dev->data->rx_queues[queue_id]; 213*0c63c005SLong Li 214*0c63c005SLong Li qinfo->mp = rxq->mp; 215*0c63c005SLong Li qinfo->nb_desc = rxq->num_desc; 216*0c63c005SLong Li qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 217*0c63c005SLong Li } 218*0c63c005SLong Li 219d9679c3aSLong Li static const uint32_t * 220d9679c3aSLong Li mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused) 221d9679c3aSLong Li { 222d9679c3aSLong Li static const uint32_t ptypes[] = { 223d9679c3aSLong Li RTE_PTYPE_L2_ETHER, 224d9679c3aSLong Li RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 225d9679c3aSLong Li RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 226d9679c3aSLong Li RTE_PTYPE_L4_FRAG, 227d9679c3aSLong Li RTE_PTYPE_L4_TCP, 228d9679c3aSLong Li RTE_PTYPE_L4_UDP, 229d9679c3aSLong Li RTE_PTYPE_UNKNOWN 230d9679c3aSLong Li }; 231d9679c3aSLong Li 232d9679c3aSLong Li return ptypes; 233d9679c3aSLong Li } 234d9679c3aSLong Li 23521958568SLong Li static int 236a382177cSLong Li mana_rss_hash_update(struct rte_eth_dev *dev, 237a382177cSLong Li struct rte_eth_rss_conf *rss_conf) 238a382177cSLong Li { 239a382177cSLong Li struct mana_priv *priv = dev->data->dev_private; 240a382177cSLong Li 241a382177cSLong Li /* Currently can only update RSS hash when device is stopped */ 242a382177cSLong Li if (dev->data->dev_started) { 243a382177cSLong Li DRV_LOG(ERR, "Can't update RSS after device has started"); 244a382177cSLong Li return -ENODEV; 245a382177cSLong Li } 246a382177cSLong Li 247a382177cSLong Li if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) { 248a382177cSLong Li DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64, 249a382177cSLong Li dev->data->port_id, rss_conf->rss_hf); 250a382177cSLong Li return -EINVAL; 251a382177cSLong Li } 252a382177cSLong Li 253a382177cSLong Li if (rss_conf->rss_key && rss_conf->rss_key_len) { 254a382177cSLong Li if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) { 255a382177cSLong Li DRV_LOG(ERR, "Port %u key len must be %u long", 256a382177cSLong Li dev->data->port_id, 257a382177cSLong Li TOEPLITZ_HASH_KEY_SIZE_IN_BYTES); 258a382177cSLong Li return -EINVAL; 259a382177cSLong Li } 260a382177cSLong Li 261a382177cSLong Li priv->rss_conf.rss_key_len = rss_conf->rss_key_len; 262a382177cSLong Li priv->rss_conf.rss_key = 263a382177cSLong Li rte_zmalloc("mana_rss", rss_conf->rss_key_len, 264a382177cSLong Li RTE_CACHE_LINE_SIZE); 265a382177cSLong Li if (!priv->rss_conf.rss_key) 266a382177cSLong Li return -ENOMEM; 267a382177cSLong Li memcpy(priv->rss_conf.rss_key, rss_conf->rss_key, 268a382177cSLong Li rss_conf->rss_key_len); 269a382177cSLong Li } 270a382177cSLong Li priv->rss_conf.rss_hf = rss_conf->rss_hf; 271a382177cSLong Li 272a382177cSLong Li return 0; 273a382177cSLong Li } 274a382177cSLong Li 275a382177cSLong Li static int 276a382177cSLong Li mana_rss_hash_conf_get(struct rte_eth_dev *dev, 277a382177cSLong Li struct rte_eth_rss_conf *rss_conf) 278a382177cSLong Li { 279a382177cSLong Li struct mana_priv *priv = dev->data->dev_private; 280a382177cSLong Li 281a382177cSLong Li if (!rss_conf) 282a382177cSLong Li return -EINVAL; 283a382177cSLong Li 284a382177cSLong Li if (rss_conf->rss_key && 285a382177cSLong Li rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) { 286a382177cSLong Li memcpy(rss_conf->rss_key, priv->rss_conf.rss_key, 287a382177cSLong Li priv->rss_conf.rss_key_len); 288a382177cSLong Li } 289a382177cSLong Li 290a382177cSLong Li rss_conf->rss_key_len = priv->rss_conf.rss_key_len; 291a382177cSLong Li rss_conf->rss_hf = priv->rss_conf.rss_hf; 292a382177cSLong Li 293a382177cSLong Li return 0; 294a382177cSLong Li } 295a382177cSLong Li 296a382177cSLong Li static int 297*0c63c005SLong Li mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 298*0c63c005SLong Li uint16_t nb_desc, unsigned int socket_id, 299*0c63c005SLong Li const struct rte_eth_rxconf *rx_conf __rte_unused, 300*0c63c005SLong Li struct rte_mempool *mp) 301*0c63c005SLong Li { 302*0c63c005SLong Li struct mana_priv *priv = dev->data->dev_private; 303*0c63c005SLong Li struct mana_rxq *rxq; 304*0c63c005SLong Li int ret; 305*0c63c005SLong Li 306*0c63c005SLong Li rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id); 307*0c63c005SLong Li if (!rxq) { 308*0c63c005SLong Li DRV_LOG(ERR, "failed to allocate rxq"); 309*0c63c005SLong Li return -ENOMEM; 310*0c63c005SLong Li } 311*0c63c005SLong Li 312*0c63c005SLong Li DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u", 313*0c63c005SLong Li queue_idx, nb_desc, socket_id); 314*0c63c005SLong Li 315*0c63c005SLong Li rxq->socket = socket_id; 316*0c63c005SLong Li 317*0c63c005SLong Li rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring", 318*0c63c005SLong Li sizeof(struct mana_rxq_desc) * 319*0c63c005SLong Li nb_desc, 320*0c63c005SLong Li RTE_CACHE_LINE_SIZE, socket_id); 321*0c63c005SLong Li 322*0c63c005SLong Li if (!rxq->desc_ring) { 323*0c63c005SLong Li DRV_LOG(ERR, "failed to allocate rxq desc_ring"); 324*0c63c005SLong Li ret = -ENOMEM; 325*0c63c005SLong Li goto fail; 326*0c63c005SLong Li } 327*0c63c005SLong Li 328*0c63c005SLong Li rxq->desc_ring_head = 0; 329*0c63c005SLong Li rxq->desc_ring_tail = 0; 330*0c63c005SLong Li 331*0c63c005SLong Li rxq->priv = priv; 332*0c63c005SLong Li rxq->num_desc = nb_desc; 333*0c63c005SLong Li rxq->mp = mp; 334*0c63c005SLong Li dev->data->rx_queues[queue_idx] = rxq; 335*0c63c005SLong Li 336*0c63c005SLong Li return 0; 337*0c63c005SLong Li 338*0c63c005SLong Li fail: 339*0c63c005SLong Li rte_free(rxq->desc_ring); 340*0c63c005SLong Li rte_free(rxq); 341*0c63c005SLong Li return ret; 342*0c63c005SLong Li } 343*0c63c005SLong Li 344*0c63c005SLong Li static void 345*0c63c005SLong Li mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 346*0c63c005SLong Li { 347*0c63c005SLong Li struct mana_rxq *rxq = dev->data->rx_queues[qid]; 348*0c63c005SLong Li 349*0c63c005SLong Li rte_free(rxq->desc_ring); 350*0c63c005SLong Li rte_free(rxq); 351*0c63c005SLong Li } 352*0c63c005SLong Li 353*0c63c005SLong Li static int 35421958568SLong Li mana_dev_link_update(struct rte_eth_dev *dev, 35521958568SLong Li int wait_to_complete __rte_unused) 35621958568SLong Li { 35721958568SLong Li struct rte_eth_link link; 35821958568SLong Li 35921958568SLong Li /* MANA has no concept of carrier state, always reporting UP */ 36021958568SLong Li link = (struct rte_eth_link) { 36121958568SLong Li .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 36221958568SLong Li .link_autoneg = RTE_ETH_LINK_SPEED_FIXED, 36321958568SLong Li .link_speed = RTE_ETH_SPEED_NUM_100G, 36421958568SLong Li .link_status = RTE_ETH_LINK_UP, 36521958568SLong Li }; 36621958568SLong Li 36721958568SLong Li return rte_eth_linkstatus_set(dev, &link); 36821958568SLong Li } 36921958568SLong Li 370517ed6e2SLong Li static const struct eth_dev_ops mana_dev_ops = { 3710dbfecfeSLong Li .dev_configure = mana_dev_configure, 3720dbfecfeSLong Li .dev_close = mana_dev_close, 373d878cb09SLong Li .dev_infos_get = mana_dev_info_get, 374*0c63c005SLong Li .rxq_info_get = mana_dev_rx_queue_info, 375d9679c3aSLong Li .dev_supported_ptypes_get = mana_supported_ptypes, 376a382177cSLong Li .rss_hash_update = mana_rss_hash_update, 377a382177cSLong Li .rss_hash_conf_get = mana_rss_hash_conf_get, 378*0c63c005SLong Li .rx_queue_setup = mana_dev_rx_queue_setup, 379*0c63c005SLong Li .rx_queue_release = mana_dev_rx_queue_release, 38021958568SLong Li .link_update = mana_dev_link_update, 381517ed6e2SLong Li }; 382517ed6e2SLong Li 383517ed6e2SLong Li static const struct eth_dev_ops mana_dev_secondary_ops = { 384d878cb09SLong Li .dev_infos_get = mana_dev_info_get, 385517ed6e2SLong Li }; 386517ed6e2SLong Li 387517ed6e2SLong Li uint16_t 388517ed6e2SLong Li mana_rx_burst_removed(void *dpdk_rxq __rte_unused, 389517ed6e2SLong Li struct rte_mbuf **pkts __rte_unused, 390517ed6e2SLong Li uint16_t pkts_n __rte_unused) 391517ed6e2SLong Li { 392517ed6e2SLong Li rte_mb(); 393517ed6e2SLong Li return 0; 394517ed6e2SLong Li } 395517ed6e2SLong Li 396517ed6e2SLong Li uint16_t 397517ed6e2SLong Li mana_tx_burst_removed(void *dpdk_rxq __rte_unused, 398517ed6e2SLong Li struct rte_mbuf **pkts __rte_unused, 399517ed6e2SLong Li uint16_t pkts_n __rte_unused) 400517ed6e2SLong Li { 401517ed6e2SLong Li rte_mb(); 402517ed6e2SLong Li return 0; 403517ed6e2SLong Li } 404517ed6e2SLong Li 405517ed6e2SLong Li #define ETH_MANA_MAC_ARG "mac" 406517ed6e2SLong Li static const char * const mana_init_args[] = { 407517ed6e2SLong Li ETH_MANA_MAC_ARG, 408517ed6e2SLong Li NULL, 409517ed6e2SLong Li }; 410517ed6e2SLong Li 411517ed6e2SLong Li /* Support of parsing up to 8 mac address from EAL command line */ 412517ed6e2SLong Li #define MAX_NUM_ADDRESS 8 413517ed6e2SLong Li struct mana_conf { 414517ed6e2SLong Li struct rte_ether_addr mac_array[MAX_NUM_ADDRESS]; 415517ed6e2SLong Li unsigned int index; 416517ed6e2SLong Li }; 417517ed6e2SLong Li 418517ed6e2SLong Li static int 419517ed6e2SLong Li mana_arg_parse_callback(const char *key, const char *val, void *private) 420517ed6e2SLong Li { 421517ed6e2SLong Li struct mana_conf *conf = (struct mana_conf *)private; 422517ed6e2SLong Li int ret; 423517ed6e2SLong Li 424517ed6e2SLong Li DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index); 425517ed6e2SLong Li 426517ed6e2SLong Li if (conf->index >= MAX_NUM_ADDRESS) { 427517ed6e2SLong Li DRV_LOG(ERR, "Exceeding max MAC address"); 428517ed6e2SLong Li return 1; 429517ed6e2SLong Li } 430517ed6e2SLong Li 431517ed6e2SLong Li ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]); 432517ed6e2SLong Li if (ret) { 433517ed6e2SLong Li DRV_LOG(ERR, "Invalid MAC address %s", val); 434517ed6e2SLong Li return ret; 435517ed6e2SLong Li } 436517ed6e2SLong Li 437517ed6e2SLong Li conf->index++; 438517ed6e2SLong Li 439517ed6e2SLong Li return 0; 440517ed6e2SLong Li } 441517ed6e2SLong Li 442517ed6e2SLong Li static int 443517ed6e2SLong Li mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf) 444517ed6e2SLong Li { 445517ed6e2SLong Li struct rte_kvargs *kvlist; 446517ed6e2SLong Li unsigned int arg_count; 447517ed6e2SLong Li int ret = 0; 448517ed6e2SLong Li 449517ed6e2SLong Li kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args); 450517ed6e2SLong Li if (!kvlist) { 451517ed6e2SLong Li DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str); 452517ed6e2SLong Li return -EINVAL; 453517ed6e2SLong Li } 454517ed6e2SLong Li 455517ed6e2SLong Li arg_count = rte_kvargs_count(kvlist, mana_init_args[0]); 456517ed6e2SLong Li if (arg_count > MAX_NUM_ADDRESS) { 457517ed6e2SLong Li ret = -EINVAL; 458517ed6e2SLong Li goto free_kvlist; 459517ed6e2SLong Li } 460517ed6e2SLong Li ret = rte_kvargs_process(kvlist, mana_init_args[0], 461517ed6e2SLong Li mana_arg_parse_callback, conf); 462517ed6e2SLong Li if (ret) { 463517ed6e2SLong Li DRV_LOG(ERR, "error parsing args"); 464517ed6e2SLong Li goto free_kvlist; 465517ed6e2SLong Li } 466517ed6e2SLong Li 467517ed6e2SLong Li free_kvlist: 468517ed6e2SLong Li rte_kvargs_free(kvlist); 469517ed6e2SLong Li return ret; 470517ed6e2SLong Li } 471517ed6e2SLong Li 472517ed6e2SLong Li static int 473517ed6e2SLong Li get_port_mac(struct ibv_device *device, unsigned int port, 474517ed6e2SLong Li struct rte_ether_addr *addr) 475517ed6e2SLong Li { 476517ed6e2SLong Li FILE *file; 477517ed6e2SLong Li int ret = 0; 478517ed6e2SLong Li DIR *dir; 479517ed6e2SLong Li struct dirent *dent; 480517ed6e2SLong Li unsigned int dev_port; 481517ed6e2SLong Li char mac[20]; 482517ed6e2SLong Li 483517ed6e2SLong Li MANA_MKSTR(path, "%s/device/net", device->ibdev_path); 484517ed6e2SLong Li 485517ed6e2SLong Li dir = opendir(path); 486517ed6e2SLong Li if (!dir) 487517ed6e2SLong Li return -ENOENT; 488517ed6e2SLong Li 489517ed6e2SLong Li while ((dent = readdir(dir))) { 490517ed6e2SLong Li char *name = dent->d_name; 491517ed6e2SLong Li 492517ed6e2SLong Li MANA_MKSTR(port_path, "%s/%s/dev_port", path, name); 493517ed6e2SLong Li 494517ed6e2SLong Li /* Ignore . and .. */ 495517ed6e2SLong Li if ((name[0] == '.') && 496517ed6e2SLong Li ((name[1] == '\0') || 497517ed6e2SLong Li ((name[1] == '.') && (name[2] == '\0')))) 498517ed6e2SLong Li continue; 499517ed6e2SLong Li 500517ed6e2SLong Li file = fopen(port_path, "r"); 501517ed6e2SLong Li if (!file) 502517ed6e2SLong Li continue; 503517ed6e2SLong Li 504517ed6e2SLong Li ret = fscanf(file, "%u", &dev_port); 505517ed6e2SLong Li fclose(file); 506517ed6e2SLong Li 507517ed6e2SLong Li if (ret != 1) 508517ed6e2SLong Li continue; 509517ed6e2SLong Li 510517ed6e2SLong Li /* Ethernet ports start at 0, IB port start at 1 */ 511517ed6e2SLong Li if (dev_port == port - 1) { 512517ed6e2SLong Li MANA_MKSTR(address_path, "%s/%s/address", path, name); 513517ed6e2SLong Li 514517ed6e2SLong Li file = fopen(address_path, "r"); 515517ed6e2SLong Li if (!file) 516517ed6e2SLong Li continue; 517517ed6e2SLong Li 518517ed6e2SLong Li ret = fscanf(file, "%s", mac); 519517ed6e2SLong Li fclose(file); 520517ed6e2SLong Li 521517ed6e2SLong Li if (ret < 0) 522517ed6e2SLong Li break; 523517ed6e2SLong Li 524517ed6e2SLong Li ret = rte_ether_unformat_addr(mac, addr); 525517ed6e2SLong Li if (ret) 526517ed6e2SLong Li DRV_LOG(ERR, "unrecognized mac addr %s", mac); 527517ed6e2SLong Li break; 528517ed6e2SLong Li } 529517ed6e2SLong Li } 530517ed6e2SLong Li 531517ed6e2SLong Li closedir(dir); 532517ed6e2SLong Li return ret; 533517ed6e2SLong Li } 534517ed6e2SLong Li 535517ed6e2SLong Li static int 536517ed6e2SLong Li mana_ibv_device_to_pci_addr(const struct ibv_device *device, 537517ed6e2SLong Li struct rte_pci_addr *pci_addr) 538517ed6e2SLong Li { 539517ed6e2SLong Li FILE *file; 540517ed6e2SLong Li char *line = NULL; 541517ed6e2SLong Li size_t len = 0; 542517ed6e2SLong Li 543517ed6e2SLong Li MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path); 544517ed6e2SLong Li 545517ed6e2SLong Li file = fopen(path, "r"); 546517ed6e2SLong Li if (!file) 547517ed6e2SLong Li return -errno; 548517ed6e2SLong Li 549517ed6e2SLong Li while (getline(&line, &len, file) != -1) { 550517ed6e2SLong Li /* Extract information. */ 551517ed6e2SLong Li if (sscanf(line, 552517ed6e2SLong Li "PCI_SLOT_NAME=" 553517ed6e2SLong Li "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 554517ed6e2SLong Li &pci_addr->domain, 555517ed6e2SLong Li &pci_addr->bus, 556517ed6e2SLong Li &pci_addr->devid, 557517ed6e2SLong Li &pci_addr->function) == 4) { 558517ed6e2SLong Li break; 559517ed6e2SLong Li } 560517ed6e2SLong Li } 561517ed6e2SLong Li 562517ed6e2SLong Li free(line); 563517ed6e2SLong Li fclose(file); 564517ed6e2SLong Li return 0; 565517ed6e2SLong Li } 566517ed6e2SLong Li 567bd15f237SLong Li /* 568bd15f237SLong Li * Interrupt handler from IB layer to notify this device is being removed. 569bd15f237SLong Li */ 570bd15f237SLong Li static void 571bd15f237SLong Li mana_intr_handler(void *arg) 572bd15f237SLong Li { 573bd15f237SLong Li struct mana_priv *priv = arg; 574bd15f237SLong Li struct ibv_context *ctx = priv->ib_ctx; 575bd15f237SLong Li struct ibv_async_event event; 576bd15f237SLong Li 577bd15f237SLong Li /* Read and ack all messages from IB device */ 578bd15f237SLong Li while (true) { 579bd15f237SLong Li if (ibv_get_async_event(ctx, &event)) 580bd15f237SLong Li break; 581bd15f237SLong Li 582bd15f237SLong Li if (event.event_type == IBV_EVENT_DEVICE_FATAL) { 583bd15f237SLong Li struct rte_eth_dev *dev; 584bd15f237SLong Li 585bd15f237SLong Li dev = &rte_eth_devices[priv->port_id]; 586bd15f237SLong Li if (dev->data->dev_conf.intr_conf.rmv) 587bd15f237SLong Li rte_eth_dev_callback_process(dev, 588bd15f237SLong Li RTE_ETH_EVENT_INTR_RMV, NULL); 589bd15f237SLong Li } 590bd15f237SLong Li 591bd15f237SLong Li ibv_ack_async_event(&event); 592bd15f237SLong Li } 593bd15f237SLong Li } 594bd15f237SLong Li 595bd15f237SLong Li static int 596bd15f237SLong Li mana_intr_uninstall(struct mana_priv *priv) 597bd15f237SLong Li { 598bd15f237SLong Li int ret; 599bd15f237SLong Li 600bd15f237SLong Li ret = rte_intr_callback_unregister(priv->intr_handle, 601bd15f237SLong Li mana_intr_handler, priv); 602bd15f237SLong Li if (ret <= 0) { 603bd15f237SLong Li DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret); 604bd15f237SLong Li return ret; 605bd15f237SLong Li } 606bd15f237SLong Li 607bd15f237SLong Li rte_intr_instance_free(priv->intr_handle); 608bd15f237SLong Li 609bd15f237SLong Li return 0; 610bd15f237SLong Li } 611bd15f237SLong Li 612bd15f237SLong Li static int 613bd15f237SLong Li mana_intr_install(struct mana_priv *priv) 614bd15f237SLong Li { 615bd15f237SLong Li int ret, flags; 616bd15f237SLong Li struct ibv_context *ctx = priv->ib_ctx; 617bd15f237SLong Li 618bd15f237SLong Li priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 619bd15f237SLong Li if (!priv->intr_handle) { 620bd15f237SLong Li DRV_LOG(ERR, "Failed to allocate intr_handle"); 621bd15f237SLong Li rte_errno = ENOMEM; 622bd15f237SLong Li return -ENOMEM; 623bd15f237SLong Li } 624bd15f237SLong Li 625bd15f237SLong Li rte_intr_fd_set(priv->intr_handle, -1); 626bd15f237SLong Li 627bd15f237SLong Li flags = fcntl(ctx->async_fd, F_GETFL); 628bd15f237SLong Li ret = fcntl(ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 629bd15f237SLong Li if (ret) { 630bd15f237SLong Li DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK"); 631bd15f237SLong Li goto free_intr; 632bd15f237SLong Li } 633bd15f237SLong Li 634bd15f237SLong Li rte_intr_fd_set(priv->intr_handle, ctx->async_fd); 635bd15f237SLong Li rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT); 636bd15f237SLong Li 637bd15f237SLong Li ret = rte_intr_callback_register(priv->intr_handle, 638bd15f237SLong Li mana_intr_handler, priv); 639bd15f237SLong Li if (ret) { 640bd15f237SLong Li DRV_LOG(ERR, "Failed to register intr callback"); 641bd15f237SLong Li rte_intr_fd_set(priv->intr_handle, -1); 642bd15f237SLong Li goto restore_fd; 643bd15f237SLong Li } 644bd15f237SLong Li 645bd15f237SLong Li return 0; 646bd15f237SLong Li 647bd15f237SLong Li restore_fd: 648bd15f237SLong Li fcntl(ctx->async_fd, F_SETFL, flags); 649bd15f237SLong Li 650bd15f237SLong Li free_intr: 651bd15f237SLong Li rte_intr_instance_free(priv->intr_handle); 652bd15f237SLong Li priv->intr_handle = NULL; 653bd15f237SLong Li 654bd15f237SLong Li return ret; 655bd15f237SLong Li } 656bd15f237SLong Li 657517ed6e2SLong Li static int 658517ed6e2SLong Li mana_proc_priv_init(struct rte_eth_dev *dev) 659517ed6e2SLong Li { 660517ed6e2SLong Li struct mana_process_priv *priv; 661517ed6e2SLong Li 662517ed6e2SLong Li priv = rte_zmalloc_socket("mana_proc_priv", 663517ed6e2SLong Li sizeof(struct mana_process_priv), 664517ed6e2SLong Li RTE_CACHE_LINE_SIZE, 665517ed6e2SLong Li dev->device->numa_node); 666517ed6e2SLong Li if (!priv) 667517ed6e2SLong Li return -ENOMEM; 668517ed6e2SLong Li 669517ed6e2SLong Li dev->process_private = priv; 670517ed6e2SLong Li return 0; 671517ed6e2SLong Li } 672517ed6e2SLong Li 673517ed6e2SLong Li /* 674517ed6e2SLong Li * Map the doorbell page for the secondary process through IB device handle. 675517ed6e2SLong Li */ 676517ed6e2SLong Li static int 677517ed6e2SLong Li mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd) 678517ed6e2SLong Li { 679517ed6e2SLong Li struct mana_process_priv *priv = eth_dev->process_private; 680517ed6e2SLong Li 681517ed6e2SLong Li void *addr; 682517ed6e2SLong Li 683517ed6e2SLong Li addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0); 684517ed6e2SLong Li if (addr == MAP_FAILED) { 685517ed6e2SLong Li DRV_LOG(ERR, "Failed to map secondary doorbell port %u", 686517ed6e2SLong Li eth_dev->data->port_id); 687517ed6e2SLong Li return -ENOMEM; 688517ed6e2SLong Li } 689517ed6e2SLong Li 690517ed6e2SLong Li DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr); 691517ed6e2SLong Li 692517ed6e2SLong Li priv->db_page = addr; 693517ed6e2SLong Li 694517ed6e2SLong Li return 0; 695517ed6e2SLong Li } 696517ed6e2SLong Li 697517ed6e2SLong Li /* Initialize shared data for the driver (all devices) */ 698517ed6e2SLong Li static int 699517ed6e2SLong Li mana_init_shared_data(void) 700517ed6e2SLong Li { 701517ed6e2SLong Li int ret = 0; 702517ed6e2SLong Li const struct rte_memzone *secondary_mz; 703517ed6e2SLong Li 704517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock); 705517ed6e2SLong Li 706517ed6e2SLong Li /* Skip if shared data is already initialized */ 707517ed6e2SLong Li if (mana_shared_data) 708517ed6e2SLong Li goto exit; 709517ed6e2SLong Li 710517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 711517ed6e2SLong Li mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA, 712517ed6e2SLong Li sizeof(*mana_shared_data), 713517ed6e2SLong Li SOCKET_ID_ANY, 0); 714517ed6e2SLong Li if (!mana_shared_mz) { 715517ed6e2SLong Li DRV_LOG(ERR, "Cannot allocate mana shared data"); 716517ed6e2SLong Li ret = -rte_errno; 717517ed6e2SLong Li goto exit; 718517ed6e2SLong Li } 719517ed6e2SLong Li 720517ed6e2SLong Li mana_shared_data = mana_shared_mz->addr; 721517ed6e2SLong Li memset(mana_shared_data, 0, sizeof(*mana_shared_data)); 722517ed6e2SLong Li rte_spinlock_init(&mana_shared_data->lock); 723517ed6e2SLong Li } else { 724517ed6e2SLong Li secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA); 725517ed6e2SLong Li if (!secondary_mz) { 726517ed6e2SLong Li DRV_LOG(ERR, "Cannot attach mana shared data"); 727517ed6e2SLong Li ret = -rte_errno; 728517ed6e2SLong Li goto exit; 729517ed6e2SLong Li } 730517ed6e2SLong Li 731517ed6e2SLong Li mana_shared_data = secondary_mz->addr; 732517ed6e2SLong Li memset(&mana_local_data, 0, sizeof(mana_local_data)); 733517ed6e2SLong Li } 734517ed6e2SLong Li 735517ed6e2SLong Li exit: 736517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock); 737517ed6e2SLong Li 738517ed6e2SLong Li return ret; 739517ed6e2SLong Li } 740517ed6e2SLong Li 741517ed6e2SLong Li /* 742517ed6e2SLong Li * Init the data structures for use in primary and secondary processes. 743517ed6e2SLong Li */ 744517ed6e2SLong Li static int 745517ed6e2SLong Li mana_init_once(void) 746517ed6e2SLong Li { 747517ed6e2SLong Li int ret; 748517ed6e2SLong Li 749517ed6e2SLong Li ret = mana_init_shared_data(); 750517ed6e2SLong Li if (ret) 751517ed6e2SLong Li return ret; 752517ed6e2SLong Li 753517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 754517ed6e2SLong Li 755517ed6e2SLong Li switch (rte_eal_process_type()) { 756517ed6e2SLong Li case RTE_PROC_PRIMARY: 757517ed6e2SLong Li if (mana_shared_data->init_done) 758517ed6e2SLong Li break; 759517ed6e2SLong Li 760517ed6e2SLong Li ret = mana_mp_init_primary(); 761517ed6e2SLong Li if (ret) 762517ed6e2SLong Li break; 763517ed6e2SLong Li DRV_LOG(ERR, "MP INIT PRIMARY"); 764517ed6e2SLong Li 765517ed6e2SLong Li mana_shared_data->init_done = 1; 766517ed6e2SLong Li break; 767517ed6e2SLong Li 768517ed6e2SLong Li case RTE_PROC_SECONDARY: 769517ed6e2SLong Li 770517ed6e2SLong Li if (mana_local_data.init_done) 771517ed6e2SLong Li break; 772517ed6e2SLong Li 773517ed6e2SLong Li ret = mana_mp_init_secondary(); 774517ed6e2SLong Li if (ret) 775517ed6e2SLong Li break; 776517ed6e2SLong Li 777517ed6e2SLong Li DRV_LOG(ERR, "MP INIT SECONDARY"); 778517ed6e2SLong Li 779517ed6e2SLong Li mana_local_data.init_done = 1; 780517ed6e2SLong Li break; 781517ed6e2SLong Li 782517ed6e2SLong Li default: 783517ed6e2SLong Li /* Impossible, internal error */ 784517ed6e2SLong Li ret = -EPROTO; 785517ed6e2SLong Li break; 786517ed6e2SLong Li } 787517ed6e2SLong Li 788517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 789517ed6e2SLong Li 790517ed6e2SLong Li return ret; 791517ed6e2SLong Li } 792517ed6e2SLong Li 793517ed6e2SLong Li /* 794517ed6e2SLong Li * Probe an IB port 795517ed6e2SLong Li * Return value: 796517ed6e2SLong Li * positive value: successfully probed port 797517ed6e2SLong Li * 0: port not matching specified MAC address 798517ed6e2SLong Li * negative value: error code 799517ed6e2SLong Li */ 800517ed6e2SLong Li static int 801517ed6e2SLong Li mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, 802517ed6e2SLong Li uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr) 803517ed6e2SLong Li { 804517ed6e2SLong Li struct mana_priv *priv = NULL; 805517ed6e2SLong Li struct rte_eth_dev *eth_dev = NULL; 806517ed6e2SLong Li struct ibv_parent_domain_init_attr attr = {0}; 807517ed6e2SLong Li char address[64]; 808517ed6e2SLong Li char name[RTE_ETH_NAME_MAX_LEN]; 809517ed6e2SLong Li int ret; 810517ed6e2SLong Li struct ibv_context *ctx = NULL; 811517ed6e2SLong Li 812517ed6e2SLong Li rte_ether_format_addr(address, sizeof(address), addr); 813517ed6e2SLong Li DRV_LOG(INFO, "device located port %u address %s", port, address); 814517ed6e2SLong Li 815517ed6e2SLong Li priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE, 816517ed6e2SLong Li SOCKET_ID_ANY); 817517ed6e2SLong Li if (!priv) 818517ed6e2SLong Li return -ENOMEM; 819517ed6e2SLong Li 820517ed6e2SLong Li snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port); 821517ed6e2SLong Li 822517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 823517ed6e2SLong Li int fd; 824517ed6e2SLong Li 825517ed6e2SLong Li eth_dev = rte_eth_dev_attach_secondary(name); 826517ed6e2SLong Li if (!eth_dev) { 827517ed6e2SLong Li DRV_LOG(ERR, "Can't attach to dev %s", name); 828517ed6e2SLong Li ret = -ENOMEM; 829517ed6e2SLong Li goto failed; 830517ed6e2SLong Li } 831517ed6e2SLong Li 832517ed6e2SLong Li eth_dev->device = &pci_dev->device; 833517ed6e2SLong Li eth_dev->dev_ops = &mana_dev_secondary_ops; 834517ed6e2SLong Li ret = mana_proc_priv_init(eth_dev); 835517ed6e2SLong Li if (ret) 836517ed6e2SLong Li goto failed; 837517ed6e2SLong Li priv->process_priv = eth_dev->process_private; 838517ed6e2SLong Li 839517ed6e2SLong Li /* Get the IB FD from the primary process */ 840517ed6e2SLong Li fd = mana_mp_req_verbs_cmd_fd(eth_dev); 841517ed6e2SLong Li if (fd < 0) { 842517ed6e2SLong Li DRV_LOG(ERR, "Failed to get FD %d", fd); 843517ed6e2SLong Li ret = -ENODEV; 844517ed6e2SLong Li goto failed; 845517ed6e2SLong Li } 846517ed6e2SLong Li 847517ed6e2SLong Li ret = mana_map_doorbell_secondary(eth_dev, fd); 848517ed6e2SLong Li if (ret) { 849517ed6e2SLong Li DRV_LOG(ERR, "Failed secondary map %d", fd); 850517ed6e2SLong Li goto failed; 851517ed6e2SLong Li } 852517ed6e2SLong Li 853517ed6e2SLong Li /* fd is no not used after mapping doorbell */ 854517ed6e2SLong Li close(fd); 855517ed6e2SLong Li 856517ed6e2SLong Li eth_dev->tx_pkt_burst = mana_tx_burst_removed; 857517ed6e2SLong Li eth_dev->rx_pkt_burst = mana_rx_burst_removed; 858517ed6e2SLong Li 859517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 860517ed6e2SLong Li mana_shared_data->secondary_cnt++; 861517ed6e2SLong Li mana_local_data.secondary_cnt++; 862517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 863517ed6e2SLong Li 864517ed6e2SLong Li rte_eth_copy_pci_info(eth_dev, pci_dev); 865517ed6e2SLong Li rte_eth_dev_probing_finish(eth_dev); 866517ed6e2SLong Li 867517ed6e2SLong Li return 0; 868517ed6e2SLong Li } 869517ed6e2SLong Li 870517ed6e2SLong Li ctx = ibv_open_device(ibdev); 871517ed6e2SLong Li if (!ctx) { 872517ed6e2SLong Li DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name); 873517ed6e2SLong Li ret = -ENODEV; 874517ed6e2SLong Li goto failed; 875517ed6e2SLong Li } 876517ed6e2SLong Li 877517ed6e2SLong Li eth_dev = rte_eth_dev_allocate(name); 878517ed6e2SLong Li if (!eth_dev) { 879517ed6e2SLong Li ret = -ENOMEM; 880517ed6e2SLong Li goto failed; 881517ed6e2SLong Li } 882517ed6e2SLong Li 883517ed6e2SLong Li eth_dev->data->mac_addrs = 884517ed6e2SLong Li rte_calloc("mana_mac", 1, 885517ed6e2SLong Li sizeof(struct rte_ether_addr), 0); 886517ed6e2SLong Li if (!eth_dev->data->mac_addrs) { 887517ed6e2SLong Li ret = -ENOMEM; 888517ed6e2SLong Li goto failed; 889517ed6e2SLong Li } 890517ed6e2SLong Li 891517ed6e2SLong Li rte_ether_addr_copy(addr, eth_dev->data->mac_addrs); 892517ed6e2SLong Li 893517ed6e2SLong Li priv->ib_pd = ibv_alloc_pd(ctx); 894517ed6e2SLong Li if (!priv->ib_pd) { 895517ed6e2SLong Li DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port); 896517ed6e2SLong Li ret = -ENOMEM; 897517ed6e2SLong Li goto failed; 898517ed6e2SLong Li } 899517ed6e2SLong Li 900517ed6e2SLong Li /* Create a parent domain with the port number */ 901517ed6e2SLong Li attr.pd = priv->ib_pd; 902517ed6e2SLong Li attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT; 903517ed6e2SLong Li attr.pd_context = (void *)(uint64_t)port; 904517ed6e2SLong Li priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr); 905517ed6e2SLong Li if (!priv->ib_parent_pd) { 906517ed6e2SLong Li DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port); 907517ed6e2SLong Li ret = -ENOMEM; 908517ed6e2SLong Li goto failed; 909517ed6e2SLong Li } 910517ed6e2SLong Li 911517ed6e2SLong Li priv->ib_ctx = ctx; 912517ed6e2SLong Li priv->port_id = eth_dev->data->port_id; 913517ed6e2SLong Li priv->dev_port = port; 914517ed6e2SLong Li eth_dev->data->dev_private = priv; 915517ed6e2SLong Li priv->dev_data = eth_dev->data; 916517ed6e2SLong Li 917517ed6e2SLong Li priv->max_rx_queues = dev_attr->orig_attr.max_qp; 918517ed6e2SLong Li priv->max_tx_queues = dev_attr->orig_attr.max_qp; 919517ed6e2SLong Li 920517ed6e2SLong Li priv->max_rx_desc = 921517ed6e2SLong Li RTE_MIN(dev_attr->orig_attr.max_qp_wr, 922517ed6e2SLong Li dev_attr->orig_attr.max_cqe); 923517ed6e2SLong Li priv->max_tx_desc = 924517ed6e2SLong Li RTE_MIN(dev_attr->orig_attr.max_qp_wr, 925517ed6e2SLong Li dev_attr->orig_attr.max_cqe); 926517ed6e2SLong Li 927517ed6e2SLong Li priv->max_send_sge = dev_attr->orig_attr.max_sge; 928517ed6e2SLong Li priv->max_recv_sge = dev_attr->orig_attr.max_sge; 929517ed6e2SLong Li 930517ed6e2SLong Li priv->max_mr = dev_attr->orig_attr.max_mr; 931517ed6e2SLong Li priv->max_mr_size = dev_attr->orig_attr.max_mr_size; 932517ed6e2SLong Li 933517ed6e2SLong Li DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d", 934517ed6e2SLong Li name, priv->max_rx_queues, priv->max_rx_desc, 935517ed6e2SLong Li priv->max_send_sge); 936517ed6e2SLong Li 937517ed6e2SLong Li rte_eth_copy_pci_info(eth_dev, pci_dev); 938517ed6e2SLong Li 939bd15f237SLong Li /* Create async interrupt handler */ 940bd15f237SLong Li ret = mana_intr_install(priv); 941bd15f237SLong Li if (ret) { 942bd15f237SLong Li DRV_LOG(ERR, "Failed to install intr handler"); 943bd15f237SLong Li goto failed; 944bd15f237SLong Li } 945bd15f237SLong Li 946517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 947517ed6e2SLong Li mana_shared_data->primary_cnt++; 948517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 949517ed6e2SLong Li 950517ed6e2SLong Li eth_dev->device = &pci_dev->device; 951517ed6e2SLong Li 952517ed6e2SLong Li DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id); 953517ed6e2SLong Li 954517ed6e2SLong Li eth_dev->rx_pkt_burst = mana_rx_burst_removed; 955517ed6e2SLong Li eth_dev->tx_pkt_burst = mana_tx_burst_removed; 956517ed6e2SLong Li eth_dev->dev_ops = &mana_dev_ops; 957517ed6e2SLong Li 958517ed6e2SLong Li rte_eth_dev_probing_finish(eth_dev); 959517ed6e2SLong Li 960517ed6e2SLong Li return 0; 961517ed6e2SLong Li 962517ed6e2SLong Li failed: 963517ed6e2SLong Li /* Free the resource for the port failed */ 964517ed6e2SLong Li if (priv) { 965517ed6e2SLong Li if (priv->ib_parent_pd) 966517ed6e2SLong Li ibv_dealloc_pd(priv->ib_parent_pd); 967517ed6e2SLong Li 968517ed6e2SLong Li if (priv->ib_pd) 969517ed6e2SLong Li ibv_dealloc_pd(priv->ib_pd); 970517ed6e2SLong Li } 971517ed6e2SLong Li 972517ed6e2SLong Li if (eth_dev) 973517ed6e2SLong Li rte_eth_dev_release_port(eth_dev); 974517ed6e2SLong Li 975517ed6e2SLong Li rte_free(priv); 976517ed6e2SLong Li 977517ed6e2SLong Li if (ctx) 978517ed6e2SLong Li ibv_close_device(ctx); 979517ed6e2SLong Li 980517ed6e2SLong Li return ret; 981517ed6e2SLong Li } 982517ed6e2SLong Li 983517ed6e2SLong Li /* 984517ed6e2SLong Li * Goes through the IB device list to look for the IB port matching the 985517ed6e2SLong Li * mac_addr. If found, create a rte_eth_dev for it. 986517ed6e2SLong Li */ 987517ed6e2SLong Li static int 988517ed6e2SLong Li mana_pci_probe_mac(struct rte_pci_device *pci_dev, 989517ed6e2SLong Li struct rte_ether_addr *mac_addr) 990517ed6e2SLong Li { 991517ed6e2SLong Li struct ibv_device **ibv_list; 992517ed6e2SLong Li int ibv_idx; 993517ed6e2SLong Li struct ibv_context *ctx; 994517ed6e2SLong Li int num_devices; 995517ed6e2SLong Li int ret = 0; 996517ed6e2SLong Li uint8_t port; 997517ed6e2SLong Li 998517ed6e2SLong Li ibv_list = ibv_get_device_list(&num_devices); 999517ed6e2SLong Li for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) { 1000517ed6e2SLong Li struct ibv_device *ibdev = ibv_list[ibv_idx]; 1001517ed6e2SLong Li struct rte_pci_addr pci_addr; 1002517ed6e2SLong Li struct ibv_device_attr_ex dev_attr; 1003517ed6e2SLong Li 1004517ed6e2SLong Li DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s", 1005517ed6e2SLong Li ibdev->name, ibdev->dev_name, ibdev->ibdev_path); 1006517ed6e2SLong Li 1007517ed6e2SLong Li if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr)) 1008517ed6e2SLong Li continue; 1009517ed6e2SLong Li 1010517ed6e2SLong Li /* Ignore if this IB device is not this PCI device */ 1011517ed6e2SLong Li if (pci_dev->addr.domain != pci_addr.domain || 1012517ed6e2SLong Li pci_dev->addr.bus != pci_addr.bus || 1013517ed6e2SLong Li pci_dev->addr.devid != pci_addr.devid || 1014517ed6e2SLong Li pci_dev->addr.function != pci_addr.function) 1015517ed6e2SLong Li continue; 1016517ed6e2SLong Li 1017517ed6e2SLong Li ctx = ibv_open_device(ibdev); 1018517ed6e2SLong Li if (!ctx) { 1019517ed6e2SLong Li DRV_LOG(ERR, "Failed to open IB device %s", 1020517ed6e2SLong Li ibdev->name); 1021517ed6e2SLong Li continue; 1022517ed6e2SLong Li } 1023517ed6e2SLong Li ret = ibv_query_device_ex(ctx, NULL, &dev_attr); 1024517ed6e2SLong Li ibv_close_device(ctx); 1025517ed6e2SLong Li 1026517ed6e2SLong Li for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt; 1027517ed6e2SLong Li port++) { 1028517ed6e2SLong Li struct rte_ether_addr addr; 1029517ed6e2SLong Li ret = get_port_mac(ibdev, port, &addr); 1030517ed6e2SLong Li if (ret) 1031517ed6e2SLong Li continue; 1032517ed6e2SLong Li 1033517ed6e2SLong Li if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr)) 1034517ed6e2SLong Li continue; 1035517ed6e2SLong Li 1036517ed6e2SLong Li ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr); 1037517ed6e2SLong Li if (ret) 1038517ed6e2SLong Li DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret); 1039517ed6e2SLong Li else 1040517ed6e2SLong Li DRV_LOG(INFO, "Successfully probed on IB port %u", port); 1041517ed6e2SLong Li } 1042517ed6e2SLong Li } 1043517ed6e2SLong Li 1044517ed6e2SLong Li ibv_free_device_list(ibv_list); 1045517ed6e2SLong Li return ret; 1046517ed6e2SLong Li } 1047517ed6e2SLong Li 1048517ed6e2SLong Li /* 1049517ed6e2SLong Li * Main callback function from PCI bus to probe a device. 1050517ed6e2SLong Li */ 1051517ed6e2SLong Li static int 1052517ed6e2SLong Li mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1053517ed6e2SLong Li struct rte_pci_device *pci_dev) 1054517ed6e2SLong Li { 1055517ed6e2SLong Li struct rte_devargs *args = pci_dev->device.devargs; 1056517ed6e2SLong Li struct mana_conf conf = {0}; 1057517ed6e2SLong Li unsigned int i; 1058517ed6e2SLong Li int ret; 1059517ed6e2SLong Li 1060517ed6e2SLong Li if (args && args->drv_str) { 1061517ed6e2SLong Li ret = mana_parse_args(args, &conf); 1062517ed6e2SLong Li if (ret) { 1063517ed6e2SLong Li DRV_LOG(ERR, "Failed to parse parameters args = %s", 1064517ed6e2SLong Li args->drv_str); 1065517ed6e2SLong Li return ret; 1066517ed6e2SLong Li } 1067517ed6e2SLong Li } 1068517ed6e2SLong Li 1069517ed6e2SLong Li ret = mana_init_once(); 1070517ed6e2SLong Li if (ret) { 1071517ed6e2SLong Li DRV_LOG(ERR, "Failed to init PMD global data %d", ret); 1072517ed6e2SLong Li return ret; 1073517ed6e2SLong Li } 1074517ed6e2SLong Li 1075517ed6e2SLong Li /* If there are no driver parameters, probe on all ports */ 1076517ed6e2SLong Li if (!conf.index) 1077517ed6e2SLong Li return mana_pci_probe_mac(pci_dev, NULL); 1078517ed6e2SLong Li 1079517ed6e2SLong Li for (i = 0; i < conf.index; i++) { 1080517ed6e2SLong Li ret = mana_pci_probe_mac(pci_dev, &conf.mac_array[i]); 1081517ed6e2SLong Li if (ret) 1082517ed6e2SLong Li return ret; 1083517ed6e2SLong Li } 1084517ed6e2SLong Li 1085517ed6e2SLong Li return 0; 1086517ed6e2SLong Li } 1087517ed6e2SLong Li 1088517ed6e2SLong Li static int 1089517ed6e2SLong Li mana_dev_uninit(struct rte_eth_dev *dev) 1090517ed6e2SLong Li { 10910dbfecfeSLong Li return mana_dev_close(dev); 1092517ed6e2SLong Li } 1093517ed6e2SLong Li 1094517ed6e2SLong Li /* 1095517ed6e2SLong Li * Callback from PCI to remove this device. 1096517ed6e2SLong Li */ 1097517ed6e2SLong Li static int 1098517ed6e2SLong Li mana_pci_remove(struct rte_pci_device *pci_dev) 1099517ed6e2SLong Li { 1100517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1101517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock); 1102517ed6e2SLong Li 1103517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 1104517ed6e2SLong Li 1105517ed6e2SLong Li RTE_VERIFY(mana_shared_data->primary_cnt > 0); 1106517ed6e2SLong Li mana_shared_data->primary_cnt--; 1107517ed6e2SLong Li if (!mana_shared_data->primary_cnt) { 1108517ed6e2SLong Li DRV_LOG(DEBUG, "mp uninit primary"); 1109517ed6e2SLong Li mana_mp_uninit_primary(); 1110517ed6e2SLong Li } 1111517ed6e2SLong Li 1112517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 1113517ed6e2SLong Li 1114517ed6e2SLong Li /* Also free the shared memory if this is the last */ 1115517ed6e2SLong Li if (!mana_shared_data->primary_cnt) { 1116517ed6e2SLong Li DRV_LOG(DEBUG, "free shared memezone data"); 1117517ed6e2SLong Li rte_memzone_free(mana_shared_mz); 1118517ed6e2SLong Li } 1119517ed6e2SLong Li 1120517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock); 1121517ed6e2SLong Li } else { 1122517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock); 1123517ed6e2SLong Li 1124517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock); 1125517ed6e2SLong Li RTE_VERIFY(mana_shared_data->secondary_cnt > 0); 1126517ed6e2SLong Li mana_shared_data->secondary_cnt--; 1127517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock); 1128517ed6e2SLong Li 1129517ed6e2SLong Li RTE_VERIFY(mana_local_data.secondary_cnt > 0); 1130517ed6e2SLong Li mana_local_data.secondary_cnt--; 1131517ed6e2SLong Li if (!mana_local_data.secondary_cnt) { 1132517ed6e2SLong Li DRV_LOG(DEBUG, "mp uninit secondary"); 1133517ed6e2SLong Li mana_mp_uninit_secondary(); 1134517ed6e2SLong Li } 1135517ed6e2SLong Li 1136517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock); 1137517ed6e2SLong Li } 1138517ed6e2SLong Li 1139517ed6e2SLong Li return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit); 1140517ed6e2SLong Li } 1141517ed6e2SLong Li 1142517ed6e2SLong Li static const struct rte_pci_id mana_pci_id_map[] = { 1143517ed6e2SLong Li { 1144517ed6e2SLong Li RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 1145517ed6e2SLong Li PCI_DEVICE_ID_MICROSOFT_MANA) 1146517ed6e2SLong Li }, 1147517ed6e2SLong Li { 1148517ed6e2SLong Li .vendor_id = 0 1149517ed6e2SLong Li }, 1150517ed6e2SLong Li }; 1151517ed6e2SLong Li 1152517ed6e2SLong Li static struct rte_pci_driver mana_pci_driver = { 1153517ed6e2SLong Li .id_table = mana_pci_id_map, 1154517ed6e2SLong Li .probe = mana_pci_probe, 1155517ed6e2SLong Li .remove = mana_pci_remove, 1156517ed6e2SLong Li .drv_flags = RTE_PCI_DRV_INTR_RMV, 1157517ed6e2SLong Li }; 1158517ed6e2SLong Li 1159517ed6e2SLong Li RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver); 1160517ed6e2SLong Li RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map); 1161517ed6e2SLong Li RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib"); 1162517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE); 1163517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE); 1164517ed6e2SLong Li RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>"); 1165