1517ed6e2SLong Li /* SPDX-License-Identifier: BSD-3-Clause
2517ed6e2SLong Li * Copyright 2022 Microsoft Corporation
3517ed6e2SLong Li */
4517ed6e2SLong Li
5517ed6e2SLong Li #include <unistd.h>
6517ed6e2SLong Li #include <dirent.h>
7517ed6e2SLong Li #include <fcntl.h>
8517ed6e2SLong Li #include <sys/mman.h>
984497839SLong Li #include <sys/ioctl.h>
1084497839SLong Li #include <net/if.h>
11517ed6e2SLong Li
12517ed6e2SLong Li #include <ethdev_driver.h>
13517ed6e2SLong Li #include <ethdev_pci.h>
14517ed6e2SLong Li #include <rte_kvargs.h>
15517ed6e2SLong Li #include <rte_eal_paging.h>
168fa22e1fSThomas Monjalon #include <rte_pci.h>
17517ed6e2SLong Li
18517ed6e2SLong Li #include <infiniband/verbs.h>
19517ed6e2SLong Li #include <infiniband/manadv.h>
20517ed6e2SLong Li
21517ed6e2SLong Li #include <assert.h>
22517ed6e2SLong Li
23517ed6e2SLong Li #include "mana.h"
24517ed6e2SLong Li
25517ed6e2SLong Li /* Shared memory between primary/secondary processes, per driver */
26517ed6e2SLong Li /* Data to track primary/secondary usage */
27517ed6e2SLong Li struct mana_shared_data *mana_shared_data;
28517ed6e2SLong Li static struct mana_shared_data mana_local_data;
29517ed6e2SLong Li
30517ed6e2SLong Li /* The memory region for the above data */
31517ed6e2SLong Li static const struct rte_memzone *mana_shared_mz;
32517ed6e2SLong Li static const char *MZ_MANA_SHARED_DATA = "mana_shared_data";
33517ed6e2SLong Li
34517ed6e2SLong Li /* Spinlock for mana_shared_data */
35517ed6e2SLong Li static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
36517ed6e2SLong Li
37517ed6e2SLong Li /* Allocate a buffer on the stack and fill it with a printf format string. */
38517ed6e2SLong Li #define MANA_MKSTR(name, ...) \
39517ed6e2SLong Li int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
40517ed6e2SLong Li char name[mkstr_size_##name + 1]; \
41517ed6e2SLong Li \
42517ed6e2SLong Li memset(name, 0, mkstr_size_##name + 1); \
43517ed6e2SLong Li snprintf(name, sizeof(name), "" __VA_ARGS__)
44517ed6e2SLong Li
45517ed6e2SLong Li int mana_logtype_driver;
46517ed6e2SLong Li int mana_logtype_init;
47517ed6e2SLong Li
480dbfecfeSLong Li /*
490dbfecfeSLong Li * Callback from rdma-core to allocate a buffer for a queue.
500dbfecfeSLong Li */
510dbfecfeSLong Li void *
mana_alloc_verbs_buf(size_t size,void * data)520dbfecfeSLong Li mana_alloc_verbs_buf(size_t size, void *data)
530dbfecfeSLong Li {
540dbfecfeSLong Li void *ret;
550dbfecfeSLong Li size_t alignment = rte_mem_page_size();
560dbfecfeSLong Li int socket = (int)(uintptr_t)data;
570dbfecfeSLong Li
580dbfecfeSLong Li DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket);
590dbfecfeSLong Li
600dbfecfeSLong Li if (alignment == (size_t)-1) {
610dbfecfeSLong Li DRV_LOG(ERR, "Failed to get mem page size");
620dbfecfeSLong Li rte_errno = ENOMEM;
630dbfecfeSLong Li return NULL;
640dbfecfeSLong Li }
650dbfecfeSLong Li
660dbfecfeSLong Li ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket);
670dbfecfeSLong Li if (!ret && size)
680dbfecfeSLong Li rte_errno = ENOMEM;
690dbfecfeSLong Li return ret;
700dbfecfeSLong Li }
710dbfecfeSLong Li
720dbfecfeSLong Li void
mana_free_verbs_buf(void * ptr,void * data __rte_unused)730dbfecfeSLong Li mana_free_verbs_buf(void *ptr, void *data __rte_unused)
740dbfecfeSLong Li {
750dbfecfeSLong Li rte_free(ptr);
760dbfecfeSLong Li }
770dbfecfeSLong Li
780dbfecfeSLong Li static int
mana_dev_configure(struct rte_eth_dev * dev)790dbfecfeSLong Li mana_dev_configure(struct rte_eth_dev *dev)
800dbfecfeSLong Li {
810dbfecfeSLong Li struct mana_priv *priv = dev->data->dev_private;
820dbfecfeSLong Li struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
830dbfecfeSLong Li
840dbfecfeSLong Li if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
850dbfecfeSLong Li dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
860dbfecfeSLong Li
870dbfecfeSLong Li if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) {
880dbfecfeSLong Li DRV_LOG(ERR, "Only support equal number of rx/tx queues");
890dbfecfeSLong Li return -EINVAL;
900dbfecfeSLong Li }
910dbfecfeSLong Li
920dbfecfeSLong Li if (!rte_is_power_of_2(dev->data->nb_rx_queues)) {
930dbfecfeSLong Li DRV_LOG(ERR, "number of TX/RX queues must be power of 2");
940dbfecfeSLong Li return -EINVAL;
950dbfecfeSLong Li }
960dbfecfeSLong Li
97f8a4217dSWei Hu priv->vlan_strip = !!(dev_conf->rxmode.offloads &
98f8a4217dSWei Hu RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
99f8a4217dSWei Hu
1000dbfecfeSLong Li priv->num_queues = dev->data->nb_rx_queues;
1010dbfecfeSLong Li
1020dbfecfeSLong Li manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS,
1030dbfecfeSLong Li (void *)((uintptr_t)&(struct manadv_ctx_allocators){
1040dbfecfeSLong Li .alloc = &mana_alloc_verbs_buf,
1050dbfecfeSLong Li .free = &mana_free_verbs_buf,
1060dbfecfeSLong Li .data = 0,
1070dbfecfeSLong Li }));
1080dbfecfeSLong Li
1090dbfecfeSLong Li return 0;
1100dbfecfeSLong Li }
1110dbfecfeSLong Li
112afd5d170SLong Li static void
rx_intr_vec_disable(struct mana_priv * priv)113afd5d170SLong Li rx_intr_vec_disable(struct mana_priv *priv)
114afd5d170SLong Li {
115afd5d170SLong Li struct rte_intr_handle *intr_handle = priv->intr_handle;
116afd5d170SLong Li
117afd5d170SLong Li rte_intr_free_epoll_fd(intr_handle);
118afd5d170SLong Li rte_intr_vec_list_free(intr_handle);
119afd5d170SLong Li rte_intr_nb_efd_set(intr_handle, 0);
120afd5d170SLong Li }
121afd5d170SLong Li
122afd5d170SLong Li static int
rx_intr_vec_enable(struct mana_priv * priv)123afd5d170SLong Li rx_intr_vec_enable(struct mana_priv *priv)
124afd5d170SLong Li {
125afd5d170SLong Li unsigned int i;
126afd5d170SLong Li unsigned int rxqs_n = priv->dev_data->nb_rx_queues;
127afd5d170SLong Li unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
128afd5d170SLong Li struct rte_intr_handle *intr_handle = priv->intr_handle;
129afd5d170SLong Li int ret;
130afd5d170SLong Li
131afd5d170SLong Li rx_intr_vec_disable(priv);
132afd5d170SLong Li
133afd5d170SLong Li if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) {
134afd5d170SLong Li DRV_LOG(ERR, "Failed to allocate memory for interrupt vector");
135afd5d170SLong Li return -ENOMEM;
136afd5d170SLong Li }
137afd5d170SLong Li
138afd5d170SLong Li for (i = 0; i < n; i++) {
139afd5d170SLong Li struct mana_rxq *rxq = priv->dev_data->rx_queues[i];
140afd5d170SLong Li
141afd5d170SLong Li ret = rte_intr_vec_list_index_set(intr_handle, i,
142afd5d170SLong Li RTE_INTR_VEC_RXTX_OFFSET + i);
143afd5d170SLong Li if (ret) {
144afd5d170SLong Li DRV_LOG(ERR, "Failed to set intr vec %u", i);
145afd5d170SLong Li return ret;
146afd5d170SLong Li }
147afd5d170SLong Li
148afd5d170SLong Li ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd);
149afd5d170SLong Li if (ret) {
150afd5d170SLong Li DRV_LOG(ERR, "Failed to set FD at intr %u", i);
151afd5d170SLong Li return ret;
152afd5d170SLong Li }
153afd5d170SLong Li }
154afd5d170SLong Li
155afd5d170SLong Li return rte_intr_nb_efd_set(intr_handle, n);
156afd5d170SLong Li }
157afd5d170SLong Li
158afd5d170SLong Li static void
rxq_intr_disable(struct mana_priv * priv)159afd5d170SLong Li rxq_intr_disable(struct mana_priv *priv)
160afd5d170SLong Li {
161afd5d170SLong Li int err = rte_errno;
162afd5d170SLong Li
163afd5d170SLong Li rx_intr_vec_disable(priv);
164afd5d170SLong Li rte_errno = err;
165afd5d170SLong Li }
166afd5d170SLong Li
167afd5d170SLong Li static int
rxq_intr_enable(struct mana_priv * priv)168afd5d170SLong Li rxq_intr_enable(struct mana_priv *priv)
169afd5d170SLong Li {
170afd5d170SLong Li const struct rte_eth_intr_conf *const intr_conf =
171afd5d170SLong Li &priv->dev_data->dev_conf.intr_conf;
172afd5d170SLong Li
173afd5d170SLong Li if (!intr_conf->rxq)
174afd5d170SLong Li return 0;
175afd5d170SLong Li
176afd5d170SLong Li return rx_intr_vec_enable(priv);
177afd5d170SLong Li }
178bd15f237SLong Li
1790dbfecfeSLong Li static int
mana_dev_start(struct rte_eth_dev * dev)18037544700SLong Li mana_dev_start(struct rte_eth_dev *dev)
18137544700SLong Li {
18237544700SLong Li int ret;
18337544700SLong Li struct mana_priv *priv = dev->data->dev_private;
18437544700SLong Li
18537544700SLong Li rte_spinlock_init(&priv->mr_btree_lock);
18637544700SLong Li ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N,
18737544700SLong Li dev->device->numa_node);
18837544700SLong Li if (ret) {
18937544700SLong Li DRV_LOG(ERR, "Failed to init device MR btree %d", ret);
19037544700SLong Li return ret;
19137544700SLong Li }
19237544700SLong Li
19337544700SLong Li ret = mana_start_tx_queues(dev);
19437544700SLong Li if (ret) {
19537544700SLong Li DRV_LOG(ERR, "failed to start tx queues %d", ret);
19637544700SLong Li goto failed_tx;
19737544700SLong Li }
19837544700SLong Li
19937544700SLong Li ret = mana_start_rx_queues(dev);
20037544700SLong Li if (ret) {
20137544700SLong Li DRV_LOG(ERR, "failed to start rx queues %d", ret);
20237544700SLong Li goto failed_rx;
20337544700SLong Li }
20437544700SLong Li
20537544700SLong Li rte_wmb();
20637544700SLong Li
20737544700SLong Li dev->tx_pkt_burst = mana_tx_burst;
20837544700SLong Li dev->rx_pkt_burst = mana_rx_burst;
20937544700SLong Li
21037544700SLong Li DRV_LOG(INFO, "TX/RX queues have started");
21137544700SLong Li
21237544700SLong Li /* Enable datapath for secondary processes */
21337544700SLong Li mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);
21437544700SLong Li
215afd5d170SLong Li ret = rxq_intr_enable(priv);
216afd5d170SLong Li if (ret) {
217afd5d170SLong Li DRV_LOG(ERR, "Failed to enable RX interrupts");
218afd5d170SLong Li goto failed_intr;
219afd5d170SLong Li }
220afd5d170SLong Li
22137544700SLong Li return 0;
22237544700SLong Li
223afd5d170SLong Li failed_intr:
224afd5d170SLong Li mana_stop_rx_queues(dev);
225afd5d170SLong Li
22637544700SLong Li failed_rx:
22737544700SLong Li mana_stop_tx_queues(dev);
22837544700SLong Li
22937544700SLong Li failed_tx:
23037544700SLong Li mana_mr_btree_free(&priv->mr_btree);
23137544700SLong Li
23237544700SLong Li return ret;
23337544700SLong Li }
23437544700SLong Li
23537544700SLong Li static int
mana_dev_stop(struct rte_eth_dev * dev)236afd5d170SLong Li mana_dev_stop(struct rte_eth_dev *dev)
23737544700SLong Li {
23837544700SLong Li int ret;
239afd5d170SLong Li struct mana_priv *priv = dev->data->dev_private;
240afd5d170SLong Li
241afd5d170SLong Li rxq_intr_disable(priv);
24237544700SLong Li
24337544700SLong Li dev->tx_pkt_burst = mana_tx_burst_removed;
24437544700SLong Li dev->rx_pkt_burst = mana_rx_burst_removed;
24537544700SLong Li
24637544700SLong Li /* Stop datapath on secondary processes */
24737544700SLong Li mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX);
24837544700SLong Li
24937544700SLong Li rte_wmb();
25037544700SLong Li
25137544700SLong Li ret = mana_stop_tx_queues(dev);
25237544700SLong Li if (ret) {
25337544700SLong Li DRV_LOG(ERR, "failed to stop tx queues");
25437544700SLong Li return ret;
25537544700SLong Li }
25637544700SLong Li
25737544700SLong Li ret = mana_stop_rx_queues(dev);
25837544700SLong Li if (ret) {
25937544700SLong Li DRV_LOG(ERR, "failed to stop tx queues");
26037544700SLong Li return ret;
26137544700SLong Li }
26237544700SLong Li
26337544700SLong Li return 0;
26437544700SLong Li }
26537544700SLong Li
266afd5d170SLong Li static int mana_intr_uninstall(struct mana_priv *priv);
267afd5d170SLong Li
26837544700SLong Li static int
mana_dev_close(struct rte_eth_dev * dev)2690dbfecfeSLong Li mana_dev_close(struct rte_eth_dev *dev)
2700dbfecfeSLong Li {
2710dbfecfeSLong Li struct mana_priv *priv = dev->data->dev_private;
2720dbfecfeSLong Li int ret;
2730dbfecfeSLong Li
2740f5db3c6SLong Li mana_remove_all_mr(priv);
2750f5db3c6SLong Li
276bd15f237SLong Li ret = mana_intr_uninstall(priv);
277bd15f237SLong Li if (ret)
278bd15f237SLong Li return ret;
279bd15f237SLong Li
2800dbfecfeSLong Li ret = ibv_close_device(priv->ib_ctx);
2810dbfecfeSLong Li if (ret) {
2820dbfecfeSLong Li ret = errno;
2830dbfecfeSLong Li return ret;
2840dbfecfeSLong Li }
2850dbfecfeSLong Li
2860dbfecfeSLong Li return 0;
2870dbfecfeSLong Li }
2880dbfecfeSLong Li
289d878cb09SLong Li static int
mana_dev_info_get(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)290d878cb09SLong Li mana_dev_info_get(struct rte_eth_dev *dev,
291d878cb09SLong Li struct rte_eth_dev_info *dev_info)
292d878cb09SLong Li {
293d878cb09SLong Li struct mana_priv *priv = dev->data->dev_private;
294d878cb09SLong Li
29584497839SLong Li dev_info->min_mtu = RTE_ETHER_MIN_MTU;
29684497839SLong Li dev_info->max_mtu = MANA_MAX_MTU;
297d878cb09SLong Li
298d878cb09SLong Li /* RX params */
299d878cb09SLong Li dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE;
30084497839SLong Li dev_info->max_rx_pktlen = MANA_MAX_MTU + RTE_ETHER_HDR_LEN;
301d878cb09SLong Li
302b7e79896SLong Li dev_info->max_rx_queues = RTE_MIN(priv->max_rx_queues, UINT16_MAX);
303b7e79896SLong Li dev_info->max_tx_queues = RTE_MIN(priv->max_tx_queues, UINT16_MAX);
304d878cb09SLong Li
305d878cb09SLong Li dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR;
306d878cb09SLong Li dev_info->max_hash_mac_addrs = 0;
307d878cb09SLong Li
308d878cb09SLong Li dev_info->max_vfs = 1;
309d878cb09SLong Li
310d878cb09SLong Li /* Offload params */
311d878cb09SLong Li dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT;
312d878cb09SLong Li
313d878cb09SLong Li dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT;
314d878cb09SLong Li
315d878cb09SLong Li /* RSS */
316d878cb09SLong Li dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS;
317d878cb09SLong Li dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES;
318d878cb09SLong Li dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT;
319d878cb09SLong Li
320d878cb09SLong Li /* Thresholds */
321d878cb09SLong Li dev_info->default_rxconf = (struct rte_eth_rxconf){
322d878cb09SLong Li .rx_thresh = {
323d878cb09SLong Li .pthresh = 8,
324d878cb09SLong Li .hthresh = 8,
325d878cb09SLong Li .wthresh = 0,
326d878cb09SLong Li },
327d878cb09SLong Li .rx_free_thresh = 32,
328d878cb09SLong Li /* If no descriptors available, pkts are dropped by default */
329d878cb09SLong Li .rx_drop_en = 1,
330d878cb09SLong Li };
331d878cb09SLong Li
332d878cb09SLong Li dev_info->default_txconf = (struct rte_eth_txconf){
333d878cb09SLong Li .tx_thresh = {
334d878cb09SLong Li .pthresh = 32,
335d878cb09SLong Li .hthresh = 0,
336d878cb09SLong Li .wthresh = 0,
337d878cb09SLong Li },
338d878cb09SLong Li .tx_rs_thresh = 32,
339d878cb09SLong Li .tx_free_thresh = 32,
340d878cb09SLong Li };
341d878cb09SLong Li
342d878cb09SLong Li /* Buffer limits */
343d878cb09SLong Li dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
344b7e79896SLong Li dev_info->rx_desc_lim.nb_max = RTE_MIN(priv->max_rx_desc, UINT16_MAX);
345d878cb09SLong Li dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
346b7e79896SLong Li dev_info->rx_desc_lim.nb_seg_max =
347b7e79896SLong Li RTE_MIN(priv->max_recv_sge, UINT16_MAX);
348b7e79896SLong Li dev_info->rx_desc_lim.nb_mtu_seg_max =
349b7e79896SLong Li RTE_MIN(priv->max_recv_sge, UINT16_MAX);
350d878cb09SLong Li
351d878cb09SLong Li dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
352b7e79896SLong Li dev_info->tx_desc_lim.nb_max = RTE_MIN(priv->max_tx_desc, UINT16_MAX);
353d878cb09SLong Li dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
354b7e79896SLong Li dev_info->tx_desc_lim.nb_seg_max =
355b7e79896SLong Li RTE_MIN(priv->max_send_sge, UINT16_MAX);
356b7e79896SLong Li dev_info->tx_desc_lim.nb_mtu_seg_max =
357b7e79896SLong Li RTE_MIN(priv->max_send_sge, UINT16_MAX);
358d878cb09SLong Li
359d878cb09SLong Li /* Speed */
360d878cb09SLong Li dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G;
361d878cb09SLong Li
362d878cb09SLong Li /* RX params */
363d878cb09SLong Li dev_info->default_rxportconf.burst_size = 1;
364d878cb09SLong Li dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE;
365d878cb09SLong Li dev_info->default_rxportconf.nb_queues = 1;
366d878cb09SLong Li
367d878cb09SLong Li /* TX params */
368d878cb09SLong Li dev_info->default_txportconf.burst_size = 1;
369d878cb09SLong Li dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE;
370d878cb09SLong Li dev_info->default_txportconf.nb_queues = 1;
371d878cb09SLong Li
372d878cb09SLong Li return 0;
373d878cb09SLong Li }
374d878cb09SLong Li
3750c63c005SLong Li static void
mana_dev_tx_queue_info(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_txq_info * qinfo)376f7dc479aSLong Li mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
377f7dc479aSLong Li struct rte_eth_txq_info *qinfo)
378f7dc479aSLong Li {
379f7dc479aSLong Li struct mana_txq *txq = dev->data->tx_queues[queue_id];
380f7dc479aSLong Li
381f7dc479aSLong Li qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
382f7dc479aSLong Li qinfo->nb_desc = txq->num_desc;
383f7dc479aSLong Li }
384f7dc479aSLong Li
385f7dc479aSLong Li static void
mana_dev_rx_queue_info(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_rxq_info * qinfo)3860c63c005SLong Li mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
3870c63c005SLong Li struct rte_eth_rxq_info *qinfo)
3880c63c005SLong Li {
3890c63c005SLong Li struct mana_rxq *rxq = dev->data->rx_queues[queue_id];
3900c63c005SLong Li
3910c63c005SLong Li qinfo->mp = rxq->mp;
3920c63c005SLong Li qinfo->nb_desc = rxq->num_desc;
3930c63c005SLong Li qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
3940c63c005SLong Li }
3950c63c005SLong Li
396d9679c3aSLong Li static const uint32_t *
mana_supported_ptypes(struct rte_eth_dev * dev __rte_unused,size_t * no_of_elements)397ba6a168aSSivaramakrishnan Venkat mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused,
398ba6a168aSSivaramakrishnan Venkat size_t *no_of_elements)
399d9679c3aSLong Li {
400d9679c3aSLong Li static const uint32_t ptypes[] = {
401d9679c3aSLong Li RTE_PTYPE_L2_ETHER,
402d9679c3aSLong Li RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
403d9679c3aSLong Li RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
404d9679c3aSLong Li RTE_PTYPE_L4_FRAG,
405d9679c3aSLong Li RTE_PTYPE_L4_TCP,
406d9679c3aSLong Li RTE_PTYPE_L4_UDP,
407d9679c3aSLong Li };
408d9679c3aSLong Li
409ba6a168aSSivaramakrishnan Venkat *no_of_elements = RTE_DIM(ptypes);
410d9679c3aSLong Li return ptypes;
411d9679c3aSLong Li }
412d9679c3aSLong Li
41321958568SLong Li static int
mana_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)414a382177cSLong Li mana_rss_hash_update(struct rte_eth_dev *dev,
415a382177cSLong Li struct rte_eth_rss_conf *rss_conf)
416a382177cSLong Li {
417a382177cSLong Li struct mana_priv *priv = dev->data->dev_private;
418a382177cSLong Li
419a382177cSLong Li /* Currently can only update RSS hash when device is stopped */
420a382177cSLong Li if (dev->data->dev_started) {
421a382177cSLong Li DRV_LOG(ERR, "Can't update RSS after device has started");
422a382177cSLong Li return -ENODEV;
423a382177cSLong Li }
424a382177cSLong Li
425a382177cSLong Li if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) {
426a382177cSLong Li DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64,
427a382177cSLong Li dev->data->port_id, rss_conf->rss_hf);
428a382177cSLong Li return -EINVAL;
429a382177cSLong Li }
430a382177cSLong Li
431a382177cSLong Li if (rss_conf->rss_key && rss_conf->rss_key_len) {
432a382177cSLong Li if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) {
433a382177cSLong Li DRV_LOG(ERR, "Port %u key len must be %u long",
434a382177cSLong Li dev->data->port_id,
435a382177cSLong Li TOEPLITZ_HASH_KEY_SIZE_IN_BYTES);
436a382177cSLong Li return -EINVAL;
437a382177cSLong Li }
438a382177cSLong Li
439a382177cSLong Li priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
440a382177cSLong Li priv->rss_conf.rss_key =
441a382177cSLong Li rte_zmalloc("mana_rss", rss_conf->rss_key_len,
442a382177cSLong Li RTE_CACHE_LINE_SIZE);
443a382177cSLong Li if (!priv->rss_conf.rss_key)
444a382177cSLong Li return -ENOMEM;
445a382177cSLong Li memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
446a382177cSLong Li rss_conf->rss_key_len);
447a382177cSLong Li }
448a382177cSLong Li priv->rss_conf.rss_hf = rss_conf->rss_hf;
449a382177cSLong Li
450a382177cSLong Li return 0;
451a382177cSLong Li }
452a382177cSLong Li
453a382177cSLong Li static int
mana_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)454a382177cSLong Li mana_rss_hash_conf_get(struct rte_eth_dev *dev,
455a382177cSLong Li struct rte_eth_rss_conf *rss_conf)
456a382177cSLong Li {
457a382177cSLong Li struct mana_priv *priv = dev->data->dev_private;
458a382177cSLong Li
459a382177cSLong Li if (!rss_conf)
460a382177cSLong Li return -EINVAL;
461a382177cSLong Li
462a382177cSLong Li if (rss_conf->rss_key &&
463a382177cSLong Li rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) {
464a382177cSLong Li memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
465a382177cSLong Li priv->rss_conf.rss_key_len);
466a382177cSLong Li }
467a382177cSLong Li
468a382177cSLong Li rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
469a382177cSLong Li rss_conf->rss_hf = priv->rss_conf.rss_hf;
470a382177cSLong Li
471a382177cSLong Li return 0;
472a382177cSLong Li }
473a382177cSLong Li
474a382177cSLong Li static int
mana_dev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf __rte_unused)475f7dc479aSLong Li mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
476f7dc479aSLong Li uint16_t nb_desc, unsigned int socket_id,
477f7dc479aSLong Li const struct rte_eth_txconf *tx_conf __rte_unused)
478f7dc479aSLong Li
479f7dc479aSLong Li {
480f7dc479aSLong Li struct mana_priv *priv = dev->data->dev_private;
481f7dc479aSLong Li struct mana_txq *txq;
482f7dc479aSLong Li int ret;
483f7dc479aSLong Li
484f7dc479aSLong Li txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id);
485f7dc479aSLong Li if (!txq) {
486f7dc479aSLong Li DRV_LOG(ERR, "failed to allocate txq");
487f7dc479aSLong Li return -ENOMEM;
488f7dc479aSLong Li }
489f7dc479aSLong Li
490f7dc479aSLong Li txq->socket = socket_id;
491f7dc479aSLong Li
492f7dc479aSLong Li txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring",
493f7dc479aSLong Li sizeof(struct mana_txq_desc) *
494f7dc479aSLong Li nb_desc,
495f7dc479aSLong Li RTE_CACHE_LINE_SIZE, socket_id);
496f7dc479aSLong Li if (!txq->desc_ring) {
497f7dc479aSLong Li DRV_LOG(ERR, "failed to allocate txq desc_ring");
498f7dc479aSLong Li ret = -ENOMEM;
499f7dc479aSLong Li goto fail;
500f7dc479aSLong Li }
501f7dc479aSLong Li
50231124619SLong Li txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp",
50331124619SLong Li sizeof(*txq->gdma_comp_buf) * nb_desc,
50431124619SLong Li RTE_CACHE_LINE_SIZE, socket_id);
50531124619SLong Li if (!txq->gdma_comp_buf) {
50631124619SLong Li DRV_LOG(ERR, "failed to allocate txq comp");
50731124619SLong Li ret = -ENOMEM;
50831124619SLong Li goto fail;
50931124619SLong Li }
51031124619SLong Li
5110f5db3c6SLong Li ret = mana_mr_btree_init(&txq->mr_btree,
5120f5db3c6SLong Li MANA_MR_BTREE_PER_QUEUE_N, socket_id);
5130f5db3c6SLong Li if (ret) {
5140f5db3c6SLong Li DRV_LOG(ERR, "Failed to init TXQ MR btree");
5150f5db3c6SLong Li goto fail;
5160f5db3c6SLong Li }
5170f5db3c6SLong Li
518f7dc479aSLong Li DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p",
519f7dc479aSLong Li queue_idx, nb_desc, socket_id, txq->desc_ring);
520f7dc479aSLong Li
521f7dc479aSLong Li txq->desc_ring_head = 0;
522f7dc479aSLong Li txq->desc_ring_tail = 0;
523f7dc479aSLong Li txq->priv = priv;
524f7dc479aSLong Li txq->num_desc = nb_desc;
525f7dc479aSLong Li dev->data->tx_queues[queue_idx] = txq;
526f7dc479aSLong Li
527f7dc479aSLong Li return 0;
528f7dc479aSLong Li
529f7dc479aSLong Li fail:
53031124619SLong Li rte_free(txq->gdma_comp_buf);
531f7dc479aSLong Li rte_free(txq->desc_ring);
532f7dc479aSLong Li rte_free(txq);
533f7dc479aSLong Li return ret;
534f7dc479aSLong Li }
535f7dc479aSLong Li
536f7dc479aSLong Li static void
mana_dev_tx_queue_release(struct rte_eth_dev * dev,uint16_t qid)537f7dc479aSLong Li mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
538f7dc479aSLong Li {
539f7dc479aSLong Li struct mana_txq *txq = dev->data->tx_queues[qid];
540f7dc479aSLong Li
5410f5db3c6SLong Li mana_mr_btree_free(&txq->mr_btree);
5420f5db3c6SLong Li
54331124619SLong Li rte_free(txq->gdma_comp_buf);
544f7dc479aSLong Li rte_free(txq->desc_ring);
545f7dc479aSLong Li rte_free(txq);
546f7dc479aSLong Li }
547f7dc479aSLong Li
548f7dc479aSLong Li static int
mana_dev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf __rte_unused,struct rte_mempool * mp)5490c63c005SLong Li mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
5500c63c005SLong Li uint16_t nb_desc, unsigned int socket_id,
5510c63c005SLong Li const struct rte_eth_rxconf *rx_conf __rte_unused,
5520c63c005SLong Li struct rte_mempool *mp)
5530c63c005SLong Li {
5540c63c005SLong Li struct mana_priv *priv = dev->data->dev_private;
5550c63c005SLong Li struct mana_rxq *rxq;
5560c63c005SLong Li int ret;
5570c63c005SLong Li
5580c63c005SLong Li rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id);
5590c63c005SLong Li if (!rxq) {
5600c63c005SLong Li DRV_LOG(ERR, "failed to allocate rxq");
5610c63c005SLong Li return -ENOMEM;
5620c63c005SLong Li }
5630c63c005SLong Li
5640c63c005SLong Li DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u",
5650c63c005SLong Li queue_idx, nb_desc, socket_id);
5660c63c005SLong Li
5670c63c005SLong Li rxq->socket = socket_id;
5680c63c005SLong Li
5690c63c005SLong Li rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring",
5700c63c005SLong Li sizeof(struct mana_rxq_desc) *
5710c63c005SLong Li nb_desc,
5720c63c005SLong Li RTE_CACHE_LINE_SIZE, socket_id);
5730c63c005SLong Li
5740c63c005SLong Li if (!rxq->desc_ring) {
5750c63c005SLong Li DRV_LOG(ERR, "failed to allocate rxq desc_ring");
5760c63c005SLong Li ret = -ENOMEM;
5770c63c005SLong Li goto fail;
5780c63c005SLong Li }
5790c63c005SLong Li
5800c63c005SLong Li rxq->desc_ring_head = 0;
5810c63c005SLong Li rxq->desc_ring_tail = 0;
5820c63c005SLong Li
58331124619SLong Li rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp",
58431124619SLong Li sizeof(*rxq->gdma_comp_buf) * nb_desc,
58531124619SLong Li RTE_CACHE_LINE_SIZE, socket_id);
58631124619SLong Li if (!rxq->gdma_comp_buf) {
58731124619SLong Li DRV_LOG(ERR, "failed to allocate rxq comp");
58831124619SLong Li ret = -ENOMEM;
58931124619SLong Li goto fail;
59031124619SLong Li }
59131124619SLong Li
5920f5db3c6SLong Li ret = mana_mr_btree_init(&rxq->mr_btree,
5930f5db3c6SLong Li MANA_MR_BTREE_PER_QUEUE_N, socket_id);
5940f5db3c6SLong Li if (ret) {
5950f5db3c6SLong Li DRV_LOG(ERR, "Failed to init RXQ MR btree");
5960f5db3c6SLong Li goto fail;
5970f5db3c6SLong Li }
5980f5db3c6SLong Li
5990c63c005SLong Li rxq->priv = priv;
6000c63c005SLong Li rxq->num_desc = nb_desc;
6010c63c005SLong Li rxq->mp = mp;
6020c63c005SLong Li dev->data->rx_queues[queue_idx] = rxq;
6030c63c005SLong Li
6040c63c005SLong Li return 0;
6050c63c005SLong Li
6060c63c005SLong Li fail:
60731124619SLong Li rte_free(rxq->gdma_comp_buf);
6080c63c005SLong Li rte_free(rxq->desc_ring);
6090c63c005SLong Li rte_free(rxq);
6100c63c005SLong Li return ret;
6110c63c005SLong Li }
6120c63c005SLong Li
6130c63c005SLong Li static void
mana_dev_rx_queue_release(struct rte_eth_dev * dev,uint16_t qid)6140c63c005SLong Li mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
6150c63c005SLong Li {
6160c63c005SLong Li struct mana_rxq *rxq = dev->data->rx_queues[qid];
6170c63c005SLong Li
6180f5db3c6SLong Li mana_mr_btree_free(&rxq->mr_btree);
6190f5db3c6SLong Li
62031124619SLong Li rte_free(rxq->gdma_comp_buf);
6210c63c005SLong Li rte_free(rxq->desc_ring);
6220c63c005SLong Li rte_free(rxq);
6230c63c005SLong Li }
6240c63c005SLong Li
6250c63c005SLong Li static int
mana_dev_link_update(struct rte_eth_dev * dev,int wait_to_complete __rte_unused)62621958568SLong Li mana_dev_link_update(struct rte_eth_dev *dev,
62721958568SLong Li int wait_to_complete __rte_unused)
62821958568SLong Li {
62921958568SLong Li struct rte_eth_link link;
63021958568SLong Li
63121958568SLong Li /* MANA has no concept of carrier state, always reporting UP */
63221958568SLong Li link = (struct rte_eth_link) {
63321958568SLong Li .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
63421958568SLong Li .link_autoneg = RTE_ETH_LINK_SPEED_FIXED,
63521958568SLong Li .link_speed = RTE_ETH_SPEED_NUM_100G,
63621958568SLong Li .link_status = RTE_ETH_LINK_UP,
63721958568SLong Li };
63821958568SLong Li
63921958568SLong Li return rte_eth_linkstatus_set(dev, &link);
64021958568SLong Li }
64121958568SLong Li
642e350b568SLong Li static int
mana_dev_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)643e350b568SLong Li mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
644e350b568SLong Li {
645e350b568SLong Li unsigned int i;
646e350b568SLong Li
647e350b568SLong Li for (i = 0; i < dev->data->nb_tx_queues; i++) {
648e350b568SLong Li struct mana_txq *txq = dev->data->tx_queues[i];
649e350b568SLong Li
650e350b568SLong Li if (!txq)
651e350b568SLong Li continue;
652e350b568SLong Li
653eb6f507aSLong Li stats->opackets += txq->stats.packets;
654eb6f507aSLong Li stats->obytes += txq->stats.bytes;
655eb6f507aSLong Li stats->oerrors += txq->stats.errors;
656e350b568SLong Li
657e350b568SLong Li if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
658e350b568SLong Li stats->q_opackets[i] = txq->stats.packets;
659e350b568SLong Li stats->q_obytes[i] = txq->stats.bytes;
660e350b568SLong Li }
661e350b568SLong Li }
662e350b568SLong Li
663e350b568SLong Li stats->rx_nombuf = 0;
664e350b568SLong Li for (i = 0; i < dev->data->nb_rx_queues; i++) {
665e350b568SLong Li struct mana_rxq *rxq = dev->data->rx_queues[i];
666e350b568SLong Li
667e350b568SLong Li if (!rxq)
668e350b568SLong Li continue;
669e350b568SLong Li
670eb6f507aSLong Li stats->ipackets += rxq->stats.packets;
671eb6f507aSLong Li stats->ibytes += rxq->stats.bytes;
672eb6f507aSLong Li stats->ierrors += rxq->stats.errors;
673e350b568SLong Li
674e350b568SLong Li /* There is no good way to get stats->imissed, not setting it */
675e350b568SLong Li
676e350b568SLong Li if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
677e350b568SLong Li stats->q_ipackets[i] = rxq->stats.packets;
678e350b568SLong Li stats->q_ibytes[i] = rxq->stats.bytes;
679e350b568SLong Li }
680e350b568SLong Li
681e350b568SLong Li stats->rx_nombuf += rxq->stats.nombuf;
682e350b568SLong Li }
683e350b568SLong Li
684e350b568SLong Li return 0;
685e350b568SLong Li }
686e350b568SLong Li
687e350b568SLong Li static int
mana_dev_stats_reset(struct rte_eth_dev * dev __rte_unused)688e350b568SLong Li mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused)
689e350b568SLong Li {
690e350b568SLong Li unsigned int i;
691e350b568SLong Li
692e350b568SLong Li PMD_INIT_FUNC_TRACE();
693e350b568SLong Li
694e350b568SLong Li for (i = 0; i < dev->data->nb_tx_queues; i++) {
695e350b568SLong Li struct mana_txq *txq = dev->data->tx_queues[i];
696e350b568SLong Li
697e350b568SLong Li if (!txq)
698e350b568SLong Li continue;
699e350b568SLong Li
700e350b568SLong Li memset(&txq->stats, 0, sizeof(txq->stats));
701e350b568SLong Li }
702e350b568SLong Li
703e350b568SLong Li for (i = 0; i < dev->data->nb_rx_queues; i++) {
704e350b568SLong Li struct mana_rxq *rxq = dev->data->rx_queues[i];
705e350b568SLong Li
706e350b568SLong Li if (!rxq)
707e350b568SLong Li continue;
708e350b568SLong Li
709e350b568SLong Li memset(&rxq->stats, 0, sizeof(rxq->stats));
710e350b568SLong Li }
711e350b568SLong Li
712e350b568SLong Li return 0;
713e350b568SLong Li }
714e350b568SLong Li
71584497839SLong Li static int
mana_get_ifname(const struct mana_priv * priv,char (* ifname)[IF_NAMESIZE])71684497839SLong Li mana_get_ifname(const struct mana_priv *priv, char (*ifname)[IF_NAMESIZE])
71784497839SLong Li {
718*c2c0c8afSMahmoud Maatuq int ret = -ENODEV;
71984497839SLong Li DIR *dir;
72084497839SLong Li struct dirent *dent;
72184497839SLong Li
72284497839SLong Li MANA_MKSTR(dirpath, "%s/device/net", priv->ib_ctx->device->ibdev_path);
72384497839SLong Li
72484497839SLong Li dir = opendir(dirpath);
72584497839SLong Li if (dir == NULL)
72684497839SLong Li return -ENODEV;
72784497839SLong Li
72884497839SLong Li while ((dent = readdir(dir)) != NULL) {
72984497839SLong Li char *name = dent->d_name;
73084497839SLong Li FILE *file;
73184497839SLong Li struct rte_ether_addr addr;
73284497839SLong Li char *mac = NULL;
73384497839SLong Li
73484497839SLong Li if ((name[0] == '.') &&
73584497839SLong Li ((name[1] == '\0') ||
73684497839SLong Li ((name[1] == '.') && (name[2] == '\0'))))
73784497839SLong Li continue;
73884497839SLong Li
73984497839SLong Li MANA_MKSTR(path, "%s/%s/address", dirpath, name);
74084497839SLong Li
74184497839SLong Li file = fopen(path, "r");
74284497839SLong Li if (!file) {
74384497839SLong Li ret = -ENODEV;
74484497839SLong Li break;
74584497839SLong Li }
74684497839SLong Li
74784497839SLong Li ret = fscanf(file, "%ms", &mac);
74884497839SLong Li fclose(file);
74984497839SLong Li
75084497839SLong Li if (ret <= 0) {
75184497839SLong Li ret = -EINVAL;
75284497839SLong Li break;
75384497839SLong Li }
75484497839SLong Li
75584497839SLong Li ret = rte_ether_unformat_addr(mac, &addr);
75684497839SLong Li free(mac);
75784497839SLong Li if (ret)
75884497839SLong Li break;
75984497839SLong Li
76084497839SLong Li if (rte_is_same_ether_addr(&addr, priv->dev_data->mac_addrs)) {
76184497839SLong Li strlcpy(*ifname, name, sizeof(*ifname));
76284497839SLong Li ret = 0;
76384497839SLong Li break;
76484497839SLong Li }
76584497839SLong Li }
76684497839SLong Li
76784497839SLong Li closedir(dir);
76884497839SLong Li return ret;
76984497839SLong Li }
77084497839SLong Li
77184497839SLong Li static int
mana_ifreq(const struct mana_priv * priv,int req,struct ifreq * ifr)77284497839SLong Li mana_ifreq(const struct mana_priv *priv, int req, struct ifreq *ifr)
77384497839SLong Li {
77484497839SLong Li int sock, ret;
77584497839SLong Li
77684497839SLong Li sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
77784497839SLong Li if (sock == -1)
77884497839SLong Li return -errno;
77984497839SLong Li
78084497839SLong Li ret = mana_get_ifname(priv, &ifr->ifr_name);
78184497839SLong Li if (ret) {
78284497839SLong Li close(sock);
78384497839SLong Li return ret;
78484497839SLong Li }
78584497839SLong Li
78684497839SLong Li if (ioctl(sock, req, ifr) == -1)
78784497839SLong Li ret = -errno;
78884497839SLong Li
78984497839SLong Li close(sock);
79084497839SLong Li
79184497839SLong Li return ret;
79284497839SLong Li }
79384497839SLong Li
79484497839SLong Li static int
mana_mtu_set(struct rte_eth_dev * dev,uint16_t mtu)79584497839SLong Li mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
79684497839SLong Li {
79784497839SLong Li struct mana_priv *priv = dev->data->dev_private;
79884497839SLong Li struct ifreq request = { .ifr_mtu = mtu, };
79984497839SLong Li
80084497839SLong Li return mana_ifreq(priv, SIOCSIFMTU, &request);
80184497839SLong Li }
80284497839SLong Li
803517ed6e2SLong Li static const struct eth_dev_ops mana_dev_ops = {
8040dbfecfeSLong Li .dev_configure = mana_dev_configure,
80537544700SLong Li .dev_start = mana_dev_start,
80637544700SLong Li .dev_stop = mana_dev_stop,
8070dbfecfeSLong Li .dev_close = mana_dev_close,
808d878cb09SLong Li .dev_infos_get = mana_dev_info_get,
809f7dc479aSLong Li .txq_info_get = mana_dev_tx_queue_info,
8100c63c005SLong Li .rxq_info_get = mana_dev_rx_queue_info,
811d9679c3aSLong Li .dev_supported_ptypes_get = mana_supported_ptypes,
812a382177cSLong Li .rss_hash_update = mana_rss_hash_update,
813a382177cSLong Li .rss_hash_conf_get = mana_rss_hash_conf_get,
814f7dc479aSLong Li .tx_queue_setup = mana_dev_tx_queue_setup,
815f7dc479aSLong Li .tx_queue_release = mana_dev_tx_queue_release,
8160c63c005SLong Li .rx_queue_setup = mana_dev_rx_queue_setup,
8170c63c005SLong Li .rx_queue_release = mana_dev_rx_queue_release,
818afd5d170SLong Li .rx_queue_intr_enable = mana_rx_intr_enable,
819afd5d170SLong Li .rx_queue_intr_disable = mana_rx_intr_disable,
82021958568SLong Li .link_update = mana_dev_link_update,
821e350b568SLong Li .stats_get = mana_dev_stats_get,
822e350b568SLong Li .stats_reset = mana_dev_stats_reset,
82384497839SLong Li .mtu_set = mana_mtu_set,
824517ed6e2SLong Li };
825517ed6e2SLong Li
826517ed6e2SLong Li static const struct eth_dev_ops mana_dev_secondary_ops = {
827e350b568SLong Li .stats_get = mana_dev_stats_get,
828e350b568SLong Li .stats_reset = mana_dev_stats_reset,
829d878cb09SLong Li .dev_infos_get = mana_dev_info_get,
830517ed6e2SLong Li };
831517ed6e2SLong Li
832517ed6e2SLong Li uint16_t
mana_rx_burst_removed(void * dpdk_rxq __rte_unused,struct rte_mbuf ** pkts __rte_unused,uint16_t pkts_n __rte_unused)833517ed6e2SLong Li mana_rx_burst_removed(void *dpdk_rxq __rte_unused,
834517ed6e2SLong Li struct rte_mbuf **pkts __rte_unused,
835517ed6e2SLong Li uint16_t pkts_n __rte_unused)
836517ed6e2SLong Li {
837517ed6e2SLong Li rte_mb();
838517ed6e2SLong Li return 0;
839517ed6e2SLong Li }
840517ed6e2SLong Li
841517ed6e2SLong Li uint16_t
mana_tx_burst_removed(void * dpdk_rxq __rte_unused,struct rte_mbuf ** pkts __rte_unused,uint16_t pkts_n __rte_unused)842517ed6e2SLong Li mana_tx_burst_removed(void *dpdk_rxq __rte_unused,
843517ed6e2SLong Li struct rte_mbuf **pkts __rte_unused,
844517ed6e2SLong Li uint16_t pkts_n __rte_unused)
845517ed6e2SLong Li {
846517ed6e2SLong Li rte_mb();
847517ed6e2SLong Li return 0;
848517ed6e2SLong Li }
849517ed6e2SLong Li
850517ed6e2SLong Li #define ETH_MANA_MAC_ARG "mac"
851517ed6e2SLong Li static const char * const mana_init_args[] = {
852517ed6e2SLong Li ETH_MANA_MAC_ARG,
853517ed6e2SLong Li NULL,
854517ed6e2SLong Li };
855517ed6e2SLong Li
856517ed6e2SLong Li /* Support of parsing up to 8 mac address from EAL command line */
857517ed6e2SLong Li #define MAX_NUM_ADDRESS 8
858517ed6e2SLong Li struct mana_conf {
859517ed6e2SLong Li struct rte_ether_addr mac_array[MAX_NUM_ADDRESS];
860517ed6e2SLong Li unsigned int index;
861517ed6e2SLong Li };
862517ed6e2SLong Li
863517ed6e2SLong Li static int
mana_arg_parse_callback(const char * key,const char * val,void * private)864517ed6e2SLong Li mana_arg_parse_callback(const char *key, const char *val, void *private)
865517ed6e2SLong Li {
866517ed6e2SLong Li struct mana_conf *conf = (struct mana_conf *)private;
867517ed6e2SLong Li int ret;
868517ed6e2SLong Li
869517ed6e2SLong Li DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index);
870517ed6e2SLong Li
871517ed6e2SLong Li if (conf->index >= MAX_NUM_ADDRESS) {
872517ed6e2SLong Li DRV_LOG(ERR, "Exceeding max MAC address");
873517ed6e2SLong Li return 1;
874517ed6e2SLong Li }
875517ed6e2SLong Li
876517ed6e2SLong Li ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]);
877517ed6e2SLong Li if (ret) {
878517ed6e2SLong Li DRV_LOG(ERR, "Invalid MAC address %s", val);
879517ed6e2SLong Li return ret;
880517ed6e2SLong Li }
881517ed6e2SLong Li
882517ed6e2SLong Li conf->index++;
883517ed6e2SLong Li
884517ed6e2SLong Li return 0;
885517ed6e2SLong Li }
886517ed6e2SLong Li
887517ed6e2SLong Li static int
mana_parse_args(struct rte_devargs * devargs,struct mana_conf * conf)888517ed6e2SLong Li mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf)
889517ed6e2SLong Li {
890517ed6e2SLong Li struct rte_kvargs *kvlist;
891517ed6e2SLong Li unsigned int arg_count;
892517ed6e2SLong Li int ret = 0;
893517ed6e2SLong Li
894517ed6e2SLong Li kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args);
895517ed6e2SLong Li if (!kvlist) {
896517ed6e2SLong Li DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str);
897517ed6e2SLong Li return -EINVAL;
898517ed6e2SLong Li }
899517ed6e2SLong Li
900517ed6e2SLong Li arg_count = rte_kvargs_count(kvlist, mana_init_args[0]);
901517ed6e2SLong Li if (arg_count > MAX_NUM_ADDRESS) {
902517ed6e2SLong Li ret = -EINVAL;
903517ed6e2SLong Li goto free_kvlist;
904517ed6e2SLong Li }
905517ed6e2SLong Li ret = rte_kvargs_process(kvlist, mana_init_args[0],
906517ed6e2SLong Li mana_arg_parse_callback, conf);
907517ed6e2SLong Li if (ret) {
908517ed6e2SLong Li DRV_LOG(ERR, "error parsing args");
909517ed6e2SLong Li goto free_kvlist;
910517ed6e2SLong Li }
911517ed6e2SLong Li
912517ed6e2SLong Li free_kvlist:
913517ed6e2SLong Li rte_kvargs_free(kvlist);
914517ed6e2SLong Li return ret;
915517ed6e2SLong Li }
916517ed6e2SLong Li
917517ed6e2SLong Li static int
get_port_mac(struct ibv_device * device,unsigned int port,struct rte_ether_addr * addr)918517ed6e2SLong Li get_port_mac(struct ibv_device *device, unsigned int port,
919517ed6e2SLong Li struct rte_ether_addr *addr)
920517ed6e2SLong Li {
921517ed6e2SLong Li FILE *file;
922517ed6e2SLong Li int ret = 0;
923517ed6e2SLong Li DIR *dir;
924517ed6e2SLong Li struct dirent *dent;
925517ed6e2SLong Li unsigned int dev_port;
926517ed6e2SLong Li
927517ed6e2SLong Li MANA_MKSTR(path, "%s/device/net", device->ibdev_path);
928517ed6e2SLong Li
929517ed6e2SLong Li dir = opendir(path);
930517ed6e2SLong Li if (!dir)
931517ed6e2SLong Li return -ENOENT;
932517ed6e2SLong Li
933517ed6e2SLong Li while ((dent = readdir(dir))) {
934517ed6e2SLong Li char *name = dent->d_name;
93535d55f0fSLong Li char *mac = NULL;
936517ed6e2SLong Li
937517ed6e2SLong Li MANA_MKSTR(port_path, "%s/%s/dev_port", path, name);
938517ed6e2SLong Li
939517ed6e2SLong Li /* Ignore . and .. */
940517ed6e2SLong Li if ((name[0] == '.') &&
941517ed6e2SLong Li ((name[1] == '\0') ||
942517ed6e2SLong Li ((name[1] == '.') && (name[2] == '\0'))))
943517ed6e2SLong Li continue;
944517ed6e2SLong Li
945517ed6e2SLong Li file = fopen(port_path, "r");
946517ed6e2SLong Li if (!file)
947517ed6e2SLong Li continue;
948517ed6e2SLong Li
949517ed6e2SLong Li ret = fscanf(file, "%u", &dev_port);
950517ed6e2SLong Li fclose(file);
951517ed6e2SLong Li
952517ed6e2SLong Li if (ret != 1)
953517ed6e2SLong Li continue;
954517ed6e2SLong Li
955517ed6e2SLong Li /* Ethernet ports start at 0, IB port start at 1 */
956517ed6e2SLong Li if (dev_port == port - 1) {
957517ed6e2SLong Li MANA_MKSTR(address_path, "%s/%s/address", path, name);
958517ed6e2SLong Li
959517ed6e2SLong Li file = fopen(address_path, "r");
960517ed6e2SLong Li if (!file)
961517ed6e2SLong Li continue;
962517ed6e2SLong Li
96335d55f0fSLong Li ret = fscanf(file, "%ms", &mac);
964517ed6e2SLong Li fclose(file);
965517ed6e2SLong Li
966517ed6e2SLong Li if (ret < 0)
967517ed6e2SLong Li break;
968517ed6e2SLong Li
969517ed6e2SLong Li ret = rte_ether_unformat_addr(mac, addr);
970517ed6e2SLong Li if (ret)
971517ed6e2SLong Li DRV_LOG(ERR, "unrecognized mac addr %s", mac);
97235d55f0fSLong Li
97335d55f0fSLong Li free(mac);
974517ed6e2SLong Li break;
975517ed6e2SLong Li }
976517ed6e2SLong Li }
977517ed6e2SLong Li
978517ed6e2SLong Li closedir(dir);
979517ed6e2SLong Li return ret;
980517ed6e2SLong Li }
981517ed6e2SLong Li
982517ed6e2SLong Li static int
mana_ibv_device_to_pci_addr(const struct ibv_device * device,struct rte_pci_addr * pci_addr)983517ed6e2SLong Li mana_ibv_device_to_pci_addr(const struct ibv_device *device,
984517ed6e2SLong Li struct rte_pci_addr *pci_addr)
985517ed6e2SLong Li {
986517ed6e2SLong Li FILE *file;
987517ed6e2SLong Li char *line = NULL;
988517ed6e2SLong Li size_t len = 0;
989517ed6e2SLong Li
990517ed6e2SLong Li MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path);
991517ed6e2SLong Li
992517ed6e2SLong Li file = fopen(path, "r");
993517ed6e2SLong Li if (!file)
994517ed6e2SLong Li return -errno;
995517ed6e2SLong Li
996517ed6e2SLong Li while (getline(&line, &len, file) != -1) {
997517ed6e2SLong Li /* Extract information. */
998517ed6e2SLong Li if (sscanf(line,
999517ed6e2SLong Li "PCI_SLOT_NAME="
1000517ed6e2SLong Li "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
1001517ed6e2SLong Li &pci_addr->domain,
1002517ed6e2SLong Li &pci_addr->bus,
1003517ed6e2SLong Li &pci_addr->devid,
1004517ed6e2SLong Li &pci_addr->function) == 4) {
1005517ed6e2SLong Li break;
1006517ed6e2SLong Li }
1007517ed6e2SLong Li }
1008517ed6e2SLong Li
1009517ed6e2SLong Li free(line);
1010517ed6e2SLong Li fclose(file);
1011517ed6e2SLong Li return 0;
1012517ed6e2SLong Li }
1013517ed6e2SLong Li
1014bd15f237SLong Li /*
1015bd15f237SLong Li * Interrupt handler from IB layer to notify this device is being removed.
1016bd15f237SLong Li */
1017bd15f237SLong Li static void
mana_intr_handler(void * arg)1018bd15f237SLong Li mana_intr_handler(void *arg)
1019bd15f237SLong Li {
1020bd15f237SLong Li struct mana_priv *priv = arg;
1021bd15f237SLong Li struct ibv_context *ctx = priv->ib_ctx;
1022bd15f237SLong Li struct ibv_async_event event;
1023bd15f237SLong Li
1024bd15f237SLong Li /* Read and ack all messages from IB device */
1025bd15f237SLong Li while (true) {
1026bd15f237SLong Li if (ibv_get_async_event(ctx, &event))
1027bd15f237SLong Li break;
1028bd15f237SLong Li
1029bd15f237SLong Li if (event.event_type == IBV_EVENT_DEVICE_FATAL) {
1030bd15f237SLong Li struct rte_eth_dev *dev;
1031bd15f237SLong Li
1032bd15f237SLong Li dev = &rte_eth_devices[priv->port_id];
1033bd15f237SLong Li if (dev->data->dev_conf.intr_conf.rmv)
1034bd15f237SLong Li rte_eth_dev_callback_process(dev,
1035bd15f237SLong Li RTE_ETH_EVENT_INTR_RMV, NULL);
1036bd15f237SLong Li }
1037bd15f237SLong Li
1038bd15f237SLong Li ibv_ack_async_event(&event);
1039bd15f237SLong Li }
1040bd15f237SLong Li }
1041bd15f237SLong Li
1042bd15f237SLong Li static int
mana_intr_uninstall(struct mana_priv * priv)1043bd15f237SLong Li mana_intr_uninstall(struct mana_priv *priv)
1044bd15f237SLong Li {
1045bd15f237SLong Li int ret;
1046bd15f237SLong Li
1047bd15f237SLong Li ret = rte_intr_callback_unregister(priv->intr_handle,
1048bd15f237SLong Li mana_intr_handler, priv);
1049bd15f237SLong Li if (ret <= 0) {
1050bd15f237SLong Li DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret);
1051bd15f237SLong Li return ret;
1052bd15f237SLong Li }
1053bd15f237SLong Li
1054bd15f237SLong Li rte_intr_instance_free(priv->intr_handle);
1055bd15f237SLong Li
1056bd15f237SLong Li return 0;
1057bd15f237SLong Li }
1058bd15f237SLong Li
1059afd5d170SLong Li int
mana_fd_set_non_blocking(int fd)1060afd5d170SLong Li mana_fd_set_non_blocking(int fd)
1061bd15f237SLong Li {
1062afd5d170SLong Li int ret = fcntl(fd, F_GETFL);
1063afd5d170SLong Li
1064afd5d170SLong Li if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK))
1065afd5d170SLong Li return 0;
1066afd5d170SLong Li
1067afd5d170SLong Li rte_errno = errno;
1068afd5d170SLong Li return -rte_errno;
1069afd5d170SLong Li }
1070afd5d170SLong Li
1071afd5d170SLong Li static int
mana_intr_install(struct rte_eth_dev * eth_dev,struct mana_priv * priv)1072afd5d170SLong Li mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv)
1073afd5d170SLong Li {
1074afd5d170SLong Li int ret;
1075bd15f237SLong Li struct ibv_context *ctx = priv->ib_ctx;
1076bd15f237SLong Li
1077bd15f237SLong Li priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
1078bd15f237SLong Li if (!priv->intr_handle) {
1079bd15f237SLong Li DRV_LOG(ERR, "Failed to allocate intr_handle");
1080bd15f237SLong Li rte_errno = ENOMEM;
1081bd15f237SLong Li return -ENOMEM;
1082bd15f237SLong Li }
1083bd15f237SLong Li
1084afd5d170SLong Li ret = rte_intr_fd_set(priv->intr_handle, -1);
1085afd5d170SLong Li if (ret)
1086afd5d170SLong Li goto free_intr;
1087bd15f237SLong Li
1088afd5d170SLong Li ret = mana_fd_set_non_blocking(ctx->async_fd);
1089bd15f237SLong Li if (ret) {
1090bd15f237SLong Li DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK");
1091bd15f237SLong Li goto free_intr;
1092bd15f237SLong Li }
1093bd15f237SLong Li
1094afd5d170SLong Li ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd);
1095afd5d170SLong Li if (ret)
1096afd5d170SLong Li goto free_intr;
1097afd5d170SLong Li
1098afd5d170SLong Li ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT);
1099afd5d170SLong Li if (ret)
1100afd5d170SLong Li goto free_intr;
1101bd15f237SLong Li
1102bd15f237SLong Li ret = rte_intr_callback_register(priv->intr_handle,
1103bd15f237SLong Li mana_intr_handler, priv);
1104bd15f237SLong Li if (ret) {
1105bd15f237SLong Li DRV_LOG(ERR, "Failed to register intr callback");
1106bd15f237SLong Li rte_intr_fd_set(priv->intr_handle, -1);
1107afd5d170SLong Li goto free_intr;
1108bd15f237SLong Li }
1109bd15f237SLong Li
1110afd5d170SLong Li eth_dev->intr_handle = priv->intr_handle;
1111bd15f237SLong Li return 0;
1112bd15f237SLong Li
1113bd15f237SLong Li free_intr:
1114bd15f237SLong Li rte_intr_instance_free(priv->intr_handle);
1115bd15f237SLong Li priv->intr_handle = NULL;
1116bd15f237SLong Li
1117bd15f237SLong Li return ret;
1118bd15f237SLong Li }
1119bd15f237SLong Li
1120517ed6e2SLong Li static int
mana_proc_priv_init(struct rte_eth_dev * dev)1121517ed6e2SLong Li mana_proc_priv_init(struct rte_eth_dev *dev)
1122517ed6e2SLong Li {
1123517ed6e2SLong Li struct mana_process_priv *priv;
1124517ed6e2SLong Li
1125517ed6e2SLong Li priv = rte_zmalloc_socket("mana_proc_priv",
1126517ed6e2SLong Li sizeof(struct mana_process_priv),
1127517ed6e2SLong Li RTE_CACHE_LINE_SIZE,
1128517ed6e2SLong Li dev->device->numa_node);
1129517ed6e2SLong Li if (!priv)
1130517ed6e2SLong Li return -ENOMEM;
1131517ed6e2SLong Li
1132517ed6e2SLong Li dev->process_private = priv;
1133517ed6e2SLong Li return 0;
1134517ed6e2SLong Li }
1135517ed6e2SLong Li
1136517ed6e2SLong Li /*
1137517ed6e2SLong Li * Map the doorbell page for the secondary process through IB device handle.
1138517ed6e2SLong Li */
1139517ed6e2SLong Li static int
mana_map_doorbell_secondary(struct rte_eth_dev * eth_dev,int fd)1140517ed6e2SLong Li mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd)
1141517ed6e2SLong Li {
1142517ed6e2SLong Li struct mana_process_priv *priv = eth_dev->process_private;
1143517ed6e2SLong Li
1144517ed6e2SLong Li void *addr;
1145517ed6e2SLong Li
1146517ed6e2SLong Li addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0);
1147517ed6e2SLong Li if (addr == MAP_FAILED) {
1148517ed6e2SLong Li DRV_LOG(ERR, "Failed to map secondary doorbell port %u",
1149517ed6e2SLong Li eth_dev->data->port_id);
1150517ed6e2SLong Li return -ENOMEM;
1151517ed6e2SLong Li }
1152517ed6e2SLong Li
1153517ed6e2SLong Li DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr);
1154517ed6e2SLong Li
1155517ed6e2SLong Li priv->db_page = addr;
1156517ed6e2SLong Li
1157517ed6e2SLong Li return 0;
1158517ed6e2SLong Li }
1159517ed6e2SLong Li
1160517ed6e2SLong Li /* Initialize shared data for the driver (all devices) */
1161517ed6e2SLong Li static int
mana_init_shared_data(void)1162517ed6e2SLong Li mana_init_shared_data(void)
1163517ed6e2SLong Li {
1164517ed6e2SLong Li int ret = 0;
1165517ed6e2SLong Li const struct rte_memzone *secondary_mz;
1166517ed6e2SLong Li
1167517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock);
1168517ed6e2SLong Li
1169517ed6e2SLong Li /* Skip if shared data is already initialized */
1170517ed6e2SLong Li if (mana_shared_data)
1171517ed6e2SLong Li goto exit;
1172517ed6e2SLong Li
1173517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1174517ed6e2SLong Li mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA,
1175517ed6e2SLong Li sizeof(*mana_shared_data),
1176517ed6e2SLong Li SOCKET_ID_ANY, 0);
1177517ed6e2SLong Li if (!mana_shared_mz) {
1178517ed6e2SLong Li DRV_LOG(ERR, "Cannot allocate mana shared data");
1179517ed6e2SLong Li ret = -rte_errno;
1180517ed6e2SLong Li goto exit;
1181517ed6e2SLong Li }
1182517ed6e2SLong Li
1183517ed6e2SLong Li mana_shared_data = mana_shared_mz->addr;
1184517ed6e2SLong Li memset(mana_shared_data, 0, sizeof(*mana_shared_data));
1185517ed6e2SLong Li rte_spinlock_init(&mana_shared_data->lock);
1186517ed6e2SLong Li } else {
1187517ed6e2SLong Li secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA);
1188517ed6e2SLong Li if (!secondary_mz) {
1189517ed6e2SLong Li DRV_LOG(ERR, "Cannot attach mana shared data");
1190517ed6e2SLong Li ret = -rte_errno;
1191517ed6e2SLong Li goto exit;
1192517ed6e2SLong Li }
1193517ed6e2SLong Li
1194517ed6e2SLong Li mana_shared_data = secondary_mz->addr;
1195517ed6e2SLong Li memset(&mana_local_data, 0, sizeof(mana_local_data));
1196517ed6e2SLong Li }
1197517ed6e2SLong Li
1198517ed6e2SLong Li exit:
1199517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock);
1200517ed6e2SLong Li
1201517ed6e2SLong Li return ret;
1202517ed6e2SLong Li }
1203517ed6e2SLong Li
1204517ed6e2SLong Li /*
1205517ed6e2SLong Li * Init the data structures for use in primary and secondary processes.
1206517ed6e2SLong Li */
1207517ed6e2SLong Li static int
mana_init_once(void)1208517ed6e2SLong Li mana_init_once(void)
1209517ed6e2SLong Li {
1210517ed6e2SLong Li int ret;
1211517ed6e2SLong Li
1212517ed6e2SLong Li ret = mana_init_shared_data();
1213517ed6e2SLong Li if (ret)
1214517ed6e2SLong Li return ret;
1215517ed6e2SLong Li
1216517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock);
1217517ed6e2SLong Li
1218517ed6e2SLong Li switch (rte_eal_process_type()) {
1219517ed6e2SLong Li case RTE_PROC_PRIMARY:
1220517ed6e2SLong Li if (mana_shared_data->init_done)
1221517ed6e2SLong Li break;
1222517ed6e2SLong Li
1223517ed6e2SLong Li ret = mana_mp_init_primary();
1224517ed6e2SLong Li if (ret)
1225517ed6e2SLong Li break;
1226517ed6e2SLong Li DRV_LOG(ERR, "MP INIT PRIMARY");
1227517ed6e2SLong Li
1228517ed6e2SLong Li mana_shared_data->init_done = 1;
1229517ed6e2SLong Li break;
1230517ed6e2SLong Li
1231517ed6e2SLong Li case RTE_PROC_SECONDARY:
1232517ed6e2SLong Li
1233517ed6e2SLong Li if (mana_local_data.init_done)
1234517ed6e2SLong Li break;
1235517ed6e2SLong Li
1236517ed6e2SLong Li ret = mana_mp_init_secondary();
1237517ed6e2SLong Li if (ret)
1238517ed6e2SLong Li break;
1239517ed6e2SLong Li
1240517ed6e2SLong Li DRV_LOG(ERR, "MP INIT SECONDARY");
1241517ed6e2SLong Li
1242517ed6e2SLong Li mana_local_data.init_done = 1;
1243517ed6e2SLong Li break;
1244517ed6e2SLong Li
1245517ed6e2SLong Li default:
1246517ed6e2SLong Li /* Impossible, internal error */
1247517ed6e2SLong Li ret = -EPROTO;
1248517ed6e2SLong Li break;
1249517ed6e2SLong Li }
1250517ed6e2SLong Li
1251517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock);
1252517ed6e2SLong Li
1253517ed6e2SLong Li return ret;
1254517ed6e2SLong Li }
1255517ed6e2SLong Li
1256517ed6e2SLong Li /*
1257517ed6e2SLong Li * Probe an IB port
1258517ed6e2SLong Li * Return value:
1259517ed6e2SLong Li * positive value: successfully probed port
1260517ed6e2SLong Li * 0: port not matching specified MAC address
1261517ed6e2SLong Li * negative value: error code
1262517ed6e2SLong Li */
1263517ed6e2SLong Li static int
mana_probe_port(struct ibv_device * ibdev,struct ibv_device_attr_ex * dev_attr,uint8_t port,struct rte_pci_device * pci_dev,struct rte_ether_addr * addr)1264517ed6e2SLong Li mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr,
1265517ed6e2SLong Li uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr)
1266517ed6e2SLong Li {
1267517ed6e2SLong Li struct mana_priv *priv = NULL;
1268517ed6e2SLong Li struct rte_eth_dev *eth_dev = NULL;
1269517ed6e2SLong Li struct ibv_parent_domain_init_attr attr = {0};
1270517ed6e2SLong Li char address[64];
1271517ed6e2SLong Li char name[RTE_ETH_NAME_MAX_LEN];
1272517ed6e2SLong Li int ret;
1273517ed6e2SLong Li struct ibv_context *ctx = NULL;
1274517ed6e2SLong Li
1275517ed6e2SLong Li rte_ether_format_addr(address, sizeof(address), addr);
1276517ed6e2SLong Li DRV_LOG(INFO, "device located port %u address %s", port, address);
1277517ed6e2SLong Li
1278517ed6e2SLong Li priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE,
1279517ed6e2SLong Li SOCKET_ID_ANY);
1280517ed6e2SLong Li if (!priv)
1281517ed6e2SLong Li return -ENOMEM;
1282517ed6e2SLong Li
1283517ed6e2SLong Li snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port);
1284517ed6e2SLong Li
1285517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1286517ed6e2SLong Li int fd;
1287517ed6e2SLong Li
1288517ed6e2SLong Li eth_dev = rte_eth_dev_attach_secondary(name);
1289517ed6e2SLong Li if (!eth_dev) {
1290517ed6e2SLong Li DRV_LOG(ERR, "Can't attach to dev %s", name);
1291517ed6e2SLong Li ret = -ENOMEM;
1292517ed6e2SLong Li goto failed;
1293517ed6e2SLong Li }
1294517ed6e2SLong Li
1295517ed6e2SLong Li eth_dev->device = &pci_dev->device;
1296517ed6e2SLong Li eth_dev->dev_ops = &mana_dev_secondary_ops;
1297517ed6e2SLong Li ret = mana_proc_priv_init(eth_dev);
1298517ed6e2SLong Li if (ret)
1299517ed6e2SLong Li goto failed;
1300517ed6e2SLong Li priv->process_priv = eth_dev->process_private;
1301517ed6e2SLong Li
1302517ed6e2SLong Li /* Get the IB FD from the primary process */
1303517ed6e2SLong Li fd = mana_mp_req_verbs_cmd_fd(eth_dev);
1304517ed6e2SLong Li if (fd < 0) {
1305517ed6e2SLong Li DRV_LOG(ERR, "Failed to get FD %d", fd);
1306517ed6e2SLong Li ret = -ENODEV;
1307517ed6e2SLong Li goto failed;
1308517ed6e2SLong Li }
1309517ed6e2SLong Li
1310517ed6e2SLong Li ret = mana_map_doorbell_secondary(eth_dev, fd);
1311517ed6e2SLong Li if (ret) {
1312517ed6e2SLong Li DRV_LOG(ERR, "Failed secondary map %d", fd);
1313517ed6e2SLong Li goto failed;
1314517ed6e2SLong Li }
1315517ed6e2SLong Li
1316517ed6e2SLong Li /* fd is no not used after mapping doorbell */
1317517ed6e2SLong Li close(fd);
1318517ed6e2SLong Li
1319095939b6SLong Li eth_dev->tx_pkt_burst = mana_tx_burst;
1320095939b6SLong Li eth_dev->rx_pkt_burst = mana_rx_burst;
1321517ed6e2SLong Li
1322517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock);
1323517ed6e2SLong Li mana_shared_data->secondary_cnt++;
1324517ed6e2SLong Li mana_local_data.secondary_cnt++;
1325517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock);
1326517ed6e2SLong Li
1327517ed6e2SLong Li rte_eth_copy_pci_info(eth_dev, pci_dev);
1328517ed6e2SLong Li rte_eth_dev_probing_finish(eth_dev);
1329517ed6e2SLong Li
1330517ed6e2SLong Li return 0;
1331517ed6e2SLong Li }
1332517ed6e2SLong Li
1333517ed6e2SLong Li ctx = ibv_open_device(ibdev);
1334517ed6e2SLong Li if (!ctx) {
1335517ed6e2SLong Li DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name);
1336517ed6e2SLong Li ret = -ENODEV;
1337517ed6e2SLong Li goto failed;
1338517ed6e2SLong Li }
1339517ed6e2SLong Li
1340517ed6e2SLong Li eth_dev = rte_eth_dev_allocate(name);
1341517ed6e2SLong Li if (!eth_dev) {
1342517ed6e2SLong Li ret = -ENOMEM;
1343517ed6e2SLong Li goto failed;
1344517ed6e2SLong Li }
1345517ed6e2SLong Li
1346517ed6e2SLong Li eth_dev->data->mac_addrs =
1347517ed6e2SLong Li rte_calloc("mana_mac", 1,
1348517ed6e2SLong Li sizeof(struct rte_ether_addr), 0);
1349517ed6e2SLong Li if (!eth_dev->data->mac_addrs) {
1350517ed6e2SLong Li ret = -ENOMEM;
1351517ed6e2SLong Li goto failed;
1352517ed6e2SLong Li }
1353517ed6e2SLong Li
1354517ed6e2SLong Li rte_ether_addr_copy(addr, eth_dev->data->mac_addrs);
1355517ed6e2SLong Li
1356517ed6e2SLong Li priv->ib_pd = ibv_alloc_pd(ctx);
1357517ed6e2SLong Li if (!priv->ib_pd) {
1358517ed6e2SLong Li DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port);
1359517ed6e2SLong Li ret = -ENOMEM;
1360517ed6e2SLong Li goto failed;
1361517ed6e2SLong Li }
1362517ed6e2SLong Li
1363517ed6e2SLong Li /* Create a parent domain with the port number */
1364517ed6e2SLong Li attr.pd = priv->ib_pd;
1365517ed6e2SLong Li attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
136674decf3bSWei Hu attr.pd_context = (void *)(uintptr_t)port;
1367517ed6e2SLong Li priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr);
1368517ed6e2SLong Li if (!priv->ib_parent_pd) {
1369517ed6e2SLong Li DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port);
1370517ed6e2SLong Li ret = -ENOMEM;
1371517ed6e2SLong Li goto failed;
1372517ed6e2SLong Li }
1373517ed6e2SLong Li
1374517ed6e2SLong Li priv->ib_ctx = ctx;
1375517ed6e2SLong Li priv->port_id = eth_dev->data->port_id;
1376517ed6e2SLong Li priv->dev_port = port;
1377517ed6e2SLong Li eth_dev->data->dev_private = priv;
1378517ed6e2SLong Li priv->dev_data = eth_dev->data;
1379517ed6e2SLong Li
1380517ed6e2SLong Li priv->max_rx_queues = dev_attr->orig_attr.max_qp;
1381517ed6e2SLong Li priv->max_tx_queues = dev_attr->orig_attr.max_qp;
1382517ed6e2SLong Li
1383517ed6e2SLong Li priv->max_rx_desc =
1384517ed6e2SLong Li RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1385517ed6e2SLong Li dev_attr->orig_attr.max_cqe);
1386517ed6e2SLong Li priv->max_tx_desc =
1387517ed6e2SLong Li RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1388517ed6e2SLong Li dev_attr->orig_attr.max_cqe);
1389517ed6e2SLong Li
1390517ed6e2SLong Li priv->max_send_sge = dev_attr->orig_attr.max_sge;
1391517ed6e2SLong Li priv->max_recv_sge = dev_attr->orig_attr.max_sge;
1392517ed6e2SLong Li
1393517ed6e2SLong Li priv->max_mr = dev_attr->orig_attr.max_mr;
1394517ed6e2SLong Li priv->max_mr_size = dev_attr->orig_attr.max_mr_size;
1395517ed6e2SLong Li
1396b7e79896SLong Li DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d mr %" PRIu64,
1397517ed6e2SLong Li name, priv->max_rx_queues, priv->max_rx_desc,
1398b7e79896SLong Li priv->max_send_sge, priv->max_mr_size);
1399517ed6e2SLong Li
1400517ed6e2SLong Li rte_eth_copy_pci_info(eth_dev, pci_dev);
1401517ed6e2SLong Li
1402bd15f237SLong Li /* Create async interrupt handler */
1403afd5d170SLong Li ret = mana_intr_install(eth_dev, priv);
1404bd15f237SLong Li if (ret) {
1405bd15f237SLong Li DRV_LOG(ERR, "Failed to install intr handler");
1406bd15f237SLong Li goto failed;
1407bd15f237SLong Li }
1408bd15f237SLong Li
1409517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock);
1410517ed6e2SLong Li mana_shared_data->primary_cnt++;
1411517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock);
1412517ed6e2SLong Li
1413517ed6e2SLong Li eth_dev->device = &pci_dev->device;
1414517ed6e2SLong Li
1415517ed6e2SLong Li DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id);
1416517ed6e2SLong Li
1417517ed6e2SLong Li eth_dev->rx_pkt_burst = mana_rx_burst_removed;
1418517ed6e2SLong Li eth_dev->tx_pkt_burst = mana_tx_burst_removed;
1419517ed6e2SLong Li eth_dev->dev_ops = &mana_dev_ops;
1420517ed6e2SLong Li
1421517ed6e2SLong Li rte_eth_dev_probing_finish(eth_dev);
1422517ed6e2SLong Li
1423517ed6e2SLong Li return 0;
1424517ed6e2SLong Li
1425517ed6e2SLong Li failed:
1426517ed6e2SLong Li /* Free the resource for the port failed */
1427517ed6e2SLong Li if (priv) {
1428517ed6e2SLong Li if (priv->ib_parent_pd)
1429517ed6e2SLong Li ibv_dealloc_pd(priv->ib_parent_pd);
1430517ed6e2SLong Li
1431517ed6e2SLong Li if (priv->ib_pd)
1432517ed6e2SLong Li ibv_dealloc_pd(priv->ib_pd);
1433517ed6e2SLong Li }
1434517ed6e2SLong Li
1435517ed6e2SLong Li if (eth_dev)
1436517ed6e2SLong Li rte_eth_dev_release_port(eth_dev);
1437517ed6e2SLong Li
1438517ed6e2SLong Li rte_free(priv);
1439517ed6e2SLong Li
1440517ed6e2SLong Li if (ctx)
1441517ed6e2SLong Li ibv_close_device(ctx);
1442517ed6e2SLong Li
1443517ed6e2SLong Li return ret;
1444517ed6e2SLong Li }
1445517ed6e2SLong Li
1446517ed6e2SLong Li /*
1447517ed6e2SLong Li * Goes through the IB device list to look for the IB port matching the
1448517ed6e2SLong Li * mac_addr. If found, create a rte_eth_dev for it.
14490595702eSLong Li * Return value: number of successfully probed devices
1450517ed6e2SLong Li */
1451517ed6e2SLong Li static int
mana_pci_probe_mac(struct rte_pci_device * pci_dev,struct rte_ether_addr * mac_addr)1452517ed6e2SLong Li mana_pci_probe_mac(struct rte_pci_device *pci_dev,
1453517ed6e2SLong Li struct rte_ether_addr *mac_addr)
1454517ed6e2SLong Li {
1455517ed6e2SLong Li struct ibv_device **ibv_list;
1456517ed6e2SLong Li int ibv_idx;
1457517ed6e2SLong Li struct ibv_context *ctx;
1458517ed6e2SLong Li int num_devices;
14590595702eSLong Li int ret;
1460517ed6e2SLong Li uint8_t port;
14610595702eSLong Li int count = 0;
1462517ed6e2SLong Li
1463517ed6e2SLong Li ibv_list = ibv_get_device_list(&num_devices);
1464517ed6e2SLong Li for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
1465517ed6e2SLong Li struct ibv_device *ibdev = ibv_list[ibv_idx];
1466517ed6e2SLong Li struct rte_pci_addr pci_addr;
1467517ed6e2SLong Li struct ibv_device_attr_ex dev_attr;
1468517ed6e2SLong Li
1469517ed6e2SLong Li DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
1470517ed6e2SLong Li ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
1471517ed6e2SLong Li
1472517ed6e2SLong Li if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
1473517ed6e2SLong Li continue;
1474517ed6e2SLong Li
1475517ed6e2SLong Li /* Ignore if this IB device is not this PCI device */
14768fa22e1fSThomas Monjalon if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0)
1477517ed6e2SLong Li continue;
1478517ed6e2SLong Li
1479517ed6e2SLong Li ctx = ibv_open_device(ibdev);
1480517ed6e2SLong Li if (!ctx) {
1481517ed6e2SLong Li DRV_LOG(ERR, "Failed to open IB device %s",
1482517ed6e2SLong Li ibdev->name);
1483517ed6e2SLong Li continue;
1484517ed6e2SLong Li }
1485517ed6e2SLong Li ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
1486517ed6e2SLong Li ibv_close_device(ctx);
1487517ed6e2SLong Li
14880595702eSLong Li if (ret) {
14890595702eSLong Li DRV_LOG(ERR, "Failed to query IB device %s",
14900595702eSLong Li ibdev->name);
14910595702eSLong Li continue;
14920595702eSLong Li }
14930595702eSLong Li
1494517ed6e2SLong Li for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
1495517ed6e2SLong Li port++) {
1496517ed6e2SLong Li struct rte_ether_addr addr;
1497517ed6e2SLong Li ret = get_port_mac(ibdev, port, &addr);
1498517ed6e2SLong Li if (ret)
1499517ed6e2SLong Li continue;
1500517ed6e2SLong Li
1501517ed6e2SLong Li if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr))
1502517ed6e2SLong Li continue;
1503517ed6e2SLong Li
1504517ed6e2SLong Li ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr);
15050595702eSLong Li if (ret) {
1506517ed6e2SLong Li DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret);
15070595702eSLong Li } else {
15080595702eSLong Li count++;
1509517ed6e2SLong Li DRV_LOG(INFO, "Successfully probed on IB port %u", port);
1510517ed6e2SLong Li }
1511517ed6e2SLong Li }
15120595702eSLong Li }
1513517ed6e2SLong Li
1514517ed6e2SLong Li ibv_free_device_list(ibv_list);
15150595702eSLong Li return count;
1516517ed6e2SLong Li }
1517517ed6e2SLong Li
1518517ed6e2SLong Li /*
1519517ed6e2SLong Li * Main callback function from PCI bus to probe a device.
1520517ed6e2SLong Li */
1521517ed6e2SLong Li static int
mana_pci_probe(struct rte_pci_driver * pci_drv __rte_unused,struct rte_pci_device * pci_dev)1522517ed6e2SLong Li mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1523517ed6e2SLong Li struct rte_pci_device *pci_dev)
1524517ed6e2SLong Li {
1525517ed6e2SLong Li struct rte_devargs *args = pci_dev->device.devargs;
1526517ed6e2SLong Li struct mana_conf conf = {0};
1527517ed6e2SLong Li unsigned int i;
1528517ed6e2SLong Li int ret;
15290595702eSLong Li int count = 0;
1530517ed6e2SLong Li
1531517ed6e2SLong Li if (args && args->drv_str) {
1532517ed6e2SLong Li ret = mana_parse_args(args, &conf);
1533517ed6e2SLong Li if (ret) {
1534517ed6e2SLong Li DRV_LOG(ERR, "Failed to parse parameters args = %s",
1535517ed6e2SLong Li args->drv_str);
1536517ed6e2SLong Li return ret;
1537517ed6e2SLong Li }
1538517ed6e2SLong Li }
1539517ed6e2SLong Li
1540517ed6e2SLong Li ret = mana_init_once();
1541517ed6e2SLong Li if (ret) {
1542517ed6e2SLong Li DRV_LOG(ERR, "Failed to init PMD global data %d", ret);
1543517ed6e2SLong Li return ret;
1544517ed6e2SLong Li }
1545517ed6e2SLong Li
1546517ed6e2SLong Li /* If there are no driver parameters, probe on all ports */
15470595702eSLong Li if (conf.index) {
15480595702eSLong Li for (i = 0; i < conf.index; i++)
15490595702eSLong Li count += mana_pci_probe_mac(pci_dev,
15500595702eSLong Li &conf.mac_array[i]);
15510595702eSLong Li } else {
15520595702eSLong Li count = mana_pci_probe_mac(pci_dev, NULL);
1553517ed6e2SLong Li }
1554517ed6e2SLong Li
15550595702eSLong Li if (!count) {
15560595702eSLong Li rte_memzone_free(mana_shared_mz);
15570595702eSLong Li mana_shared_mz = NULL;
15580595702eSLong Li ret = -ENODEV;
15590595702eSLong Li }
15600595702eSLong Li
15610595702eSLong Li return ret;
1562517ed6e2SLong Li }
1563517ed6e2SLong Li
1564517ed6e2SLong Li static int
mana_dev_uninit(struct rte_eth_dev * dev)1565517ed6e2SLong Li mana_dev_uninit(struct rte_eth_dev *dev)
1566517ed6e2SLong Li {
15670dbfecfeSLong Li return mana_dev_close(dev);
1568517ed6e2SLong Li }
1569517ed6e2SLong Li
1570517ed6e2SLong Li /*
1571517ed6e2SLong Li * Callback from PCI to remove this device.
1572517ed6e2SLong Li */
1573517ed6e2SLong Li static int
mana_pci_remove(struct rte_pci_device * pci_dev)1574517ed6e2SLong Li mana_pci_remove(struct rte_pci_device *pci_dev)
1575517ed6e2SLong Li {
1576517ed6e2SLong Li if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1577517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock);
1578517ed6e2SLong Li
1579517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock);
1580517ed6e2SLong Li
1581517ed6e2SLong Li RTE_VERIFY(mana_shared_data->primary_cnt > 0);
1582517ed6e2SLong Li mana_shared_data->primary_cnt--;
1583517ed6e2SLong Li if (!mana_shared_data->primary_cnt) {
1584517ed6e2SLong Li DRV_LOG(DEBUG, "mp uninit primary");
1585517ed6e2SLong Li mana_mp_uninit_primary();
1586517ed6e2SLong Li }
1587517ed6e2SLong Li
1588517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock);
1589517ed6e2SLong Li
1590517ed6e2SLong Li /* Also free the shared memory if this is the last */
1591517ed6e2SLong Li if (!mana_shared_data->primary_cnt) {
1592517ed6e2SLong Li DRV_LOG(DEBUG, "free shared memezone data");
1593517ed6e2SLong Li rte_memzone_free(mana_shared_mz);
15940595702eSLong Li mana_shared_mz = NULL;
1595517ed6e2SLong Li }
1596517ed6e2SLong Li
1597517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock);
1598517ed6e2SLong Li } else {
1599517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data_lock);
1600517ed6e2SLong Li
1601517ed6e2SLong Li rte_spinlock_lock(&mana_shared_data->lock);
1602517ed6e2SLong Li RTE_VERIFY(mana_shared_data->secondary_cnt > 0);
1603517ed6e2SLong Li mana_shared_data->secondary_cnt--;
1604517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data->lock);
1605517ed6e2SLong Li
1606517ed6e2SLong Li RTE_VERIFY(mana_local_data.secondary_cnt > 0);
1607517ed6e2SLong Li mana_local_data.secondary_cnt--;
1608517ed6e2SLong Li if (!mana_local_data.secondary_cnt) {
1609517ed6e2SLong Li DRV_LOG(DEBUG, "mp uninit secondary");
1610517ed6e2SLong Li mana_mp_uninit_secondary();
1611517ed6e2SLong Li }
1612517ed6e2SLong Li
1613517ed6e2SLong Li rte_spinlock_unlock(&mana_shared_data_lock);
1614517ed6e2SLong Li }
1615517ed6e2SLong Li
1616517ed6e2SLong Li return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit);
1617517ed6e2SLong Li }
1618517ed6e2SLong Li
1619517ed6e2SLong Li static const struct rte_pci_id mana_pci_id_map[] = {
1620517ed6e2SLong Li {
1621517ed6e2SLong Li RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT,
1622517ed6e2SLong Li PCI_DEVICE_ID_MICROSOFT_MANA)
1623517ed6e2SLong Li },
1624517ed6e2SLong Li {
1625517ed6e2SLong Li .vendor_id = 0
1626517ed6e2SLong Li },
1627517ed6e2SLong Li };
1628517ed6e2SLong Li
1629517ed6e2SLong Li static struct rte_pci_driver mana_pci_driver = {
1630517ed6e2SLong Li .id_table = mana_pci_id_map,
1631517ed6e2SLong Li .probe = mana_pci_probe,
1632517ed6e2SLong Li .remove = mana_pci_remove,
1633517ed6e2SLong Li .drv_flags = RTE_PCI_DRV_INTR_RMV,
1634517ed6e2SLong Li };
1635517ed6e2SLong Li
1636517ed6e2SLong Li RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
1637517ed6e2SLong Li RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
1638517ed6e2SLong Li RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
1639517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
1640517ed6e2SLong Li RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
1641517ed6e2SLong Li RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>");
1642