1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2022 Microsoft Corporation
3 */
4
5 #include <unistd.h>
6 #include <dirent.h>
7 #include <fcntl.h>
8 #include <sys/mman.h>
9 #include <sys/ioctl.h>
10 #include <net/if.h>
11
12 #include <ethdev_driver.h>
13 #include <ethdev_pci.h>
14 #include <rte_kvargs.h>
15 #include <rte_eal_paging.h>
16 #include <rte_pci.h>
17
18 #include <infiniband/verbs.h>
19 #include <infiniband/manadv.h>
20
21 #include <assert.h>
22
23 #include "mana.h"
24
25 /* Shared memory between primary/secondary processes, per driver */
26 /* Data to track primary/secondary usage */
27 struct mana_shared_data *mana_shared_data;
28 static struct mana_shared_data mana_local_data;
29
30 /* The memory region for the above data */
31 static const struct rte_memzone *mana_shared_mz;
32 static const char *MZ_MANA_SHARED_DATA = "mana_shared_data";
33
34 /* Spinlock for mana_shared_data */
35 static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
36
37 /* Allocate a buffer on the stack and fill it with a printf format string. */
38 #define MANA_MKSTR(name, ...) \
39 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
40 char name[mkstr_size_##name + 1]; \
41 \
42 memset(name, 0, mkstr_size_##name + 1); \
43 snprintf(name, sizeof(name), "" __VA_ARGS__)
44
45 int mana_logtype_driver;
46 int mana_logtype_init;
47
48 /*
49 * Callback from rdma-core to allocate a buffer for a queue.
50 */
51 void *
mana_alloc_verbs_buf(size_t size,void * data)52 mana_alloc_verbs_buf(size_t size, void *data)
53 {
54 void *ret;
55 size_t alignment = rte_mem_page_size();
56 int socket = (int)(uintptr_t)data;
57
58 DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket);
59
60 if (alignment == (size_t)-1) {
61 DRV_LOG(ERR, "Failed to get mem page size");
62 rte_errno = ENOMEM;
63 return NULL;
64 }
65
66 ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket);
67 if (!ret && size)
68 rte_errno = ENOMEM;
69 return ret;
70 }
71
72 void
mana_free_verbs_buf(void * ptr,void * data __rte_unused)73 mana_free_verbs_buf(void *ptr, void *data __rte_unused)
74 {
75 rte_free(ptr);
76 }
77
78 static int
mana_dev_configure(struct rte_eth_dev * dev)79 mana_dev_configure(struct rte_eth_dev *dev)
80 {
81 struct mana_priv *priv = dev->data->dev_private;
82 struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
83
84 if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
85 dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
86
87 if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) {
88 DRV_LOG(ERR, "Only support equal number of rx/tx queues");
89 return -EINVAL;
90 }
91
92 if (!rte_is_power_of_2(dev->data->nb_rx_queues)) {
93 DRV_LOG(ERR, "number of TX/RX queues must be power of 2");
94 return -EINVAL;
95 }
96
97 priv->vlan_strip = !!(dev_conf->rxmode.offloads &
98 RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
99
100 priv->num_queues = dev->data->nb_rx_queues;
101
102 manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS,
103 (void *)((uintptr_t)&(struct manadv_ctx_allocators){
104 .alloc = &mana_alloc_verbs_buf,
105 .free = &mana_free_verbs_buf,
106 .data = 0,
107 }));
108
109 return 0;
110 }
111
112 static void
rx_intr_vec_disable(struct mana_priv * priv)113 rx_intr_vec_disable(struct mana_priv *priv)
114 {
115 struct rte_intr_handle *intr_handle = priv->intr_handle;
116
117 rte_intr_free_epoll_fd(intr_handle);
118 rte_intr_vec_list_free(intr_handle);
119 rte_intr_nb_efd_set(intr_handle, 0);
120 }
121
122 static int
rx_intr_vec_enable(struct mana_priv * priv)123 rx_intr_vec_enable(struct mana_priv *priv)
124 {
125 unsigned int i;
126 unsigned int rxqs_n = priv->dev_data->nb_rx_queues;
127 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
128 struct rte_intr_handle *intr_handle = priv->intr_handle;
129 int ret;
130
131 rx_intr_vec_disable(priv);
132
133 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) {
134 DRV_LOG(ERR, "Failed to allocate memory for interrupt vector");
135 return -ENOMEM;
136 }
137
138 for (i = 0; i < n; i++) {
139 struct mana_rxq *rxq = priv->dev_data->rx_queues[i];
140
141 ret = rte_intr_vec_list_index_set(intr_handle, i,
142 RTE_INTR_VEC_RXTX_OFFSET + i);
143 if (ret) {
144 DRV_LOG(ERR, "Failed to set intr vec %u", i);
145 return ret;
146 }
147
148 ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd);
149 if (ret) {
150 DRV_LOG(ERR, "Failed to set FD at intr %u", i);
151 return ret;
152 }
153 }
154
155 return rte_intr_nb_efd_set(intr_handle, n);
156 }
157
158 static void
rxq_intr_disable(struct mana_priv * priv)159 rxq_intr_disable(struct mana_priv *priv)
160 {
161 int err = rte_errno;
162
163 rx_intr_vec_disable(priv);
164 rte_errno = err;
165 }
166
167 static int
rxq_intr_enable(struct mana_priv * priv)168 rxq_intr_enable(struct mana_priv *priv)
169 {
170 const struct rte_eth_intr_conf *const intr_conf =
171 &priv->dev_data->dev_conf.intr_conf;
172
173 if (!intr_conf->rxq)
174 return 0;
175
176 return rx_intr_vec_enable(priv);
177 }
178
179 static int
mana_dev_start(struct rte_eth_dev * dev)180 mana_dev_start(struct rte_eth_dev *dev)
181 {
182 int ret;
183 struct mana_priv *priv = dev->data->dev_private;
184
185 rte_spinlock_init(&priv->mr_btree_lock);
186 ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N,
187 dev->device->numa_node);
188 if (ret) {
189 DRV_LOG(ERR, "Failed to init device MR btree %d", ret);
190 return ret;
191 }
192
193 ret = mana_start_tx_queues(dev);
194 if (ret) {
195 DRV_LOG(ERR, "failed to start tx queues %d", ret);
196 goto failed_tx;
197 }
198
199 ret = mana_start_rx_queues(dev);
200 if (ret) {
201 DRV_LOG(ERR, "failed to start rx queues %d", ret);
202 goto failed_rx;
203 }
204
205 rte_wmb();
206
207 dev->tx_pkt_burst = mana_tx_burst;
208 dev->rx_pkt_burst = mana_rx_burst;
209
210 DRV_LOG(INFO, "TX/RX queues have started");
211
212 /* Enable datapath for secondary processes */
213 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);
214
215 ret = rxq_intr_enable(priv);
216 if (ret) {
217 DRV_LOG(ERR, "Failed to enable RX interrupts");
218 goto failed_intr;
219 }
220
221 return 0;
222
223 failed_intr:
224 mana_stop_rx_queues(dev);
225
226 failed_rx:
227 mana_stop_tx_queues(dev);
228
229 failed_tx:
230 mana_mr_btree_free(&priv->mr_btree);
231
232 return ret;
233 }
234
235 static int
mana_dev_stop(struct rte_eth_dev * dev)236 mana_dev_stop(struct rte_eth_dev *dev)
237 {
238 int ret;
239 struct mana_priv *priv = dev->data->dev_private;
240
241 rxq_intr_disable(priv);
242
243 dev->tx_pkt_burst = mana_tx_burst_removed;
244 dev->rx_pkt_burst = mana_rx_burst_removed;
245
246 /* Stop datapath on secondary processes */
247 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX);
248
249 rte_wmb();
250
251 ret = mana_stop_tx_queues(dev);
252 if (ret) {
253 DRV_LOG(ERR, "failed to stop tx queues");
254 return ret;
255 }
256
257 ret = mana_stop_rx_queues(dev);
258 if (ret) {
259 DRV_LOG(ERR, "failed to stop tx queues");
260 return ret;
261 }
262
263 return 0;
264 }
265
266 static int mana_intr_uninstall(struct mana_priv *priv);
267
268 static int
mana_dev_close(struct rte_eth_dev * dev)269 mana_dev_close(struct rte_eth_dev *dev)
270 {
271 struct mana_priv *priv = dev->data->dev_private;
272 int ret;
273
274 mana_remove_all_mr(priv);
275
276 ret = mana_intr_uninstall(priv);
277 if (ret)
278 return ret;
279
280 ret = ibv_close_device(priv->ib_ctx);
281 if (ret) {
282 ret = errno;
283 return ret;
284 }
285
286 return 0;
287 }
288
289 static int
mana_dev_info_get(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)290 mana_dev_info_get(struct rte_eth_dev *dev,
291 struct rte_eth_dev_info *dev_info)
292 {
293 struct mana_priv *priv = dev->data->dev_private;
294
295 dev_info->min_mtu = RTE_ETHER_MIN_MTU;
296 dev_info->max_mtu = MANA_MAX_MTU;
297
298 /* RX params */
299 dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE;
300 dev_info->max_rx_pktlen = MANA_MAX_MTU + RTE_ETHER_HDR_LEN;
301
302 dev_info->max_rx_queues = RTE_MIN(priv->max_rx_queues, UINT16_MAX);
303 dev_info->max_tx_queues = RTE_MIN(priv->max_tx_queues, UINT16_MAX);
304
305 dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR;
306 dev_info->max_hash_mac_addrs = 0;
307
308 dev_info->max_vfs = 1;
309
310 /* Offload params */
311 dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT;
312
313 dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT;
314
315 /* RSS */
316 dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS;
317 dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES;
318 dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT;
319
320 /* Thresholds */
321 dev_info->default_rxconf = (struct rte_eth_rxconf){
322 .rx_thresh = {
323 .pthresh = 8,
324 .hthresh = 8,
325 .wthresh = 0,
326 },
327 .rx_free_thresh = 32,
328 /* If no descriptors available, pkts are dropped by default */
329 .rx_drop_en = 1,
330 };
331
332 dev_info->default_txconf = (struct rte_eth_txconf){
333 .tx_thresh = {
334 .pthresh = 32,
335 .hthresh = 0,
336 .wthresh = 0,
337 },
338 .tx_rs_thresh = 32,
339 .tx_free_thresh = 32,
340 };
341
342 /* Buffer limits */
343 dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
344 dev_info->rx_desc_lim.nb_max = RTE_MIN(priv->max_rx_desc, UINT16_MAX);
345 dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
346 dev_info->rx_desc_lim.nb_seg_max =
347 RTE_MIN(priv->max_recv_sge, UINT16_MAX);
348 dev_info->rx_desc_lim.nb_mtu_seg_max =
349 RTE_MIN(priv->max_recv_sge, UINT16_MAX);
350
351 dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE;
352 dev_info->tx_desc_lim.nb_max = RTE_MIN(priv->max_tx_desc, UINT16_MAX);
353 dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE;
354 dev_info->tx_desc_lim.nb_seg_max =
355 RTE_MIN(priv->max_send_sge, UINT16_MAX);
356 dev_info->tx_desc_lim.nb_mtu_seg_max =
357 RTE_MIN(priv->max_send_sge, UINT16_MAX);
358
359 /* Speed */
360 dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G;
361
362 /* RX params */
363 dev_info->default_rxportconf.burst_size = 1;
364 dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE;
365 dev_info->default_rxportconf.nb_queues = 1;
366
367 /* TX params */
368 dev_info->default_txportconf.burst_size = 1;
369 dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE;
370 dev_info->default_txportconf.nb_queues = 1;
371
372 return 0;
373 }
374
375 static void
mana_dev_tx_queue_info(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_txq_info * qinfo)376 mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
377 struct rte_eth_txq_info *qinfo)
378 {
379 struct mana_txq *txq = dev->data->tx_queues[queue_id];
380
381 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
382 qinfo->nb_desc = txq->num_desc;
383 }
384
385 static void
mana_dev_rx_queue_info(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_rxq_info * qinfo)386 mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id,
387 struct rte_eth_rxq_info *qinfo)
388 {
389 struct mana_rxq *rxq = dev->data->rx_queues[queue_id];
390
391 qinfo->mp = rxq->mp;
392 qinfo->nb_desc = rxq->num_desc;
393 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
394 }
395
396 static const uint32_t *
mana_supported_ptypes(struct rte_eth_dev * dev __rte_unused,size_t * no_of_elements)397 mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused,
398 size_t *no_of_elements)
399 {
400 static const uint32_t ptypes[] = {
401 RTE_PTYPE_L2_ETHER,
402 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
403 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
404 RTE_PTYPE_L4_FRAG,
405 RTE_PTYPE_L4_TCP,
406 RTE_PTYPE_L4_UDP,
407 };
408
409 *no_of_elements = RTE_DIM(ptypes);
410 return ptypes;
411 }
412
413 static int
mana_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)414 mana_rss_hash_update(struct rte_eth_dev *dev,
415 struct rte_eth_rss_conf *rss_conf)
416 {
417 struct mana_priv *priv = dev->data->dev_private;
418
419 /* Currently can only update RSS hash when device is stopped */
420 if (dev->data->dev_started) {
421 DRV_LOG(ERR, "Can't update RSS after device has started");
422 return -ENODEV;
423 }
424
425 if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) {
426 DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64,
427 dev->data->port_id, rss_conf->rss_hf);
428 return -EINVAL;
429 }
430
431 if (rss_conf->rss_key && rss_conf->rss_key_len) {
432 if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) {
433 DRV_LOG(ERR, "Port %u key len must be %u long",
434 dev->data->port_id,
435 TOEPLITZ_HASH_KEY_SIZE_IN_BYTES);
436 return -EINVAL;
437 }
438
439 priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
440 priv->rss_conf.rss_key =
441 rte_zmalloc("mana_rss", rss_conf->rss_key_len,
442 RTE_CACHE_LINE_SIZE);
443 if (!priv->rss_conf.rss_key)
444 return -ENOMEM;
445 memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
446 rss_conf->rss_key_len);
447 }
448 priv->rss_conf.rss_hf = rss_conf->rss_hf;
449
450 return 0;
451 }
452
453 static int
mana_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)454 mana_rss_hash_conf_get(struct rte_eth_dev *dev,
455 struct rte_eth_rss_conf *rss_conf)
456 {
457 struct mana_priv *priv = dev->data->dev_private;
458
459 if (!rss_conf)
460 return -EINVAL;
461
462 if (rss_conf->rss_key &&
463 rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) {
464 memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
465 priv->rss_conf.rss_key_len);
466 }
467
468 rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
469 rss_conf->rss_hf = priv->rss_conf.rss_hf;
470
471 return 0;
472 }
473
474 static int
mana_dev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf __rte_unused)475 mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
476 uint16_t nb_desc, unsigned int socket_id,
477 const struct rte_eth_txconf *tx_conf __rte_unused)
478
479 {
480 struct mana_priv *priv = dev->data->dev_private;
481 struct mana_txq *txq;
482 int ret;
483
484 txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id);
485 if (!txq) {
486 DRV_LOG(ERR, "failed to allocate txq");
487 return -ENOMEM;
488 }
489
490 txq->socket = socket_id;
491
492 txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring",
493 sizeof(struct mana_txq_desc) *
494 nb_desc,
495 RTE_CACHE_LINE_SIZE, socket_id);
496 if (!txq->desc_ring) {
497 DRV_LOG(ERR, "failed to allocate txq desc_ring");
498 ret = -ENOMEM;
499 goto fail;
500 }
501
502 txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp",
503 sizeof(*txq->gdma_comp_buf) * nb_desc,
504 RTE_CACHE_LINE_SIZE, socket_id);
505 if (!txq->gdma_comp_buf) {
506 DRV_LOG(ERR, "failed to allocate txq comp");
507 ret = -ENOMEM;
508 goto fail;
509 }
510
511 ret = mana_mr_btree_init(&txq->mr_btree,
512 MANA_MR_BTREE_PER_QUEUE_N, socket_id);
513 if (ret) {
514 DRV_LOG(ERR, "Failed to init TXQ MR btree");
515 goto fail;
516 }
517
518 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p",
519 queue_idx, nb_desc, socket_id, txq->desc_ring);
520
521 txq->desc_ring_head = 0;
522 txq->desc_ring_tail = 0;
523 txq->priv = priv;
524 txq->num_desc = nb_desc;
525 dev->data->tx_queues[queue_idx] = txq;
526
527 return 0;
528
529 fail:
530 rte_free(txq->gdma_comp_buf);
531 rte_free(txq->desc_ring);
532 rte_free(txq);
533 return ret;
534 }
535
536 static void
mana_dev_tx_queue_release(struct rte_eth_dev * dev,uint16_t qid)537 mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
538 {
539 struct mana_txq *txq = dev->data->tx_queues[qid];
540
541 mana_mr_btree_free(&txq->mr_btree);
542
543 rte_free(txq->gdma_comp_buf);
544 rte_free(txq->desc_ring);
545 rte_free(txq);
546 }
547
548 static int
mana_dev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf __rte_unused,struct rte_mempool * mp)549 mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
550 uint16_t nb_desc, unsigned int socket_id,
551 const struct rte_eth_rxconf *rx_conf __rte_unused,
552 struct rte_mempool *mp)
553 {
554 struct mana_priv *priv = dev->data->dev_private;
555 struct mana_rxq *rxq;
556 int ret;
557
558 rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id);
559 if (!rxq) {
560 DRV_LOG(ERR, "failed to allocate rxq");
561 return -ENOMEM;
562 }
563
564 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u",
565 queue_idx, nb_desc, socket_id);
566
567 rxq->socket = socket_id;
568
569 rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring",
570 sizeof(struct mana_rxq_desc) *
571 nb_desc,
572 RTE_CACHE_LINE_SIZE, socket_id);
573
574 if (!rxq->desc_ring) {
575 DRV_LOG(ERR, "failed to allocate rxq desc_ring");
576 ret = -ENOMEM;
577 goto fail;
578 }
579
580 rxq->desc_ring_head = 0;
581 rxq->desc_ring_tail = 0;
582
583 rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp",
584 sizeof(*rxq->gdma_comp_buf) * nb_desc,
585 RTE_CACHE_LINE_SIZE, socket_id);
586 if (!rxq->gdma_comp_buf) {
587 DRV_LOG(ERR, "failed to allocate rxq comp");
588 ret = -ENOMEM;
589 goto fail;
590 }
591
592 ret = mana_mr_btree_init(&rxq->mr_btree,
593 MANA_MR_BTREE_PER_QUEUE_N, socket_id);
594 if (ret) {
595 DRV_LOG(ERR, "Failed to init RXQ MR btree");
596 goto fail;
597 }
598
599 rxq->priv = priv;
600 rxq->num_desc = nb_desc;
601 rxq->mp = mp;
602 dev->data->rx_queues[queue_idx] = rxq;
603
604 return 0;
605
606 fail:
607 rte_free(rxq->gdma_comp_buf);
608 rte_free(rxq->desc_ring);
609 rte_free(rxq);
610 return ret;
611 }
612
613 static void
mana_dev_rx_queue_release(struct rte_eth_dev * dev,uint16_t qid)614 mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
615 {
616 struct mana_rxq *rxq = dev->data->rx_queues[qid];
617
618 mana_mr_btree_free(&rxq->mr_btree);
619
620 rte_free(rxq->gdma_comp_buf);
621 rte_free(rxq->desc_ring);
622 rte_free(rxq);
623 }
624
625 static int
mana_dev_link_update(struct rte_eth_dev * dev,int wait_to_complete __rte_unused)626 mana_dev_link_update(struct rte_eth_dev *dev,
627 int wait_to_complete __rte_unused)
628 {
629 struct rte_eth_link link;
630
631 /* MANA has no concept of carrier state, always reporting UP */
632 link = (struct rte_eth_link) {
633 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
634 .link_autoneg = RTE_ETH_LINK_SPEED_FIXED,
635 .link_speed = RTE_ETH_SPEED_NUM_100G,
636 .link_status = RTE_ETH_LINK_UP,
637 };
638
639 return rte_eth_linkstatus_set(dev, &link);
640 }
641
642 static int
mana_dev_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)643 mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
644 {
645 unsigned int i;
646
647 for (i = 0; i < dev->data->nb_tx_queues; i++) {
648 struct mana_txq *txq = dev->data->tx_queues[i];
649
650 if (!txq)
651 continue;
652
653 stats->opackets += txq->stats.packets;
654 stats->obytes += txq->stats.bytes;
655 stats->oerrors += txq->stats.errors;
656
657 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
658 stats->q_opackets[i] = txq->stats.packets;
659 stats->q_obytes[i] = txq->stats.bytes;
660 }
661 }
662
663 stats->rx_nombuf = 0;
664 for (i = 0; i < dev->data->nb_rx_queues; i++) {
665 struct mana_rxq *rxq = dev->data->rx_queues[i];
666
667 if (!rxq)
668 continue;
669
670 stats->ipackets += rxq->stats.packets;
671 stats->ibytes += rxq->stats.bytes;
672 stats->ierrors += rxq->stats.errors;
673
674 /* There is no good way to get stats->imissed, not setting it */
675
676 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
677 stats->q_ipackets[i] = rxq->stats.packets;
678 stats->q_ibytes[i] = rxq->stats.bytes;
679 }
680
681 stats->rx_nombuf += rxq->stats.nombuf;
682 }
683
684 return 0;
685 }
686
687 static int
mana_dev_stats_reset(struct rte_eth_dev * dev __rte_unused)688 mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused)
689 {
690 unsigned int i;
691
692 PMD_INIT_FUNC_TRACE();
693
694 for (i = 0; i < dev->data->nb_tx_queues; i++) {
695 struct mana_txq *txq = dev->data->tx_queues[i];
696
697 if (!txq)
698 continue;
699
700 memset(&txq->stats, 0, sizeof(txq->stats));
701 }
702
703 for (i = 0; i < dev->data->nb_rx_queues; i++) {
704 struct mana_rxq *rxq = dev->data->rx_queues[i];
705
706 if (!rxq)
707 continue;
708
709 memset(&rxq->stats, 0, sizeof(rxq->stats));
710 }
711
712 return 0;
713 }
714
715 static int
mana_get_ifname(const struct mana_priv * priv,char (* ifname)[IF_NAMESIZE])716 mana_get_ifname(const struct mana_priv *priv, char (*ifname)[IF_NAMESIZE])
717 {
718 int ret = -ENODEV;
719 DIR *dir;
720 struct dirent *dent;
721
722 MANA_MKSTR(dirpath, "%s/device/net", priv->ib_ctx->device->ibdev_path);
723
724 dir = opendir(dirpath);
725 if (dir == NULL)
726 return -ENODEV;
727
728 while ((dent = readdir(dir)) != NULL) {
729 char *name = dent->d_name;
730 FILE *file;
731 struct rte_ether_addr addr;
732 char *mac = NULL;
733
734 if ((name[0] == '.') &&
735 ((name[1] == '\0') ||
736 ((name[1] == '.') && (name[2] == '\0'))))
737 continue;
738
739 MANA_MKSTR(path, "%s/%s/address", dirpath, name);
740
741 file = fopen(path, "r");
742 if (!file) {
743 ret = -ENODEV;
744 break;
745 }
746
747 ret = fscanf(file, "%ms", &mac);
748 fclose(file);
749
750 if (ret <= 0) {
751 ret = -EINVAL;
752 break;
753 }
754
755 ret = rte_ether_unformat_addr(mac, &addr);
756 free(mac);
757 if (ret)
758 break;
759
760 if (rte_is_same_ether_addr(&addr, priv->dev_data->mac_addrs)) {
761 strlcpy(*ifname, name, sizeof(*ifname));
762 ret = 0;
763 break;
764 }
765 }
766
767 closedir(dir);
768 return ret;
769 }
770
771 static int
mana_ifreq(const struct mana_priv * priv,int req,struct ifreq * ifr)772 mana_ifreq(const struct mana_priv *priv, int req, struct ifreq *ifr)
773 {
774 int sock, ret;
775
776 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
777 if (sock == -1)
778 return -errno;
779
780 ret = mana_get_ifname(priv, &ifr->ifr_name);
781 if (ret) {
782 close(sock);
783 return ret;
784 }
785
786 if (ioctl(sock, req, ifr) == -1)
787 ret = -errno;
788
789 close(sock);
790
791 return ret;
792 }
793
794 static int
mana_mtu_set(struct rte_eth_dev * dev,uint16_t mtu)795 mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
796 {
797 struct mana_priv *priv = dev->data->dev_private;
798 struct ifreq request = { .ifr_mtu = mtu, };
799
800 return mana_ifreq(priv, SIOCSIFMTU, &request);
801 }
802
803 static const struct eth_dev_ops mana_dev_ops = {
804 .dev_configure = mana_dev_configure,
805 .dev_start = mana_dev_start,
806 .dev_stop = mana_dev_stop,
807 .dev_close = mana_dev_close,
808 .dev_infos_get = mana_dev_info_get,
809 .txq_info_get = mana_dev_tx_queue_info,
810 .rxq_info_get = mana_dev_rx_queue_info,
811 .dev_supported_ptypes_get = mana_supported_ptypes,
812 .rss_hash_update = mana_rss_hash_update,
813 .rss_hash_conf_get = mana_rss_hash_conf_get,
814 .tx_queue_setup = mana_dev_tx_queue_setup,
815 .tx_queue_release = mana_dev_tx_queue_release,
816 .rx_queue_setup = mana_dev_rx_queue_setup,
817 .rx_queue_release = mana_dev_rx_queue_release,
818 .rx_queue_intr_enable = mana_rx_intr_enable,
819 .rx_queue_intr_disable = mana_rx_intr_disable,
820 .link_update = mana_dev_link_update,
821 .stats_get = mana_dev_stats_get,
822 .stats_reset = mana_dev_stats_reset,
823 .mtu_set = mana_mtu_set,
824 };
825
826 static const struct eth_dev_ops mana_dev_secondary_ops = {
827 .stats_get = mana_dev_stats_get,
828 .stats_reset = mana_dev_stats_reset,
829 .dev_infos_get = mana_dev_info_get,
830 };
831
832 uint16_t
mana_rx_burst_removed(void * dpdk_rxq __rte_unused,struct rte_mbuf ** pkts __rte_unused,uint16_t pkts_n __rte_unused)833 mana_rx_burst_removed(void *dpdk_rxq __rte_unused,
834 struct rte_mbuf **pkts __rte_unused,
835 uint16_t pkts_n __rte_unused)
836 {
837 rte_mb();
838 return 0;
839 }
840
841 uint16_t
mana_tx_burst_removed(void * dpdk_rxq __rte_unused,struct rte_mbuf ** pkts __rte_unused,uint16_t pkts_n __rte_unused)842 mana_tx_burst_removed(void *dpdk_rxq __rte_unused,
843 struct rte_mbuf **pkts __rte_unused,
844 uint16_t pkts_n __rte_unused)
845 {
846 rte_mb();
847 return 0;
848 }
849
850 #define ETH_MANA_MAC_ARG "mac"
851 static const char * const mana_init_args[] = {
852 ETH_MANA_MAC_ARG,
853 NULL,
854 };
855
856 /* Support of parsing up to 8 mac address from EAL command line */
857 #define MAX_NUM_ADDRESS 8
858 struct mana_conf {
859 struct rte_ether_addr mac_array[MAX_NUM_ADDRESS];
860 unsigned int index;
861 };
862
863 static int
mana_arg_parse_callback(const char * key,const char * val,void * private)864 mana_arg_parse_callback(const char *key, const char *val, void *private)
865 {
866 struct mana_conf *conf = (struct mana_conf *)private;
867 int ret;
868
869 DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index);
870
871 if (conf->index >= MAX_NUM_ADDRESS) {
872 DRV_LOG(ERR, "Exceeding max MAC address");
873 return 1;
874 }
875
876 ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]);
877 if (ret) {
878 DRV_LOG(ERR, "Invalid MAC address %s", val);
879 return ret;
880 }
881
882 conf->index++;
883
884 return 0;
885 }
886
887 static int
mana_parse_args(struct rte_devargs * devargs,struct mana_conf * conf)888 mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf)
889 {
890 struct rte_kvargs *kvlist;
891 unsigned int arg_count;
892 int ret = 0;
893
894 kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args);
895 if (!kvlist) {
896 DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str);
897 return -EINVAL;
898 }
899
900 arg_count = rte_kvargs_count(kvlist, mana_init_args[0]);
901 if (arg_count > MAX_NUM_ADDRESS) {
902 ret = -EINVAL;
903 goto free_kvlist;
904 }
905 ret = rte_kvargs_process(kvlist, mana_init_args[0],
906 mana_arg_parse_callback, conf);
907 if (ret) {
908 DRV_LOG(ERR, "error parsing args");
909 goto free_kvlist;
910 }
911
912 free_kvlist:
913 rte_kvargs_free(kvlist);
914 return ret;
915 }
916
917 static int
get_port_mac(struct ibv_device * device,unsigned int port,struct rte_ether_addr * addr)918 get_port_mac(struct ibv_device *device, unsigned int port,
919 struct rte_ether_addr *addr)
920 {
921 FILE *file;
922 int ret = 0;
923 DIR *dir;
924 struct dirent *dent;
925 unsigned int dev_port;
926
927 MANA_MKSTR(path, "%s/device/net", device->ibdev_path);
928
929 dir = opendir(path);
930 if (!dir)
931 return -ENOENT;
932
933 while ((dent = readdir(dir))) {
934 char *name = dent->d_name;
935 char *mac = NULL;
936
937 MANA_MKSTR(port_path, "%s/%s/dev_port", path, name);
938
939 /* Ignore . and .. */
940 if ((name[0] == '.') &&
941 ((name[1] == '\0') ||
942 ((name[1] == '.') && (name[2] == '\0'))))
943 continue;
944
945 file = fopen(port_path, "r");
946 if (!file)
947 continue;
948
949 ret = fscanf(file, "%u", &dev_port);
950 fclose(file);
951
952 if (ret != 1)
953 continue;
954
955 /* Ethernet ports start at 0, IB port start at 1 */
956 if (dev_port == port - 1) {
957 MANA_MKSTR(address_path, "%s/%s/address", path, name);
958
959 file = fopen(address_path, "r");
960 if (!file)
961 continue;
962
963 ret = fscanf(file, "%ms", &mac);
964 fclose(file);
965
966 if (ret < 0)
967 break;
968
969 ret = rte_ether_unformat_addr(mac, addr);
970 if (ret)
971 DRV_LOG(ERR, "unrecognized mac addr %s", mac);
972
973 free(mac);
974 break;
975 }
976 }
977
978 closedir(dir);
979 return ret;
980 }
981
982 static int
mana_ibv_device_to_pci_addr(const struct ibv_device * device,struct rte_pci_addr * pci_addr)983 mana_ibv_device_to_pci_addr(const struct ibv_device *device,
984 struct rte_pci_addr *pci_addr)
985 {
986 FILE *file;
987 char *line = NULL;
988 size_t len = 0;
989
990 MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path);
991
992 file = fopen(path, "r");
993 if (!file)
994 return -errno;
995
996 while (getline(&line, &len, file) != -1) {
997 /* Extract information. */
998 if (sscanf(line,
999 "PCI_SLOT_NAME="
1000 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
1001 &pci_addr->domain,
1002 &pci_addr->bus,
1003 &pci_addr->devid,
1004 &pci_addr->function) == 4) {
1005 break;
1006 }
1007 }
1008
1009 free(line);
1010 fclose(file);
1011 return 0;
1012 }
1013
1014 /*
1015 * Interrupt handler from IB layer to notify this device is being removed.
1016 */
1017 static void
mana_intr_handler(void * arg)1018 mana_intr_handler(void *arg)
1019 {
1020 struct mana_priv *priv = arg;
1021 struct ibv_context *ctx = priv->ib_ctx;
1022 struct ibv_async_event event;
1023
1024 /* Read and ack all messages from IB device */
1025 while (true) {
1026 if (ibv_get_async_event(ctx, &event))
1027 break;
1028
1029 if (event.event_type == IBV_EVENT_DEVICE_FATAL) {
1030 struct rte_eth_dev *dev;
1031
1032 dev = &rte_eth_devices[priv->port_id];
1033 if (dev->data->dev_conf.intr_conf.rmv)
1034 rte_eth_dev_callback_process(dev,
1035 RTE_ETH_EVENT_INTR_RMV, NULL);
1036 }
1037
1038 ibv_ack_async_event(&event);
1039 }
1040 }
1041
1042 static int
mana_intr_uninstall(struct mana_priv * priv)1043 mana_intr_uninstall(struct mana_priv *priv)
1044 {
1045 int ret;
1046
1047 ret = rte_intr_callback_unregister(priv->intr_handle,
1048 mana_intr_handler, priv);
1049 if (ret <= 0) {
1050 DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret);
1051 return ret;
1052 }
1053
1054 rte_intr_instance_free(priv->intr_handle);
1055
1056 return 0;
1057 }
1058
1059 int
mana_fd_set_non_blocking(int fd)1060 mana_fd_set_non_blocking(int fd)
1061 {
1062 int ret = fcntl(fd, F_GETFL);
1063
1064 if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK))
1065 return 0;
1066
1067 rte_errno = errno;
1068 return -rte_errno;
1069 }
1070
1071 static int
mana_intr_install(struct rte_eth_dev * eth_dev,struct mana_priv * priv)1072 mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv)
1073 {
1074 int ret;
1075 struct ibv_context *ctx = priv->ib_ctx;
1076
1077 priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
1078 if (!priv->intr_handle) {
1079 DRV_LOG(ERR, "Failed to allocate intr_handle");
1080 rte_errno = ENOMEM;
1081 return -ENOMEM;
1082 }
1083
1084 ret = rte_intr_fd_set(priv->intr_handle, -1);
1085 if (ret)
1086 goto free_intr;
1087
1088 ret = mana_fd_set_non_blocking(ctx->async_fd);
1089 if (ret) {
1090 DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK");
1091 goto free_intr;
1092 }
1093
1094 ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd);
1095 if (ret)
1096 goto free_intr;
1097
1098 ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT);
1099 if (ret)
1100 goto free_intr;
1101
1102 ret = rte_intr_callback_register(priv->intr_handle,
1103 mana_intr_handler, priv);
1104 if (ret) {
1105 DRV_LOG(ERR, "Failed to register intr callback");
1106 rte_intr_fd_set(priv->intr_handle, -1);
1107 goto free_intr;
1108 }
1109
1110 eth_dev->intr_handle = priv->intr_handle;
1111 return 0;
1112
1113 free_intr:
1114 rte_intr_instance_free(priv->intr_handle);
1115 priv->intr_handle = NULL;
1116
1117 return ret;
1118 }
1119
1120 static int
mana_proc_priv_init(struct rte_eth_dev * dev)1121 mana_proc_priv_init(struct rte_eth_dev *dev)
1122 {
1123 struct mana_process_priv *priv;
1124
1125 priv = rte_zmalloc_socket("mana_proc_priv",
1126 sizeof(struct mana_process_priv),
1127 RTE_CACHE_LINE_SIZE,
1128 dev->device->numa_node);
1129 if (!priv)
1130 return -ENOMEM;
1131
1132 dev->process_private = priv;
1133 return 0;
1134 }
1135
1136 /*
1137 * Map the doorbell page for the secondary process through IB device handle.
1138 */
1139 static int
mana_map_doorbell_secondary(struct rte_eth_dev * eth_dev,int fd)1140 mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd)
1141 {
1142 struct mana_process_priv *priv = eth_dev->process_private;
1143
1144 void *addr;
1145
1146 addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0);
1147 if (addr == MAP_FAILED) {
1148 DRV_LOG(ERR, "Failed to map secondary doorbell port %u",
1149 eth_dev->data->port_id);
1150 return -ENOMEM;
1151 }
1152
1153 DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr);
1154
1155 priv->db_page = addr;
1156
1157 return 0;
1158 }
1159
1160 /* Initialize shared data for the driver (all devices) */
1161 static int
mana_init_shared_data(void)1162 mana_init_shared_data(void)
1163 {
1164 int ret = 0;
1165 const struct rte_memzone *secondary_mz;
1166
1167 rte_spinlock_lock(&mana_shared_data_lock);
1168
1169 /* Skip if shared data is already initialized */
1170 if (mana_shared_data)
1171 goto exit;
1172
1173 if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1174 mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA,
1175 sizeof(*mana_shared_data),
1176 SOCKET_ID_ANY, 0);
1177 if (!mana_shared_mz) {
1178 DRV_LOG(ERR, "Cannot allocate mana shared data");
1179 ret = -rte_errno;
1180 goto exit;
1181 }
1182
1183 mana_shared_data = mana_shared_mz->addr;
1184 memset(mana_shared_data, 0, sizeof(*mana_shared_data));
1185 rte_spinlock_init(&mana_shared_data->lock);
1186 } else {
1187 secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA);
1188 if (!secondary_mz) {
1189 DRV_LOG(ERR, "Cannot attach mana shared data");
1190 ret = -rte_errno;
1191 goto exit;
1192 }
1193
1194 mana_shared_data = secondary_mz->addr;
1195 memset(&mana_local_data, 0, sizeof(mana_local_data));
1196 }
1197
1198 exit:
1199 rte_spinlock_unlock(&mana_shared_data_lock);
1200
1201 return ret;
1202 }
1203
1204 /*
1205 * Init the data structures for use in primary and secondary processes.
1206 */
1207 static int
mana_init_once(void)1208 mana_init_once(void)
1209 {
1210 int ret;
1211
1212 ret = mana_init_shared_data();
1213 if (ret)
1214 return ret;
1215
1216 rte_spinlock_lock(&mana_shared_data->lock);
1217
1218 switch (rte_eal_process_type()) {
1219 case RTE_PROC_PRIMARY:
1220 if (mana_shared_data->init_done)
1221 break;
1222
1223 ret = mana_mp_init_primary();
1224 if (ret)
1225 break;
1226 DRV_LOG(ERR, "MP INIT PRIMARY");
1227
1228 mana_shared_data->init_done = 1;
1229 break;
1230
1231 case RTE_PROC_SECONDARY:
1232
1233 if (mana_local_data.init_done)
1234 break;
1235
1236 ret = mana_mp_init_secondary();
1237 if (ret)
1238 break;
1239
1240 DRV_LOG(ERR, "MP INIT SECONDARY");
1241
1242 mana_local_data.init_done = 1;
1243 break;
1244
1245 default:
1246 /* Impossible, internal error */
1247 ret = -EPROTO;
1248 break;
1249 }
1250
1251 rte_spinlock_unlock(&mana_shared_data->lock);
1252
1253 return ret;
1254 }
1255
1256 /*
1257 * Probe an IB port
1258 * Return value:
1259 * positive value: successfully probed port
1260 * 0: port not matching specified MAC address
1261 * negative value: error code
1262 */
1263 static int
mana_probe_port(struct ibv_device * ibdev,struct ibv_device_attr_ex * dev_attr,uint8_t port,struct rte_pci_device * pci_dev,struct rte_ether_addr * addr)1264 mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr,
1265 uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr)
1266 {
1267 struct mana_priv *priv = NULL;
1268 struct rte_eth_dev *eth_dev = NULL;
1269 struct ibv_parent_domain_init_attr attr = {0};
1270 char address[64];
1271 char name[RTE_ETH_NAME_MAX_LEN];
1272 int ret;
1273 struct ibv_context *ctx = NULL;
1274
1275 rte_ether_format_addr(address, sizeof(address), addr);
1276 DRV_LOG(INFO, "device located port %u address %s", port, address);
1277
1278 priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE,
1279 SOCKET_ID_ANY);
1280 if (!priv)
1281 return -ENOMEM;
1282
1283 snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port);
1284
1285 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1286 int fd;
1287
1288 eth_dev = rte_eth_dev_attach_secondary(name);
1289 if (!eth_dev) {
1290 DRV_LOG(ERR, "Can't attach to dev %s", name);
1291 ret = -ENOMEM;
1292 goto failed;
1293 }
1294
1295 eth_dev->device = &pci_dev->device;
1296 eth_dev->dev_ops = &mana_dev_secondary_ops;
1297 ret = mana_proc_priv_init(eth_dev);
1298 if (ret)
1299 goto failed;
1300 priv->process_priv = eth_dev->process_private;
1301
1302 /* Get the IB FD from the primary process */
1303 fd = mana_mp_req_verbs_cmd_fd(eth_dev);
1304 if (fd < 0) {
1305 DRV_LOG(ERR, "Failed to get FD %d", fd);
1306 ret = -ENODEV;
1307 goto failed;
1308 }
1309
1310 ret = mana_map_doorbell_secondary(eth_dev, fd);
1311 if (ret) {
1312 DRV_LOG(ERR, "Failed secondary map %d", fd);
1313 goto failed;
1314 }
1315
1316 /* fd is no not used after mapping doorbell */
1317 close(fd);
1318
1319 eth_dev->tx_pkt_burst = mana_tx_burst;
1320 eth_dev->rx_pkt_burst = mana_rx_burst;
1321
1322 rte_spinlock_lock(&mana_shared_data->lock);
1323 mana_shared_data->secondary_cnt++;
1324 mana_local_data.secondary_cnt++;
1325 rte_spinlock_unlock(&mana_shared_data->lock);
1326
1327 rte_eth_copy_pci_info(eth_dev, pci_dev);
1328 rte_eth_dev_probing_finish(eth_dev);
1329
1330 return 0;
1331 }
1332
1333 ctx = ibv_open_device(ibdev);
1334 if (!ctx) {
1335 DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name);
1336 ret = -ENODEV;
1337 goto failed;
1338 }
1339
1340 eth_dev = rte_eth_dev_allocate(name);
1341 if (!eth_dev) {
1342 ret = -ENOMEM;
1343 goto failed;
1344 }
1345
1346 eth_dev->data->mac_addrs =
1347 rte_calloc("mana_mac", 1,
1348 sizeof(struct rte_ether_addr), 0);
1349 if (!eth_dev->data->mac_addrs) {
1350 ret = -ENOMEM;
1351 goto failed;
1352 }
1353
1354 rte_ether_addr_copy(addr, eth_dev->data->mac_addrs);
1355
1356 priv->ib_pd = ibv_alloc_pd(ctx);
1357 if (!priv->ib_pd) {
1358 DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port);
1359 ret = -ENOMEM;
1360 goto failed;
1361 }
1362
1363 /* Create a parent domain with the port number */
1364 attr.pd = priv->ib_pd;
1365 attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
1366 attr.pd_context = (void *)(uintptr_t)port;
1367 priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr);
1368 if (!priv->ib_parent_pd) {
1369 DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port);
1370 ret = -ENOMEM;
1371 goto failed;
1372 }
1373
1374 priv->ib_ctx = ctx;
1375 priv->port_id = eth_dev->data->port_id;
1376 priv->dev_port = port;
1377 eth_dev->data->dev_private = priv;
1378 priv->dev_data = eth_dev->data;
1379
1380 priv->max_rx_queues = dev_attr->orig_attr.max_qp;
1381 priv->max_tx_queues = dev_attr->orig_attr.max_qp;
1382
1383 priv->max_rx_desc =
1384 RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1385 dev_attr->orig_attr.max_cqe);
1386 priv->max_tx_desc =
1387 RTE_MIN(dev_attr->orig_attr.max_qp_wr,
1388 dev_attr->orig_attr.max_cqe);
1389
1390 priv->max_send_sge = dev_attr->orig_attr.max_sge;
1391 priv->max_recv_sge = dev_attr->orig_attr.max_sge;
1392
1393 priv->max_mr = dev_attr->orig_attr.max_mr;
1394 priv->max_mr_size = dev_attr->orig_attr.max_mr_size;
1395
1396 DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d mr %" PRIu64,
1397 name, priv->max_rx_queues, priv->max_rx_desc,
1398 priv->max_send_sge, priv->max_mr_size);
1399
1400 rte_eth_copy_pci_info(eth_dev, pci_dev);
1401
1402 /* Create async interrupt handler */
1403 ret = mana_intr_install(eth_dev, priv);
1404 if (ret) {
1405 DRV_LOG(ERR, "Failed to install intr handler");
1406 goto failed;
1407 }
1408
1409 rte_spinlock_lock(&mana_shared_data->lock);
1410 mana_shared_data->primary_cnt++;
1411 rte_spinlock_unlock(&mana_shared_data->lock);
1412
1413 eth_dev->device = &pci_dev->device;
1414
1415 DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id);
1416
1417 eth_dev->rx_pkt_burst = mana_rx_burst_removed;
1418 eth_dev->tx_pkt_burst = mana_tx_burst_removed;
1419 eth_dev->dev_ops = &mana_dev_ops;
1420
1421 rte_eth_dev_probing_finish(eth_dev);
1422
1423 return 0;
1424
1425 failed:
1426 /* Free the resource for the port failed */
1427 if (priv) {
1428 if (priv->ib_parent_pd)
1429 ibv_dealloc_pd(priv->ib_parent_pd);
1430
1431 if (priv->ib_pd)
1432 ibv_dealloc_pd(priv->ib_pd);
1433 }
1434
1435 if (eth_dev)
1436 rte_eth_dev_release_port(eth_dev);
1437
1438 rte_free(priv);
1439
1440 if (ctx)
1441 ibv_close_device(ctx);
1442
1443 return ret;
1444 }
1445
1446 /*
1447 * Goes through the IB device list to look for the IB port matching the
1448 * mac_addr. If found, create a rte_eth_dev for it.
1449 * Return value: number of successfully probed devices
1450 */
1451 static int
mana_pci_probe_mac(struct rte_pci_device * pci_dev,struct rte_ether_addr * mac_addr)1452 mana_pci_probe_mac(struct rte_pci_device *pci_dev,
1453 struct rte_ether_addr *mac_addr)
1454 {
1455 struct ibv_device **ibv_list;
1456 int ibv_idx;
1457 struct ibv_context *ctx;
1458 int num_devices;
1459 int ret;
1460 uint8_t port;
1461 int count = 0;
1462
1463 ibv_list = ibv_get_device_list(&num_devices);
1464 for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
1465 struct ibv_device *ibdev = ibv_list[ibv_idx];
1466 struct rte_pci_addr pci_addr;
1467 struct ibv_device_attr_ex dev_attr;
1468
1469 DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
1470 ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
1471
1472 if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
1473 continue;
1474
1475 /* Ignore if this IB device is not this PCI device */
1476 if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0)
1477 continue;
1478
1479 ctx = ibv_open_device(ibdev);
1480 if (!ctx) {
1481 DRV_LOG(ERR, "Failed to open IB device %s",
1482 ibdev->name);
1483 continue;
1484 }
1485 ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
1486 ibv_close_device(ctx);
1487
1488 if (ret) {
1489 DRV_LOG(ERR, "Failed to query IB device %s",
1490 ibdev->name);
1491 continue;
1492 }
1493
1494 for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
1495 port++) {
1496 struct rte_ether_addr addr;
1497 ret = get_port_mac(ibdev, port, &addr);
1498 if (ret)
1499 continue;
1500
1501 if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr))
1502 continue;
1503
1504 ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr);
1505 if (ret) {
1506 DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret);
1507 } else {
1508 count++;
1509 DRV_LOG(INFO, "Successfully probed on IB port %u", port);
1510 }
1511 }
1512 }
1513
1514 ibv_free_device_list(ibv_list);
1515 return count;
1516 }
1517
1518 /*
1519 * Main callback function from PCI bus to probe a device.
1520 */
1521 static int
mana_pci_probe(struct rte_pci_driver * pci_drv __rte_unused,struct rte_pci_device * pci_dev)1522 mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1523 struct rte_pci_device *pci_dev)
1524 {
1525 struct rte_devargs *args = pci_dev->device.devargs;
1526 struct mana_conf conf = {0};
1527 unsigned int i;
1528 int ret;
1529 int count = 0;
1530
1531 if (args && args->drv_str) {
1532 ret = mana_parse_args(args, &conf);
1533 if (ret) {
1534 DRV_LOG(ERR, "Failed to parse parameters args = %s",
1535 args->drv_str);
1536 return ret;
1537 }
1538 }
1539
1540 ret = mana_init_once();
1541 if (ret) {
1542 DRV_LOG(ERR, "Failed to init PMD global data %d", ret);
1543 return ret;
1544 }
1545
1546 /* If there are no driver parameters, probe on all ports */
1547 if (conf.index) {
1548 for (i = 0; i < conf.index; i++)
1549 count += mana_pci_probe_mac(pci_dev,
1550 &conf.mac_array[i]);
1551 } else {
1552 count = mana_pci_probe_mac(pci_dev, NULL);
1553 }
1554
1555 if (!count) {
1556 rte_memzone_free(mana_shared_mz);
1557 mana_shared_mz = NULL;
1558 ret = -ENODEV;
1559 }
1560
1561 return ret;
1562 }
1563
1564 static int
mana_dev_uninit(struct rte_eth_dev * dev)1565 mana_dev_uninit(struct rte_eth_dev *dev)
1566 {
1567 return mana_dev_close(dev);
1568 }
1569
1570 /*
1571 * Callback from PCI to remove this device.
1572 */
1573 static int
mana_pci_remove(struct rte_pci_device * pci_dev)1574 mana_pci_remove(struct rte_pci_device *pci_dev)
1575 {
1576 if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1577 rte_spinlock_lock(&mana_shared_data_lock);
1578
1579 rte_spinlock_lock(&mana_shared_data->lock);
1580
1581 RTE_VERIFY(mana_shared_data->primary_cnt > 0);
1582 mana_shared_data->primary_cnt--;
1583 if (!mana_shared_data->primary_cnt) {
1584 DRV_LOG(DEBUG, "mp uninit primary");
1585 mana_mp_uninit_primary();
1586 }
1587
1588 rte_spinlock_unlock(&mana_shared_data->lock);
1589
1590 /* Also free the shared memory if this is the last */
1591 if (!mana_shared_data->primary_cnt) {
1592 DRV_LOG(DEBUG, "free shared memezone data");
1593 rte_memzone_free(mana_shared_mz);
1594 mana_shared_mz = NULL;
1595 }
1596
1597 rte_spinlock_unlock(&mana_shared_data_lock);
1598 } else {
1599 rte_spinlock_lock(&mana_shared_data_lock);
1600
1601 rte_spinlock_lock(&mana_shared_data->lock);
1602 RTE_VERIFY(mana_shared_data->secondary_cnt > 0);
1603 mana_shared_data->secondary_cnt--;
1604 rte_spinlock_unlock(&mana_shared_data->lock);
1605
1606 RTE_VERIFY(mana_local_data.secondary_cnt > 0);
1607 mana_local_data.secondary_cnt--;
1608 if (!mana_local_data.secondary_cnt) {
1609 DRV_LOG(DEBUG, "mp uninit secondary");
1610 mana_mp_uninit_secondary();
1611 }
1612
1613 rte_spinlock_unlock(&mana_shared_data_lock);
1614 }
1615
1616 return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit);
1617 }
1618
1619 static const struct rte_pci_id mana_pci_id_map[] = {
1620 {
1621 RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT,
1622 PCI_DEVICE_ID_MICROSOFT_MANA)
1623 },
1624 {
1625 .vendor_id = 0
1626 },
1627 };
1628
1629 static struct rte_pci_driver mana_pci_driver = {
1630 .id_table = mana_pci_id_map,
1631 .probe = mana_pci_probe,
1632 .remove = mana_pci_remove,
1633 .drv_flags = RTE_PCI_DRV_INTR_RMV,
1634 };
1635
1636 RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
1637 RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
1638 RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
1639 RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
1640 RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
1641 RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>");
1642