1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. 3 */ 4 5 #include <stdlib.h> 6 #include <fcntl.h> 7 #include <unistd.h> 8 #include <sys/types.h> 9 #include <sys/socket.h> 10 #include <sys/un.h> 11 #include <sys/ioctl.h> 12 #include <sys/mman.h> 13 #include <linux/if_ether.h> 14 #include <errno.h> 15 #include <sys/eventfd.h> 16 17 #include <rte_version.h> 18 #include <rte_mbuf.h> 19 #include <rte_ether.h> 20 #include <rte_ethdev_driver.h> 21 #include <rte_ethdev_vdev.h> 22 #include <rte_malloc.h> 23 #include <rte_kvargs.h> 24 #include <rte_bus_vdev.h> 25 #include <rte_string_fns.h> 26 #include <rte_errno.h> 27 #include <rte_memory.h> 28 #include <rte_memzone.h> 29 #include <rte_eal_memconfig.h> 30 31 #include "rte_eth_memif.h" 32 #include "memif_socket.h" 33 34 #define ETH_MEMIF_ID_ARG "id" 35 #define ETH_MEMIF_ROLE_ARG "role" 36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG "bsize" 37 #define ETH_MEMIF_RING_SIZE_ARG "rsize" 38 #define ETH_MEMIF_SOCKET_ARG "socket" 39 #define ETH_MEMIF_MAC_ARG "mac" 40 #define ETH_MEMIF_ZC_ARG "zero-copy" 41 #define ETH_MEMIF_SECRET_ARG "secret" 42 43 static const char * const valid_arguments[] = { 44 ETH_MEMIF_ID_ARG, 45 ETH_MEMIF_ROLE_ARG, 46 ETH_MEMIF_PKT_BUFFER_SIZE_ARG, 47 ETH_MEMIF_RING_SIZE_ARG, 48 ETH_MEMIF_SOCKET_ARG, 49 ETH_MEMIF_MAC_ARG, 50 ETH_MEMIF_ZC_ARG, 51 ETH_MEMIF_SECRET_ARG, 52 NULL 53 }; 54 55 static const struct rte_eth_link pmd_link = { 56 .link_speed = ETH_SPEED_NUM_10G, 57 .link_duplex = ETH_LINK_FULL_DUPLEX, 58 .link_status = ETH_LINK_DOWN, 59 .link_autoneg = ETH_LINK_AUTONEG 60 }; 61 62 #define MEMIF_MP_SEND_REGION "memif_mp_send_region" 63 64 65 static int memif_region_init_zc(const struct rte_memseg_list *msl, 66 const struct rte_memseg *ms, void *arg); 67 68 const char * 69 memif_version(void) 70 { 71 return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR)); 72 } 73 74 /* Message header to synchronize regions */ 75 struct mp_region_msg { 76 char port_name[RTE_DEV_NAME_MAX_LEN]; 77 memif_region_index_t idx; 78 memif_region_size_t size; 79 }; 80 81 static int 82 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer) 83 { 84 struct rte_eth_dev *dev; 85 struct pmd_process_private *proc_private; 86 const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param; 87 struct rte_mp_msg reply; 88 struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param; 89 uint16_t port_id; 90 int ret; 91 92 /* Get requested port */ 93 ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id); 94 if (ret) { 95 MIF_LOG(ERR, "Failed to get port id for %s", 96 msg_param->port_name); 97 return -1; 98 } 99 dev = &rte_eth_devices[port_id]; 100 proc_private = dev->process_private; 101 102 memset(&reply, 0, sizeof(reply)); 103 strlcpy(reply.name, msg->name, sizeof(reply.name)); 104 reply_param->idx = msg_param->idx; 105 if (proc_private->regions[msg_param->idx] != NULL) { 106 reply_param->size = proc_private->regions[msg_param->idx]->region_size; 107 reply.fds[0] = proc_private->regions[msg_param->idx]->fd; 108 reply.num_fds = 1; 109 } 110 reply.len_param = sizeof(*reply_param); 111 if (rte_mp_reply(&reply, peer) < 0) { 112 MIF_LOG(ERR, "Failed to reply to an add region request"); 113 return -1; 114 } 115 116 return 0; 117 } 118 119 /* 120 * Request regions 121 * Called by secondary process, when ports link status goes up. 122 */ 123 static int 124 memif_mp_request_regions(struct rte_eth_dev *dev) 125 { 126 int ret, i; 127 struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0}; 128 struct rte_mp_msg msg, *reply; 129 struct rte_mp_reply replies; 130 struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param; 131 struct mp_region_msg *reply_param; 132 struct memif_region *r; 133 struct pmd_process_private *proc_private = dev->process_private; 134 struct pmd_internals *pmd = dev->data->dev_private; 135 /* in case of zero-copy slave, only request region 0 */ 136 uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ? 137 1 : ETH_MEMIF_MAX_REGION_NUM; 138 139 MIF_LOG(DEBUG, "Requesting memory regions"); 140 141 for (i = 0; i < max_region_num; i++) { 142 /* Prepare the message */ 143 memset(&msg, 0, sizeof(msg)); 144 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name)); 145 strlcpy(msg_param->port_name, dev->data->name, 146 sizeof(msg_param->port_name)); 147 msg_param->idx = i; 148 msg.len_param = sizeof(*msg_param); 149 150 /* Send message */ 151 ret = rte_mp_request_sync(&msg, &replies, &timeout); 152 if (ret < 0 || replies.nb_received != 1) { 153 MIF_LOG(ERR, "Failed to send mp msg: %d", 154 rte_errno); 155 return -1; 156 } 157 158 reply = &replies.msgs[0]; 159 reply_param = (struct mp_region_msg *)reply->param; 160 161 if (reply_param->size > 0) { 162 r = rte_zmalloc("region", sizeof(struct memif_region), 0); 163 if (r == NULL) { 164 MIF_LOG(ERR, "Failed to alloc memif region."); 165 free(reply); 166 return -ENOMEM; 167 } 168 r->region_size = reply_param->size; 169 if (reply->num_fds < 1) { 170 MIF_LOG(ERR, "Missing file descriptor."); 171 free(reply); 172 return -1; 173 } 174 r->fd = reply->fds[0]; 175 r->addr = NULL; 176 177 proc_private->regions[reply_param->idx] = r; 178 proc_private->regions_num++; 179 } 180 free(reply); 181 } 182 183 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { 184 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private); 185 if (ret < 0) 186 return ret; 187 } 188 189 return memif_connect(dev); 190 } 191 192 static int 193 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info) 194 { 195 dev_info->max_mac_addrs = 1; 196 dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; 197 dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; 198 dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; 199 dev_info->min_rx_bufsize = 0; 200 201 return 0; 202 } 203 204 static memif_ring_t * 205 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private, 206 memif_ring_type_t type, uint16_t ring_num) 207 { 208 /* rings only in region 0 */ 209 void *p = proc_private->regions[0]->addr; 210 int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) * 211 (1 << pmd->run.log2_ring_size); 212 213 p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size; 214 215 return (memif_ring_t *)p; 216 } 217 218 static memif_region_offset_t 219 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq, 220 memif_ring_type_t type, uint16_t num) 221 { 222 struct pmd_internals *pmd = dev->data->dev_private; 223 struct pmd_process_private *proc_private = dev->process_private; 224 225 return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) - 226 (uint8_t *)proc_private->regions[mq->region]->addr); 227 } 228 229 static memif_ring_t * 230 memif_get_ring_from_queue(struct pmd_process_private *proc_private, 231 struct memif_queue *mq) 232 { 233 struct memif_region *r; 234 235 r = proc_private->regions[mq->region]; 236 if (r == NULL) 237 return NULL; 238 239 return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset); 240 } 241 242 static void * 243 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d) 244 { 245 return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset); 246 } 247 248 /* Free mbufs received by master */ 249 static void 250 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq) 251 { 252 uint16_t mask = (1 << mq->log2_ring_size) - 1; 253 memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); 254 255 /* FIXME: improve performance */ 256 /* The ring->tail acts as a guard variable between Tx and Rx 257 * threads, so using load-acquire pairs with store-release 258 * to synchronize it between threads. 259 */ 260 while (mq->last_tail != __atomic_load_n(&ring->tail, 261 __ATOMIC_ACQUIRE)) { 262 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]); 263 /* Decrement refcnt and free mbuf. (current segment) */ 264 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1); 265 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]); 266 mq->last_tail++; 267 } 268 } 269 270 static int 271 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail, 272 struct rte_mbuf *tail) 273 { 274 /* Check for number-of-segments-overflow */ 275 if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS)) 276 return -EOVERFLOW; 277 278 /* Chain 'tail' onto the old tail */ 279 cur_tail->next = tail; 280 281 /* accumulate number of segments and total length. */ 282 head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs); 283 284 tail->pkt_len = tail->data_len; 285 head->pkt_len += tail->pkt_len; 286 287 return 0; 288 } 289 290 static uint16_t 291 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 292 { 293 struct memif_queue *mq = queue; 294 struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; 295 struct pmd_process_private *proc_private = 296 rte_eth_devices[mq->in_port].process_private; 297 memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); 298 uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0; 299 uint16_t n_rx_pkts = 0; 300 uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) - 301 RTE_PKTMBUF_HEADROOM; 302 uint16_t src_len, src_off, dst_len, dst_off, cp_len; 303 memif_ring_type_t type = mq->type; 304 memif_desc_t *d0; 305 struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail; 306 uint64_t b; 307 ssize_t size __rte_unused; 308 uint16_t head; 309 int ret; 310 struct rte_eth_link link; 311 312 if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) 313 return 0; 314 if (unlikely(ring == NULL)) { 315 /* Secondary process will attempt to request regions. */ 316 ret = rte_eth_link_get(mq->in_port, &link); 317 if (ret < 0) 318 MIF_LOG(ERR, "Failed to get port %u link info: %s", 319 mq->in_port, rte_strerror(-ret)); 320 return 0; 321 } 322 323 /* consume interrupt */ 324 if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) 325 size = read(mq->intr_handle.fd, &b, sizeof(b)); 326 327 ring_size = 1 << mq->log2_ring_size; 328 mask = ring_size - 1; 329 330 if (type == MEMIF_RING_S2M) { 331 cur_slot = mq->last_head; 332 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); 333 } else { 334 cur_slot = mq->last_tail; 335 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); 336 } 337 338 if (cur_slot == last_slot) 339 goto refill; 340 n_slots = last_slot - cur_slot; 341 342 while (n_slots && n_rx_pkts < nb_pkts) { 343 mbuf_head = rte_pktmbuf_alloc(mq->mempool); 344 if (unlikely(mbuf_head == NULL)) 345 goto no_free_bufs; 346 mbuf = mbuf_head; 347 mbuf->port = mq->in_port; 348 349 next_slot: 350 s0 = cur_slot & mask; 351 d0 = &ring->desc[s0]; 352 353 src_len = d0->length; 354 dst_off = 0; 355 src_off = 0; 356 357 do { 358 dst_len = mbuf_size - dst_off; 359 if (dst_len == 0) { 360 dst_off = 0; 361 dst_len = mbuf_size; 362 363 /* store pointer to tail */ 364 mbuf_tail = mbuf; 365 mbuf = rte_pktmbuf_alloc(mq->mempool); 366 if (unlikely(mbuf == NULL)) 367 goto no_free_bufs; 368 mbuf->port = mq->in_port; 369 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); 370 if (unlikely(ret < 0)) { 371 MIF_LOG(ERR, "number-of-segments-overflow"); 372 rte_pktmbuf_free(mbuf); 373 goto no_free_bufs; 374 } 375 } 376 cp_len = RTE_MIN(dst_len, src_len); 377 378 rte_pktmbuf_data_len(mbuf) += cp_len; 379 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); 380 if (mbuf != mbuf_head) 381 rte_pktmbuf_pkt_len(mbuf_head) += cp_len; 382 383 memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off), 384 (uint8_t *)memif_get_buffer(proc_private, d0) + src_off, 385 cp_len); 386 387 src_off += cp_len; 388 dst_off += cp_len; 389 src_len -= cp_len; 390 } while (src_len); 391 392 cur_slot++; 393 n_slots--; 394 395 if (d0->flags & MEMIF_DESC_FLAG_NEXT) 396 goto next_slot; 397 398 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); 399 *bufs++ = mbuf_head; 400 n_rx_pkts++; 401 } 402 403 no_free_bufs: 404 if (type == MEMIF_RING_S2M) { 405 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE); 406 mq->last_head = cur_slot; 407 } else { 408 mq->last_tail = cur_slot; 409 } 410 411 refill: 412 if (type == MEMIF_RING_M2S) { 413 head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); 414 n_slots = ring_size - head + mq->last_tail; 415 416 while (n_slots--) { 417 s0 = head++ & mask; 418 d0 = &ring->desc[s0]; 419 d0->length = pmd->run.pkt_buffer_size; 420 } 421 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE); 422 } 423 424 mq->n_pkts += n_rx_pkts; 425 return n_rx_pkts; 426 } 427 428 static uint16_t 429 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 430 { 431 struct memif_queue *mq = queue; 432 struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; 433 struct pmd_process_private *proc_private = 434 rte_eth_devices[mq->in_port].process_private; 435 memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); 436 uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head; 437 uint16_t n_rx_pkts = 0; 438 memif_desc_t *d0; 439 struct rte_mbuf *mbuf, *mbuf_tail; 440 struct rte_mbuf *mbuf_head = NULL; 441 int ret; 442 struct rte_eth_link link; 443 444 if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) 445 return 0; 446 if (unlikely(ring == NULL)) { 447 /* Secondary process will attempt to request regions. */ 448 rte_eth_link_get(mq->in_port, &link); 449 return 0; 450 } 451 452 /* consume interrupt */ 453 if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { 454 uint64_t b; 455 ssize_t size __rte_unused; 456 size = read(mq->intr_handle.fd, &b, sizeof(b)); 457 } 458 459 ring_size = 1 << mq->log2_ring_size; 460 mask = ring_size - 1; 461 462 cur_slot = mq->last_tail; 463 /* The ring->tail acts as a guard variable between Tx and Rx 464 * threads, so using load-acquire pairs with store-release 465 * to synchronize it between threads. 466 */ 467 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); 468 if (cur_slot == last_slot) 469 goto refill; 470 n_slots = last_slot - cur_slot; 471 472 while (n_slots && n_rx_pkts < nb_pkts) { 473 s0 = cur_slot & mask; 474 475 d0 = &ring->desc[s0]; 476 mbuf_head = mq->buffers[s0]; 477 mbuf = mbuf_head; 478 479 next_slot: 480 /* prefetch next descriptor */ 481 if (n_rx_pkts + 1 < nb_pkts) 482 rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]); 483 484 mbuf->port = mq->in_port; 485 rte_pktmbuf_data_len(mbuf) = d0->length; 486 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); 487 488 mq->n_bytes += rte_pktmbuf_data_len(mbuf); 489 490 cur_slot++; 491 n_slots--; 492 if (d0->flags & MEMIF_DESC_FLAG_NEXT) { 493 s0 = cur_slot & mask; 494 d0 = &ring->desc[s0]; 495 mbuf_tail = mbuf; 496 mbuf = mq->buffers[s0]; 497 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); 498 if (unlikely(ret < 0)) { 499 MIF_LOG(ERR, "number-of-segments-overflow"); 500 goto refill; 501 } 502 goto next_slot; 503 } 504 505 *bufs++ = mbuf_head; 506 n_rx_pkts++; 507 } 508 509 mq->last_tail = cur_slot; 510 511 /* Supply master with new buffers */ 512 refill: 513 /* The ring->head acts as a guard variable between Tx and Rx 514 * threads, so using load-acquire pairs with store-release 515 * to synchronize it between threads. 516 */ 517 head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); 518 n_slots = ring_size - head + mq->last_tail; 519 520 if (n_slots < 32) 521 goto no_free_mbufs; 522 523 ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots); 524 if (unlikely(ret < 0)) 525 goto no_free_mbufs; 526 527 while (n_slots--) { 528 s0 = head++ & mask; 529 if (n_slots > 0) 530 rte_prefetch0(mq->buffers[head & mask]); 531 d0 = &ring->desc[s0]; 532 /* store buffer header */ 533 mbuf = mq->buffers[s0]; 534 /* populate descriptor */ 535 d0->length = rte_pktmbuf_data_room_size(mq->mempool) - 536 RTE_PKTMBUF_HEADROOM; 537 d0->region = 1; 538 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) - 539 (uint8_t *)proc_private->regions[d0->region]->addr; 540 } 541 no_free_mbufs: 542 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE); 543 544 mq->n_pkts += n_rx_pkts; 545 546 return n_rx_pkts; 547 } 548 549 static uint16_t 550 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 551 { 552 struct memif_queue *mq = queue; 553 struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; 554 struct pmd_process_private *proc_private = 555 rte_eth_devices[mq->in_port].process_private; 556 memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); 557 uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0; 558 uint16_t src_len, src_off, dst_len, dst_off, cp_len; 559 memif_ring_type_t type = mq->type; 560 memif_desc_t *d0; 561 struct rte_mbuf *mbuf; 562 struct rte_mbuf *mbuf_head; 563 uint64_t a; 564 ssize_t size; 565 struct rte_eth_link link; 566 567 if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) 568 return 0; 569 if (unlikely(ring == NULL)) { 570 int ret; 571 572 /* Secondary process will attempt to request regions. */ 573 ret = rte_eth_link_get(mq->in_port, &link); 574 if (ret < 0) 575 MIF_LOG(ERR, "Failed to get port %u link info: %s", 576 mq->in_port, rte_strerror(-ret)); 577 return 0; 578 } 579 580 ring_size = 1 << mq->log2_ring_size; 581 mask = ring_size - 1; 582 583 n_free = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE) - mq->last_tail; 584 mq->last_tail += n_free; 585 586 if (type == MEMIF_RING_S2M) { 587 slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); 588 n_free = ring_size - slot + mq->last_tail; 589 } else { 590 slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); 591 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot; 592 } 593 594 while (n_tx_pkts < nb_pkts && n_free) { 595 mbuf_head = *bufs++; 596 mbuf = mbuf_head; 597 598 saved_slot = slot; 599 d0 = &ring->desc[slot & mask]; 600 dst_off = 0; 601 dst_len = (type == MEMIF_RING_S2M) ? 602 pmd->run.pkt_buffer_size : d0->length; 603 604 next_in_chain: 605 src_off = 0; 606 src_len = rte_pktmbuf_data_len(mbuf); 607 608 while (src_len) { 609 if (dst_len == 0) { 610 if (n_free) { 611 slot++; 612 n_free--; 613 d0->flags |= MEMIF_DESC_FLAG_NEXT; 614 d0 = &ring->desc[slot & mask]; 615 dst_off = 0; 616 dst_len = (type == MEMIF_RING_S2M) ? 617 pmd->run.pkt_buffer_size : d0->length; 618 d0->flags = 0; 619 } else { 620 slot = saved_slot; 621 goto no_free_slots; 622 } 623 } 624 cp_len = RTE_MIN(dst_len, src_len); 625 626 memcpy((uint8_t *)memif_get_buffer(proc_private, d0) + dst_off, 627 rte_pktmbuf_mtod_offset(mbuf, void *, src_off), 628 cp_len); 629 630 mq->n_bytes += cp_len; 631 src_off += cp_len; 632 dst_off += cp_len; 633 src_len -= cp_len; 634 dst_len -= cp_len; 635 636 d0->length = dst_off; 637 } 638 639 if (rte_pktmbuf_is_contiguous(mbuf) == 0) { 640 mbuf = mbuf->next; 641 goto next_in_chain; 642 } 643 644 n_tx_pkts++; 645 slot++; 646 n_free--; 647 rte_pktmbuf_free(mbuf_head); 648 } 649 650 no_free_slots: 651 if (type == MEMIF_RING_S2M) 652 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE); 653 else 654 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE); 655 656 if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { 657 a = 1; 658 size = write(mq->intr_handle.fd, &a, sizeof(a)); 659 if (unlikely(size < 0)) { 660 MIF_LOG(WARNING, 661 "Failed to send interrupt. %s", strerror(errno)); 662 } 663 } 664 665 mq->n_pkts += n_tx_pkts; 666 return n_tx_pkts; 667 } 668 669 670 static int 671 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq, 672 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask, 673 uint16_t slot, uint16_t n_free) 674 { 675 memif_desc_t *d0; 676 int used_slots = 1; 677 678 next_in_chain: 679 /* store pointer to mbuf to free it later */ 680 mq->buffers[slot & mask] = mbuf; 681 /* Increment refcnt to make sure the buffer is not freed before master 682 * receives it. (current segment) 683 */ 684 rte_mbuf_refcnt_update(mbuf, 1); 685 /* populate descriptor */ 686 d0 = &ring->desc[slot & mask]; 687 d0->length = rte_pktmbuf_data_len(mbuf); 688 /* FIXME: get region index */ 689 d0->region = 1; 690 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) - 691 (uint8_t *)proc_private->regions[d0->region]->addr; 692 d0->flags = 0; 693 694 /* check if buffer is chained */ 695 if (rte_pktmbuf_is_contiguous(mbuf) == 0) { 696 if (n_free < 2) 697 return 0; 698 /* mark buffer as chained */ 699 d0->flags |= MEMIF_DESC_FLAG_NEXT; 700 /* advance mbuf */ 701 mbuf = mbuf->next; 702 /* update counters */ 703 used_slots++; 704 slot++; 705 n_free--; 706 goto next_in_chain; 707 } 708 return used_slots; 709 } 710 711 static uint16_t 712 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) 713 { 714 struct memif_queue *mq = queue; 715 struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; 716 struct pmd_process_private *proc_private = 717 rte_eth_devices[mq->in_port].process_private; 718 memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); 719 uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0; 720 memif_ring_type_t type = mq->type; 721 struct rte_eth_link link; 722 723 if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) 724 return 0; 725 if (unlikely(ring == NULL)) { 726 /* Secondary process will attempt to request regions. */ 727 rte_eth_link_get(mq->in_port, &link); 728 return 0; 729 } 730 731 ring_size = 1 << mq->log2_ring_size; 732 mask = ring_size - 1; 733 734 /* free mbufs received by master */ 735 memif_free_stored_mbufs(proc_private, mq); 736 737 /* ring type always MEMIF_RING_S2M */ 738 /* The ring->head acts as a guard variable between Tx and Rx 739 * threads, so using load-acquire pairs with store-release 740 * to synchronize it between threads. 741 */ 742 slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); 743 n_free = ring_size - slot + mq->last_tail; 744 745 int used_slots; 746 747 while (n_free && (n_tx_pkts < nb_pkts)) { 748 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) { 749 if ((nb_pkts - n_tx_pkts) > 8) { 750 rte_prefetch0(*bufs + 4); 751 rte_prefetch0(*bufs + 5); 752 rte_prefetch0(*bufs + 6); 753 rte_prefetch0(*bufs + 7); 754 } 755 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, 756 mask, slot, n_free); 757 if (unlikely(used_slots < 1)) 758 goto no_free_slots; 759 n_tx_pkts++; 760 slot += used_slots; 761 n_free -= used_slots; 762 763 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, 764 mask, slot, n_free); 765 if (unlikely(used_slots < 1)) 766 goto no_free_slots; 767 n_tx_pkts++; 768 slot += used_slots; 769 n_free -= used_slots; 770 771 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, 772 mask, slot, n_free); 773 if (unlikely(used_slots < 1)) 774 goto no_free_slots; 775 n_tx_pkts++; 776 slot += used_slots; 777 n_free -= used_slots; 778 779 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, 780 mask, slot, n_free); 781 if (unlikely(used_slots < 1)) 782 goto no_free_slots; 783 n_tx_pkts++; 784 slot += used_slots; 785 n_free -= used_slots; 786 } 787 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, 788 mask, slot, n_free); 789 if (unlikely(used_slots < 1)) 790 goto no_free_slots; 791 n_tx_pkts++; 792 slot += used_slots; 793 n_free -= used_slots; 794 } 795 796 no_free_slots: 797 /* update ring pointers */ 798 if (type == MEMIF_RING_S2M) 799 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE); 800 else 801 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE); 802 803 /* Send interrupt, if enabled. */ 804 if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { 805 uint64_t a = 1; 806 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a)); 807 if (unlikely(size < 0)) { 808 MIF_LOG(WARNING, 809 "Failed to send interrupt. %s", strerror(errno)); 810 } 811 } 812 813 /* increment queue counters */ 814 mq->n_pkts += n_tx_pkts; 815 816 return n_tx_pkts; 817 } 818 819 void 820 memif_free_regions(struct rte_eth_dev *dev) 821 { 822 struct pmd_process_private *proc_private = dev->process_private; 823 struct pmd_internals *pmd = dev->data->dev_private; 824 int i; 825 struct memif_region *r; 826 827 /* regions are allocated contiguously, so it's 828 * enough to loop until 'proc_private->regions_num' 829 */ 830 for (i = 0; i < proc_private->regions_num; i++) { 831 r = proc_private->regions[i]; 832 if (r != NULL) { 833 /* This is memzone */ 834 if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) { 835 r->addr = NULL; 836 if (r->fd > 0) 837 close(r->fd); 838 } 839 if (r->addr != NULL) { 840 munmap(r->addr, r->region_size); 841 if (r->fd > 0) { 842 close(r->fd); 843 r->fd = -1; 844 } 845 } 846 rte_free(r); 847 proc_private->regions[i] = NULL; 848 } 849 } 850 proc_private->regions_num = 0; 851 } 852 853 static int 854 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms, 855 void *arg) 856 { 857 struct pmd_process_private *proc_private = (struct pmd_process_private *)arg; 858 struct memif_region *r; 859 860 if (proc_private->regions_num < 1) { 861 MIF_LOG(ERR, "Missing descriptor region"); 862 return -1; 863 } 864 865 r = proc_private->regions[proc_private->regions_num - 1]; 866 867 if (r->addr != msl->base_va) 868 r = proc_private->regions[++proc_private->regions_num - 1]; 869 870 if (r == NULL) { 871 r = rte_zmalloc("region", sizeof(struct memif_region), 0); 872 if (r == NULL) { 873 MIF_LOG(ERR, "Failed to alloc memif region."); 874 return -ENOMEM; 875 } 876 877 r->addr = msl->base_va; 878 r->region_size = ms->len; 879 r->fd = rte_memseg_get_fd(ms); 880 if (r->fd < 0) 881 return -1; 882 r->pkt_buffer_offset = 0; 883 884 proc_private->regions[proc_private->regions_num - 1] = r; 885 } else { 886 r->region_size += ms->len; 887 } 888 889 return 0; 890 } 891 892 static int 893 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers) 894 { 895 struct pmd_internals *pmd = dev->data->dev_private; 896 struct pmd_process_private *proc_private = dev->process_private; 897 char shm_name[ETH_MEMIF_SHM_NAME_SIZE]; 898 int ret = 0; 899 struct memif_region *r; 900 901 if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) { 902 MIF_LOG(ERR, "Too many regions."); 903 return -1; 904 } 905 906 r = rte_zmalloc("region", sizeof(struct memif_region), 0); 907 if (r == NULL) { 908 MIF_LOG(ERR, "Failed to alloc memif region."); 909 return -ENOMEM; 910 } 911 912 /* calculate buffer offset */ 913 r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) * 914 (sizeof(memif_ring_t) + sizeof(memif_desc_t) * 915 (1 << pmd->run.log2_ring_size)); 916 917 r->region_size = r->pkt_buffer_offset; 918 /* if region has buffers, add buffers size to region_size */ 919 if (has_buffers == 1) 920 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size * 921 (1 << pmd->run.log2_ring_size) * 922 (pmd->run.num_s2m_rings + 923 pmd->run.num_m2s_rings)); 924 925 memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE); 926 snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d", 927 proc_private->regions_num); 928 929 r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING); 930 if (r->fd < 0) { 931 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno)); 932 ret = -1; 933 goto error; 934 } 935 936 ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK); 937 if (ret < 0) { 938 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno)); 939 goto error; 940 } 941 942 ret = ftruncate(r->fd, r->region_size); 943 if (ret < 0) { 944 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno)); 945 goto error; 946 } 947 948 r->addr = mmap(NULL, r->region_size, PROT_READ | 949 PROT_WRITE, MAP_SHARED, r->fd, 0); 950 if (r->addr == MAP_FAILED) { 951 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret)); 952 ret = -1; 953 goto error; 954 } 955 956 proc_private->regions[proc_private->regions_num] = r; 957 proc_private->regions_num++; 958 959 return ret; 960 961 error: 962 if (r->fd > 0) 963 close(r->fd); 964 r->fd = -1; 965 966 return ret; 967 } 968 969 static int 970 memif_regions_init(struct rte_eth_dev *dev) 971 { 972 struct pmd_internals *pmd = dev->data->dev_private; 973 int ret; 974 975 /* 976 * Zero-copy exposes dpdk memory. 977 * Each memseg list will be represented by memif region. 978 * Zero-copy regions indexing: memseg list idx + 1, 979 * as we already have region 0 reserved for descriptors. 980 */ 981 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { 982 /* create region idx 0 containing descriptors */ 983 ret = memif_region_init_shm(dev, 0); 984 if (ret < 0) 985 return ret; 986 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private); 987 if (ret < 0) 988 return ret; 989 } else { 990 /* create one memory region contaning rings and buffers */ 991 ret = memif_region_init_shm(dev, /* has buffers */ 1); 992 if (ret < 0) 993 return ret; 994 } 995 996 return 0; 997 } 998 999 static void 1000 memif_init_rings(struct rte_eth_dev *dev) 1001 { 1002 struct pmd_internals *pmd = dev->data->dev_private; 1003 struct pmd_process_private *proc_private = dev->process_private; 1004 memif_ring_t *ring; 1005 int i, j; 1006 uint16_t slot; 1007 1008 for (i = 0; i < pmd->run.num_s2m_rings; i++) { 1009 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2M, i); 1010 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); 1011 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); 1012 ring->cookie = MEMIF_COOKIE; 1013 ring->flags = 0; 1014 1015 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) 1016 continue; 1017 1018 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { 1019 slot = i * (1 << pmd->run.log2_ring_size) + j; 1020 ring->desc[j].region = 0; 1021 ring->desc[j].offset = 1022 proc_private->regions[0]->pkt_buffer_offset + 1023 (uint32_t)(slot * pmd->run.pkt_buffer_size); 1024 ring->desc[j].length = pmd->run.pkt_buffer_size; 1025 } 1026 } 1027 1028 for (i = 0; i < pmd->run.num_m2s_rings; i++) { 1029 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_M2S, i); 1030 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); 1031 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); 1032 ring->cookie = MEMIF_COOKIE; 1033 ring->flags = 0; 1034 1035 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) 1036 continue; 1037 1038 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { 1039 slot = (i + pmd->run.num_s2m_rings) * 1040 (1 << pmd->run.log2_ring_size) + j; 1041 ring->desc[j].region = 0; 1042 ring->desc[j].offset = 1043 proc_private->regions[0]->pkt_buffer_offset + 1044 (uint32_t)(slot * pmd->run.pkt_buffer_size); 1045 ring->desc[j].length = pmd->run.pkt_buffer_size; 1046 } 1047 } 1048 } 1049 1050 /* called only by slave */ 1051 static int 1052 memif_init_queues(struct rte_eth_dev *dev) 1053 { 1054 struct pmd_internals *pmd = dev->data->dev_private; 1055 struct memif_queue *mq; 1056 int i; 1057 1058 for (i = 0; i < pmd->run.num_s2m_rings; i++) { 1059 mq = dev->data->tx_queues[i]; 1060 mq->log2_ring_size = pmd->run.log2_ring_size; 1061 /* queues located only in region 0 */ 1062 mq->region = 0; 1063 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2M, i); 1064 mq->last_head = 0; 1065 mq->last_tail = 0; 1066 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); 1067 if (mq->intr_handle.fd < 0) { 1068 MIF_LOG(WARNING, 1069 "Failed to create eventfd for tx queue %d: %s.", i, 1070 strerror(errno)); 1071 } 1072 mq->buffers = NULL; 1073 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { 1074 mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) * 1075 (1 << mq->log2_ring_size), 0); 1076 if (mq->buffers == NULL) 1077 return -ENOMEM; 1078 } 1079 } 1080 1081 for (i = 0; i < pmd->run.num_m2s_rings; i++) { 1082 mq = dev->data->rx_queues[i]; 1083 mq->log2_ring_size = pmd->run.log2_ring_size; 1084 /* queues located only in region 0 */ 1085 mq->region = 0; 1086 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_M2S, i); 1087 mq->last_head = 0; 1088 mq->last_tail = 0; 1089 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); 1090 if (mq->intr_handle.fd < 0) { 1091 MIF_LOG(WARNING, 1092 "Failed to create eventfd for rx queue %d: %s.", i, 1093 strerror(errno)); 1094 } 1095 mq->buffers = NULL; 1096 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { 1097 mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) * 1098 (1 << mq->log2_ring_size), 0); 1099 if (mq->buffers == NULL) 1100 return -ENOMEM; 1101 } 1102 } 1103 return 0; 1104 } 1105 1106 int 1107 memif_init_regions_and_queues(struct rte_eth_dev *dev) 1108 { 1109 int ret; 1110 1111 ret = memif_regions_init(dev); 1112 if (ret < 0) 1113 return ret; 1114 1115 memif_init_rings(dev); 1116 1117 ret = memif_init_queues(dev); 1118 if (ret < 0) 1119 return ret; 1120 1121 return 0; 1122 } 1123 1124 int 1125 memif_connect(struct rte_eth_dev *dev) 1126 { 1127 struct pmd_internals *pmd = dev->data->dev_private; 1128 struct pmd_process_private *proc_private = dev->process_private; 1129 struct memif_region *mr; 1130 struct memif_queue *mq; 1131 memif_ring_t *ring; 1132 int i; 1133 1134 for (i = 0; i < proc_private->regions_num; i++) { 1135 mr = proc_private->regions[i]; 1136 if (mr != NULL) { 1137 if (mr->addr == NULL) { 1138 if (mr->fd < 0) 1139 return -1; 1140 mr->addr = mmap(NULL, mr->region_size, 1141 PROT_READ | PROT_WRITE, 1142 MAP_SHARED, mr->fd, 0); 1143 if (mr->addr == MAP_FAILED) { 1144 MIF_LOG(ERR, "mmap failed: %s\n", 1145 strerror(errno)); 1146 return -1; 1147 } 1148 } 1149 if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) { 1150 /* close memseg file */ 1151 close(mr->fd); 1152 mr->fd = -1; 1153 } 1154 } 1155 } 1156 1157 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1158 for (i = 0; i < pmd->run.num_s2m_rings; i++) { 1159 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? 1160 dev->data->tx_queues[i] : dev->data->rx_queues[i]; 1161 ring = memif_get_ring_from_queue(proc_private, mq); 1162 if (ring == NULL || ring->cookie != MEMIF_COOKIE) { 1163 MIF_LOG(ERR, "Wrong ring"); 1164 return -1; 1165 } 1166 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); 1167 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); 1168 mq->last_head = 0; 1169 mq->last_tail = 0; 1170 /* enable polling mode */ 1171 if (pmd->role == MEMIF_ROLE_MASTER) 1172 ring->flags = MEMIF_RING_FLAG_MASK_INT; 1173 } 1174 for (i = 0; i < pmd->run.num_m2s_rings; i++) { 1175 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? 1176 dev->data->rx_queues[i] : dev->data->tx_queues[i]; 1177 ring = memif_get_ring_from_queue(proc_private, mq); 1178 if (ring == NULL || ring->cookie != MEMIF_COOKIE) { 1179 MIF_LOG(ERR, "Wrong ring"); 1180 return -1; 1181 } 1182 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); 1183 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); 1184 mq->last_head = 0; 1185 mq->last_tail = 0; 1186 /* enable polling mode */ 1187 if (pmd->role == MEMIF_ROLE_SLAVE) 1188 ring->flags = MEMIF_RING_FLAG_MASK_INT; 1189 } 1190 1191 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING; 1192 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED; 1193 dev->data->dev_link.link_status = ETH_LINK_UP; 1194 } 1195 MIF_LOG(INFO, "Connected."); 1196 return 0; 1197 } 1198 1199 static int 1200 memif_dev_start(struct rte_eth_dev *dev) 1201 { 1202 struct pmd_internals *pmd = dev->data->dev_private; 1203 int ret = 0; 1204 1205 switch (pmd->role) { 1206 case MEMIF_ROLE_SLAVE: 1207 ret = memif_connect_slave(dev); 1208 break; 1209 case MEMIF_ROLE_MASTER: 1210 ret = memif_connect_master(dev); 1211 break; 1212 default: 1213 MIF_LOG(ERR, "Unknown role: %d.", pmd->role); 1214 ret = -1; 1215 break; 1216 } 1217 1218 return ret; 1219 } 1220 1221 static int 1222 memif_dev_close(struct rte_eth_dev *dev) 1223 { 1224 struct pmd_internals *pmd = dev->data->dev_private; 1225 int i; 1226 1227 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1228 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0); 1229 memif_disconnect(dev); 1230 1231 for (i = 0; i < dev->data->nb_rx_queues; i++) 1232 (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]); 1233 for (i = 0; i < dev->data->nb_tx_queues; i++) 1234 (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]); 1235 1236 memif_socket_remove_device(dev); 1237 } else { 1238 memif_disconnect(dev); 1239 } 1240 1241 rte_free(dev->process_private); 1242 1243 return 0; 1244 } 1245 1246 static int 1247 memif_dev_configure(struct rte_eth_dev *dev) 1248 { 1249 struct pmd_internals *pmd = dev->data->dev_private; 1250 1251 /* 1252 * SLAVE - TXQ 1253 * MASTER - RXQ 1254 */ 1255 pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? 1256 dev->data->nb_tx_queues : dev->data->nb_rx_queues; 1257 1258 /* 1259 * SLAVE - RXQ 1260 * MASTER - TXQ 1261 */ 1262 pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? 1263 dev->data->nb_rx_queues : dev->data->nb_tx_queues; 1264 1265 return 0; 1266 } 1267 1268 static int 1269 memif_tx_queue_setup(struct rte_eth_dev *dev, 1270 uint16_t qid, 1271 uint16_t nb_tx_desc __rte_unused, 1272 unsigned int socket_id __rte_unused, 1273 const struct rte_eth_txconf *tx_conf __rte_unused) 1274 { 1275 struct pmd_internals *pmd = dev->data->dev_private; 1276 struct memif_queue *mq; 1277 1278 mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0); 1279 if (mq == NULL) { 1280 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid); 1281 return -ENOMEM; 1282 } 1283 1284 mq->type = 1285 (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S; 1286 mq->n_pkts = 0; 1287 mq->n_bytes = 0; 1288 mq->intr_handle.fd = -1; 1289 mq->intr_handle.type = RTE_INTR_HANDLE_EXT; 1290 mq->in_port = dev->data->port_id; 1291 dev->data->tx_queues[qid] = mq; 1292 1293 return 0; 1294 } 1295 1296 static int 1297 memif_rx_queue_setup(struct rte_eth_dev *dev, 1298 uint16_t qid, 1299 uint16_t nb_rx_desc __rte_unused, 1300 unsigned int socket_id __rte_unused, 1301 const struct rte_eth_rxconf *rx_conf __rte_unused, 1302 struct rte_mempool *mb_pool) 1303 { 1304 struct pmd_internals *pmd = dev->data->dev_private; 1305 struct memif_queue *mq; 1306 1307 mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0); 1308 if (mq == NULL) { 1309 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid); 1310 return -ENOMEM; 1311 } 1312 1313 mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M; 1314 mq->n_pkts = 0; 1315 mq->n_bytes = 0; 1316 mq->intr_handle.fd = -1; 1317 mq->intr_handle.type = RTE_INTR_HANDLE_EXT; 1318 mq->mempool = mb_pool; 1319 mq->in_port = dev->data->port_id; 1320 dev->data->rx_queues[qid] = mq; 1321 1322 return 0; 1323 } 1324 1325 static void 1326 memif_queue_release(void *queue) 1327 { 1328 struct memif_queue *mq = (struct memif_queue *)queue; 1329 1330 if (!mq) 1331 return; 1332 1333 rte_free(mq); 1334 } 1335 1336 static int 1337 memif_link_update(struct rte_eth_dev *dev, 1338 int wait_to_complete __rte_unused) 1339 { 1340 struct pmd_process_private *proc_private; 1341 1342 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1343 proc_private = dev->process_private; 1344 if (dev->data->dev_link.link_status == ETH_LINK_UP && 1345 proc_private->regions_num == 0) { 1346 memif_mp_request_regions(dev); 1347 } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN && 1348 proc_private->regions_num > 0) { 1349 memif_free_regions(dev); 1350 } 1351 } 1352 return 0; 1353 } 1354 1355 static int 1356 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 1357 { 1358 struct pmd_internals *pmd = dev->data->dev_private; 1359 struct memif_queue *mq; 1360 int i; 1361 uint8_t tmp, nq; 1362 1363 stats->ipackets = 0; 1364 stats->ibytes = 0; 1365 stats->opackets = 0; 1366 stats->obytes = 0; 1367 1368 tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings : 1369 pmd->run.num_m2s_rings; 1370 nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : 1371 RTE_ETHDEV_QUEUE_STAT_CNTRS; 1372 1373 /* RX stats */ 1374 for (i = 0; i < nq; i++) { 1375 mq = dev->data->rx_queues[i]; 1376 stats->q_ipackets[i] = mq->n_pkts; 1377 stats->q_ibytes[i] = mq->n_bytes; 1378 stats->ipackets += mq->n_pkts; 1379 stats->ibytes += mq->n_bytes; 1380 } 1381 1382 tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings : 1383 pmd->run.num_s2m_rings; 1384 nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : 1385 RTE_ETHDEV_QUEUE_STAT_CNTRS; 1386 1387 /* TX stats */ 1388 for (i = 0; i < nq; i++) { 1389 mq = dev->data->tx_queues[i]; 1390 stats->q_opackets[i] = mq->n_pkts; 1391 stats->q_obytes[i] = mq->n_bytes; 1392 stats->opackets += mq->n_pkts; 1393 stats->obytes += mq->n_bytes; 1394 } 1395 return 0; 1396 } 1397 1398 static int 1399 memif_stats_reset(struct rte_eth_dev *dev) 1400 { 1401 struct pmd_internals *pmd = dev->data->dev_private; 1402 int i; 1403 struct memif_queue *mq; 1404 1405 for (i = 0; i < pmd->run.num_s2m_rings; i++) { 1406 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] : 1407 dev->data->rx_queues[i]; 1408 mq->n_pkts = 0; 1409 mq->n_bytes = 0; 1410 } 1411 for (i = 0; i < pmd->run.num_m2s_rings; i++) { 1412 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] : 1413 dev->data->tx_queues[i]; 1414 mq->n_pkts = 0; 1415 mq->n_bytes = 0; 1416 } 1417 1418 return 0; 1419 } 1420 1421 static int 1422 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused, 1423 uint16_t qid __rte_unused) 1424 { 1425 MIF_LOG(WARNING, "Interrupt mode not supported."); 1426 1427 return -1; 1428 } 1429 1430 static int 1431 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused) 1432 { 1433 struct pmd_internals *pmd __rte_unused = dev->data->dev_private; 1434 1435 return 0; 1436 } 1437 1438 static const struct eth_dev_ops ops = { 1439 .dev_start = memif_dev_start, 1440 .dev_close = memif_dev_close, 1441 .dev_infos_get = memif_dev_info, 1442 .dev_configure = memif_dev_configure, 1443 .tx_queue_setup = memif_tx_queue_setup, 1444 .rx_queue_setup = memif_rx_queue_setup, 1445 .rx_queue_release = memif_queue_release, 1446 .tx_queue_release = memif_queue_release, 1447 .rx_queue_intr_enable = memif_rx_queue_intr_enable, 1448 .rx_queue_intr_disable = memif_rx_queue_intr_disable, 1449 .link_update = memif_link_update, 1450 .stats_get = memif_stats_get, 1451 .stats_reset = memif_stats_reset, 1452 }; 1453 1454 static int 1455 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role, 1456 memif_interface_id_t id, uint32_t flags, 1457 const char *socket_filename, 1458 memif_log2_ring_size_t log2_ring_size, 1459 uint16_t pkt_buffer_size, const char *secret, 1460 struct rte_ether_addr *ether_addr) 1461 { 1462 int ret = 0; 1463 struct rte_eth_dev *eth_dev; 1464 struct rte_eth_dev_data *data; 1465 struct pmd_internals *pmd; 1466 struct pmd_process_private *process_private; 1467 const unsigned int numa_node = vdev->device.numa_node; 1468 const char *name = rte_vdev_device_name(vdev); 1469 1470 eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd)); 1471 if (eth_dev == NULL) { 1472 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name); 1473 return -1; 1474 } 1475 1476 process_private = (struct pmd_process_private *) 1477 rte_zmalloc(name, sizeof(struct pmd_process_private), 1478 RTE_CACHE_LINE_SIZE); 1479 1480 if (process_private == NULL) { 1481 MIF_LOG(ERR, "Failed to alloc memory for process private"); 1482 return -1; 1483 } 1484 eth_dev->process_private = process_private; 1485 1486 pmd = eth_dev->data->dev_private; 1487 memset(pmd, 0, sizeof(*pmd)); 1488 1489 pmd->id = id; 1490 pmd->flags = flags; 1491 pmd->flags |= ETH_MEMIF_FLAG_DISABLED; 1492 pmd->role = role; 1493 /* Zero-copy flag irelevant to master. */ 1494 if (pmd->role == MEMIF_ROLE_MASTER) 1495 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY; 1496 1497 ret = memif_socket_init(eth_dev, socket_filename); 1498 if (ret < 0) 1499 return ret; 1500 1501 memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE); 1502 if (secret != NULL) 1503 strlcpy(pmd->secret, secret, sizeof(pmd->secret)); 1504 1505 pmd->cfg.log2_ring_size = log2_ring_size; 1506 /* set in .dev_configure() */ 1507 pmd->cfg.num_s2m_rings = 0; 1508 pmd->cfg.num_m2s_rings = 0; 1509 1510 pmd->cfg.pkt_buffer_size = pkt_buffer_size; 1511 rte_spinlock_init(&pmd->cc_lock); 1512 1513 data = eth_dev->data; 1514 data->dev_private = pmd; 1515 data->numa_node = numa_node; 1516 data->dev_link = pmd_link; 1517 data->mac_addrs = ether_addr; 1518 data->promiscuous = 1; 1519 1520 eth_dev->dev_ops = &ops; 1521 eth_dev->device = &vdev->device; 1522 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { 1523 eth_dev->rx_pkt_burst = eth_memif_rx_zc; 1524 eth_dev->tx_pkt_burst = eth_memif_tx_zc; 1525 } else { 1526 eth_dev->rx_pkt_burst = eth_memif_rx; 1527 eth_dev->tx_pkt_burst = eth_memif_tx; 1528 } 1529 1530 rte_eth_dev_probing_finish(eth_dev); 1531 1532 return 0; 1533 } 1534 1535 static int 1536 memif_set_role(const char *key __rte_unused, const char *value, 1537 void *extra_args) 1538 { 1539 enum memif_role_t *role = (enum memif_role_t *)extra_args; 1540 1541 if (strstr(value, "master") != NULL) { 1542 *role = MEMIF_ROLE_MASTER; 1543 } else if (strstr(value, "slave") != NULL) { 1544 *role = MEMIF_ROLE_SLAVE; 1545 } else { 1546 MIF_LOG(ERR, "Unknown role: %s.", value); 1547 return -EINVAL; 1548 } 1549 return 0; 1550 } 1551 1552 static int 1553 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args) 1554 { 1555 uint32_t *flags = (uint32_t *)extra_args; 1556 1557 if (strstr(value, "yes") != NULL) { 1558 if (!rte_mcfg_get_single_file_segments()) { 1559 MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments."); 1560 return -ENOTSUP; 1561 } 1562 *flags |= ETH_MEMIF_FLAG_ZERO_COPY; 1563 } else if (strstr(value, "no") != NULL) { 1564 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY; 1565 } else { 1566 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value); 1567 return -EINVAL; 1568 } 1569 return 0; 1570 } 1571 1572 static int 1573 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args) 1574 { 1575 memif_interface_id_t *id = (memif_interface_id_t *)extra_args; 1576 1577 /* even if parsing fails, 0 is a valid id */ 1578 *id = strtoul(value, NULL, 10); 1579 return 0; 1580 } 1581 1582 static int 1583 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args) 1584 { 1585 unsigned long tmp; 1586 uint16_t *pkt_buffer_size = (uint16_t *)extra_args; 1587 1588 tmp = strtoul(value, NULL, 10); 1589 if (tmp == 0 || tmp > 0xFFFF) { 1590 MIF_LOG(ERR, "Invalid buffer size: %s.", value); 1591 return -EINVAL; 1592 } 1593 *pkt_buffer_size = tmp; 1594 return 0; 1595 } 1596 1597 static int 1598 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args) 1599 { 1600 unsigned long tmp; 1601 memif_log2_ring_size_t *log2_ring_size = 1602 (memif_log2_ring_size_t *)extra_args; 1603 1604 tmp = strtoul(value, NULL, 10); 1605 if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) { 1606 MIF_LOG(ERR, "Invalid ring size: %s (max %u).", 1607 value, ETH_MEMIF_MAX_LOG2_RING_SIZE); 1608 return -EINVAL; 1609 } 1610 *log2_ring_size = tmp; 1611 return 0; 1612 } 1613 1614 /* check if directory exists and if we have permission to read/write */ 1615 static int 1616 memif_check_socket_filename(const char *filename) 1617 { 1618 char *dir = NULL, *tmp; 1619 uint32_t idx; 1620 int ret = 0; 1621 1622 if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) { 1623 MIF_LOG(ERR, "Unix socket address too long (max 108)."); 1624 return -1; 1625 } 1626 1627 tmp = strrchr(filename, '/'); 1628 if (tmp != NULL) { 1629 idx = tmp - filename; 1630 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0); 1631 if (dir == NULL) { 1632 MIF_LOG(ERR, "Failed to allocate memory."); 1633 return -1; 1634 } 1635 strlcpy(dir, filename, sizeof(char) * (idx + 1)); 1636 } 1637 1638 if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK | 1639 W_OK, AT_EACCESS) < 0)) { 1640 MIF_LOG(ERR, "Invalid socket directory."); 1641 ret = -EINVAL; 1642 } 1643 1644 if (dir != NULL) 1645 rte_free(dir); 1646 1647 return ret; 1648 } 1649 1650 static int 1651 memif_set_socket_filename(const char *key __rte_unused, const char *value, 1652 void *extra_args) 1653 { 1654 const char **socket_filename = (const char **)extra_args; 1655 1656 *socket_filename = value; 1657 return memif_check_socket_filename(*socket_filename); 1658 } 1659 1660 static int 1661 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args) 1662 { 1663 struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args; 1664 1665 if (rte_ether_unformat_addr(value, ether_addr) < 0) 1666 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value); 1667 return 0; 1668 } 1669 1670 static int 1671 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args) 1672 { 1673 const char **secret = (const char **)extra_args; 1674 1675 *secret = value; 1676 return 0; 1677 } 1678 1679 static int 1680 rte_pmd_memif_probe(struct rte_vdev_device *vdev) 1681 { 1682 RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128); 1683 RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16); 1684 int ret = 0; 1685 struct rte_kvargs *kvlist; 1686 const char *name = rte_vdev_device_name(vdev); 1687 enum memif_role_t role = MEMIF_ROLE_SLAVE; 1688 memif_interface_id_t id = 0; 1689 uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE; 1690 memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE; 1691 const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME; 1692 uint32_t flags = 0; 1693 const char *secret = NULL; 1694 struct rte_ether_addr *ether_addr = rte_zmalloc("", 1695 sizeof(struct rte_ether_addr), 0); 1696 struct rte_eth_dev *eth_dev; 1697 1698 rte_eth_random_addr(ether_addr->addr_bytes); 1699 1700 MIF_LOG(INFO, "Initialize MEMIF: %s.", name); 1701 1702 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1703 eth_dev = rte_eth_dev_attach_secondary(name); 1704 if (!eth_dev) { 1705 MIF_LOG(ERR, "Failed to probe %s", name); 1706 return -1; 1707 } 1708 1709 eth_dev->dev_ops = &ops; 1710 eth_dev->device = &vdev->device; 1711 eth_dev->rx_pkt_burst = eth_memif_rx; 1712 eth_dev->tx_pkt_burst = eth_memif_tx; 1713 1714 if (!rte_eal_primary_proc_alive(NULL)) { 1715 MIF_LOG(ERR, "Primary process is missing"); 1716 return -1; 1717 } 1718 1719 eth_dev->process_private = (struct pmd_process_private *) 1720 rte_zmalloc(name, 1721 sizeof(struct pmd_process_private), 1722 RTE_CACHE_LINE_SIZE); 1723 if (eth_dev->process_private == NULL) { 1724 MIF_LOG(ERR, 1725 "Failed to alloc memory for process private"); 1726 return -1; 1727 } 1728 1729 rte_eth_dev_probing_finish(eth_dev); 1730 1731 return 0; 1732 } 1733 1734 ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region); 1735 /* 1736 * Primary process can continue probing, but secondary process won't 1737 * be able to get memory regions information 1738 */ 1739 if (ret < 0 && rte_errno != EEXIST) 1740 MIF_LOG(WARNING, "Failed to register mp action callback: %s", 1741 strerror(rte_errno)); 1742 1743 kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments); 1744 1745 /* parse parameters */ 1746 if (kvlist != NULL) { 1747 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG, 1748 &memif_set_role, &role); 1749 if (ret < 0) 1750 goto exit; 1751 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG, 1752 &memif_set_id, &id); 1753 if (ret < 0) 1754 goto exit; 1755 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG, 1756 &memif_set_bs, &pkt_buffer_size); 1757 if (ret < 0) 1758 goto exit; 1759 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG, 1760 &memif_set_rs, &log2_ring_size); 1761 if (ret < 0) 1762 goto exit; 1763 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG, 1764 &memif_set_socket_filename, 1765 (void *)(&socket_filename)); 1766 if (ret < 0) 1767 goto exit; 1768 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG, 1769 &memif_set_mac, ether_addr); 1770 if (ret < 0) 1771 goto exit; 1772 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG, 1773 &memif_set_zc, &flags); 1774 if (ret < 0) 1775 goto exit; 1776 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG, 1777 &memif_set_secret, (void *)(&secret)); 1778 if (ret < 0) 1779 goto exit; 1780 } 1781 1782 /* create interface */ 1783 ret = memif_create(vdev, role, id, flags, socket_filename, 1784 log2_ring_size, pkt_buffer_size, secret, ether_addr); 1785 1786 exit: 1787 if (kvlist != NULL) 1788 rte_kvargs_free(kvlist); 1789 return ret; 1790 } 1791 1792 static int 1793 rte_pmd_memif_remove(struct rte_vdev_device *vdev) 1794 { 1795 struct rte_eth_dev *eth_dev; 1796 1797 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev)); 1798 if (eth_dev == NULL) 1799 return 0; 1800 1801 rte_eth_dev_close(eth_dev->data->port_id); 1802 1803 return 0; 1804 } 1805 1806 static struct rte_vdev_driver pmd_memif_drv = { 1807 .probe = rte_pmd_memif_probe, 1808 .remove = rte_pmd_memif_remove, 1809 }; 1810 1811 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv); 1812 1813 RTE_PMD_REGISTER_PARAM_STRING(net_memif, 1814 ETH_MEMIF_ID_ARG "=<int>" 1815 ETH_MEMIF_ROLE_ARG "=master|slave" 1816 ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>" 1817 ETH_MEMIF_RING_SIZE_ARG "=<int>" 1818 ETH_MEMIF_SOCKET_ARG "=<string>" 1819 ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx" 1820 ETH_MEMIF_ZC_ARG "=yes|no" 1821 ETH_MEMIF_SECRET_ARG "=<string>"); 1822 1823 RTE_LOG_REGISTER(memif_logtype, pmd.net.memif, NOTICE); 1824