1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <strings.h> 13 #include <malloc.h> 14 15 #include <rte_ethdev.h> 16 #include <rte_memcpy.h> 17 #include <rte_string_fns.h> 18 #include <rte_memzone.h> 19 #include <rte_malloc.h> 20 #include <rte_atomic.h> 21 #include <rte_branch_prediction.h> 22 #include <rte_ether.h> 23 #include <rte_common.h> 24 #include <rte_errno.h> 25 #include <rte_memory.h> 26 #include <rte_eal.h> 27 #include <rte_dev.h> 28 #include <rte_net.h> 29 #include <rte_bus_vmbus.h> 30 #include <rte_spinlock.h> 31 32 #include "hn_logs.h" 33 #include "hn_var.h" 34 #include "hn_rndis.h" 35 #include "hn_nvs.h" 36 #include "ndis.h" 37 38 #define HN_NVS_SEND_MSG_SIZE \ 39 (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) 40 41 #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ 42 #define HN_TXCOPY_THRESHOLD 512 43 44 #define HN_RXCOPY_THRESHOLD 256 45 #define HN_RXQ_EVENT_DEFAULT 2048 46 47 struct hn_rxinfo { 48 uint32_t vlan_info; 49 uint32_t csum_info; 50 uint32_t hash_info; 51 uint32_t hash_value; 52 }; 53 54 #define HN_RXINFO_VLAN 0x0001 55 #define HN_RXINFO_CSUM 0x0002 56 #define HN_RXINFO_HASHINF 0x0004 57 #define HN_RXINFO_HASHVAL 0x0008 58 #define HN_RXINFO_ALL \ 59 (HN_RXINFO_VLAN | \ 60 HN_RXINFO_CSUM | \ 61 HN_RXINFO_HASHINF | \ 62 HN_RXINFO_HASHVAL) 63 64 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 65 #define HN_NDIS_RXCSUM_INFO_INVALID 0 66 #define HN_NDIS_HASH_INFO_INVALID 0 67 68 /* 69 * Per-transmit book keeping. 70 * A slot in transmit ring (chim_index) is reserved for each transmit. 71 * 72 * There are two types of transmit: 73 * - buffered transmit where chimney buffer is used and RNDIS header 74 * is in the buffer. mbuf == NULL for this case. 75 * 76 * - direct transmit where RNDIS header is in the in rndis_pkt 77 * mbuf is freed after transmit. 78 * 79 * Descriptors come from per-port pool which is used 80 * to limit number of outstanding requests per device. 81 */ 82 struct hn_txdesc { 83 struct rte_mbuf *m; 84 85 uint16_t queue_id; 86 uint16_t chim_index; 87 uint32_t chim_size; 88 uint32_t data_size; 89 uint32_t packets; 90 91 struct rndis_packet_msg *rndis_pkt; 92 }; 93 94 #define HN_RNDIS_PKT_LEN \ 95 (sizeof(struct rndis_packet_msg) + \ 96 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ 97 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 98 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 99 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 100 101 /* Minimum space required for a packet */ 102 #define HN_PKTSIZE_MIN(align) \ 103 RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) 104 105 #define DEFAULT_TX_FREE_THRESH 32U 106 107 static void 108 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) 109 { 110 uint32_t s = m->pkt_len; 111 const struct rte_ether_addr *ea; 112 113 if (s == 64) { 114 stats->size_bins[1]++; 115 } else if (s > 64 && s < 1024) { 116 uint32_t bin; 117 118 /* count zeros, and offset into correct bin */ 119 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 120 stats->size_bins[bin]++; 121 } else { 122 if (s < 64) 123 stats->size_bins[0]++; 124 else if (s < 1519) 125 stats->size_bins[6]++; 126 else 127 stats->size_bins[7]++; 128 } 129 130 ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); 131 if (rte_is_multicast_ether_addr(ea)) { 132 if (rte_is_broadcast_ether_addr(ea)) 133 stats->broadcast++; 134 else 135 stats->multicast++; 136 } 137 } 138 139 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) 140 { 141 return pkt->pktinfooffset + pkt->pktinfolen; 142 } 143 144 static inline uint32_t 145 hn_rndis_pktmsg_offset(uint32_t ofs) 146 { 147 return ofs - offsetof(struct rndis_packet_msg, dataoffset); 148 } 149 150 static void hn_txd_init(struct rte_mempool *mp __rte_unused, 151 void *opaque, void *obj, unsigned int idx) 152 { 153 struct hn_txdesc *txd = obj; 154 struct rte_eth_dev *dev = opaque; 155 struct rndis_packet_msg *pkt; 156 157 memset(txd, 0, sizeof(*txd)); 158 txd->chim_index = idx; 159 160 pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN, 161 rte_align32pow2(HN_RNDIS_PKT_LEN), 162 dev->device->numa_node); 163 if (!pkt) 164 rte_exit(EXIT_FAILURE, "can not allocate RNDIS header"); 165 166 txd->rndis_pkt = pkt; 167 } 168 169 /* 170 * Unlike Linux and FreeBSD, this driver uses a mempool 171 * to limit outstanding transmits and reserve buffers 172 */ 173 int 174 hn_tx_pool_init(struct rte_eth_dev *dev) 175 { 176 struct hn_data *hv = dev->data->dev_private; 177 char name[RTE_MEMPOOL_NAMESIZE]; 178 struct rte_mempool *mp; 179 180 snprintf(name, sizeof(name), 181 "hn_txd_%u", dev->data->port_id); 182 183 PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d", 184 name, hv->chim_cnt, sizeof(struct hn_txdesc), 185 dev->device->numa_node); 186 187 mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc), 188 HN_TXD_CACHE_SIZE, 0, 189 NULL, NULL, 190 hn_txd_init, dev, 191 dev->device->numa_node, 0); 192 if (!mp) { 193 PMD_DRV_LOG(ERR, 194 "mempool %s create failed: %d", name, rte_errno); 195 return -rte_errno; 196 } 197 198 hv->tx_pool = mp; 199 return 0; 200 } 201 202 void 203 hn_tx_pool_uninit(struct rte_eth_dev *dev) 204 { 205 struct hn_data *hv = dev->data->dev_private; 206 207 if (hv->tx_pool) { 208 rte_mempool_free(hv->tx_pool); 209 hv->tx_pool = NULL; 210 } 211 } 212 213 static void hn_reset_txagg(struct hn_tx_queue *txq) 214 { 215 txq->agg_szleft = txq->agg_szmax; 216 txq->agg_pktleft = txq->agg_pktmax; 217 txq->agg_txd = NULL; 218 txq->agg_prevpkt = NULL; 219 } 220 221 int 222 hn_dev_tx_queue_setup(struct rte_eth_dev *dev, 223 uint16_t queue_idx, uint16_t nb_desc __rte_unused, 224 unsigned int socket_id, 225 const struct rte_eth_txconf *tx_conf) 226 227 { 228 struct hn_data *hv = dev->data->dev_private; 229 struct hn_tx_queue *txq; 230 uint32_t tx_free_thresh; 231 int err; 232 233 PMD_INIT_FUNC_TRACE(); 234 235 txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, 236 socket_id); 237 if (!txq) 238 return -ENOMEM; 239 240 txq->hv = hv; 241 txq->chan = hv->channels[queue_idx]; 242 txq->port_id = dev->data->port_id; 243 txq->queue_id = queue_idx; 244 245 tx_free_thresh = tx_conf->tx_free_thresh; 246 if (tx_free_thresh == 0) 247 tx_free_thresh = RTE_MIN(hv->chim_cnt / 4, 248 DEFAULT_TX_FREE_THRESH); 249 250 if (tx_free_thresh >= hv->chim_cnt - 3) 251 tx_free_thresh = hv->chim_cnt - 3; 252 253 txq->free_thresh = tx_free_thresh; 254 255 txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); 256 txq->agg_pktmax = hv->rndis_agg_pkts; 257 txq->agg_align = hv->rndis_agg_align; 258 259 hn_reset_txagg(txq); 260 261 err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, 262 socket_id, tx_conf); 263 if (err) { 264 rte_free(txq); 265 return err; 266 } 267 268 dev->data->tx_queues[queue_idx] = txq; 269 return 0; 270 } 271 272 void 273 hn_dev_tx_queue_release(void *arg) 274 { 275 struct hn_tx_queue *txq = arg; 276 struct hn_txdesc *txd; 277 278 PMD_INIT_FUNC_TRACE(); 279 280 if (!txq) 281 return; 282 283 /* If any pending data is still present just drop it */ 284 txd = txq->agg_txd; 285 if (txd) 286 rte_mempool_put(txq->hv->tx_pool, txd); 287 288 rte_free(txq); 289 } 290 291 static void 292 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, 293 unsigned long xactid, const struct hn_nvs_rndis_ack *ack) 294 { 295 struct hn_txdesc *txd = (struct hn_txdesc *)xactid; 296 struct hn_tx_queue *txq; 297 298 /* Control packets are sent with xacid == 0 */ 299 if (!txd) 300 return; 301 302 txq = dev->data->tx_queues[queue_id]; 303 if (likely(ack->status == NVS_STATUS_OK)) { 304 PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", 305 txq->port_id, txq->queue_id, txd->chim_index, 306 txd->packets, txd->data_size); 307 txq->stats.bytes += txd->data_size; 308 txq->stats.packets += txd->packets; 309 } else { 310 PMD_TX_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", 311 txq->port_id, txq->queue_id, txd->chim_index, ack->status); 312 ++txq->stats.errors; 313 } 314 315 rte_pktmbuf_free(txd->m); 316 317 rte_mempool_put(txq->hv->tx_pool, txd); 318 } 319 320 /* Handle transmit completion events */ 321 static void 322 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, 323 const struct vmbus_chanpkt_hdr *pkt, 324 const void *data) 325 { 326 const struct hn_nvs_hdr *hdr = data; 327 328 switch (hdr->type) { 329 case NVS_TYPE_RNDIS_ACK: 330 hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); 331 break; 332 333 default: 334 PMD_TX_LOG(NOTICE, 335 "unexpected send completion type %u", 336 hdr->type); 337 } 338 } 339 340 /* Parse per-packet info (meta data) */ 341 static int 342 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, 343 struct hn_rxinfo *info) 344 { 345 const struct rndis_pktinfo *pi = info_data; 346 uint32_t mask = 0; 347 348 while (info_dlen != 0) { 349 const void *data; 350 uint32_t dlen; 351 352 if (unlikely(info_dlen < sizeof(*pi))) 353 return -EINVAL; 354 355 if (unlikely(info_dlen < pi->size)) 356 return -EINVAL; 357 info_dlen -= pi->size; 358 359 if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 360 return -EINVAL; 361 if (unlikely(pi->size < pi->offset)) 362 return -EINVAL; 363 364 dlen = pi->size - pi->offset; 365 data = pi->data; 366 367 switch (pi->type) { 368 case NDIS_PKTINFO_TYPE_VLAN: 369 if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) 370 return -EINVAL; 371 info->vlan_info = *((const uint32_t *)data); 372 mask |= HN_RXINFO_VLAN; 373 break; 374 375 case NDIS_PKTINFO_TYPE_CSUM: 376 if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) 377 return -EINVAL; 378 info->csum_info = *((const uint32_t *)data); 379 mask |= HN_RXINFO_CSUM; 380 break; 381 382 case NDIS_PKTINFO_TYPE_HASHVAL: 383 if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) 384 return -EINVAL; 385 info->hash_value = *((const uint32_t *)data); 386 mask |= HN_RXINFO_HASHVAL; 387 break; 388 389 case NDIS_PKTINFO_TYPE_HASHINF: 390 if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) 391 return -EINVAL; 392 info->hash_info = *((const uint32_t *)data); 393 mask |= HN_RXINFO_HASHINF; 394 break; 395 396 default: 397 goto next; 398 } 399 400 if (mask == HN_RXINFO_ALL) 401 break; /* All found; done */ 402 next: 403 pi = (const struct rndis_pktinfo *) 404 ((const uint8_t *)pi + pi->size); 405 } 406 407 /* 408 * Final fixup. 409 * - If there is no hash value, invalidate the hash info. 410 */ 411 if (!(mask & HN_RXINFO_HASHVAL)) 412 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 413 return 0; 414 } 415 416 /* 417 * Ack the consumed RXBUF associated w/ this channel packet, 418 * so that this RXBUF can be recycled by the hypervisor. 419 */ 420 static void hn_rx_buf_release(struct hn_rx_bufinfo *rxb) 421 { 422 struct rte_mbuf_ext_shared_info *shinfo = &rxb->shinfo; 423 struct hn_data *hv = rxb->hv; 424 425 if (rte_mbuf_ext_refcnt_update(shinfo, -1) == 0) { 426 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 427 --hv->rxbuf_outstanding; 428 } 429 } 430 431 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) 432 { 433 hn_rx_buf_release(opaque); 434 } 435 436 static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq, 437 const struct vmbus_chanpkt_rxbuf *pkt) 438 { 439 struct hn_rx_bufinfo *rxb; 440 441 rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid; 442 rxb->chan = rxq->chan; 443 rxb->xactid = pkt->hdr.xactid; 444 rxb->hv = rxq->hv; 445 446 rxb->shinfo.free_cb = hn_rx_buf_free_cb; 447 rxb->shinfo.fcb_opaque = rxb; 448 rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); 449 return rxb; 450 } 451 452 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, 453 uint8_t *data, unsigned int headroom, unsigned int dlen, 454 const struct hn_rxinfo *info) 455 { 456 struct hn_data *hv = rxq->hv; 457 struct rte_mbuf *m; 458 459 m = rte_pktmbuf_alloc(rxq->mb_pool); 460 if (unlikely(!m)) { 461 struct rte_eth_dev *dev = 462 &rte_eth_devices[rxq->port_id]; 463 464 dev->data->rx_mbuf_alloc_failed++; 465 return; 466 } 467 468 /* 469 * For large packets, avoid copy if possible but need to keep 470 * some space available in receive area for later packets. 471 */ 472 if (dlen >= HN_RXCOPY_THRESHOLD && 473 hv->rxbuf_outstanding < hv->rxbuf_section_cnt / 2) { 474 struct rte_mbuf_ext_shared_info *shinfo; 475 const void *rxbuf; 476 rte_iova_t iova; 477 478 /* 479 * Build an external mbuf that points to recveive area. 480 * Use refcount to handle multiple packets in same 481 * receive buffer section. 482 */ 483 rxbuf = hv->rxbuf_res->addr; 484 iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); 485 shinfo = &rxb->shinfo; 486 487 if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 1) 488 ++hv->rxbuf_outstanding; 489 490 rte_pktmbuf_attach_extbuf(m, data, iova, 491 dlen + headroom, shinfo); 492 m->data_off = headroom; 493 } else { 494 /* Mbuf's in pool must be large enough to hold small packets */ 495 if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { 496 rte_pktmbuf_free_seg(m); 497 ++rxq->stats.errors; 498 return; 499 } 500 rte_memcpy(rte_pktmbuf_mtod(m, void *), 501 data + headroom, dlen); 502 } 503 504 m->port = rxq->port_id; 505 m->pkt_len = dlen; 506 m->data_len = dlen; 507 m->packet_type = rte_net_get_ptype(m, NULL, 508 RTE_PTYPE_L2_MASK | 509 RTE_PTYPE_L3_MASK | 510 RTE_PTYPE_L4_MASK); 511 512 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 513 m->vlan_tci = info->vlan_info; 514 m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN; 515 516 /* NDIS always strips tag, put it back if necessary */ 517 if (!hv->vlan_strip && rte_vlan_insert(&m)) { 518 PMD_DRV_LOG(DEBUG, "vlan insert failed"); 519 ++rxq->stats.errors; 520 rte_pktmbuf_free(m); 521 return; 522 } 523 } 524 525 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 526 if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) 527 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; 528 529 if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK 530 | NDIS_RXCSUM_INFO_TCPCS_OK)) 531 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 532 else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED 533 | NDIS_RXCSUM_INFO_UDPCS_FAILED)) 534 m->ol_flags |= PKT_RX_L4_CKSUM_BAD; 535 } 536 537 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 538 m->ol_flags |= PKT_RX_RSS_HASH; 539 m->hash.rss = info->hash_value; 540 } 541 542 PMD_RX_LOG(DEBUG, 543 "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, 544 rxq->port_id, rxq->queue_id, rxb->xactid, 545 m->pkt_len, m->packet_type, m->ol_flags); 546 547 ++rxq->stats.packets; 548 rxq->stats.bytes += m->pkt_len; 549 hn_update_packet_stats(&rxq->stats, m); 550 551 if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { 552 ++rxq->stats.ring_full; 553 rte_pktmbuf_free(m); 554 } 555 } 556 557 static void hn_rndis_rx_data(struct hn_rx_queue *rxq, 558 struct hn_rx_bufinfo *rxb, 559 void *data, uint32_t dlen) 560 { 561 unsigned int data_off, data_len, pktinfo_off, pktinfo_len; 562 const struct rndis_packet_msg *pkt = data; 563 struct hn_rxinfo info = { 564 .vlan_info = HN_NDIS_VLAN_INFO_INVALID, 565 .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, 566 .hash_info = HN_NDIS_HASH_INFO_INVALID, 567 }; 568 int err; 569 570 hn_rndis_dump(pkt); 571 572 if (unlikely(dlen < sizeof(*pkt))) 573 goto error; 574 575 if (unlikely(dlen < pkt->len)) 576 goto error; /* truncated RNDIS from host */ 577 578 if (unlikely(pkt->len < pkt->datalen 579 + pkt->oobdatalen + pkt->pktinfolen)) 580 goto error; 581 582 if (unlikely(pkt->datalen == 0)) 583 goto error; 584 585 /* Check offsets. */ 586 if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) 587 goto error; 588 589 if (likely(pkt->pktinfooffset > 0) && 590 unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || 591 (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) 592 goto error; 593 594 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 595 data_len = pkt->datalen; 596 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); 597 pktinfo_len = pkt->pktinfolen; 598 599 if (likely(pktinfo_len > 0)) { 600 err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, 601 pktinfo_len, &info); 602 if (err) 603 goto error; 604 } 605 606 if (unlikely(data_off + data_len > pkt->len)) 607 goto error; 608 609 if (unlikely(data_len < RTE_ETHER_HDR_LEN)) 610 goto error; 611 612 hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); 613 return; 614 error: 615 ++rxq->stats.errors; 616 } 617 618 static void 619 hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, 620 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) 621 { 622 const struct rndis_msghdr *hdr = buf; 623 624 switch (hdr->type) { 625 case RNDIS_PACKET_MSG: 626 if (dev->data->dev_started) 627 hn_rndis_rx_data(rxq, rxb, buf, len); 628 break; 629 630 case RNDIS_INDICATE_STATUS_MSG: 631 hn_rndis_link_status(dev, buf); 632 break; 633 634 case RNDIS_INITIALIZE_CMPLT: 635 case RNDIS_QUERY_CMPLT: 636 case RNDIS_SET_CMPLT: 637 hn_rndis_receive_response(rxq->hv, buf, len); 638 break; 639 640 default: 641 PMD_DRV_LOG(NOTICE, 642 "unexpected RNDIS message (type %#x len %u)", 643 hdr->type, len); 644 break; 645 } 646 } 647 648 static void 649 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, 650 struct hn_data *hv, 651 struct hn_rx_queue *rxq, 652 const struct vmbus_chanpkt_hdr *hdr, 653 const void *buf) 654 { 655 const struct vmbus_chanpkt_rxbuf *pkt; 656 const struct hn_nvs_hdr *nvs_hdr = buf; 657 uint32_t rxbuf_sz = hv->rxbuf_res->len; 658 char *rxbuf = hv->rxbuf_res->addr; 659 unsigned int i, hlen, count; 660 struct hn_rx_bufinfo *rxb; 661 662 /* At minimum we need type header */ 663 if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { 664 PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); 665 return; 666 } 667 668 /* Make sure that this is a RNDIS message. */ 669 if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { 670 PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", 671 nvs_hdr->type); 672 return; 673 } 674 675 hlen = vmbus_chanpkt_getlen(hdr->hlen); 676 if (unlikely(hlen < sizeof(*pkt))) { 677 PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); 678 return; 679 } 680 681 pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); 682 if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { 683 PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", 684 pkt->rxbuf_id); 685 return; 686 } 687 688 count = pkt->rxbuf_cnt; 689 if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, 690 rxbuf[count]))) { 691 PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); 692 return; 693 } 694 695 if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { 696 PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, 697 pkt->hdr.xactid); 698 return; 699 } 700 701 /* Setup receive buffer info to allow for callback */ 702 rxb = hn_rx_buf_init(rxq, pkt); 703 704 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 705 for (i = 0; i < count; ++i) { 706 unsigned int ofs, len; 707 708 ofs = pkt->rxbuf[i].ofs; 709 len = pkt->rxbuf[i].len; 710 711 if (unlikely(ofs + len > rxbuf_sz)) { 712 PMD_RX_LOG(ERR, 713 "%uth RNDIS msg overflow ofs %u, len %u", 714 i, ofs, len); 715 continue; 716 } 717 718 if (unlikely(len == 0)) { 719 PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); 720 continue; 721 } 722 723 hn_rndis_receive(dev, rxq, rxb, 724 rxbuf + ofs, len); 725 } 726 727 /* Send ACK now if external mbuf not used */ 728 hn_rx_buf_release(rxb); 729 } 730 731 /* 732 * Called when NVS inband events are received. 733 * Send up a two part message with port_id and the NVS message 734 * to the pipe to the netvsc-vf-event control thread. 735 */ 736 static void hn_nvs_handle_notify(struct rte_eth_dev *dev, 737 const struct vmbus_chanpkt_hdr *pkt, 738 const void *data) 739 { 740 const struct hn_nvs_hdr *hdr = data; 741 742 switch (hdr->type) { 743 case NVS_TYPE_TXTBL_NOTE: 744 /* Transmit indirection table has locking problems 745 * in DPDK and therefore not implemented 746 */ 747 PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); 748 break; 749 750 case NVS_TYPE_VFASSOC_NOTE: 751 hn_nvs_handle_vfassoc(dev, pkt, data); 752 break; 753 754 default: 755 PMD_DRV_LOG(INFO, 756 "got notify, nvs type %u", hdr->type); 757 } 758 } 759 760 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, 761 uint16_t queue_id, 762 unsigned int socket_id) 763 { 764 struct hn_rx_queue *rxq; 765 766 rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), 767 RTE_CACHE_LINE_SIZE, socket_id); 768 if (!rxq) 769 return NULL; 770 771 rxq->hv = hv; 772 rxq->chan = hv->channels[queue_id]; 773 rte_spinlock_init(&rxq->ring_lock); 774 rxq->port_id = hv->port_id; 775 rxq->queue_id = queue_id; 776 rxq->event_sz = HN_RXQ_EVENT_DEFAULT; 777 rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, 778 RTE_CACHE_LINE_SIZE, socket_id); 779 if (!rxq->event_buf) { 780 rte_free(rxq); 781 return NULL; 782 } 783 784 return rxq; 785 } 786 787 int 788 hn_dev_rx_queue_setup(struct rte_eth_dev *dev, 789 uint16_t queue_idx, uint16_t nb_desc, 790 unsigned int socket_id, 791 const struct rte_eth_rxconf *rx_conf, 792 struct rte_mempool *mp) 793 { 794 struct hn_data *hv = dev->data->dev_private; 795 char ring_name[RTE_RING_NAMESIZE]; 796 struct hn_rx_queue *rxq; 797 unsigned int count; 798 int error = -ENOMEM; 799 800 PMD_INIT_FUNC_TRACE(); 801 802 if (queue_idx == 0) { 803 rxq = hv->primary; 804 } else { 805 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 806 if (!rxq) 807 return -ENOMEM; 808 } 809 810 rxq->mb_pool = mp; 811 count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; 812 if (nb_desc == 0 || nb_desc > count) 813 nb_desc = count; 814 815 /* 816 * Staging ring from receive event logic to rx_pkts. 817 * rx_pkts assumes caller is handling multi-thread issue. 818 * event logic has locking. 819 */ 820 snprintf(ring_name, sizeof(ring_name), 821 "hn_rx_%u_%u", dev->data->port_id, queue_idx); 822 rxq->rx_ring = rte_ring_create(ring_name, 823 rte_align32pow2(nb_desc), 824 socket_id, 0); 825 if (!rxq->rx_ring) 826 goto fail; 827 828 error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, 829 socket_id, rx_conf, mp); 830 if (error) 831 goto fail; 832 833 dev->data->rx_queues[queue_idx] = rxq; 834 return 0; 835 836 fail: 837 rte_ring_free(rxq->rx_ring); 838 rte_free(rxq->event_buf); 839 rte_free(rxq); 840 return error; 841 } 842 843 static void 844 hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) 845 { 846 847 if (!rxq) 848 return; 849 850 rte_ring_free(rxq->rx_ring); 851 rxq->rx_ring = NULL; 852 rxq->mb_pool = NULL; 853 854 hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); 855 856 /* Keep primary queue to allow for control operations */ 857 if (keep_primary && rxq == rxq->hv->primary) 858 return; 859 860 rte_free(rxq->event_buf); 861 rte_free(rxq); 862 } 863 864 void 865 hn_dev_rx_queue_release(void *arg) 866 { 867 struct hn_rx_queue *rxq = arg; 868 869 PMD_INIT_FUNC_TRACE(); 870 871 hn_rx_queue_free(rxq, true); 872 } 873 874 int 875 hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) 876 { 877 struct hn_tx_queue *txq = arg; 878 879 return hn_process_events(txq->hv, txq->queue_id, free_cnt); 880 } 881 882 /* 883 * Process pending events on the channel. 884 * Called from both Rx queue poll and Tx cleanup 885 */ 886 uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, 887 uint32_t tx_limit) 888 { 889 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 890 struct hn_rx_queue *rxq; 891 uint32_t bytes_read = 0; 892 uint32_t tx_done = 0; 893 int ret = 0; 894 895 rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; 896 897 /* If no pending data then nothing to do */ 898 if (rte_vmbus_chan_rx_empty(rxq->chan)) 899 return 0; 900 901 /* 902 * Since channel is shared between Rx and TX queue need to have a lock 903 * since DPDK does not force same CPU to be used for Rx/Tx. 904 */ 905 if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) 906 return 0; 907 908 for (;;) { 909 const struct vmbus_chanpkt_hdr *pkt; 910 uint32_t len = rxq->event_sz; 911 const void *data; 912 913 retry: 914 ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); 915 if (ret == -EAGAIN) 916 break; /* ring is empty */ 917 918 if (unlikely(ret == -ENOBUFS)) { 919 /* event buffer not large enough to read ring */ 920 921 PMD_DRV_LOG(DEBUG, 922 "event buffer expansion (need %u)", len); 923 rxq->event_sz = len + len / 4; 924 rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, 925 RTE_CACHE_LINE_SIZE); 926 if (rxq->event_buf) 927 goto retry; 928 /* out of memory, no more events now */ 929 rxq->event_sz = 0; 930 break; 931 } 932 933 if (unlikely(ret <= 0)) { 934 /* This indicates a failure to communicate (or worse) */ 935 rte_exit(EXIT_FAILURE, 936 "vmbus ring buffer error: %d", ret); 937 } 938 939 bytes_read += ret; 940 pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; 941 data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); 942 943 switch (pkt->type) { 944 case VMBUS_CHANPKT_TYPE_COMP: 945 ++tx_done; 946 hn_nvs_handle_comp(dev, queue_id, pkt, data); 947 break; 948 949 case VMBUS_CHANPKT_TYPE_RXBUF: 950 hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); 951 break; 952 953 case VMBUS_CHANPKT_TYPE_INBAND: 954 hn_nvs_handle_notify(dev, pkt, data); 955 break; 956 957 default: 958 PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); 959 break; 960 } 961 962 if (tx_limit && tx_done >= tx_limit) 963 break; 964 965 if (rxq->rx_ring && rte_ring_full(rxq->rx_ring)) 966 break; 967 } 968 969 if (bytes_read > 0) 970 rte_vmbus_chan_signal_read(rxq->chan, bytes_read); 971 972 rte_spinlock_unlock(&rxq->ring_lock); 973 974 return tx_done; 975 } 976 977 static void hn_append_to_chim(struct hn_tx_queue *txq, 978 struct rndis_packet_msg *pkt, 979 const struct rte_mbuf *m) 980 { 981 struct hn_txdesc *txd = txq->agg_txd; 982 uint8_t *buf = (uint8_t *)pkt; 983 unsigned int data_offs; 984 985 hn_rndis_dump(pkt); 986 987 data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 988 txd->chim_size += pkt->len; 989 txd->data_size += m->pkt_len; 990 ++txd->packets; 991 hn_update_packet_stats(&txq->stats, m); 992 993 for (; m; m = m->next) { 994 uint16_t len = rte_pktmbuf_data_len(m); 995 996 rte_memcpy(buf + data_offs, 997 rte_pktmbuf_mtod(m, const char *), len); 998 data_offs += len; 999 } 1000 } 1001 1002 /* 1003 * Send pending aggregated data in chimney buffer (if any). 1004 * Returns error if send was unsuccessful because channel ring buffer 1005 * was full. 1006 */ 1007 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) 1008 1009 { 1010 struct hn_txdesc *txd = txq->agg_txd; 1011 struct hn_nvs_rndis rndis; 1012 int ret; 1013 1014 if (!txd) 1015 return 0; 1016 1017 rndis = (struct hn_nvs_rndis) { 1018 .type = NVS_TYPE_RNDIS, 1019 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1020 .chim_idx = txd->chim_index, 1021 .chim_sz = txd->chim_size, 1022 }; 1023 1024 PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", 1025 txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); 1026 1027 ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, 1028 &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); 1029 1030 if (likely(ret == 0)) 1031 hn_reset_txagg(txq); 1032 else 1033 PMD_TX_LOG(NOTICE, "port %u:%u send failed: %d", 1034 txq->port_id, txq->queue_id, ret); 1035 1036 return ret; 1037 } 1038 1039 static struct hn_txdesc *hn_new_txd(struct hn_data *hv, 1040 struct hn_tx_queue *txq) 1041 { 1042 struct hn_txdesc *txd; 1043 1044 if (rte_mempool_get(hv->tx_pool, (void **)&txd)) { 1045 ++txq->stats.ring_full; 1046 PMD_TX_LOG(DEBUG, "tx pool exhausted!"); 1047 return NULL; 1048 } 1049 1050 txd->m = NULL; 1051 txd->queue_id = txq->queue_id; 1052 txd->packets = 0; 1053 txd->data_size = 0; 1054 txd->chim_size = 0; 1055 1056 return txd; 1057 } 1058 1059 static void * 1060 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) 1061 { 1062 struct hn_txdesc *agg_txd = txq->agg_txd; 1063 struct rndis_packet_msg *pkt; 1064 void *chim; 1065 1066 if (agg_txd) { 1067 unsigned int padding, olen; 1068 1069 /* 1070 * Update the previous RNDIS packet's total length, 1071 * it can be increased due to the mandatory alignment 1072 * padding for this RNDIS packet. And update the 1073 * aggregating txdesc's chimney sending buffer size 1074 * accordingly. 1075 * 1076 * Zero-out the padding, as required by the RNDIS spec. 1077 */ 1078 pkt = txq->agg_prevpkt; 1079 olen = pkt->len; 1080 padding = RTE_ALIGN(olen, txq->agg_align) - olen; 1081 if (padding > 0) { 1082 agg_txd->chim_size += padding; 1083 pkt->len += padding; 1084 memset((uint8_t *)pkt + olen, 0, padding); 1085 } 1086 1087 chim = (uint8_t *)pkt + pkt->len; 1088 1089 txq->agg_pktleft--; 1090 txq->agg_szleft -= pktsize; 1091 if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { 1092 /* 1093 * Probably can't aggregate more packets, 1094 * flush this aggregating txdesc proactively. 1095 */ 1096 txq->agg_pktleft = 0; 1097 } 1098 } else { 1099 agg_txd = hn_new_txd(hv, txq); 1100 if (!agg_txd) 1101 return NULL; 1102 1103 chim = (uint8_t *)hv->chim_res->addr 1104 + agg_txd->chim_index * hv->chim_szmax; 1105 1106 txq->agg_txd = agg_txd; 1107 txq->agg_pktleft = txq->agg_pktmax - 1; 1108 txq->agg_szleft = txq->agg_szmax - pktsize; 1109 } 1110 txq->agg_prevpkt = chim; 1111 1112 return chim; 1113 } 1114 1115 static inline void * 1116 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, 1117 uint32_t pi_dlen, uint32_t pi_type) 1118 { 1119 const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); 1120 struct rndis_pktinfo *pi; 1121 1122 /* 1123 * Per-packet-info does not move; it only grows. 1124 * 1125 * NOTE: 1126 * pktinfooffset in this phase counts from the beginning 1127 * of rndis_packet_msg. 1128 */ 1129 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); 1130 1131 pkt->pktinfolen += pi_size; 1132 1133 pi->size = pi_size; 1134 pi->type = pi_type; 1135 pi->offset = RNDIS_PKTINFO_OFFSET; 1136 1137 return pi->data; 1138 } 1139 1140 /* Put RNDIS header and packet info on packet */ 1141 static void hn_encap(struct rndis_packet_msg *pkt, 1142 uint16_t queue_id, 1143 const struct rte_mbuf *m) 1144 { 1145 unsigned int hlen = m->l2_len + m->l3_len; 1146 uint32_t *pi_data; 1147 uint32_t pkt_hlen; 1148 1149 pkt->type = RNDIS_PACKET_MSG; 1150 pkt->len = m->pkt_len; 1151 pkt->dataoffset = 0; 1152 pkt->datalen = m->pkt_len; 1153 pkt->oobdataoffset = 0; 1154 pkt->oobdatalen = 0; 1155 pkt->oobdataelements = 0; 1156 pkt->pktinfooffset = sizeof(*pkt); 1157 pkt->pktinfolen = 0; 1158 pkt->vchandle = 0; 1159 pkt->reserved = 0; 1160 1161 /* 1162 * Set the hash value for this packet, to the queue_id to cause 1163 * TX done event for this packet on the right channel. 1164 */ 1165 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, 1166 NDIS_PKTINFO_TYPE_HASHVAL); 1167 *pi_data = queue_id; 1168 1169 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1170 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, 1171 NDIS_PKTINFO_TYPE_VLAN); 1172 *pi_data = m->vlan_tci; 1173 } 1174 1175 if (m->ol_flags & PKT_TX_TCP_SEG) { 1176 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, 1177 NDIS_PKTINFO_TYPE_LSO); 1178 1179 if (m->ol_flags & PKT_TX_IPV6) { 1180 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, 1181 m->tso_segsz); 1182 } else { 1183 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, 1184 m->tso_segsz); 1185 } 1186 } else if (m->ol_flags & 1187 (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) { 1188 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, 1189 NDIS_PKTINFO_TYPE_CSUM); 1190 *pi_data = 0; 1191 1192 if (m->ol_flags & PKT_TX_IPV6) 1193 *pi_data |= NDIS_TXCSUM_INFO_IPV6; 1194 if (m->ol_flags & PKT_TX_IPV4) { 1195 *pi_data |= NDIS_TXCSUM_INFO_IPV4; 1196 1197 if (m->ol_flags & PKT_TX_IP_CKSUM) 1198 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1199 } 1200 1201 if (m->ol_flags & PKT_TX_TCP_CKSUM) 1202 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); 1203 else if (m->ol_flags & PKT_TX_UDP_CKSUM) 1204 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); 1205 } 1206 1207 pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; 1208 /* Fixup RNDIS packet message total length */ 1209 pkt->len += pkt_hlen; 1210 1211 /* Convert RNDIS packet message offsets */ 1212 pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 1213 pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); 1214 } 1215 1216 /* How many scatter gather list elements ar needed */ 1217 static unsigned int hn_get_slots(const struct rte_mbuf *m) 1218 { 1219 unsigned int slots = 1; /* for RNDIS header */ 1220 1221 while (m) { 1222 unsigned int size = rte_pktmbuf_data_len(m); 1223 unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; 1224 1225 slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE; 1226 m = m->next; 1227 } 1228 1229 return slots; 1230 } 1231 1232 /* Build scatter gather list from chained mbuf */ 1233 static unsigned int hn_fill_sg(struct vmbus_gpa *sg, 1234 const struct rte_mbuf *m) 1235 { 1236 unsigned int segs = 0; 1237 1238 while (m) { 1239 rte_iova_t addr = rte_mbuf_data_iova(m); 1240 unsigned int page = addr / PAGE_SIZE; 1241 unsigned int offset = addr & PAGE_MASK; 1242 unsigned int len = rte_pktmbuf_data_len(m); 1243 1244 while (len > 0) { 1245 unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset); 1246 1247 sg[segs].page = page; 1248 sg[segs].ofs = offset; 1249 sg[segs].len = bytes; 1250 segs++; 1251 1252 ++page; 1253 offset = 0; 1254 len -= bytes; 1255 } 1256 m = m->next; 1257 } 1258 1259 return segs; 1260 } 1261 1262 /* Transmit directly from mbuf */ 1263 static int hn_xmit_sg(struct hn_tx_queue *txq, 1264 const struct hn_txdesc *txd, const struct rte_mbuf *m, 1265 bool *need_sig) 1266 { 1267 struct vmbus_gpa sg[hn_get_slots(m)]; 1268 struct hn_nvs_rndis nvs_rndis = { 1269 .type = NVS_TYPE_RNDIS, 1270 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1271 .chim_sz = txd->chim_size, 1272 }; 1273 rte_iova_t addr; 1274 unsigned int segs; 1275 1276 /* attach aggregation data if present */ 1277 if (txd->chim_size > 0) 1278 nvs_rndis.chim_idx = txd->chim_index; 1279 else 1280 nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; 1281 1282 hn_rndis_dump(txd->rndis_pkt); 1283 1284 /* pass IOVA of rndis header in first segment */ 1285 addr = rte_malloc_virt2iova(txd->rndis_pkt); 1286 if (unlikely(addr == RTE_BAD_IOVA)) { 1287 PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova"); 1288 return -EINVAL; 1289 } 1290 1291 sg[0].page = addr / PAGE_SIZE; 1292 sg[0].ofs = addr & PAGE_MASK; 1293 sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); 1294 segs = 1; 1295 1296 hn_update_packet_stats(&txq->stats, m); 1297 1298 segs += hn_fill_sg(sg + 1, m); 1299 1300 PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", 1301 txq->port_id, txq->queue_id, txd->chim_index, 1302 segs, nvs_rndis.chim_sz); 1303 1304 return hn_nvs_send_sglist(txq->chan, sg, segs, 1305 &nvs_rndis, sizeof(nvs_rndis), 1306 (uintptr_t)txd, need_sig); 1307 } 1308 1309 uint16_t 1310 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1311 { 1312 struct hn_tx_queue *txq = ptxq; 1313 uint16_t queue_id = txq->queue_id; 1314 struct hn_data *hv = txq->hv; 1315 struct rte_eth_dev *vf_dev; 1316 bool need_sig = false; 1317 uint16_t nb_tx; 1318 int ret; 1319 1320 if (unlikely(hv->closed)) 1321 return 0; 1322 1323 /* Transmit over VF if present and up */ 1324 vf_dev = hn_get_vf_dev(hv); 1325 1326 if (vf_dev && vf_dev->data->dev_started) { 1327 void *sub_q = vf_dev->data->tx_queues[queue_id]; 1328 1329 return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); 1330 } 1331 1332 if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh) 1333 hn_process_events(hv, txq->queue_id, 0); 1334 1335 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1336 struct rte_mbuf *m = tx_pkts[nb_tx]; 1337 uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; 1338 struct rndis_packet_msg *pkt; 1339 1340 /* For small packets aggregate them in chimney buffer */ 1341 if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { 1342 /* If this packet will not fit, then flush */ 1343 if (txq->agg_pktleft == 0 || 1344 RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { 1345 if (hn_flush_txagg(txq, &need_sig)) 1346 goto fail; 1347 } 1348 1349 pkt = hn_try_txagg(hv, txq, pkt_size); 1350 if (unlikely(!pkt)) 1351 break; 1352 1353 hn_encap(pkt, queue_id, m); 1354 hn_append_to_chim(txq, pkt, m); 1355 1356 rte_pktmbuf_free(m); 1357 1358 /* if buffer is full, flush */ 1359 if (txq->agg_pktleft == 0 && 1360 hn_flush_txagg(txq, &need_sig)) 1361 goto fail; 1362 } else { 1363 struct hn_txdesc *txd; 1364 1365 /* can send chimney data and large packet at once */ 1366 txd = txq->agg_txd; 1367 if (txd) { 1368 hn_reset_txagg(txq); 1369 } else { 1370 txd = hn_new_txd(hv, txq); 1371 if (unlikely(!txd)) 1372 break; 1373 } 1374 1375 pkt = txd->rndis_pkt; 1376 txd->m = m; 1377 txd->data_size += m->pkt_len; 1378 ++txd->packets; 1379 1380 hn_encap(pkt, queue_id, m); 1381 1382 ret = hn_xmit_sg(txq, txd, m, &need_sig); 1383 if (unlikely(ret != 0)) { 1384 PMD_TX_LOG(NOTICE, "sg send failed: %d", ret); 1385 ++txq->stats.errors; 1386 rte_mempool_put(hv->tx_pool, txd); 1387 goto fail; 1388 } 1389 } 1390 } 1391 1392 /* If partial buffer left, then try and send it. 1393 * if that fails, then reuse it on next send. 1394 */ 1395 hn_flush_txagg(txq, &need_sig); 1396 1397 fail: 1398 if (need_sig) 1399 rte_vmbus_chan_signal_tx(txq->chan); 1400 1401 return nb_tx; 1402 } 1403 1404 static uint16_t 1405 hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, 1406 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1407 { 1408 uint16_t i, n; 1409 1410 if (unlikely(nb_pkts == 0)) 1411 return 0; 1412 1413 n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); 1414 1415 /* relabel the received mbufs */ 1416 for (i = 0; i < n; i++) 1417 rx_pkts[i]->port = rxq->port_id; 1418 1419 return n; 1420 } 1421 1422 uint16_t 1423 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1424 { 1425 struct hn_rx_queue *rxq = prxq; 1426 struct hn_data *hv = rxq->hv; 1427 struct rte_eth_dev *vf_dev; 1428 uint16_t nb_rcv; 1429 1430 if (unlikely(hv->closed)) 1431 return 0; 1432 1433 /* Receive from VF if present and up */ 1434 vf_dev = hn_get_vf_dev(hv); 1435 1436 /* Check for new completions */ 1437 if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) 1438 hn_process_events(hv, rxq->queue_id, 0); 1439 1440 /* Always check the vmbus path for multicast and new flows */ 1441 nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, 1442 (void **)rx_pkts, nb_pkts, NULL); 1443 1444 /* If VF is available, check that as well */ 1445 if (vf_dev && vf_dev->data->dev_started) 1446 nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, 1447 rx_pkts + nb_rcv, nb_pkts - nb_rcv); 1448 1449 return nb_rcv; 1450 } 1451 1452 void 1453 hn_dev_free_queues(struct rte_eth_dev *dev) 1454 { 1455 unsigned int i; 1456 1457 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1458 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 1459 1460 hn_rx_queue_free(rxq, false); 1461 dev->data->rx_queues[i] = NULL; 1462 } 1463 dev->data->nb_rx_queues = 0; 1464 1465 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1466 hn_dev_tx_queue_release(dev->data->tx_queues[i]); 1467 dev->data->tx_queues[i] = NULL; 1468 } 1469 dev->data->nb_tx_queues = 0; 1470 } 1471