1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <strings.h> 13 #include <malloc.h> 14 15 #include <rte_ethdev.h> 16 #include <rte_memcpy.h> 17 #include <rte_string_fns.h> 18 #include <rte_memzone.h> 19 #include <rte_malloc.h> 20 #include <rte_atomic.h> 21 #include <rte_bitmap.h> 22 #include <rte_branch_prediction.h> 23 #include <rte_ether.h> 24 #include <rte_common.h> 25 #include <rte_errno.h> 26 #include <rte_memory.h> 27 #include <rte_eal.h> 28 #include <rte_dev.h> 29 #include <rte_net.h> 30 #include <rte_bus_vmbus.h> 31 #include <rte_spinlock.h> 32 33 #include "hn_logs.h" 34 #include "hn_var.h" 35 #include "hn_rndis.h" 36 #include "hn_nvs.h" 37 #include "ndis.h" 38 39 #define HN_NVS_SEND_MSG_SIZE \ 40 (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) 41 42 #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ 43 #define HN_TXCOPY_THRESHOLD 512 44 45 #define HN_RXCOPY_THRESHOLD 256 46 #define HN_RXQ_EVENT_DEFAULT 2048 47 48 struct hn_rxinfo { 49 uint32_t vlan_info; 50 uint32_t csum_info; 51 uint32_t hash_info; 52 uint32_t hash_value; 53 }; 54 55 #define HN_RXINFO_VLAN 0x0001 56 #define HN_RXINFO_CSUM 0x0002 57 #define HN_RXINFO_HASHINF 0x0004 58 #define HN_RXINFO_HASHVAL 0x0008 59 #define HN_RXINFO_ALL \ 60 (HN_RXINFO_VLAN | \ 61 HN_RXINFO_CSUM | \ 62 HN_RXINFO_HASHINF | \ 63 HN_RXINFO_HASHVAL) 64 65 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 66 #define HN_NDIS_RXCSUM_INFO_INVALID 0 67 #define HN_NDIS_HASH_INFO_INVALID 0 68 69 /* 70 * Per-transmit book keeping. 71 * A slot in transmit ring (chim_index) is reserved for each transmit. 72 * 73 * There are two types of transmit: 74 * - buffered transmit where chimney buffer is used and RNDIS header 75 * is in the buffer. mbuf == NULL for this case. 76 * 77 * - direct transmit where RNDIS header is in the in rndis_pkt 78 * mbuf is freed after transmit. 79 * 80 * Descriptors come from per-port pool which is used 81 * to limit number of outstanding requests per device. 82 */ 83 struct hn_txdesc { 84 struct rte_mbuf *m; 85 86 uint16_t queue_id; 87 uint32_t chim_index; 88 uint32_t chim_size; 89 uint32_t data_size; 90 uint32_t packets; 91 92 struct rndis_packet_msg *rndis_pkt; 93 }; 94 95 #define HN_RNDIS_PKT_LEN \ 96 (sizeof(struct rndis_packet_msg) + \ 97 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ 98 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 99 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 100 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 101 102 #define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) 103 104 /* Minimum space required for a packet */ 105 #define HN_PKTSIZE_MIN(align) \ 106 RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) 107 108 #define DEFAULT_TX_FREE_THRESH 32 109 110 static void 111 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) 112 { 113 uint32_t s = m->pkt_len; 114 const struct rte_ether_addr *ea; 115 116 if (s == 64) { 117 stats->size_bins[1]++; 118 } else if (s > 64 && s < 1024) { 119 uint32_t bin; 120 121 /* count zeros, and offset into correct bin */ 122 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 123 stats->size_bins[bin]++; 124 } else { 125 if (s < 64) 126 stats->size_bins[0]++; 127 else if (s < 1519) 128 stats->size_bins[6]++; 129 else 130 stats->size_bins[7]++; 131 } 132 133 ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); 134 if (rte_is_multicast_ether_addr(ea)) { 135 if (rte_is_broadcast_ether_addr(ea)) 136 stats->broadcast++; 137 else 138 stats->multicast++; 139 } 140 } 141 142 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) 143 { 144 return pkt->pktinfooffset + pkt->pktinfolen; 145 } 146 147 static inline uint32_t 148 hn_rndis_pktmsg_offset(uint32_t ofs) 149 { 150 return ofs - offsetof(struct rndis_packet_msg, dataoffset); 151 } 152 153 static void hn_txd_init(struct rte_mempool *mp __rte_unused, 154 void *opaque, void *obj, unsigned int idx) 155 { 156 struct hn_tx_queue *txq = opaque; 157 struct hn_txdesc *txd = obj; 158 159 memset(txd, 0, sizeof(*txd)); 160 161 txd->queue_id = txq->queue_id; 162 txd->chim_index = NVS_CHIM_IDX_INVALID; 163 txd->rndis_pkt = (struct rndis_packet_msg *)((char *)txq->tx_rndis 164 + idx * HN_RNDIS_PKT_ALIGNED); 165 } 166 167 int 168 hn_chim_init(struct rte_eth_dev *dev) 169 { 170 struct hn_data *hv = dev->data->dev_private; 171 uint32_t i, chim_bmp_size; 172 173 rte_spinlock_init(&hv->chim_lock); 174 chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); 175 hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, 176 RTE_CACHE_LINE_SIZE); 177 if (hv->chim_bmem == NULL) { 178 PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", 179 chim_bmp_size); 180 return -1; 181 } 182 183 hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, 184 hv->chim_bmem, chim_bmp_size); 185 if (hv->chim_bmap == NULL) { 186 PMD_INIT_LOG(ERR, "failed to init chim bitmap"); 187 return -1; 188 } 189 190 for (i = 0; i < hv->chim_cnt; i++) 191 rte_bitmap_set(hv->chim_bmap, i); 192 193 return 0; 194 } 195 196 void 197 hn_chim_uninit(struct rte_eth_dev *dev) 198 { 199 struct hn_data *hv = dev->data->dev_private; 200 201 rte_bitmap_free(hv->chim_bmap); 202 rte_free(hv->chim_bmem); 203 hv->chim_bmem = NULL; 204 } 205 206 static uint32_t hn_chim_alloc(struct hn_data *hv) 207 { 208 uint32_t index = NVS_CHIM_IDX_INVALID; 209 uint64_t slab = 0; 210 211 rte_spinlock_lock(&hv->chim_lock); 212 if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) { 213 index += rte_bsf64(slab); 214 rte_bitmap_clear(hv->chim_bmap, index); 215 } 216 rte_spinlock_unlock(&hv->chim_lock); 217 218 return index; 219 } 220 221 static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) 222 { 223 if (chim_idx >= hv->chim_cnt) { 224 PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); 225 } else { 226 rte_spinlock_lock(&hv->chim_lock); 227 rte_bitmap_set(hv->chim_bmap, chim_idx); 228 rte_spinlock_unlock(&hv->chim_lock); 229 } 230 } 231 232 static void hn_reset_txagg(struct hn_tx_queue *txq) 233 { 234 txq->agg_szleft = txq->agg_szmax; 235 txq->agg_pktleft = txq->agg_pktmax; 236 txq->agg_txd = NULL; 237 txq->agg_prevpkt = NULL; 238 } 239 240 int 241 hn_dev_tx_queue_setup(struct rte_eth_dev *dev, 242 uint16_t queue_idx, uint16_t nb_desc, 243 unsigned int socket_id, 244 const struct rte_eth_txconf *tx_conf) 245 246 { 247 struct hn_data *hv = dev->data->dev_private; 248 struct hn_tx_queue *txq; 249 char name[RTE_MEMPOOL_NAMESIZE]; 250 uint32_t tx_free_thresh; 251 int err = -ENOMEM; 252 253 PMD_INIT_FUNC_TRACE(); 254 255 tx_free_thresh = tx_conf->tx_free_thresh; 256 if (tx_free_thresh == 0) 257 tx_free_thresh = RTE_MIN(nb_desc / 4, 258 DEFAULT_TX_FREE_THRESH); 259 260 if (tx_free_thresh + 3 >= nb_desc) { 261 PMD_INIT_LOG(ERR, 262 "tx_free_thresh must be less than the number of TX entries minus 3(%u)." 263 " (tx_free_thresh=%u port=%u queue=%u)\n", 264 nb_desc - 3, 265 tx_free_thresh, dev->data->port_id, queue_idx); 266 return -EINVAL; 267 } 268 269 txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, 270 socket_id); 271 if (!txq) 272 return -ENOMEM; 273 274 txq->hv = hv; 275 txq->chan = hv->channels[queue_idx]; 276 txq->port_id = dev->data->port_id; 277 txq->queue_id = queue_idx; 278 txq->free_thresh = tx_free_thresh; 279 280 snprintf(name, sizeof(name), 281 "hn_txd_%u_%u", dev->data->port_id, queue_idx); 282 283 PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", 284 name, nb_desc, sizeof(struct hn_txdesc)); 285 286 txq->tx_rndis_mz = rte_memzone_reserve_aligned(name, 287 nb_desc * HN_RNDIS_PKT_ALIGNED, rte_socket_id(), 288 RTE_MEMZONE_IOVA_CONTIG, HN_RNDIS_PKT_ALIGNED); 289 if (!txq->tx_rndis_mz) { 290 err = -rte_errno; 291 goto error; 292 } 293 txq->tx_rndis = txq->tx_rndis_mz->addr; 294 txq->tx_rndis_iova = txq->tx_rndis_mz->iova; 295 296 txq->txdesc_pool = rte_mempool_create(name, nb_desc, 297 sizeof(struct hn_txdesc), 298 0, 0, NULL, NULL, 299 hn_txd_init, txq, 300 dev->device->numa_node, 0); 301 if (txq->txdesc_pool == NULL) { 302 PMD_DRV_LOG(ERR, 303 "mempool %s create failed: %d", name, rte_errno); 304 goto error; 305 } 306 307 txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); 308 txq->agg_pktmax = hv->rndis_agg_pkts; 309 txq->agg_align = hv->rndis_agg_align; 310 311 hn_reset_txagg(txq); 312 313 err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, 314 socket_id, tx_conf); 315 if (err == 0) { 316 dev->data->tx_queues[queue_idx] = txq; 317 return 0; 318 } 319 320 error: 321 if (txq->txdesc_pool) 322 rte_mempool_free(txq->txdesc_pool); 323 rte_memzone_free(txq->tx_rndis_mz); 324 rte_free(txq); 325 return err; 326 } 327 328 void 329 hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 330 struct rte_eth_txq_info *qinfo) 331 { 332 struct hn_tx_queue *txq = dev->data->tx_queues[queue_id]; 333 334 qinfo->nb_desc = txq->txdesc_pool->size; 335 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 336 } 337 338 static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) 339 { 340 struct hn_txdesc *txd; 341 342 if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { 343 ++txq->stats.ring_full; 344 PMD_TX_LOG(DEBUG, "tx pool exhausted!"); 345 return NULL; 346 } 347 348 txd->m = NULL; 349 txd->packets = 0; 350 txd->data_size = 0; 351 txd->chim_size = 0; 352 353 return txd; 354 } 355 356 static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) 357 { 358 rte_mempool_put(txq->txdesc_pool, txd); 359 } 360 361 void 362 hn_dev_tx_queue_release(void *arg) 363 { 364 struct hn_tx_queue *txq = arg; 365 366 PMD_INIT_FUNC_TRACE(); 367 368 if (!txq) 369 return; 370 371 if (txq->txdesc_pool) 372 rte_mempool_free(txq->txdesc_pool); 373 374 rte_memzone_free(txq->tx_rndis_mz); 375 rte_free(txq); 376 } 377 378 /* 379 * Check the status of a Tx descriptor in the queue. 380 * 381 * returns: 382 * - -EINVAL - offset outside of tx_descriptor pool. 383 * - RTE_ETH_TX_DESC_FULL - descriptor is not acknowledged by host. 384 * - RTE_ETH_TX_DESC_DONE - descriptor is available. 385 */ 386 int hn_dev_tx_descriptor_status(void *arg, uint16_t offset) 387 { 388 const struct hn_tx_queue *txq = arg; 389 390 hn_process_events(txq->hv, txq->queue_id, 0); 391 392 if (offset >= rte_mempool_avail_count(txq->txdesc_pool)) 393 return -EINVAL; 394 395 if (offset < rte_mempool_in_use_count(txq->txdesc_pool)) 396 return RTE_ETH_TX_DESC_FULL; 397 else 398 return RTE_ETH_TX_DESC_DONE; 399 } 400 401 static void 402 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, 403 unsigned long xactid, const struct hn_nvs_rndis_ack *ack) 404 { 405 struct hn_data *hv = dev->data->dev_private; 406 struct hn_txdesc *txd = (struct hn_txdesc *)xactid; 407 struct hn_tx_queue *txq; 408 409 /* Control packets are sent with xacid == 0 */ 410 if (!txd) 411 return; 412 413 txq = dev->data->tx_queues[queue_id]; 414 if (likely(ack->status == NVS_STATUS_OK)) { 415 PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", 416 txq->port_id, txq->queue_id, txd->chim_index, 417 txd->packets, txd->data_size); 418 txq->stats.bytes += txd->data_size; 419 txq->stats.packets += txd->packets; 420 } else { 421 PMD_DRV_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", 422 txq->port_id, txq->queue_id, txd->chim_index, ack->status); 423 ++txq->stats.errors; 424 } 425 426 if (txd->chim_index != NVS_CHIM_IDX_INVALID) { 427 hn_chim_free(hv, txd->chim_index); 428 txd->chim_index = NVS_CHIM_IDX_INVALID; 429 } 430 431 rte_pktmbuf_free(txd->m); 432 hn_txd_put(txq, txd); 433 } 434 435 /* Handle transmit completion events */ 436 static void 437 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, 438 const struct vmbus_chanpkt_hdr *pkt, 439 const void *data) 440 { 441 const struct hn_nvs_hdr *hdr = data; 442 443 switch (hdr->type) { 444 case NVS_TYPE_RNDIS_ACK: 445 hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); 446 break; 447 448 default: 449 PMD_DRV_LOG(NOTICE, "unexpected send completion type %u", 450 hdr->type); 451 } 452 } 453 454 /* Parse per-packet info (meta data) */ 455 static int 456 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, 457 struct hn_rxinfo *info) 458 { 459 const struct rndis_pktinfo *pi = info_data; 460 uint32_t mask = 0; 461 462 while (info_dlen != 0) { 463 const void *data; 464 uint32_t dlen; 465 466 if (unlikely(info_dlen < sizeof(*pi))) 467 return -EINVAL; 468 469 if (unlikely(info_dlen < pi->size)) 470 return -EINVAL; 471 info_dlen -= pi->size; 472 473 if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 474 return -EINVAL; 475 if (unlikely(pi->size < pi->offset)) 476 return -EINVAL; 477 478 dlen = pi->size - pi->offset; 479 data = pi->data; 480 481 switch (pi->type) { 482 case NDIS_PKTINFO_TYPE_VLAN: 483 if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) 484 return -EINVAL; 485 info->vlan_info = *((const uint32_t *)data); 486 mask |= HN_RXINFO_VLAN; 487 break; 488 489 case NDIS_PKTINFO_TYPE_CSUM: 490 if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) 491 return -EINVAL; 492 info->csum_info = *((const uint32_t *)data); 493 mask |= HN_RXINFO_CSUM; 494 break; 495 496 case NDIS_PKTINFO_TYPE_HASHVAL: 497 if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) 498 return -EINVAL; 499 info->hash_value = *((const uint32_t *)data); 500 mask |= HN_RXINFO_HASHVAL; 501 break; 502 503 case NDIS_PKTINFO_TYPE_HASHINF: 504 if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) 505 return -EINVAL; 506 info->hash_info = *((const uint32_t *)data); 507 mask |= HN_RXINFO_HASHINF; 508 break; 509 510 default: 511 goto next; 512 } 513 514 if (mask == HN_RXINFO_ALL) 515 break; /* All found; done */ 516 next: 517 pi = (const struct rndis_pktinfo *) 518 ((const uint8_t *)pi + pi->size); 519 } 520 521 /* 522 * Final fixup. 523 * - If there is no hash value, invalidate the hash info. 524 */ 525 if (!(mask & HN_RXINFO_HASHVAL)) 526 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 527 return 0; 528 } 529 530 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) 531 { 532 struct hn_rx_bufinfo *rxb = opaque; 533 struct hn_rx_queue *rxq = rxb->rxq; 534 535 rte_atomic32_dec(&rxq->rxbuf_outstanding); 536 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 537 } 538 539 static struct hn_rx_bufinfo *hn_rx_buf_init(struct hn_rx_queue *rxq, 540 const struct vmbus_chanpkt_rxbuf *pkt) 541 { 542 struct hn_rx_bufinfo *rxb; 543 544 rxb = rxq->rxbuf_info + pkt->hdr.xactid; 545 rxb->chan = rxq->chan; 546 rxb->xactid = pkt->hdr.xactid; 547 rxb->rxq = rxq; 548 549 rxb->shinfo.free_cb = hn_rx_buf_free_cb; 550 rxb->shinfo.fcb_opaque = rxb; 551 rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); 552 return rxb; 553 } 554 555 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, 556 uint8_t *data, unsigned int headroom, unsigned int dlen, 557 const struct hn_rxinfo *info) 558 { 559 struct hn_data *hv = rxq->hv; 560 struct rte_mbuf *m; 561 bool use_extbuf = false; 562 563 m = rte_pktmbuf_alloc(rxq->mb_pool); 564 if (unlikely(!m)) { 565 struct rte_eth_dev *dev = 566 &rte_eth_devices[rxq->port_id]; 567 568 dev->data->rx_mbuf_alloc_failed++; 569 return; 570 } 571 572 /* 573 * For large packets, avoid copy if possible but need to keep 574 * some space available in receive area for later packets. 575 */ 576 if (dlen >= HN_RXCOPY_THRESHOLD && 577 (uint32_t)rte_atomic32_read(&rxq->rxbuf_outstanding) < 578 hv->rxbuf_section_cnt / 2) { 579 struct rte_mbuf_ext_shared_info *shinfo; 580 const void *rxbuf; 581 rte_iova_t iova; 582 583 /* 584 * Build an external mbuf that points to recveive area. 585 * Use refcount to handle multiple packets in same 586 * receive buffer section. 587 */ 588 rxbuf = hv->rxbuf_res->addr; 589 iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); 590 shinfo = &rxb->shinfo; 591 592 /* shinfo is already set to 1 by the caller */ 593 if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) 594 rte_atomic32_inc(&rxq->rxbuf_outstanding); 595 596 rte_pktmbuf_attach_extbuf(m, data, iova, 597 dlen + headroom, shinfo); 598 m->data_off = headroom; 599 use_extbuf = true; 600 } else { 601 /* Mbuf's in pool must be large enough to hold small packets */ 602 if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { 603 rte_pktmbuf_free_seg(m); 604 ++rxq->stats.errors; 605 return; 606 } 607 rte_memcpy(rte_pktmbuf_mtod(m, void *), 608 data + headroom, dlen); 609 } 610 611 m->port = rxq->port_id; 612 m->pkt_len = dlen; 613 m->data_len = dlen; 614 m->packet_type = rte_net_get_ptype(m, NULL, 615 RTE_PTYPE_L2_MASK | 616 RTE_PTYPE_L3_MASK | 617 RTE_PTYPE_L4_MASK); 618 619 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 620 m->vlan_tci = info->vlan_info; 621 m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN; 622 623 /* NDIS always strips tag, put it back if necessary */ 624 if (!hv->vlan_strip && rte_vlan_insert(&m)) { 625 PMD_DRV_LOG(DEBUG, "vlan insert failed"); 626 ++rxq->stats.errors; 627 if (use_extbuf) 628 rte_pktmbuf_detach_extbuf(m); 629 rte_pktmbuf_free(m); 630 return; 631 } 632 } 633 634 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 635 if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) 636 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; 637 638 if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK 639 | NDIS_RXCSUM_INFO_TCPCS_OK)) 640 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 641 else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED 642 | NDIS_RXCSUM_INFO_UDPCS_FAILED)) 643 m->ol_flags |= PKT_RX_L4_CKSUM_BAD; 644 } 645 646 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 647 m->ol_flags |= PKT_RX_RSS_HASH; 648 m->hash.rss = info->hash_value; 649 } 650 651 PMD_RX_LOG(DEBUG, 652 "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, 653 rxq->port_id, rxq->queue_id, rxb->xactid, 654 m->pkt_len, m->packet_type, m->ol_flags); 655 656 ++rxq->stats.packets; 657 rxq->stats.bytes += m->pkt_len; 658 hn_update_packet_stats(&rxq->stats, m); 659 660 if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { 661 ++rxq->stats.ring_full; 662 PMD_RX_LOG(DEBUG, "rx ring full"); 663 if (use_extbuf) 664 rte_pktmbuf_detach_extbuf(m); 665 rte_pktmbuf_free(m); 666 } 667 } 668 669 static void hn_rndis_rx_data(struct hn_rx_queue *rxq, 670 struct hn_rx_bufinfo *rxb, 671 void *data, uint32_t dlen) 672 { 673 unsigned int data_off, data_len; 674 unsigned int pktinfo_off, pktinfo_len; 675 const struct rndis_packet_msg *pkt = data; 676 struct hn_rxinfo info = { 677 .vlan_info = HN_NDIS_VLAN_INFO_INVALID, 678 .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, 679 .hash_info = HN_NDIS_HASH_INFO_INVALID, 680 }; 681 int err; 682 683 hn_rndis_dump(pkt); 684 685 if (unlikely(dlen < sizeof(*pkt))) 686 goto error; 687 688 if (unlikely(dlen < pkt->len)) 689 goto error; /* truncated RNDIS from host */ 690 691 if (unlikely(pkt->len < pkt->datalen 692 + pkt->oobdatalen + pkt->pktinfolen)) 693 goto error; 694 695 if (unlikely(pkt->datalen == 0)) 696 goto error; 697 698 /* Check offsets. */ 699 if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) 700 goto error; 701 702 if (likely(pkt->pktinfooffset > 0) && 703 unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || 704 (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) 705 goto error; 706 707 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 708 data_len = pkt->datalen; 709 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); 710 pktinfo_len = pkt->pktinfolen; 711 712 if (likely(pktinfo_len > 0)) { 713 err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, 714 pktinfo_len, &info); 715 if (err) 716 goto error; 717 } 718 719 /* overflow check */ 720 if (data_len > data_len + data_off || data_len + data_off > pkt->len) 721 goto error; 722 723 if (unlikely(data_len < RTE_ETHER_HDR_LEN)) 724 goto error; 725 726 hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); 727 return; 728 error: 729 ++rxq->stats.errors; 730 } 731 732 static void 733 hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, 734 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) 735 { 736 const struct rndis_msghdr *hdr = buf; 737 738 switch (hdr->type) { 739 case RNDIS_PACKET_MSG: 740 if (dev->data->dev_started) 741 hn_rndis_rx_data(rxq, rxb, buf, len); 742 break; 743 744 case RNDIS_INDICATE_STATUS_MSG: 745 hn_rndis_link_status(dev, buf); 746 break; 747 748 case RNDIS_INITIALIZE_CMPLT: 749 case RNDIS_QUERY_CMPLT: 750 case RNDIS_SET_CMPLT: 751 hn_rndis_receive_response(rxq->hv, buf, len); 752 break; 753 754 default: 755 PMD_DRV_LOG(NOTICE, 756 "unexpected RNDIS message (type %#x len %u)", 757 hdr->type, len); 758 break; 759 } 760 } 761 762 static void 763 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, 764 struct hn_data *hv, 765 struct hn_rx_queue *rxq, 766 const struct vmbus_chanpkt_hdr *hdr, 767 const void *buf) 768 { 769 const struct vmbus_chanpkt_rxbuf *pkt; 770 const struct hn_nvs_hdr *nvs_hdr = buf; 771 uint32_t rxbuf_sz = hv->rxbuf_res->len; 772 char *rxbuf = hv->rxbuf_res->addr; 773 unsigned int i, hlen, count; 774 struct hn_rx_bufinfo *rxb; 775 776 /* At minimum we need type header */ 777 if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { 778 PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); 779 return; 780 } 781 782 /* Make sure that this is a RNDIS message. */ 783 if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { 784 PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", 785 nvs_hdr->type); 786 return; 787 } 788 789 hlen = vmbus_chanpkt_getlen(hdr->hlen); 790 if (unlikely(hlen < sizeof(*pkt))) { 791 PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); 792 return; 793 } 794 795 pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); 796 if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { 797 PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", 798 pkt->rxbuf_id); 799 return; 800 } 801 802 count = pkt->rxbuf_cnt; 803 if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, 804 rxbuf[count]))) { 805 PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); 806 return; 807 } 808 809 if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { 810 PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, 811 pkt->hdr.xactid); 812 return; 813 } 814 815 /* Setup receive buffer info to allow for callback */ 816 rxb = hn_rx_buf_init(rxq, pkt); 817 818 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 819 for (i = 0; i < count; ++i) { 820 unsigned int ofs, len; 821 822 ofs = pkt->rxbuf[i].ofs; 823 len = pkt->rxbuf[i].len; 824 825 if (unlikely(ofs + len > rxbuf_sz)) { 826 PMD_RX_LOG(ERR, 827 "%uth RNDIS msg overflow ofs %u, len %u", 828 i, ofs, len); 829 continue; 830 } 831 832 if (unlikely(len == 0)) { 833 PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); 834 continue; 835 } 836 837 hn_rndis_receive(dev, rxq, rxb, 838 rxbuf + ofs, len); 839 } 840 841 /* Send ACK now if external mbuf not used */ 842 if (rte_mbuf_ext_refcnt_update(&rxb->shinfo, -1) == 0) 843 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 844 } 845 846 /* 847 * Called when NVS inband events are received. 848 * Send up a two part message with port_id and the NVS message 849 * to the pipe to the netvsc-vf-event control thread. 850 */ 851 static void hn_nvs_handle_notify(struct rte_eth_dev *dev, 852 const struct vmbus_chanpkt_hdr *pkt, 853 const void *data) 854 { 855 const struct hn_nvs_hdr *hdr = data; 856 857 switch (hdr->type) { 858 case NVS_TYPE_TXTBL_NOTE: 859 /* Transmit indirection table has locking problems 860 * in DPDK and therefore not implemented 861 */ 862 PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); 863 break; 864 865 case NVS_TYPE_VFASSOC_NOTE: 866 hn_nvs_handle_vfassoc(dev, pkt, data); 867 break; 868 869 default: 870 PMD_DRV_LOG(INFO, 871 "got notify, nvs type %u", hdr->type); 872 } 873 } 874 875 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, 876 uint16_t queue_id, 877 unsigned int socket_id) 878 { 879 struct hn_rx_queue *rxq; 880 881 rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), 882 RTE_CACHE_LINE_SIZE, socket_id); 883 if (!rxq) 884 return NULL; 885 886 rxq->hv = hv; 887 rxq->chan = hv->channels[queue_id]; 888 rte_spinlock_init(&rxq->ring_lock); 889 rxq->port_id = hv->port_id; 890 rxq->queue_id = queue_id; 891 rxq->event_sz = HN_RXQ_EVENT_DEFAULT; 892 rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, 893 RTE_CACHE_LINE_SIZE, socket_id); 894 if (!rxq->event_buf) { 895 rte_free(rxq); 896 return NULL; 897 } 898 899 /* setup rxbuf_info for non-primary queue */ 900 if (queue_id) { 901 rxq->rxbuf_info = rte_calloc("HN_RXBUF_INFO", 902 hv->rxbuf_section_cnt, 903 sizeof(*rxq->rxbuf_info), 904 RTE_CACHE_LINE_SIZE); 905 906 if (!rxq->rxbuf_info) { 907 PMD_DRV_LOG(ERR, 908 "Could not allocate rxbuf info for queue %d\n", 909 queue_id); 910 rte_free(rxq->event_buf); 911 rte_free(rxq); 912 return NULL; 913 } 914 } 915 916 return rxq; 917 } 918 919 void 920 hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 921 struct rte_eth_rxq_info *qinfo) 922 { 923 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 924 925 qinfo->mp = rxq->mb_pool; 926 qinfo->nb_desc = rxq->rx_ring->size; 927 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 928 } 929 930 int 931 hn_dev_rx_queue_setup(struct rte_eth_dev *dev, 932 uint16_t queue_idx, uint16_t nb_desc, 933 unsigned int socket_id, 934 const struct rte_eth_rxconf *rx_conf, 935 struct rte_mempool *mp) 936 { 937 struct hn_data *hv = dev->data->dev_private; 938 char ring_name[RTE_RING_NAMESIZE]; 939 struct hn_rx_queue *rxq; 940 unsigned int count; 941 int error = -ENOMEM; 942 943 PMD_INIT_FUNC_TRACE(); 944 945 if (queue_idx == 0) { 946 rxq = hv->primary; 947 } else { 948 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 949 if (!rxq) 950 return -ENOMEM; 951 } 952 953 rxq->mb_pool = mp; 954 count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; 955 if (nb_desc == 0 || nb_desc > count) 956 nb_desc = count; 957 958 /* 959 * Staging ring from receive event logic to rx_pkts. 960 * rx_pkts assumes caller is handling multi-thread issue. 961 * event logic has locking. 962 */ 963 snprintf(ring_name, sizeof(ring_name), 964 "hn_rx_%u_%u", dev->data->port_id, queue_idx); 965 rxq->rx_ring = rte_ring_create(ring_name, 966 rte_align32pow2(nb_desc), 967 socket_id, 0); 968 if (!rxq->rx_ring) 969 goto fail; 970 971 error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, 972 socket_id, rx_conf, mp); 973 if (error) 974 goto fail; 975 976 dev->data->rx_queues[queue_idx] = rxq; 977 return 0; 978 979 fail: 980 rte_ring_free(rxq->rx_ring); 981 rte_free(rxq->rxbuf_info); 982 rte_free(rxq->event_buf); 983 rte_free(rxq); 984 return error; 985 } 986 987 static void 988 hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) 989 { 990 991 if (!rxq) 992 return; 993 994 rte_ring_free(rxq->rx_ring); 995 rxq->rx_ring = NULL; 996 rxq->mb_pool = NULL; 997 998 hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); 999 1000 /* Keep primary queue to allow for control operations */ 1001 if (keep_primary && rxq == rxq->hv->primary) 1002 return; 1003 1004 rte_free(rxq->rxbuf_info); 1005 rte_free(rxq->event_buf); 1006 rte_free(rxq); 1007 } 1008 1009 void 1010 hn_dev_rx_queue_release(void *arg) 1011 { 1012 struct hn_rx_queue *rxq = arg; 1013 1014 PMD_INIT_FUNC_TRACE(); 1015 1016 hn_rx_queue_free(rxq, true); 1017 } 1018 1019 /* 1020 * Get the number of used descriptor in a rx queue 1021 * For this device that means how many packets are pending in the ring. 1022 */ 1023 uint32_t 1024 hn_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_id) 1025 { 1026 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 1027 1028 return rte_ring_count(rxq->rx_ring); 1029 } 1030 1031 /* 1032 * Check the status of a Rx descriptor in the queue 1033 * 1034 * returns: 1035 * - -EINVAL - offset outside of ring 1036 * - RTE_ETH_RX_DESC_AVAIL - no data available yet 1037 * - RTE_ETH_RX_DESC_DONE - data is waiting in stagin ring 1038 */ 1039 int hn_dev_rx_queue_status(void *arg, uint16_t offset) 1040 { 1041 const struct hn_rx_queue *rxq = arg; 1042 1043 hn_process_events(rxq->hv, rxq->queue_id, 0); 1044 if (offset >= rxq->rx_ring->capacity) 1045 return -EINVAL; 1046 1047 if (offset < rte_ring_count(rxq->rx_ring)) 1048 return RTE_ETH_RX_DESC_DONE; 1049 else 1050 return RTE_ETH_RX_DESC_AVAIL; 1051 } 1052 1053 int 1054 hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) 1055 { 1056 struct hn_tx_queue *txq = arg; 1057 1058 return hn_process_events(txq->hv, txq->queue_id, free_cnt); 1059 } 1060 1061 /* 1062 * Process pending events on the channel. 1063 * Called from both Rx queue poll and Tx cleanup 1064 */ 1065 uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, 1066 uint32_t tx_limit) 1067 { 1068 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 1069 struct hn_rx_queue *rxq; 1070 uint32_t bytes_read = 0; 1071 uint32_t tx_done = 0; 1072 int ret = 0; 1073 1074 rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; 1075 1076 /* 1077 * Since channel is shared between Rx and TX queue need to have a lock 1078 * since DPDK does not force same CPU to be used for Rx/Tx. 1079 */ 1080 if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) 1081 return 0; 1082 1083 for (;;) { 1084 const struct vmbus_chanpkt_hdr *pkt; 1085 uint32_t len = rxq->event_sz; 1086 const void *data; 1087 1088 retry: 1089 ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); 1090 if (ret == -EAGAIN) 1091 break; /* ring is empty */ 1092 1093 if (unlikely(ret == -ENOBUFS)) { 1094 /* event buffer not large enough to read ring */ 1095 1096 PMD_DRV_LOG(DEBUG, 1097 "event buffer expansion (need %u)", len); 1098 rxq->event_sz = len + len / 4; 1099 rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, 1100 RTE_CACHE_LINE_SIZE); 1101 if (rxq->event_buf) 1102 goto retry; 1103 /* out of memory, no more events now */ 1104 rxq->event_sz = 0; 1105 break; 1106 } 1107 1108 if (unlikely(ret <= 0)) { 1109 /* This indicates a failure to communicate (or worse) */ 1110 rte_exit(EXIT_FAILURE, 1111 "vmbus ring buffer error: %d", ret); 1112 } 1113 1114 bytes_read += ret; 1115 pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; 1116 data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); 1117 1118 switch (pkt->type) { 1119 case VMBUS_CHANPKT_TYPE_COMP: 1120 ++tx_done; 1121 hn_nvs_handle_comp(dev, queue_id, pkt, data); 1122 break; 1123 1124 case VMBUS_CHANPKT_TYPE_RXBUF: 1125 hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); 1126 break; 1127 1128 case VMBUS_CHANPKT_TYPE_INBAND: 1129 hn_nvs_handle_notify(dev, pkt, data); 1130 break; 1131 1132 default: 1133 PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); 1134 break; 1135 } 1136 1137 if (tx_limit && tx_done >= tx_limit) 1138 break; 1139 } 1140 1141 if (bytes_read > 0) 1142 rte_vmbus_chan_signal_read(rxq->chan, bytes_read); 1143 1144 rte_spinlock_unlock(&rxq->ring_lock); 1145 1146 return tx_done; 1147 } 1148 1149 static void hn_append_to_chim(struct hn_tx_queue *txq, 1150 struct rndis_packet_msg *pkt, 1151 const struct rte_mbuf *m) 1152 { 1153 struct hn_txdesc *txd = txq->agg_txd; 1154 uint8_t *buf = (uint8_t *)pkt; 1155 unsigned int data_offs; 1156 1157 hn_rndis_dump(pkt); 1158 1159 data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 1160 txd->chim_size += pkt->len; 1161 txd->data_size += m->pkt_len; 1162 ++txd->packets; 1163 hn_update_packet_stats(&txq->stats, m); 1164 1165 for (; m; m = m->next) { 1166 uint16_t len = rte_pktmbuf_data_len(m); 1167 1168 rte_memcpy(buf + data_offs, 1169 rte_pktmbuf_mtod(m, const char *), len); 1170 data_offs += len; 1171 } 1172 } 1173 1174 /* 1175 * Send pending aggregated data in chimney buffer (if any). 1176 * Returns error if send was unsuccessful because channel ring buffer 1177 * was full. 1178 */ 1179 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) 1180 1181 { 1182 struct hn_txdesc *txd = txq->agg_txd; 1183 struct hn_nvs_rndis rndis; 1184 int ret; 1185 1186 if (!txd) 1187 return 0; 1188 1189 rndis = (struct hn_nvs_rndis) { 1190 .type = NVS_TYPE_RNDIS, 1191 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1192 .chim_idx = txd->chim_index, 1193 .chim_sz = txd->chim_size, 1194 }; 1195 1196 PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", 1197 txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); 1198 1199 ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, 1200 &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); 1201 1202 if (likely(ret == 0)) 1203 hn_reset_txagg(txq); 1204 else if (ret == -EAGAIN) { 1205 PMD_TX_LOG(DEBUG, "port %u:%u channel full", 1206 txq->port_id, txq->queue_id); 1207 ++txq->stats.channel_full; 1208 } else { 1209 ++txq->stats.errors; 1210 1211 PMD_DRV_LOG(NOTICE, "port %u:%u send failed: %d", 1212 txq->port_id, txq->queue_id, ret); 1213 } 1214 return ret; 1215 } 1216 1217 /* 1218 * Try and find a place in a send chimney buffer to put 1219 * the small packet. If space is available, this routine 1220 * returns a pointer of where to place the data. 1221 * If no space, caller should try direct transmit. 1222 */ 1223 static void * 1224 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, 1225 struct hn_txdesc *txd, uint32_t pktsize) 1226 { 1227 struct hn_txdesc *agg_txd = txq->agg_txd; 1228 struct rndis_packet_msg *pkt; 1229 void *chim; 1230 1231 if (agg_txd) { 1232 unsigned int padding, olen; 1233 1234 /* 1235 * Update the previous RNDIS packet's total length, 1236 * it can be increased due to the mandatory alignment 1237 * padding for this RNDIS packet. And update the 1238 * aggregating txdesc's chimney sending buffer size 1239 * accordingly. 1240 * 1241 * Zero-out the padding, as required by the RNDIS spec. 1242 */ 1243 pkt = txq->agg_prevpkt; 1244 olen = pkt->len; 1245 padding = RTE_ALIGN(olen, txq->agg_align) - olen; 1246 if (padding > 0) { 1247 agg_txd->chim_size += padding; 1248 pkt->len += padding; 1249 memset((uint8_t *)pkt + olen, 0, padding); 1250 } 1251 1252 chim = (uint8_t *)pkt + pkt->len; 1253 txq->agg_prevpkt = chim; 1254 txq->agg_pktleft--; 1255 txq->agg_szleft -= pktsize; 1256 if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { 1257 /* 1258 * Probably can't aggregate more packets, 1259 * flush this aggregating txdesc proactively. 1260 */ 1261 txq->agg_pktleft = 0; 1262 } 1263 1264 hn_txd_put(txq, txd); 1265 return chim; 1266 } 1267 1268 txd->chim_index = hn_chim_alloc(hv); 1269 if (txd->chim_index == NVS_CHIM_IDX_INVALID) 1270 return NULL; 1271 1272 chim = (uint8_t *)hv->chim_res->addr 1273 + txd->chim_index * hv->chim_szmax; 1274 1275 txq->agg_txd = txd; 1276 txq->agg_pktleft = txq->agg_pktmax - 1; 1277 txq->agg_szleft = txq->agg_szmax - pktsize; 1278 txq->agg_prevpkt = chim; 1279 1280 return chim; 1281 } 1282 1283 static inline void * 1284 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, 1285 uint32_t pi_dlen, uint32_t pi_type) 1286 { 1287 const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); 1288 struct rndis_pktinfo *pi; 1289 1290 /* 1291 * Per-packet-info does not move; it only grows. 1292 * 1293 * NOTE: 1294 * pktinfooffset in this phase counts from the beginning 1295 * of rndis_packet_msg. 1296 */ 1297 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); 1298 1299 pkt->pktinfolen += pi_size; 1300 1301 pi->size = pi_size; 1302 pi->type = pi_type; 1303 pi->offset = RNDIS_PKTINFO_OFFSET; 1304 1305 return pi->data; 1306 } 1307 1308 /* Put RNDIS header and packet info on packet */ 1309 static void hn_encap(struct rndis_packet_msg *pkt, 1310 uint16_t queue_id, 1311 const struct rte_mbuf *m) 1312 { 1313 unsigned int hlen = m->l2_len + m->l3_len; 1314 uint32_t *pi_data; 1315 uint32_t pkt_hlen; 1316 1317 pkt->type = RNDIS_PACKET_MSG; 1318 pkt->len = m->pkt_len; 1319 pkt->dataoffset = 0; 1320 pkt->datalen = m->pkt_len; 1321 pkt->oobdataoffset = 0; 1322 pkt->oobdatalen = 0; 1323 pkt->oobdataelements = 0; 1324 pkt->pktinfooffset = sizeof(*pkt); 1325 pkt->pktinfolen = 0; 1326 pkt->vchandle = 0; 1327 pkt->reserved = 0; 1328 1329 /* 1330 * Set the hash value for this packet, to the queue_id to cause 1331 * TX done event for this packet on the right channel. 1332 */ 1333 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, 1334 NDIS_PKTINFO_TYPE_HASHVAL); 1335 *pi_data = queue_id; 1336 1337 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1338 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, 1339 NDIS_PKTINFO_TYPE_VLAN); 1340 *pi_data = m->vlan_tci; 1341 } 1342 1343 if (m->ol_flags & PKT_TX_TCP_SEG) { 1344 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, 1345 NDIS_PKTINFO_TYPE_LSO); 1346 1347 if (m->ol_flags & PKT_TX_IPV6) { 1348 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, 1349 m->tso_segsz); 1350 } else { 1351 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, 1352 m->tso_segsz); 1353 } 1354 } else if (m->ol_flags & 1355 (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) { 1356 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, 1357 NDIS_PKTINFO_TYPE_CSUM); 1358 *pi_data = 0; 1359 1360 if (m->ol_flags & PKT_TX_IPV6) 1361 *pi_data |= NDIS_TXCSUM_INFO_IPV6; 1362 if (m->ol_flags & PKT_TX_IPV4) { 1363 *pi_data |= NDIS_TXCSUM_INFO_IPV4; 1364 1365 if (m->ol_flags & PKT_TX_IP_CKSUM) 1366 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1367 } 1368 1369 if (m->ol_flags & PKT_TX_TCP_CKSUM) 1370 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); 1371 else if (m->ol_flags & PKT_TX_UDP_CKSUM) 1372 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); 1373 } 1374 1375 pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; 1376 /* Fixup RNDIS packet message total length */ 1377 pkt->len += pkt_hlen; 1378 1379 /* Convert RNDIS packet message offsets */ 1380 pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 1381 pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); 1382 } 1383 1384 /* How many scatter gather list elements ar needed */ 1385 static unsigned int hn_get_slots(const struct rte_mbuf *m) 1386 { 1387 unsigned int slots = 1; /* for RNDIS header */ 1388 1389 while (m) { 1390 unsigned int size = rte_pktmbuf_data_len(m); 1391 unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; 1392 1393 slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE; 1394 m = m->next; 1395 } 1396 1397 return slots; 1398 } 1399 1400 /* Build scatter gather list from chained mbuf */ 1401 static unsigned int hn_fill_sg(struct vmbus_gpa *sg, 1402 const struct rte_mbuf *m) 1403 { 1404 unsigned int segs = 0; 1405 1406 while (m) { 1407 rte_iova_t addr = rte_mbuf_data_iova(m); 1408 unsigned int page = addr / PAGE_SIZE; 1409 unsigned int offset = addr & PAGE_MASK; 1410 unsigned int len = rte_pktmbuf_data_len(m); 1411 1412 while (len > 0) { 1413 unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset); 1414 1415 sg[segs].page = page; 1416 sg[segs].ofs = offset; 1417 sg[segs].len = bytes; 1418 segs++; 1419 1420 ++page; 1421 offset = 0; 1422 len -= bytes; 1423 } 1424 m = m->next; 1425 } 1426 1427 return segs; 1428 } 1429 1430 /* Transmit directly from mbuf */ 1431 static int hn_xmit_sg(struct hn_tx_queue *txq, 1432 const struct hn_txdesc *txd, const struct rte_mbuf *m, 1433 bool *need_sig) 1434 { 1435 struct vmbus_gpa sg[hn_get_slots(m)]; 1436 struct hn_nvs_rndis nvs_rndis = { 1437 .type = NVS_TYPE_RNDIS, 1438 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1439 .chim_sz = txd->chim_size, 1440 }; 1441 rte_iova_t addr; 1442 unsigned int segs; 1443 1444 /* attach aggregation data if present */ 1445 if (txd->chim_size > 0) 1446 nvs_rndis.chim_idx = txd->chim_index; 1447 else 1448 nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; 1449 1450 hn_rndis_dump(txd->rndis_pkt); 1451 1452 /* pass IOVA of rndis header in first segment */ 1453 addr = txq->tx_rndis_iova + 1454 ((char *)txd->rndis_pkt - (char *)txq->tx_rndis); 1455 1456 sg[0].page = addr / PAGE_SIZE; 1457 sg[0].ofs = addr & PAGE_MASK; 1458 sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); 1459 segs = 1; 1460 1461 hn_update_packet_stats(&txq->stats, m); 1462 1463 segs += hn_fill_sg(sg + 1, m); 1464 1465 PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", 1466 txq->port_id, txq->queue_id, txd->chim_index, 1467 segs, nvs_rndis.chim_sz); 1468 1469 return hn_nvs_send_sglist(txq->chan, sg, segs, 1470 &nvs_rndis, sizeof(nvs_rndis), 1471 (uintptr_t)txd, need_sig); 1472 } 1473 1474 uint16_t 1475 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1476 { 1477 struct hn_tx_queue *txq = ptxq; 1478 uint16_t queue_id = txq->queue_id; 1479 struct hn_data *hv = txq->hv; 1480 struct rte_eth_dev *vf_dev; 1481 bool need_sig = false; 1482 uint16_t nb_tx, tx_thresh; 1483 int ret; 1484 1485 if (unlikely(hv->closed)) 1486 return 0; 1487 1488 /* 1489 * Always check for events on the primary channel 1490 * because that is where hotplug notifications occur. 1491 */ 1492 tx_thresh = RTE_MAX(txq->free_thresh, nb_pkts); 1493 if (txq->queue_id == 0 || 1494 rte_mempool_avail_count(txq->txdesc_pool) < tx_thresh) 1495 hn_process_events(hv, txq->queue_id, 0); 1496 1497 /* Transmit over VF if present and up */ 1498 rte_rwlock_read_lock(&hv->vf_lock); 1499 vf_dev = hn_get_vf_dev(hv); 1500 if (vf_dev && vf_dev->data->dev_started) { 1501 void *sub_q = vf_dev->data->tx_queues[queue_id]; 1502 1503 nb_tx = (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); 1504 rte_rwlock_read_unlock(&hv->vf_lock); 1505 return nb_tx; 1506 } 1507 rte_rwlock_read_unlock(&hv->vf_lock); 1508 1509 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1510 struct rte_mbuf *m = tx_pkts[nb_tx]; 1511 uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; 1512 struct rndis_packet_msg *pkt; 1513 struct hn_txdesc *txd; 1514 1515 txd = hn_txd_get(txq); 1516 if (txd == NULL) 1517 break; 1518 1519 /* For small packets aggregate them in chimney buffer */ 1520 if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { 1521 /* If this packet will not fit, then flush */ 1522 if (txq->agg_pktleft == 0 || 1523 RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { 1524 if (hn_flush_txagg(txq, &need_sig)) 1525 goto fail; 1526 } 1527 1528 1529 pkt = hn_try_txagg(hv, txq, txd, pkt_size); 1530 if (unlikely(!pkt)) 1531 break; 1532 1533 hn_encap(pkt, queue_id, m); 1534 hn_append_to_chim(txq, pkt, m); 1535 1536 rte_pktmbuf_free(m); 1537 1538 /* if buffer is full, flush */ 1539 if (txq->agg_pktleft == 0 && 1540 hn_flush_txagg(txq, &need_sig)) 1541 goto fail; 1542 } else { 1543 /* Send any outstanding packets in buffer */ 1544 if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) 1545 goto fail; 1546 1547 pkt = txd->rndis_pkt; 1548 txd->m = m; 1549 txd->data_size = m->pkt_len; 1550 ++txd->packets; 1551 1552 hn_encap(pkt, queue_id, m); 1553 1554 ret = hn_xmit_sg(txq, txd, m, &need_sig); 1555 if (unlikely(ret != 0)) { 1556 if (ret == -EAGAIN) { 1557 PMD_TX_LOG(DEBUG, "sg channel full"); 1558 ++txq->stats.channel_full; 1559 } else { 1560 PMD_DRV_LOG(NOTICE, "sg send failed: %d", ret); 1561 ++txq->stats.errors; 1562 } 1563 hn_txd_put(txq, txd); 1564 goto fail; 1565 } 1566 } 1567 } 1568 1569 /* If partial buffer left, then try and send it. 1570 * if that fails, then reuse it on next send. 1571 */ 1572 hn_flush_txagg(txq, &need_sig); 1573 1574 fail: 1575 if (need_sig) 1576 rte_vmbus_chan_signal_tx(txq->chan); 1577 1578 return nb_tx; 1579 } 1580 1581 static uint16_t 1582 hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, 1583 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1584 { 1585 uint16_t i, n; 1586 1587 if (unlikely(nb_pkts == 0)) 1588 return 0; 1589 1590 n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); 1591 1592 /* relabel the received mbufs */ 1593 for (i = 0; i < n; i++) 1594 rx_pkts[i]->port = rxq->port_id; 1595 1596 return n; 1597 } 1598 1599 uint16_t 1600 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1601 { 1602 struct hn_rx_queue *rxq = prxq; 1603 struct hn_data *hv = rxq->hv; 1604 struct rte_eth_dev *vf_dev; 1605 uint16_t nb_rcv; 1606 1607 if (unlikely(hv->closed)) 1608 return 0; 1609 1610 /* Check for new completions (and hotplug) */ 1611 if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) 1612 hn_process_events(hv, rxq->queue_id, 0); 1613 1614 /* Always check the vmbus path for multicast and new flows */ 1615 nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, 1616 (void **)rx_pkts, nb_pkts, NULL); 1617 1618 /* If VF is available, check that as well */ 1619 rte_rwlock_read_lock(&hv->vf_lock); 1620 vf_dev = hn_get_vf_dev(hv); 1621 if (vf_dev && vf_dev->data->dev_started) 1622 nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, 1623 rx_pkts + nb_rcv, nb_pkts - nb_rcv); 1624 1625 rte_rwlock_read_unlock(&hv->vf_lock); 1626 return nb_rcv; 1627 } 1628 1629 void 1630 hn_dev_free_queues(struct rte_eth_dev *dev) 1631 { 1632 unsigned int i; 1633 1634 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1635 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 1636 1637 hn_rx_queue_free(rxq, false); 1638 dev->data->rx_queues[i] = NULL; 1639 } 1640 dev->data->nb_rx_queues = 0; 1641 1642 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1643 hn_dev_tx_queue_release(dev->data->tx_queues[i]); 1644 dev->data->tx_queues[i] = NULL; 1645 } 1646 dev->data->nb_tx_queues = 0; 1647 } 1648