1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <strings.h> 13 #include <malloc.h> 14 15 #include <rte_ethdev.h> 16 #include <rte_memcpy.h> 17 #include <rte_string_fns.h> 18 #include <rte_memzone.h> 19 #include <rte_malloc.h> 20 #include <rte_atomic.h> 21 #include <rte_bitmap.h> 22 #include <rte_branch_prediction.h> 23 #include <rte_ether.h> 24 #include <rte_common.h> 25 #include <rte_errno.h> 26 #include <rte_memory.h> 27 #include <rte_eal.h> 28 #include <dev_driver.h> 29 #include <rte_net.h> 30 #include <bus_vmbus_driver.h> 31 #include <rte_spinlock.h> 32 33 #include "hn_logs.h" 34 #include "hn_var.h" 35 #include "hn_rndis.h" 36 #include "hn_nvs.h" 37 #include "ndis.h" 38 39 #define HN_NVS_SEND_MSG_SIZE \ 40 (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) 41 42 #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ 43 #define HN_RXQ_EVENT_DEFAULT 2048 44 45 struct hn_rxinfo { 46 uint32_t vlan_info; 47 uint32_t csum_info; 48 uint32_t hash_info; 49 uint32_t hash_value; 50 }; 51 52 #define HN_RXINFO_VLAN 0x0001 53 #define HN_RXINFO_CSUM 0x0002 54 #define HN_RXINFO_HASHINF 0x0004 55 #define HN_RXINFO_HASHVAL 0x0008 56 #define HN_RXINFO_ALL \ 57 (HN_RXINFO_VLAN | \ 58 HN_RXINFO_CSUM | \ 59 HN_RXINFO_HASHINF | \ 60 HN_RXINFO_HASHVAL) 61 62 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 63 #define HN_NDIS_RXCSUM_INFO_INVALID 0 64 #define HN_NDIS_HASH_INFO_INVALID 0 65 66 /* 67 * Per-transmit book keeping. 68 * A slot in transmit ring (chim_index) is reserved for each transmit. 69 * 70 * There are two types of transmit: 71 * - buffered transmit where chimney buffer is used and RNDIS header 72 * is in the buffer. mbuf == NULL for this case. 73 * 74 * - direct transmit where RNDIS header is in the in rndis_pkt 75 * mbuf is freed after transmit. 76 * 77 * Descriptors come from per-port pool which is used 78 * to limit number of outstanding requests per device. 79 */ 80 struct hn_txdesc { 81 struct rte_mbuf *m; 82 83 uint16_t queue_id; 84 uint32_t chim_index; 85 uint32_t chim_size; 86 uint32_t data_size; 87 uint32_t packets; 88 89 struct rndis_packet_msg *rndis_pkt; 90 }; 91 92 #define HN_RNDIS_PKT_LEN \ 93 (sizeof(struct rndis_packet_msg) + \ 94 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ 95 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 96 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 97 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 98 99 #define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) 100 101 /* Minimum space required for a packet */ 102 #define HN_PKTSIZE_MIN(align) \ 103 RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) 104 105 #define DEFAULT_TX_FREE_THRESH 32 106 107 static void 108 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) 109 { 110 uint32_t s = m->pkt_len; 111 const struct rte_ether_addr *ea; 112 113 if (s >= 1024) 114 stats->size_bins[6 + (s > 1518)]++; 115 else if (s <= 64) 116 stats->size_bins[s >> 6]++; 117 else 118 stats->size_bins[32UL - rte_clz32(s) - 5]++; 119 120 ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); 121 RTE_BUILD_BUG_ON(offsetof(struct hn_stats, broadcast) != 122 offsetof(struct hn_stats, multicast) + sizeof(uint64_t)); 123 if (unlikely(rte_is_multicast_ether_addr(ea))) 124 (&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++; 125 } 126 127 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) 128 { 129 return pkt->pktinfooffset + pkt->pktinfolen; 130 } 131 132 static inline uint32_t 133 hn_rndis_pktmsg_offset(uint32_t ofs) 134 { 135 return ofs - offsetof(struct rndis_packet_msg, dataoffset); 136 } 137 138 static void hn_txd_init(struct rte_mempool *mp __rte_unused, 139 void *opaque, void *obj, unsigned int idx) 140 { 141 struct hn_tx_queue *txq = opaque; 142 struct hn_txdesc *txd = obj; 143 144 memset(txd, 0, sizeof(*txd)); 145 146 txd->queue_id = txq->queue_id; 147 txd->chim_index = NVS_CHIM_IDX_INVALID; 148 txd->rndis_pkt = (struct rndis_packet_msg *)((char *)txq->tx_rndis 149 + idx * HN_RNDIS_PKT_ALIGNED); 150 } 151 152 int 153 hn_chim_init(struct rte_eth_dev *dev) 154 { 155 struct hn_data *hv = dev->data->dev_private; 156 uint32_t i, chim_bmp_size; 157 158 rte_spinlock_init(&hv->chim_lock); 159 chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); 160 hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, 161 RTE_CACHE_LINE_SIZE); 162 if (hv->chim_bmem == NULL) { 163 PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", 164 chim_bmp_size); 165 return -1; 166 } 167 168 hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, 169 hv->chim_bmem, chim_bmp_size); 170 if (hv->chim_bmap == NULL) { 171 PMD_INIT_LOG(ERR, "failed to init chim bitmap"); 172 return -1; 173 } 174 175 for (i = 0; i < hv->chim_cnt; i++) 176 rte_bitmap_set(hv->chim_bmap, i); 177 178 return 0; 179 } 180 181 void 182 hn_chim_uninit(struct rte_eth_dev *dev) 183 { 184 struct hn_data *hv = dev->data->dev_private; 185 186 rte_bitmap_free(hv->chim_bmap); 187 rte_free(hv->chim_bmem); 188 hv->chim_bmem = NULL; 189 } 190 191 static uint32_t hn_chim_alloc(struct hn_data *hv) 192 { 193 uint32_t index = NVS_CHIM_IDX_INVALID; 194 uint64_t slab = 0; 195 196 rte_spinlock_lock(&hv->chim_lock); 197 if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) { 198 index += rte_bsf64(slab); 199 rte_bitmap_clear(hv->chim_bmap, index); 200 } 201 rte_spinlock_unlock(&hv->chim_lock); 202 203 return index; 204 } 205 206 static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) 207 { 208 if (chim_idx >= hv->chim_cnt) { 209 PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); 210 } else { 211 rte_spinlock_lock(&hv->chim_lock); 212 rte_bitmap_set(hv->chim_bmap, chim_idx); 213 rte_spinlock_unlock(&hv->chim_lock); 214 } 215 } 216 217 static void hn_reset_txagg(struct hn_tx_queue *txq) 218 { 219 txq->agg_szleft = txq->agg_szmax; 220 txq->agg_pktleft = txq->agg_pktmax; 221 txq->agg_txd = NULL; 222 txq->agg_prevpkt = NULL; 223 } 224 225 static void 226 hn_rx_queue_free_common(struct hn_rx_queue *rxq) 227 { 228 if (!rxq) 229 return; 230 231 rte_free(rxq->rxbuf_info); 232 rte_free(rxq->event_buf); 233 rte_free(rxq); 234 } 235 236 int 237 hn_dev_tx_queue_setup(struct rte_eth_dev *dev, 238 uint16_t queue_idx, uint16_t nb_desc, 239 unsigned int socket_id, 240 const struct rte_eth_txconf *tx_conf) 241 242 { 243 struct hn_data *hv = dev->data->dev_private; 244 struct hn_tx_queue *txq; 245 struct hn_rx_queue *rxq = NULL; 246 char name[RTE_MEMPOOL_NAMESIZE]; 247 uint32_t tx_free_thresh; 248 int err = -ENOMEM; 249 250 PMD_INIT_FUNC_TRACE(); 251 252 tx_free_thresh = tx_conf->tx_free_thresh; 253 if (tx_free_thresh == 0) 254 tx_free_thresh = RTE_MIN(nb_desc / 4, 255 DEFAULT_TX_FREE_THRESH); 256 257 if (tx_free_thresh + 3 >= nb_desc) { 258 PMD_INIT_LOG(ERR, 259 "tx_free_thresh must be less than the number of TX entries minus 3(%u)." 260 " (tx_free_thresh=%u port=%u queue=%u)", 261 nb_desc - 3, 262 tx_free_thresh, dev->data->port_id, queue_idx); 263 return -EINVAL; 264 } 265 266 txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, 267 socket_id); 268 if (!txq) 269 return -ENOMEM; 270 271 txq->hv = hv; 272 txq->chan = hv->channels[queue_idx]; 273 txq->port_id = dev->data->port_id; 274 txq->queue_id = queue_idx; 275 txq->free_thresh = tx_free_thresh; 276 277 snprintf(name, sizeof(name), 278 "hn_txd_%u_%u", dev->data->port_id, queue_idx); 279 280 PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", 281 name, nb_desc, sizeof(struct hn_txdesc)); 282 283 txq->tx_rndis_mz = rte_memzone_reserve_aligned(name, 284 nb_desc * HN_RNDIS_PKT_ALIGNED, rte_socket_id(), 285 RTE_MEMZONE_IOVA_CONTIG, HN_RNDIS_PKT_ALIGNED); 286 if (!txq->tx_rndis_mz) { 287 err = -rte_errno; 288 goto error; 289 } 290 txq->tx_rndis = txq->tx_rndis_mz->addr; 291 txq->tx_rndis_iova = txq->tx_rndis_mz->iova; 292 293 txq->txdesc_pool = rte_mempool_create(name, nb_desc, 294 sizeof(struct hn_txdesc), 295 0, 0, NULL, NULL, 296 hn_txd_init, txq, 297 dev->device->numa_node, 0); 298 if (txq->txdesc_pool == NULL) { 299 PMD_DRV_LOG(ERR, 300 "mempool %s create failed: %d", name, rte_errno); 301 goto error; 302 } 303 304 /* 305 * If there are more Tx queues than Rx queues, allocate rx_queues 306 * with event buffer so that Tx completion messages can still be 307 * received 308 */ 309 if (queue_idx >= dev->data->nb_rx_queues) { 310 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 311 312 if (!rxq) { 313 err = -ENOMEM; 314 goto error; 315 } 316 317 /* 318 * Don't allocate mbuf pool or rx ring. RSS is always configured 319 * to ensure packets aren't received by this Rx queue. 320 */ 321 rxq->mb_pool = NULL; 322 rxq->rx_ring = NULL; 323 } 324 325 txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); 326 txq->agg_pktmax = hv->rndis_agg_pkts; 327 txq->agg_align = hv->rndis_agg_align; 328 329 hn_reset_txagg(txq); 330 331 err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, 332 socket_id, tx_conf); 333 if (err == 0) { 334 dev->data->tx_queues[queue_idx] = txq; 335 if (rxq != NULL) 336 dev->data->rx_queues[queue_idx] = rxq; 337 return 0; 338 } 339 340 error: 341 rte_mempool_free(txq->txdesc_pool); 342 rte_memzone_free(txq->tx_rndis_mz); 343 hn_rx_queue_free_common(rxq); 344 rte_free(txq); 345 return err; 346 } 347 348 void 349 hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 350 struct rte_eth_txq_info *qinfo) 351 { 352 struct hn_tx_queue *txq = dev->data->tx_queues[queue_id]; 353 354 qinfo->nb_desc = txq->txdesc_pool->size; 355 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 356 } 357 358 static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) 359 { 360 struct hn_txdesc *txd; 361 362 if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { 363 ++txq->stats.ring_full; 364 PMD_TX_LOG(DEBUG, "tx pool exhausted!"); 365 return NULL; 366 } 367 368 txd->m = NULL; 369 txd->packets = 0; 370 txd->data_size = 0; 371 txd->chim_size = 0; 372 373 return txd; 374 } 375 376 static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) 377 { 378 rte_mempool_put(txq->txdesc_pool, txd); 379 } 380 381 void 382 hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 383 { 384 struct hn_tx_queue *txq = dev->data->tx_queues[qid]; 385 386 PMD_INIT_FUNC_TRACE(); 387 388 if (!txq) 389 return; 390 /* 391 * Free any Rx queues allocated for a Tx queue without a corresponding 392 * Rx queue 393 */ 394 if (qid >= dev->data->nb_rx_queues) 395 hn_rx_queue_free_common(dev->data->rx_queues[qid]); 396 397 rte_mempool_free(txq->txdesc_pool); 398 399 rte_memzone_free(txq->tx_rndis_mz); 400 rte_free(txq); 401 } 402 403 /* 404 * Check the status of a Tx descriptor in the queue. 405 * 406 * returns: 407 * - -EINVAL - offset outside of tx_descriptor pool. 408 * - RTE_ETH_TX_DESC_FULL - descriptor is not acknowledged by host. 409 * - RTE_ETH_TX_DESC_DONE - descriptor is available. 410 */ 411 int hn_dev_tx_descriptor_status(void *arg, uint16_t offset) 412 { 413 const struct hn_tx_queue *txq = arg; 414 415 hn_process_events(txq->hv, txq->queue_id, 0); 416 417 if (offset >= rte_mempool_avail_count(txq->txdesc_pool)) 418 return -EINVAL; 419 420 if (offset < rte_mempool_in_use_count(txq->txdesc_pool)) 421 return RTE_ETH_TX_DESC_FULL; 422 else 423 return RTE_ETH_TX_DESC_DONE; 424 } 425 426 static void 427 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, 428 unsigned long xactid, const struct hn_nvs_rndis_ack *ack) 429 { 430 struct hn_data *hv = dev->data->dev_private; 431 struct hn_txdesc *txd = (struct hn_txdesc *)xactid; 432 struct hn_tx_queue *txq; 433 434 /* Control packets are sent with xacid == 0 */ 435 if (!txd) 436 return; 437 438 txq = dev->data->tx_queues[queue_id]; 439 if (likely(ack->status == NVS_STATUS_OK)) { 440 PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", 441 txq->port_id, txq->queue_id, txd->chim_index, 442 txd->packets, txd->data_size); 443 txq->stats.bytes += txd->data_size; 444 txq->stats.packets += txd->packets; 445 } else { 446 PMD_DRV_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", 447 txq->port_id, txq->queue_id, txd->chim_index, ack->status); 448 ++txq->stats.errors; 449 } 450 451 if (txd->chim_index != NVS_CHIM_IDX_INVALID) { 452 hn_chim_free(hv, txd->chim_index); 453 txd->chim_index = NVS_CHIM_IDX_INVALID; 454 } 455 456 rte_pktmbuf_free(txd->m); 457 hn_txd_put(txq, txd); 458 } 459 460 /* Handle transmit completion events */ 461 static void 462 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, 463 const struct vmbus_chanpkt_hdr *pkt, 464 const void *data) 465 { 466 const struct hn_nvs_hdr *hdr = data; 467 468 switch (hdr->type) { 469 case NVS_TYPE_RNDIS_ACK: 470 hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); 471 break; 472 473 default: 474 PMD_DRV_LOG(NOTICE, "unexpected send completion type %u", 475 hdr->type); 476 } 477 } 478 479 /* Parse per-packet info (meta data) */ 480 static int 481 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, 482 struct hn_rxinfo *info) 483 { 484 const struct rndis_pktinfo *pi = info_data; 485 uint32_t mask = 0; 486 487 while (info_dlen != 0) { 488 const void *data; 489 uint32_t dlen; 490 491 if (unlikely(info_dlen < sizeof(*pi))) 492 return -EINVAL; 493 494 if (unlikely(info_dlen < pi->size)) 495 return -EINVAL; 496 info_dlen -= pi->size; 497 498 if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 499 return -EINVAL; 500 if (unlikely(pi->size < pi->offset)) 501 return -EINVAL; 502 503 dlen = pi->size - pi->offset; 504 data = pi->data; 505 506 switch (pi->type) { 507 case NDIS_PKTINFO_TYPE_VLAN: 508 if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) 509 return -EINVAL; 510 info->vlan_info = *((const uint32_t *)data); 511 mask |= HN_RXINFO_VLAN; 512 break; 513 514 case NDIS_PKTINFO_TYPE_CSUM: 515 if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) 516 return -EINVAL; 517 info->csum_info = *((const uint32_t *)data); 518 mask |= HN_RXINFO_CSUM; 519 break; 520 521 case NDIS_PKTINFO_TYPE_HASHVAL: 522 if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) 523 return -EINVAL; 524 info->hash_value = *((const uint32_t *)data); 525 mask |= HN_RXINFO_HASHVAL; 526 break; 527 528 case NDIS_PKTINFO_TYPE_HASHINF: 529 if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) 530 return -EINVAL; 531 info->hash_info = *((const uint32_t *)data); 532 mask |= HN_RXINFO_HASHINF; 533 break; 534 535 default: 536 goto next; 537 } 538 539 if (mask == HN_RXINFO_ALL) 540 break; /* All found; done */ 541 next: 542 pi = (const struct rndis_pktinfo *) 543 ((const uint8_t *)pi + pi->size); 544 } 545 546 /* 547 * Final fixup. 548 * - If there is no hash value, invalidate the hash info. 549 */ 550 if (!(mask & HN_RXINFO_HASHVAL)) 551 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 552 return 0; 553 } 554 555 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) 556 { 557 struct hn_rx_bufinfo *rxb = opaque; 558 struct hn_rx_queue *rxq = rxb->rxq; 559 560 rte_atomic32_dec(&rxq->rxbuf_outstanding); 561 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 562 } 563 564 static struct hn_rx_bufinfo *hn_rx_buf_init(struct hn_rx_queue *rxq, 565 const struct vmbus_chanpkt_rxbuf *pkt) 566 { 567 struct hn_rx_bufinfo *rxb; 568 569 rxb = rxq->rxbuf_info + pkt->hdr.xactid; 570 rxb->chan = rxq->chan; 571 rxb->xactid = pkt->hdr.xactid; 572 rxb->rxq = rxq; 573 574 rxb->shinfo.free_cb = hn_rx_buf_free_cb; 575 rxb->shinfo.fcb_opaque = rxb; 576 rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); 577 return rxb; 578 } 579 580 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, 581 uint8_t *data, unsigned int headroom, unsigned int dlen, 582 const struct hn_rxinfo *info) 583 { 584 struct hn_data *hv = rxq->hv; 585 struct rte_mbuf *m = NULL; 586 bool use_extbuf = false; 587 588 if (likely(rxq->mb_pool != NULL)) 589 m = rte_pktmbuf_alloc(rxq->mb_pool); 590 591 if (unlikely(!m)) { 592 struct rte_eth_dev *dev = 593 &rte_eth_devices[rxq->port_id]; 594 595 dev->data->rx_mbuf_alloc_failed++; 596 return; 597 } 598 599 /* 600 * For large packets, avoid copy if possible but need to keep 601 * some space available in receive area for later packets. 602 */ 603 if (hv->rx_extmbuf_enable && dlen > hv->rx_copybreak && 604 (uint32_t)rte_atomic32_read(&rxq->rxbuf_outstanding) < 605 hv->rxbuf_section_cnt / 2) { 606 struct rte_mbuf_ext_shared_info *shinfo; 607 const void *rxbuf; 608 rte_iova_t iova; 609 610 /* 611 * Build an external mbuf that points to receive area. 612 * Use refcount to handle multiple packets in same 613 * receive buffer section. 614 */ 615 rxbuf = hv->rxbuf_res.addr; 616 iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); 617 shinfo = &rxb->shinfo; 618 619 /* shinfo is already set to 1 by the caller */ 620 if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) 621 rte_atomic32_inc(&rxq->rxbuf_outstanding); 622 623 rte_pktmbuf_attach_extbuf(m, data, iova, 624 dlen + headroom, shinfo); 625 m->data_off = headroom; 626 use_extbuf = true; 627 } else { 628 /* Mbuf's in pool must be large enough to hold small packets */ 629 if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { 630 rte_pktmbuf_free_seg(m); 631 ++rxq->stats.errors; 632 return; 633 } 634 rte_memcpy(rte_pktmbuf_mtod(m, void *), 635 data + headroom, dlen); 636 } 637 638 m->port = rxq->port_id; 639 m->pkt_len = dlen; 640 m->data_len = dlen; 641 m->packet_type = rte_net_get_ptype(m, NULL, 642 RTE_PTYPE_L2_MASK | 643 RTE_PTYPE_L3_MASK | 644 RTE_PTYPE_L4_MASK); 645 646 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 647 m->vlan_tci = RTE_VLAN_TCI_MAKE(NDIS_VLAN_INFO_ID(info->vlan_info), 648 NDIS_VLAN_INFO_PRI(info->vlan_info), 649 NDIS_VLAN_INFO_CFI(info->vlan_info)); 650 m->ol_flags |= RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; 651 652 /* NDIS always strips tag, put it back if necessary */ 653 if (!hv->vlan_strip && rte_vlan_insert(&m)) { 654 PMD_DRV_LOG(DEBUG, "vlan insert failed"); 655 ++rxq->stats.errors; 656 if (use_extbuf) 657 rte_pktmbuf_detach_extbuf(m); 658 rte_pktmbuf_free(m); 659 return; 660 } 661 } 662 663 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 664 if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) 665 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 666 667 if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK 668 | NDIS_RXCSUM_INFO_TCPCS_OK)) 669 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 670 else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED 671 | NDIS_RXCSUM_INFO_UDPCS_FAILED)) 672 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 673 } 674 675 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 676 m->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 677 m->hash.rss = info->hash_value; 678 } 679 680 PMD_RX_LOG(DEBUG, 681 "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, 682 rxq->port_id, rxq->queue_id, rxb->xactid, 683 m->pkt_len, m->packet_type, m->ol_flags); 684 685 ++rxq->stats.packets; 686 rxq->stats.bytes += m->pkt_len; 687 hn_update_packet_stats(&rxq->stats, m); 688 689 if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { 690 ++rxq->stats.ring_full; 691 PMD_RX_LOG(DEBUG, "rx ring full"); 692 if (use_extbuf) 693 rte_pktmbuf_detach_extbuf(m); 694 rte_pktmbuf_free(m); 695 } 696 } 697 698 static void hn_rndis_rx_data(struct hn_rx_queue *rxq, 699 struct hn_rx_bufinfo *rxb, 700 void *data, uint32_t dlen) 701 { 702 unsigned int data_off, data_len; 703 unsigned int pktinfo_off, pktinfo_len; 704 const struct rndis_packet_msg *pkt = data; 705 struct hn_rxinfo info = { 706 .vlan_info = HN_NDIS_VLAN_INFO_INVALID, 707 .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, 708 .hash_info = HN_NDIS_HASH_INFO_INVALID, 709 }; 710 int err; 711 712 hn_rndis_dump(pkt); 713 714 if (unlikely(dlen < sizeof(*pkt))) 715 goto error; 716 717 if (unlikely(dlen < pkt->len)) 718 goto error; /* truncated RNDIS from host */ 719 720 if (unlikely(pkt->len < pkt->datalen 721 + pkt->oobdatalen + pkt->pktinfolen)) 722 goto error; 723 724 if (unlikely(pkt->datalen == 0)) 725 goto error; 726 727 /* Check offsets. */ 728 if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) 729 goto error; 730 731 if (likely(pkt->pktinfooffset > 0) && 732 unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || 733 (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) 734 goto error; 735 736 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 737 data_len = pkt->datalen; 738 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); 739 pktinfo_len = pkt->pktinfolen; 740 741 if (likely(pktinfo_len > 0)) { 742 err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, 743 pktinfo_len, &info); 744 if (err) 745 goto error; 746 } 747 748 /* overflow check */ 749 if (data_len > data_len + data_off || data_len + data_off > pkt->len) 750 goto error; 751 752 if (unlikely(data_len < RTE_ETHER_HDR_LEN)) 753 goto error; 754 755 hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); 756 return; 757 error: 758 ++rxq->stats.errors; 759 } 760 761 static void 762 hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, 763 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) 764 { 765 const struct rndis_msghdr *hdr = buf; 766 767 switch (hdr->type) { 768 case RNDIS_PACKET_MSG: 769 if (dev->data->dev_started) 770 hn_rndis_rx_data(rxq, rxb, buf, len); 771 break; 772 773 case RNDIS_INDICATE_STATUS_MSG: 774 hn_rndis_link_status(dev, buf); 775 break; 776 777 case RNDIS_INITIALIZE_CMPLT: 778 case RNDIS_QUERY_CMPLT: 779 case RNDIS_SET_CMPLT: 780 hn_rndis_receive_response(rxq->hv, buf, len); 781 break; 782 783 default: 784 PMD_DRV_LOG(NOTICE, 785 "unexpected RNDIS message (type %#x len %u)", 786 hdr->type, len); 787 break; 788 } 789 } 790 791 static void 792 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, 793 struct hn_data *hv, 794 struct hn_rx_queue *rxq, 795 const struct vmbus_chanpkt_hdr *hdr, 796 const void *buf) 797 { 798 const struct vmbus_chanpkt_rxbuf *pkt; 799 const struct hn_nvs_hdr *nvs_hdr = buf; 800 uint32_t rxbuf_sz = hv->rxbuf_res.len; 801 char *rxbuf = hv->rxbuf_res.addr; 802 unsigned int i, hlen, count; 803 struct hn_rx_bufinfo *rxb; 804 805 /* At minimum we need type header */ 806 if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { 807 PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); 808 return; 809 } 810 811 /* Make sure that this is a RNDIS message. */ 812 if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { 813 PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", 814 nvs_hdr->type); 815 return; 816 } 817 818 hlen = vmbus_chanpkt_getlen(hdr->hlen); 819 if (unlikely(hlen < sizeof(*pkt))) { 820 PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); 821 return; 822 } 823 824 pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); 825 if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { 826 PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", 827 pkt->rxbuf_id); 828 return; 829 } 830 831 count = pkt->rxbuf_cnt; 832 if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, 833 rxbuf[count]))) { 834 PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); 835 return; 836 } 837 838 if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { 839 PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, 840 pkt->hdr.xactid); 841 return; 842 } 843 844 /* Setup receive buffer info to allow for callback */ 845 rxb = hn_rx_buf_init(rxq, pkt); 846 847 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 848 for (i = 0; i < count; ++i) { 849 unsigned int ofs, len; 850 851 ofs = pkt->rxbuf[i].ofs; 852 len = pkt->rxbuf[i].len; 853 854 if (unlikely(ofs + len > rxbuf_sz)) { 855 PMD_RX_LOG(ERR, 856 "%uth RNDIS msg overflow ofs %u, len %u", 857 i, ofs, len); 858 continue; 859 } 860 861 if (unlikely(len == 0)) { 862 PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); 863 continue; 864 } 865 866 hn_rndis_receive(dev, rxq, rxb, 867 rxbuf + ofs, len); 868 } 869 870 /* Send ACK now if external mbuf not used */ 871 if (rte_mbuf_ext_refcnt_update(&rxb->shinfo, -1) == 0) 872 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 873 } 874 875 /* 876 * Called when NVS inband events are received. 877 * Send up a two part message with port_id and the NVS message 878 * to the pipe to the netvsc-vf-event control thread. 879 */ 880 static void hn_nvs_handle_notify(struct rte_eth_dev *dev, 881 const struct vmbus_chanpkt_hdr *pkt, 882 const void *data) 883 { 884 const struct hn_nvs_hdr *hdr = data; 885 886 switch (hdr->type) { 887 case NVS_TYPE_TXTBL_NOTE: 888 /* Transmit indirection table has locking problems 889 * in DPDK and therefore not implemented 890 */ 891 PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); 892 break; 893 894 case NVS_TYPE_VFASSOC_NOTE: 895 hn_nvs_handle_vfassoc(dev, pkt, data); 896 break; 897 898 default: 899 PMD_DRV_LOG(INFO, 900 "got notify, nvs type %u", hdr->type); 901 } 902 } 903 904 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, 905 uint16_t queue_id, 906 unsigned int socket_id) 907 { 908 struct hn_rx_queue *rxq; 909 910 rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), 911 RTE_CACHE_LINE_SIZE, socket_id); 912 if (!rxq) 913 return NULL; 914 915 rxq->hv = hv; 916 rxq->chan = hv->channels[queue_id]; 917 rte_spinlock_init(&rxq->ring_lock); 918 rxq->port_id = hv->port_id; 919 rxq->queue_id = queue_id; 920 rxq->event_sz = HN_RXQ_EVENT_DEFAULT; 921 rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, 922 RTE_CACHE_LINE_SIZE, socket_id); 923 if (!rxq->event_buf) { 924 rte_free(rxq); 925 return NULL; 926 } 927 928 /* setup rxbuf_info for non-primary queue */ 929 if (queue_id) { 930 rxq->rxbuf_info = rte_calloc("HN_RXBUF_INFO", 931 hv->rxbuf_section_cnt, 932 sizeof(*rxq->rxbuf_info), 933 RTE_CACHE_LINE_SIZE); 934 935 if (!rxq->rxbuf_info) { 936 PMD_DRV_LOG(ERR, 937 "Could not allocate rxbuf info for queue %d", 938 queue_id); 939 rte_free(rxq->event_buf); 940 rte_free(rxq); 941 return NULL; 942 } 943 } 944 945 return rxq; 946 } 947 948 void 949 hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 950 struct rte_eth_rxq_info *qinfo) 951 { 952 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 953 954 qinfo->mp = rxq->mb_pool; 955 qinfo->nb_desc = rxq->rx_ring->size; 956 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 957 } 958 959 int 960 hn_dev_rx_queue_setup(struct rte_eth_dev *dev, 961 uint16_t queue_idx, uint16_t nb_desc, 962 unsigned int socket_id, 963 const struct rte_eth_rxconf *rx_conf, 964 struct rte_mempool *mp) 965 { 966 struct hn_data *hv = dev->data->dev_private; 967 char ring_name[RTE_RING_NAMESIZE]; 968 struct hn_rx_queue *rxq; 969 unsigned int count; 970 int error = -ENOMEM; 971 972 PMD_INIT_FUNC_TRACE(); 973 974 if (queue_idx == 0) { 975 rxq = hv->primary; 976 } else { 977 /* 978 * If the number of Tx queues was previously greater than the 979 * number of Rx queues, we may already have allocated an rxq. 980 */ 981 if (!dev->data->rx_queues[queue_idx]) 982 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 983 else 984 rxq = dev->data->rx_queues[queue_idx]; 985 986 if (!rxq) 987 return -ENOMEM; 988 } 989 990 rxq->mb_pool = mp; 991 count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; 992 if (nb_desc == 0 || nb_desc > count) 993 nb_desc = count; 994 995 /* 996 * Staging ring from receive event logic to rx_pkts. 997 * rx_pkts assumes caller is handling multi-thread issue. 998 * event logic has locking. 999 */ 1000 snprintf(ring_name, sizeof(ring_name), 1001 "hn_rx_%u_%u", dev->data->port_id, queue_idx); 1002 rxq->rx_ring = rte_ring_create(ring_name, 1003 rte_align32pow2(nb_desc), 1004 socket_id, 0); 1005 if (!rxq->rx_ring) 1006 goto fail; 1007 1008 error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, 1009 socket_id, rx_conf, mp); 1010 if (error) 1011 goto fail; 1012 1013 dev->data->rx_queues[queue_idx] = rxq; 1014 return 0; 1015 1016 fail: 1017 rte_ring_free(rxq->rx_ring); 1018 /* Only free rxq if it was created in this function. */ 1019 if (!dev->data->rx_queues[queue_idx]) 1020 hn_rx_queue_free_common(rxq); 1021 1022 return error; 1023 } 1024 1025 static void 1026 hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) 1027 { 1028 1029 if (!rxq) 1030 return; 1031 1032 rte_ring_free(rxq->rx_ring); 1033 rxq->rx_ring = NULL; 1034 rxq->mb_pool = NULL; 1035 1036 hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); 1037 1038 /* Keep primary queue to allow for control operations */ 1039 if (keep_primary && rxq == rxq->hv->primary) 1040 return; 1041 1042 hn_rx_queue_free_common(rxq); 1043 } 1044 1045 void 1046 hn_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1047 { 1048 struct hn_rx_queue *rxq = dev->data->rx_queues[qid]; 1049 1050 PMD_INIT_FUNC_TRACE(); 1051 1052 hn_rx_queue_free(rxq, true); 1053 } 1054 1055 /* 1056 * Get the number of used descriptor in a rx queue 1057 * For this device that means how many packets are pending in the ring. 1058 */ 1059 uint32_t 1060 hn_dev_rx_queue_count(void *rx_queue) 1061 { 1062 struct hn_rx_queue *rxq = rx_queue; 1063 1064 return rte_ring_count(rxq->rx_ring); 1065 } 1066 1067 /* 1068 * Check the status of a Rx descriptor in the queue 1069 * 1070 * returns: 1071 * - -EINVAL - offset outside of ring 1072 * - RTE_ETH_RX_DESC_AVAIL - no data available yet 1073 * - RTE_ETH_RX_DESC_DONE - data is waiting in staging ring 1074 */ 1075 int hn_dev_rx_queue_status(void *arg, uint16_t offset) 1076 { 1077 const struct hn_rx_queue *rxq = arg; 1078 1079 hn_process_events(rxq->hv, rxq->queue_id, 0); 1080 if (offset >= rxq->rx_ring->capacity) 1081 return -EINVAL; 1082 1083 if (offset < rte_ring_count(rxq->rx_ring)) 1084 return RTE_ETH_RX_DESC_DONE; 1085 else 1086 return RTE_ETH_RX_DESC_AVAIL; 1087 } 1088 1089 int 1090 hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) 1091 { 1092 struct hn_tx_queue *txq = arg; 1093 1094 return hn_process_events(txq->hv, txq->queue_id, free_cnt); 1095 } 1096 1097 /* 1098 * Process pending events on the channel. 1099 * Called from both Rx queue poll and Tx cleanup 1100 */ 1101 uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, 1102 uint32_t tx_limit) 1103 { 1104 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 1105 struct hn_rx_queue *rxq; 1106 uint32_t bytes_read = 0; 1107 uint32_t tx_done = 0; 1108 int ret = 0; 1109 1110 rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; 1111 1112 /* 1113 * Since channel is shared between Rx and TX queue need to have a lock 1114 * since DPDK does not force same CPU to be used for Rx/Tx. 1115 */ 1116 if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) 1117 return 0; 1118 1119 for (;;) { 1120 const struct vmbus_chanpkt_hdr *pkt; 1121 uint32_t len = rxq->event_sz; 1122 const void *data; 1123 1124 retry: 1125 ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); 1126 if (ret == -EAGAIN) 1127 break; /* ring is empty */ 1128 1129 if (unlikely(ret == -ENOBUFS)) { 1130 /* event buffer not large enough to read ring */ 1131 1132 PMD_DRV_LOG(DEBUG, 1133 "event buffer expansion (need %u)", len); 1134 rxq->event_sz = len + len / 4; 1135 rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, 1136 RTE_CACHE_LINE_SIZE); 1137 if (rxq->event_buf) 1138 goto retry; 1139 /* out of memory, no more events now */ 1140 rxq->event_sz = 0; 1141 break; 1142 } 1143 1144 if (unlikely(ret <= 0)) { 1145 /* This indicates a failure to communicate (or worse) */ 1146 rte_exit(EXIT_FAILURE, 1147 "vmbus ring buffer error: %d", ret); 1148 } 1149 1150 bytes_read += ret; 1151 pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; 1152 data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); 1153 1154 switch (pkt->type) { 1155 case VMBUS_CHANPKT_TYPE_COMP: 1156 ++tx_done; 1157 hn_nvs_handle_comp(dev, queue_id, pkt, data); 1158 break; 1159 1160 case VMBUS_CHANPKT_TYPE_RXBUF: 1161 hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); 1162 break; 1163 1164 case VMBUS_CHANPKT_TYPE_INBAND: 1165 hn_nvs_handle_notify(dev, pkt, data); 1166 break; 1167 1168 default: 1169 PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); 1170 break; 1171 } 1172 1173 if (tx_limit && tx_done >= tx_limit) 1174 break; 1175 } 1176 1177 if (bytes_read > 0) 1178 rte_vmbus_chan_signal_read(rxq->chan, bytes_read); 1179 1180 rte_spinlock_unlock(&rxq->ring_lock); 1181 1182 return tx_done; 1183 } 1184 1185 static void hn_append_to_chim(struct hn_tx_queue *txq, 1186 struct rndis_packet_msg *pkt, 1187 const struct rte_mbuf *m) 1188 { 1189 struct hn_txdesc *txd = txq->agg_txd; 1190 uint8_t *buf = (uint8_t *)pkt; 1191 unsigned int data_offs; 1192 1193 hn_rndis_dump(pkt); 1194 1195 data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 1196 txd->chim_size += pkt->len; 1197 txd->data_size += m->pkt_len; 1198 ++txd->packets; 1199 hn_update_packet_stats(&txq->stats, m); 1200 1201 for (; m; m = m->next) { 1202 uint16_t len = rte_pktmbuf_data_len(m); 1203 1204 rte_memcpy(buf + data_offs, 1205 rte_pktmbuf_mtod(m, const char *), len); 1206 data_offs += len; 1207 } 1208 } 1209 1210 /* 1211 * Send pending aggregated data in chimney buffer (if any). 1212 * Returns error if send was unsuccessful because channel ring buffer 1213 * was full. 1214 */ 1215 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) 1216 1217 { 1218 struct hn_txdesc *txd = txq->agg_txd; 1219 struct hn_nvs_rndis rndis; 1220 int ret; 1221 1222 if (!txd) 1223 return 0; 1224 1225 rndis = (struct hn_nvs_rndis) { 1226 .type = NVS_TYPE_RNDIS, 1227 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1228 .chim_idx = txd->chim_index, 1229 .chim_sz = txd->chim_size, 1230 }; 1231 1232 PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", 1233 txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); 1234 1235 ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, 1236 &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); 1237 1238 if (likely(ret == 0)) 1239 hn_reset_txagg(txq); 1240 else if (ret == -EAGAIN) { 1241 PMD_TX_LOG(DEBUG, "port %u:%u channel full", 1242 txq->port_id, txq->queue_id); 1243 ++txq->stats.channel_full; 1244 } else { 1245 ++txq->stats.errors; 1246 1247 PMD_DRV_LOG(NOTICE, "port %u:%u send failed: %d", 1248 txq->port_id, txq->queue_id, ret); 1249 } 1250 return ret; 1251 } 1252 1253 /* 1254 * Try and find a place in a send chimney buffer to put 1255 * the small packet. If space is available, this routine 1256 * returns a pointer of where to place the data. 1257 * If no space, caller should try direct transmit. 1258 */ 1259 static void * 1260 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, 1261 struct hn_txdesc *txd, uint32_t pktsize) 1262 { 1263 struct hn_txdesc *agg_txd = txq->agg_txd; 1264 struct rndis_packet_msg *pkt; 1265 void *chim; 1266 1267 if (agg_txd) { 1268 unsigned int padding, olen; 1269 1270 /* 1271 * Update the previous RNDIS packet's total length, 1272 * it can be increased due to the mandatory alignment 1273 * padding for this RNDIS packet. And update the 1274 * aggregating txdesc's chimney sending buffer size 1275 * accordingly. 1276 * 1277 * Zero-out the padding, as required by the RNDIS spec. 1278 */ 1279 pkt = txq->agg_prevpkt; 1280 olen = pkt->len; 1281 padding = RTE_ALIGN(olen, txq->agg_align) - olen; 1282 if (padding > 0) { 1283 agg_txd->chim_size += padding; 1284 pkt->len += padding; 1285 memset((uint8_t *)pkt + olen, 0, padding); 1286 } 1287 1288 chim = (uint8_t *)pkt + pkt->len; 1289 txq->agg_prevpkt = chim; 1290 txq->agg_pktleft--; 1291 txq->agg_szleft -= pktsize; 1292 if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { 1293 /* 1294 * Probably can't aggregate more packets, 1295 * flush this aggregating txdesc proactively. 1296 */ 1297 txq->agg_pktleft = 0; 1298 } 1299 1300 hn_txd_put(txq, txd); 1301 return chim; 1302 } 1303 1304 txd->chim_index = hn_chim_alloc(hv); 1305 if (txd->chim_index == NVS_CHIM_IDX_INVALID) 1306 return NULL; 1307 1308 chim = (uint8_t *)hv->chim_res.addr 1309 + txd->chim_index * hv->chim_szmax; 1310 1311 txq->agg_txd = txd; 1312 txq->agg_pktleft = txq->agg_pktmax - 1; 1313 txq->agg_szleft = txq->agg_szmax - pktsize; 1314 txq->agg_prevpkt = chim; 1315 1316 return chim; 1317 } 1318 1319 static inline void * 1320 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, 1321 uint32_t pi_dlen, uint32_t pi_type) 1322 { 1323 const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); 1324 struct rndis_pktinfo *pi; 1325 1326 /* 1327 * Per-packet-info does not move; it only grows. 1328 * 1329 * NOTE: 1330 * pktinfooffset in this phase counts from the beginning 1331 * of rndis_packet_msg. 1332 */ 1333 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); 1334 1335 pkt->pktinfolen += pi_size; 1336 1337 pi->size = pi_size; 1338 pi->type = pi_type; 1339 pi->offset = RNDIS_PKTINFO_OFFSET; 1340 1341 return pi->data; 1342 } 1343 1344 /* Put RNDIS header and packet info on packet */ 1345 static void hn_encap(struct rndis_packet_msg *pkt, 1346 uint16_t queue_id, 1347 const struct rte_mbuf *m) 1348 { 1349 unsigned int hlen = m->l2_len + m->l3_len; 1350 uint32_t *pi_data; 1351 uint32_t pkt_hlen; 1352 1353 pkt->type = RNDIS_PACKET_MSG; 1354 pkt->len = m->pkt_len; 1355 pkt->dataoffset = 0; 1356 pkt->datalen = m->pkt_len; 1357 pkt->oobdataoffset = 0; 1358 pkt->oobdatalen = 0; 1359 pkt->oobdataelements = 0; 1360 pkt->pktinfooffset = sizeof(*pkt); 1361 pkt->pktinfolen = 0; 1362 pkt->vchandle = 0; 1363 pkt->reserved = 0; 1364 1365 /* 1366 * Set the hash value for this packet, to the queue_id to cause 1367 * TX done event for this packet on the right channel. 1368 */ 1369 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, 1370 NDIS_PKTINFO_TYPE_HASHVAL); 1371 *pi_data = queue_id; 1372 1373 if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { 1374 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, 1375 NDIS_PKTINFO_TYPE_VLAN); 1376 *pi_data = NDIS_VLAN_INFO_MAKE(RTE_VLAN_TCI_ID(m->vlan_tci), 1377 RTE_VLAN_TCI_PRI(m->vlan_tci), 1378 RTE_VLAN_TCI_DEI(m->vlan_tci)); 1379 } 1380 1381 if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 1382 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, 1383 NDIS_PKTINFO_TYPE_LSO); 1384 1385 if (m->ol_flags & RTE_MBUF_F_TX_IPV6) { 1386 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, 1387 m->tso_segsz); 1388 } else { 1389 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, 1390 m->tso_segsz); 1391 } 1392 } else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1393 RTE_MBUF_F_TX_TCP_CKSUM || 1394 (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1395 RTE_MBUF_F_TX_UDP_CKSUM || 1396 (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { 1397 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, 1398 NDIS_PKTINFO_TYPE_CSUM); 1399 *pi_data = 0; 1400 1401 if (m->ol_flags & RTE_MBUF_F_TX_IPV6) 1402 *pi_data |= NDIS_TXCSUM_INFO_IPV6; 1403 if (m->ol_flags & RTE_MBUF_F_TX_IPV4) { 1404 *pi_data |= NDIS_TXCSUM_INFO_IPV4; 1405 1406 if (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) 1407 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1408 } 1409 1410 if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1411 RTE_MBUF_F_TX_TCP_CKSUM) 1412 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); 1413 else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1414 RTE_MBUF_F_TX_UDP_CKSUM) 1415 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); 1416 } 1417 1418 pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; 1419 /* Fixup RNDIS packet message total length */ 1420 pkt->len += pkt_hlen; 1421 1422 /* Convert RNDIS packet message offsets */ 1423 pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 1424 pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); 1425 } 1426 1427 /* How many scatter gather list elements ar needed */ 1428 static unsigned int hn_get_slots(const struct rte_mbuf *m) 1429 { 1430 unsigned int slots = 1; /* for RNDIS header */ 1431 1432 while (m) { 1433 unsigned int size = rte_pktmbuf_data_len(m); 1434 unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; 1435 1436 slots += (offs + size + rte_mem_page_size() - 1) / 1437 rte_mem_page_size(); 1438 m = m->next; 1439 } 1440 1441 return slots; 1442 } 1443 1444 /* Build scatter gather list from chained mbuf */ 1445 static unsigned int hn_fill_sg(struct vmbus_gpa *sg, 1446 const struct rte_mbuf *m) 1447 { 1448 unsigned int segs = 0; 1449 1450 while (m) { 1451 rte_iova_t addr = rte_mbuf_data_iova(m); 1452 unsigned int page = addr / rte_mem_page_size(); 1453 unsigned int offset = addr & PAGE_MASK; 1454 unsigned int len = rte_pktmbuf_data_len(m); 1455 1456 while (len > 0) { 1457 unsigned int bytes = RTE_MIN(len, 1458 rte_mem_page_size() - offset); 1459 1460 sg[segs].page = page; 1461 sg[segs].ofs = offset; 1462 sg[segs].len = bytes; 1463 segs++; 1464 1465 ++page; 1466 offset = 0; 1467 len -= bytes; 1468 } 1469 m = m->next; 1470 } 1471 1472 return segs; 1473 } 1474 1475 /* Transmit directly from mbuf */ 1476 static int hn_xmit_sg(struct hn_tx_queue *txq, 1477 const struct hn_txdesc *txd, const struct rte_mbuf *m, 1478 bool *need_sig) 1479 { 1480 struct vmbus_gpa sg[hn_get_slots(m)]; 1481 struct hn_nvs_rndis nvs_rndis = { 1482 .type = NVS_TYPE_RNDIS, 1483 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1484 .chim_sz = txd->chim_size, 1485 }; 1486 rte_iova_t addr; 1487 unsigned int segs; 1488 1489 /* attach aggregation data if present */ 1490 if (txd->chim_size > 0) 1491 nvs_rndis.chim_idx = txd->chim_index; 1492 else 1493 nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; 1494 1495 hn_rndis_dump(txd->rndis_pkt); 1496 1497 /* pass IOVA of rndis header in first segment */ 1498 addr = txq->tx_rndis_iova + 1499 ((char *)txd->rndis_pkt - (char *)txq->tx_rndis); 1500 1501 sg[0].page = addr / rte_mem_page_size(); 1502 sg[0].ofs = addr & PAGE_MASK; 1503 sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); 1504 segs = 1; 1505 1506 hn_update_packet_stats(&txq->stats, m); 1507 1508 segs += hn_fill_sg(sg + 1, m); 1509 1510 PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", 1511 txq->port_id, txq->queue_id, txd->chim_index, 1512 segs, nvs_rndis.chim_sz); 1513 1514 return hn_nvs_send_sglist(txq->chan, sg, segs, 1515 &nvs_rndis, sizeof(nvs_rndis), 1516 (uintptr_t)txd, need_sig); 1517 } 1518 1519 uint16_t 1520 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1521 { 1522 struct hn_tx_queue *txq = ptxq; 1523 uint16_t queue_id = txq->queue_id; 1524 struct hn_data *hv = txq->hv; 1525 struct rte_eth_dev *vf_dev; 1526 bool need_sig = false; 1527 uint16_t nb_tx, tx_thresh; 1528 int ret; 1529 1530 if (unlikely(hv->closed)) 1531 return 0; 1532 1533 /* 1534 * Always check for events on the primary channel 1535 * because that is where hotplug notifications occur. 1536 */ 1537 tx_thresh = RTE_MAX(txq->free_thresh, nb_pkts); 1538 if (txq->queue_id == 0 || 1539 rte_mempool_avail_count(txq->txdesc_pool) < tx_thresh) 1540 hn_process_events(hv, txq->queue_id, 0); 1541 1542 /* Transmit over VF if present and up */ 1543 if (hv->vf_ctx.vf_vsc_switched) { 1544 rte_rwlock_read_lock(&hv->vf_lock); 1545 vf_dev = hn_get_vf_dev(hv); 1546 if (hv->vf_ctx.vf_vsc_switched && vf_dev && 1547 vf_dev->data->dev_started) { 1548 void *sub_q = vf_dev->data->tx_queues[queue_id]; 1549 1550 nb_tx = (*vf_dev->tx_pkt_burst) 1551 (sub_q, tx_pkts, nb_pkts); 1552 rte_rwlock_read_unlock(&hv->vf_lock); 1553 return nb_tx; 1554 } 1555 rte_rwlock_read_unlock(&hv->vf_lock); 1556 } 1557 1558 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1559 struct rte_mbuf *m = tx_pkts[nb_tx]; 1560 struct rndis_packet_msg *pkt; 1561 struct hn_txdesc *txd; 1562 uint32_t pkt_size; 1563 1564 txd = hn_txd_get(txq); 1565 if (txd == NULL) 1566 break; 1567 1568 if (!(m->ol_flags & RTE_MBUF_F_TX_VLAN)) { 1569 struct rte_ether_hdr *eh = 1570 rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 1571 struct rte_vlan_hdr *vh; 1572 1573 /* Force TX vlan offloading for 801.2Q packet */ 1574 if (eh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN)) { 1575 vh = (struct rte_vlan_hdr *)(eh + 1); 1576 m->ol_flags |= RTE_MBUF_F_TX_VLAN; 1577 m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); 1578 1579 /* Copy ether header over */ 1580 memmove(rte_pktmbuf_adj(m, sizeof(struct rte_vlan_hdr)), 1581 eh, 2 * RTE_ETHER_ADDR_LEN); 1582 } 1583 } 1584 pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; 1585 1586 /* For small packets aggregate them in chimney buffer */ 1587 if (m->pkt_len <= hv->tx_copybreak && 1588 pkt_size <= txq->agg_szmax) { 1589 /* If this packet will not fit, then flush */ 1590 if (txq->agg_pktleft == 0 || 1591 RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { 1592 if (hn_flush_txagg(txq, &need_sig)) 1593 goto fail; 1594 } 1595 1596 1597 pkt = hn_try_txagg(hv, txq, txd, pkt_size); 1598 if (unlikely(!pkt)) 1599 break; 1600 1601 hn_encap(pkt, queue_id, m); 1602 hn_append_to_chim(txq, pkt, m); 1603 1604 rte_pktmbuf_free(m); 1605 1606 /* if buffer is full, flush */ 1607 if (txq->agg_pktleft == 0 && 1608 hn_flush_txagg(txq, &need_sig)) 1609 goto fail; 1610 } else { 1611 /* Send any outstanding packets in buffer */ 1612 if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) 1613 goto fail; 1614 1615 pkt = txd->rndis_pkt; 1616 txd->m = m; 1617 txd->data_size = m->pkt_len; 1618 ++txd->packets; 1619 1620 hn_encap(pkt, queue_id, m); 1621 1622 ret = hn_xmit_sg(txq, txd, m, &need_sig); 1623 if (unlikely(ret != 0)) { 1624 if (ret == -EAGAIN) { 1625 PMD_TX_LOG(DEBUG, "sg channel full"); 1626 ++txq->stats.channel_full; 1627 } else { 1628 PMD_DRV_LOG(NOTICE, "sg send failed: %d", ret); 1629 ++txq->stats.errors; 1630 } 1631 hn_txd_put(txq, txd); 1632 goto fail; 1633 } 1634 } 1635 } 1636 1637 /* If partial buffer left, then try and send it. 1638 * if that fails, then reuse it on next send. 1639 */ 1640 hn_flush_txagg(txq, &need_sig); 1641 1642 fail: 1643 if (need_sig) 1644 rte_vmbus_chan_signal_tx(txq->chan); 1645 1646 return nb_tx; 1647 } 1648 1649 static uint16_t 1650 hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, 1651 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1652 { 1653 uint16_t i, n; 1654 1655 if (unlikely(nb_pkts == 0)) 1656 return 0; 1657 1658 n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); 1659 1660 /* relabel the received mbufs */ 1661 for (i = 0; i < n; i++) 1662 rx_pkts[i]->port = rxq->port_id; 1663 1664 return n; 1665 } 1666 1667 uint16_t 1668 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1669 { 1670 struct hn_rx_queue *rxq = prxq; 1671 struct hn_data *hv = rxq->hv; 1672 struct rte_eth_dev *vf_dev; 1673 uint16_t nb_rcv; 1674 1675 if (unlikely(hv->closed)) 1676 return 0; 1677 1678 /* Check for new completions (and hotplug) */ 1679 if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) 1680 hn_process_events(hv, rxq->queue_id, 0); 1681 1682 /* Always check the vmbus path for multicast and new flows */ 1683 nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, 1684 (void **)rx_pkts, nb_pkts, NULL); 1685 1686 /* If VF is available, check that as well */ 1687 if (hv->vf_ctx.vf_vsc_switched) { 1688 rte_rwlock_read_lock(&hv->vf_lock); 1689 vf_dev = hn_get_vf_dev(hv); 1690 if (hv->vf_ctx.vf_vsc_switched && vf_dev && 1691 vf_dev->data->dev_started) 1692 nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, 1693 rx_pkts + nb_rcv, 1694 nb_pkts - nb_rcv); 1695 1696 rte_rwlock_read_unlock(&hv->vf_lock); 1697 } 1698 return nb_rcv; 1699 } 1700 1701 void 1702 hn_dev_free_queues(struct rte_eth_dev *dev) 1703 { 1704 unsigned int i; 1705 1706 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1707 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 1708 1709 hn_rx_queue_free(rxq, false); 1710 dev->data->rx_queues[i] = NULL; 1711 } 1712 dev->data->nb_rx_queues = 0; 1713 1714 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1715 hn_dev_tx_queue_release(dev, i); 1716 dev->data->tx_queues[i] = NULL; 1717 } 1718 dev->data->nb_tx_queues = 0; 1719 } 1720