1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <strings.h> 13 #include <malloc.h> 14 15 #include <rte_ethdev.h> 16 #include <rte_memcpy.h> 17 #include <rte_string_fns.h> 18 #include <rte_memzone.h> 19 #include <rte_malloc.h> 20 #include <rte_atomic.h> 21 #include <rte_bitmap.h> 22 #include <rte_branch_prediction.h> 23 #include <rte_ether.h> 24 #include <rte_common.h> 25 #include <rte_errno.h> 26 #include <rte_memory.h> 27 #include <rte_eal.h> 28 #include <dev_driver.h> 29 #include <rte_net.h> 30 #include <bus_vmbus_driver.h> 31 #include <rte_spinlock.h> 32 33 #include "hn_logs.h" 34 #include "hn_var.h" 35 #include "hn_rndis.h" 36 #include "hn_nvs.h" 37 #include "ndis.h" 38 39 #define HN_NVS_SEND_MSG_SIZE \ 40 (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) 41 42 #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ 43 #define HN_RXQ_EVENT_DEFAULT 2048 44 45 struct hn_rxinfo { 46 uint32_t vlan_info; 47 uint32_t csum_info; 48 uint32_t hash_info; 49 uint32_t hash_value; 50 }; 51 52 #define HN_RXINFO_VLAN 0x0001 53 #define HN_RXINFO_CSUM 0x0002 54 #define HN_RXINFO_HASHINF 0x0004 55 #define HN_RXINFO_HASHVAL 0x0008 56 #define HN_RXINFO_ALL \ 57 (HN_RXINFO_VLAN | \ 58 HN_RXINFO_CSUM | \ 59 HN_RXINFO_HASHINF | \ 60 HN_RXINFO_HASHVAL) 61 62 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 63 #define HN_NDIS_RXCSUM_INFO_INVALID 0 64 #define HN_NDIS_HASH_INFO_INVALID 0 65 66 /* 67 * Per-transmit book keeping. 68 * A slot in transmit ring (chim_index) is reserved for each transmit. 69 * 70 * There are two types of transmit: 71 * - buffered transmit where chimney buffer is used and RNDIS header 72 * is in the buffer. mbuf == NULL for this case. 73 * 74 * - direct transmit where RNDIS header is in the in rndis_pkt 75 * mbuf is freed after transmit. 76 * 77 * Descriptors come from per-port pool which is used 78 * to limit number of outstanding requests per device. 79 */ 80 struct hn_txdesc { 81 struct rte_mbuf *m; 82 83 uint16_t queue_id; 84 uint32_t chim_index; 85 uint32_t chim_size; 86 uint32_t data_size; 87 uint32_t packets; 88 89 struct rndis_packet_msg *rndis_pkt; 90 }; 91 92 #define HN_RNDIS_PKT_LEN \ 93 (sizeof(struct rndis_packet_msg) + \ 94 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ 95 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 96 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 97 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 98 99 #define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) 100 101 /* Minimum space required for a packet */ 102 #define HN_PKTSIZE_MIN(align) \ 103 RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) 104 105 #define DEFAULT_TX_FREE_THRESH 32 106 107 static void 108 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) 109 { 110 uint32_t s = m->pkt_len; 111 const struct rte_ether_addr *ea; 112 113 if (s >= 1024) 114 stats->size_bins[6 + (s > 1518)]++; 115 else if (s <= 64) 116 stats->size_bins[s >> 6]++; 117 else 118 stats->size_bins[32UL - rte_clz32(s) - 5]++; 119 120 ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); 121 RTE_BUILD_BUG_ON(offsetof(struct hn_stats, broadcast) != 122 offsetof(struct hn_stats, multicast) + sizeof(uint64_t)); 123 if (unlikely(rte_is_multicast_ether_addr(ea))) 124 (&stats->multicast)[rte_is_broadcast_ether_addr(ea)]++; 125 } 126 127 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) 128 { 129 return pkt->pktinfooffset + pkt->pktinfolen; 130 } 131 132 static inline uint32_t 133 hn_rndis_pktmsg_offset(uint32_t ofs) 134 { 135 return ofs - offsetof(struct rndis_packet_msg, dataoffset); 136 } 137 138 static void hn_txd_init(struct rte_mempool *mp __rte_unused, 139 void *opaque, void *obj, unsigned int idx) 140 { 141 struct hn_tx_queue *txq = opaque; 142 struct hn_txdesc *txd = obj; 143 144 memset(txd, 0, sizeof(*txd)); 145 146 txd->queue_id = txq->queue_id; 147 txd->chim_index = NVS_CHIM_IDX_INVALID; 148 txd->rndis_pkt = (struct rndis_packet_msg *)((char *)txq->tx_rndis 149 + idx * HN_RNDIS_PKT_ALIGNED); 150 } 151 152 int 153 hn_chim_init(struct rte_eth_dev *dev) 154 { 155 struct hn_data *hv = dev->data->dev_private; 156 uint32_t i, chim_bmp_size; 157 158 rte_spinlock_init(&hv->chim_lock); 159 chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); 160 hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, 161 RTE_CACHE_LINE_SIZE); 162 if (hv->chim_bmem == NULL) { 163 PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", 164 chim_bmp_size); 165 return -1; 166 } 167 168 hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, 169 hv->chim_bmem, chim_bmp_size); 170 if (hv->chim_bmap == NULL) { 171 PMD_INIT_LOG(ERR, "failed to init chim bitmap"); 172 return -1; 173 } 174 175 for (i = 0; i < hv->chim_cnt; i++) 176 rte_bitmap_set(hv->chim_bmap, i); 177 178 return 0; 179 } 180 181 void 182 hn_chim_uninit(struct rte_eth_dev *dev) 183 { 184 struct hn_data *hv = dev->data->dev_private; 185 186 rte_bitmap_free(hv->chim_bmap); 187 rte_free(hv->chim_bmem); 188 hv->chim_bmem = NULL; 189 } 190 191 static uint32_t hn_chim_alloc(struct hn_data *hv) 192 { 193 uint32_t index = NVS_CHIM_IDX_INVALID; 194 uint64_t slab = 0; 195 196 rte_spinlock_lock(&hv->chim_lock); 197 if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) { 198 index += rte_bsf64(slab); 199 rte_bitmap_clear(hv->chim_bmap, index); 200 } 201 rte_spinlock_unlock(&hv->chim_lock); 202 203 return index; 204 } 205 206 static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) 207 { 208 if (chim_idx >= hv->chim_cnt) { 209 PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); 210 } else { 211 rte_spinlock_lock(&hv->chim_lock); 212 rte_bitmap_set(hv->chim_bmap, chim_idx); 213 rte_spinlock_unlock(&hv->chim_lock); 214 } 215 } 216 217 static void hn_reset_txagg(struct hn_tx_queue *txq) 218 { 219 txq->agg_szleft = txq->agg_szmax; 220 txq->agg_pktleft = txq->agg_pktmax; 221 txq->agg_txd = NULL; 222 txq->agg_prevpkt = NULL; 223 } 224 225 int 226 hn_dev_tx_queue_setup(struct rte_eth_dev *dev, 227 uint16_t queue_idx, uint16_t nb_desc, 228 unsigned int socket_id, 229 const struct rte_eth_txconf *tx_conf) 230 231 { 232 struct hn_data *hv = dev->data->dev_private; 233 struct hn_tx_queue *txq; 234 char name[RTE_MEMPOOL_NAMESIZE]; 235 uint32_t tx_free_thresh; 236 int err = -ENOMEM; 237 238 PMD_INIT_FUNC_TRACE(); 239 240 tx_free_thresh = tx_conf->tx_free_thresh; 241 if (tx_free_thresh == 0) 242 tx_free_thresh = RTE_MIN(nb_desc / 4, 243 DEFAULT_TX_FREE_THRESH); 244 245 if (tx_free_thresh + 3 >= nb_desc) { 246 PMD_INIT_LOG(ERR, 247 "tx_free_thresh must be less than the number of TX entries minus 3(%u)." 248 " (tx_free_thresh=%u port=%u queue=%u)", 249 nb_desc - 3, 250 tx_free_thresh, dev->data->port_id, queue_idx); 251 return -EINVAL; 252 } 253 254 txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, 255 socket_id); 256 if (!txq) 257 return -ENOMEM; 258 259 txq->hv = hv; 260 txq->chan = hv->channels[queue_idx]; 261 txq->port_id = dev->data->port_id; 262 txq->queue_id = queue_idx; 263 txq->free_thresh = tx_free_thresh; 264 265 snprintf(name, sizeof(name), 266 "hn_txd_%u_%u", dev->data->port_id, queue_idx); 267 268 PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", 269 name, nb_desc, sizeof(struct hn_txdesc)); 270 271 txq->tx_rndis_mz = rte_memzone_reserve_aligned(name, 272 nb_desc * HN_RNDIS_PKT_ALIGNED, rte_socket_id(), 273 RTE_MEMZONE_IOVA_CONTIG, HN_RNDIS_PKT_ALIGNED); 274 if (!txq->tx_rndis_mz) { 275 err = -rte_errno; 276 goto error; 277 } 278 txq->tx_rndis = txq->tx_rndis_mz->addr; 279 txq->tx_rndis_iova = txq->tx_rndis_mz->iova; 280 281 txq->txdesc_pool = rte_mempool_create(name, nb_desc, 282 sizeof(struct hn_txdesc), 283 0, 0, NULL, NULL, 284 hn_txd_init, txq, 285 dev->device->numa_node, 0); 286 if (txq->txdesc_pool == NULL) { 287 PMD_DRV_LOG(ERR, 288 "mempool %s create failed: %d", name, rte_errno); 289 goto error; 290 } 291 292 txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); 293 txq->agg_pktmax = hv->rndis_agg_pkts; 294 txq->agg_align = hv->rndis_agg_align; 295 296 hn_reset_txagg(txq); 297 298 err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, 299 socket_id, tx_conf); 300 if (err == 0) { 301 dev->data->tx_queues[queue_idx] = txq; 302 return 0; 303 } 304 305 error: 306 rte_mempool_free(txq->txdesc_pool); 307 rte_memzone_free(txq->tx_rndis_mz); 308 rte_free(txq); 309 return err; 310 } 311 312 void 313 hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 314 struct rte_eth_txq_info *qinfo) 315 { 316 struct hn_tx_queue *txq = dev->data->tx_queues[queue_id]; 317 318 qinfo->nb_desc = txq->txdesc_pool->size; 319 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 320 } 321 322 static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) 323 { 324 struct hn_txdesc *txd; 325 326 if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { 327 ++txq->stats.ring_full; 328 PMD_TX_LOG(DEBUG, "tx pool exhausted!"); 329 return NULL; 330 } 331 332 txd->m = NULL; 333 txd->packets = 0; 334 txd->data_size = 0; 335 txd->chim_size = 0; 336 337 return txd; 338 } 339 340 static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) 341 { 342 rte_mempool_put(txq->txdesc_pool, txd); 343 } 344 345 void 346 hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 347 { 348 struct hn_tx_queue *txq = dev->data->tx_queues[qid]; 349 350 PMD_INIT_FUNC_TRACE(); 351 352 if (!txq) 353 return; 354 355 rte_mempool_free(txq->txdesc_pool); 356 357 rte_memzone_free(txq->tx_rndis_mz); 358 rte_free(txq); 359 } 360 361 /* 362 * Check the status of a Tx descriptor in the queue. 363 * 364 * returns: 365 * - -EINVAL - offset outside of tx_descriptor pool. 366 * - RTE_ETH_TX_DESC_FULL - descriptor is not acknowledged by host. 367 * - RTE_ETH_TX_DESC_DONE - descriptor is available. 368 */ 369 int hn_dev_tx_descriptor_status(void *arg, uint16_t offset) 370 { 371 const struct hn_tx_queue *txq = arg; 372 373 hn_process_events(txq->hv, txq->queue_id, 0); 374 375 if (offset >= rte_mempool_avail_count(txq->txdesc_pool)) 376 return -EINVAL; 377 378 if (offset < rte_mempool_in_use_count(txq->txdesc_pool)) 379 return RTE_ETH_TX_DESC_FULL; 380 else 381 return RTE_ETH_TX_DESC_DONE; 382 } 383 384 static void 385 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, 386 unsigned long xactid, const struct hn_nvs_rndis_ack *ack) 387 { 388 struct hn_data *hv = dev->data->dev_private; 389 struct hn_txdesc *txd = (struct hn_txdesc *)xactid; 390 struct hn_tx_queue *txq; 391 392 /* Control packets are sent with xacid == 0 */ 393 if (!txd) 394 return; 395 396 txq = dev->data->tx_queues[queue_id]; 397 if (likely(ack->status == NVS_STATUS_OK)) { 398 PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", 399 txq->port_id, txq->queue_id, txd->chim_index, 400 txd->packets, txd->data_size); 401 txq->stats.bytes += txd->data_size; 402 txq->stats.packets += txd->packets; 403 } else { 404 PMD_DRV_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", 405 txq->port_id, txq->queue_id, txd->chim_index, ack->status); 406 ++txq->stats.errors; 407 } 408 409 if (txd->chim_index != NVS_CHIM_IDX_INVALID) { 410 hn_chim_free(hv, txd->chim_index); 411 txd->chim_index = NVS_CHIM_IDX_INVALID; 412 } 413 414 rte_pktmbuf_free(txd->m); 415 hn_txd_put(txq, txd); 416 } 417 418 /* Handle transmit completion events */ 419 static void 420 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, 421 const struct vmbus_chanpkt_hdr *pkt, 422 const void *data) 423 { 424 const struct hn_nvs_hdr *hdr = data; 425 426 switch (hdr->type) { 427 case NVS_TYPE_RNDIS_ACK: 428 hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); 429 break; 430 431 default: 432 PMD_DRV_LOG(NOTICE, "unexpected send completion type %u", 433 hdr->type); 434 } 435 } 436 437 /* Parse per-packet info (meta data) */ 438 static int 439 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, 440 struct hn_rxinfo *info) 441 { 442 const struct rndis_pktinfo *pi = info_data; 443 uint32_t mask = 0; 444 445 while (info_dlen != 0) { 446 const void *data; 447 uint32_t dlen; 448 449 if (unlikely(info_dlen < sizeof(*pi))) 450 return -EINVAL; 451 452 if (unlikely(info_dlen < pi->size)) 453 return -EINVAL; 454 info_dlen -= pi->size; 455 456 if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 457 return -EINVAL; 458 if (unlikely(pi->size < pi->offset)) 459 return -EINVAL; 460 461 dlen = pi->size - pi->offset; 462 data = pi->data; 463 464 switch (pi->type) { 465 case NDIS_PKTINFO_TYPE_VLAN: 466 if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) 467 return -EINVAL; 468 info->vlan_info = *((const uint32_t *)data); 469 mask |= HN_RXINFO_VLAN; 470 break; 471 472 case NDIS_PKTINFO_TYPE_CSUM: 473 if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) 474 return -EINVAL; 475 info->csum_info = *((const uint32_t *)data); 476 mask |= HN_RXINFO_CSUM; 477 break; 478 479 case NDIS_PKTINFO_TYPE_HASHVAL: 480 if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) 481 return -EINVAL; 482 info->hash_value = *((const uint32_t *)data); 483 mask |= HN_RXINFO_HASHVAL; 484 break; 485 486 case NDIS_PKTINFO_TYPE_HASHINF: 487 if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) 488 return -EINVAL; 489 info->hash_info = *((const uint32_t *)data); 490 mask |= HN_RXINFO_HASHINF; 491 break; 492 493 default: 494 goto next; 495 } 496 497 if (mask == HN_RXINFO_ALL) 498 break; /* All found; done */ 499 next: 500 pi = (const struct rndis_pktinfo *) 501 ((const uint8_t *)pi + pi->size); 502 } 503 504 /* 505 * Final fixup. 506 * - If there is no hash value, invalidate the hash info. 507 */ 508 if (!(mask & HN_RXINFO_HASHVAL)) 509 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 510 return 0; 511 } 512 513 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) 514 { 515 struct hn_rx_bufinfo *rxb = opaque; 516 struct hn_rx_queue *rxq = rxb->rxq; 517 518 rte_atomic32_dec(&rxq->rxbuf_outstanding); 519 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 520 } 521 522 static struct hn_rx_bufinfo *hn_rx_buf_init(struct hn_rx_queue *rxq, 523 const struct vmbus_chanpkt_rxbuf *pkt) 524 { 525 struct hn_rx_bufinfo *rxb; 526 527 rxb = rxq->rxbuf_info + pkt->hdr.xactid; 528 rxb->chan = rxq->chan; 529 rxb->xactid = pkt->hdr.xactid; 530 rxb->rxq = rxq; 531 532 rxb->shinfo.free_cb = hn_rx_buf_free_cb; 533 rxb->shinfo.fcb_opaque = rxb; 534 rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); 535 return rxb; 536 } 537 538 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, 539 uint8_t *data, unsigned int headroom, unsigned int dlen, 540 const struct hn_rxinfo *info) 541 { 542 struct hn_data *hv = rxq->hv; 543 struct rte_mbuf *m; 544 bool use_extbuf = false; 545 546 m = rte_pktmbuf_alloc(rxq->mb_pool); 547 if (unlikely(!m)) { 548 struct rte_eth_dev *dev = 549 &rte_eth_devices[rxq->port_id]; 550 551 dev->data->rx_mbuf_alloc_failed++; 552 return; 553 } 554 555 /* 556 * For large packets, avoid copy if possible but need to keep 557 * some space available in receive area for later packets. 558 */ 559 if (hv->rx_extmbuf_enable && dlen > hv->rx_copybreak && 560 (uint32_t)rte_atomic32_read(&rxq->rxbuf_outstanding) < 561 hv->rxbuf_section_cnt / 2) { 562 struct rte_mbuf_ext_shared_info *shinfo; 563 const void *rxbuf; 564 rte_iova_t iova; 565 566 /* 567 * Build an external mbuf that points to receive area. 568 * Use refcount to handle multiple packets in same 569 * receive buffer section. 570 */ 571 rxbuf = hv->rxbuf_res.addr; 572 iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); 573 shinfo = &rxb->shinfo; 574 575 /* shinfo is already set to 1 by the caller */ 576 if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) 577 rte_atomic32_inc(&rxq->rxbuf_outstanding); 578 579 rte_pktmbuf_attach_extbuf(m, data, iova, 580 dlen + headroom, shinfo); 581 m->data_off = headroom; 582 use_extbuf = true; 583 } else { 584 /* Mbuf's in pool must be large enough to hold small packets */ 585 if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { 586 rte_pktmbuf_free_seg(m); 587 ++rxq->stats.errors; 588 return; 589 } 590 rte_memcpy(rte_pktmbuf_mtod(m, void *), 591 data + headroom, dlen); 592 } 593 594 m->port = rxq->port_id; 595 m->pkt_len = dlen; 596 m->data_len = dlen; 597 m->packet_type = rte_net_get_ptype(m, NULL, 598 RTE_PTYPE_L2_MASK | 599 RTE_PTYPE_L3_MASK | 600 RTE_PTYPE_L4_MASK); 601 602 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 603 m->vlan_tci = RTE_VLAN_TCI_MAKE(NDIS_VLAN_INFO_ID(info->vlan_info), 604 NDIS_VLAN_INFO_PRI(info->vlan_info), 605 NDIS_VLAN_INFO_CFI(info->vlan_info)); 606 m->ol_flags |= RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; 607 608 /* NDIS always strips tag, put it back if necessary */ 609 if (!hv->vlan_strip && rte_vlan_insert(&m)) { 610 PMD_DRV_LOG(DEBUG, "vlan insert failed"); 611 ++rxq->stats.errors; 612 if (use_extbuf) 613 rte_pktmbuf_detach_extbuf(m); 614 rte_pktmbuf_free(m); 615 return; 616 } 617 } 618 619 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 620 if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) 621 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 622 623 if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK 624 | NDIS_RXCSUM_INFO_TCPCS_OK)) 625 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 626 else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED 627 | NDIS_RXCSUM_INFO_UDPCS_FAILED)) 628 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 629 } 630 631 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 632 m->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 633 m->hash.rss = info->hash_value; 634 } 635 636 PMD_RX_LOG(DEBUG, 637 "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, 638 rxq->port_id, rxq->queue_id, rxb->xactid, 639 m->pkt_len, m->packet_type, m->ol_flags); 640 641 ++rxq->stats.packets; 642 rxq->stats.bytes += m->pkt_len; 643 hn_update_packet_stats(&rxq->stats, m); 644 645 if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { 646 ++rxq->stats.ring_full; 647 PMD_RX_LOG(DEBUG, "rx ring full"); 648 if (use_extbuf) 649 rte_pktmbuf_detach_extbuf(m); 650 rte_pktmbuf_free(m); 651 } 652 } 653 654 static void hn_rndis_rx_data(struct hn_rx_queue *rxq, 655 struct hn_rx_bufinfo *rxb, 656 void *data, uint32_t dlen) 657 { 658 unsigned int data_off, data_len; 659 unsigned int pktinfo_off, pktinfo_len; 660 const struct rndis_packet_msg *pkt = data; 661 struct hn_rxinfo info = { 662 .vlan_info = HN_NDIS_VLAN_INFO_INVALID, 663 .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, 664 .hash_info = HN_NDIS_HASH_INFO_INVALID, 665 }; 666 int err; 667 668 hn_rndis_dump(pkt); 669 670 if (unlikely(dlen < sizeof(*pkt))) 671 goto error; 672 673 if (unlikely(dlen < pkt->len)) 674 goto error; /* truncated RNDIS from host */ 675 676 if (unlikely(pkt->len < pkt->datalen 677 + pkt->oobdatalen + pkt->pktinfolen)) 678 goto error; 679 680 if (unlikely(pkt->datalen == 0)) 681 goto error; 682 683 /* Check offsets. */ 684 if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) 685 goto error; 686 687 if (likely(pkt->pktinfooffset > 0) && 688 unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || 689 (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) 690 goto error; 691 692 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 693 data_len = pkt->datalen; 694 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); 695 pktinfo_len = pkt->pktinfolen; 696 697 if (likely(pktinfo_len > 0)) { 698 err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, 699 pktinfo_len, &info); 700 if (err) 701 goto error; 702 } 703 704 /* overflow check */ 705 if (data_len > data_len + data_off || data_len + data_off > pkt->len) 706 goto error; 707 708 if (unlikely(data_len < RTE_ETHER_HDR_LEN)) 709 goto error; 710 711 hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); 712 return; 713 error: 714 ++rxq->stats.errors; 715 } 716 717 static void 718 hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, 719 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) 720 { 721 const struct rndis_msghdr *hdr = buf; 722 723 switch (hdr->type) { 724 case RNDIS_PACKET_MSG: 725 if (dev->data->dev_started) 726 hn_rndis_rx_data(rxq, rxb, buf, len); 727 break; 728 729 case RNDIS_INDICATE_STATUS_MSG: 730 hn_rndis_link_status(dev, buf); 731 break; 732 733 case RNDIS_INITIALIZE_CMPLT: 734 case RNDIS_QUERY_CMPLT: 735 case RNDIS_SET_CMPLT: 736 hn_rndis_receive_response(rxq->hv, buf, len); 737 break; 738 739 default: 740 PMD_DRV_LOG(NOTICE, 741 "unexpected RNDIS message (type %#x len %u)", 742 hdr->type, len); 743 break; 744 } 745 } 746 747 static void 748 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, 749 struct hn_data *hv, 750 struct hn_rx_queue *rxq, 751 const struct vmbus_chanpkt_hdr *hdr, 752 const void *buf) 753 { 754 const struct vmbus_chanpkt_rxbuf *pkt; 755 const struct hn_nvs_hdr *nvs_hdr = buf; 756 uint32_t rxbuf_sz = hv->rxbuf_res.len; 757 char *rxbuf = hv->rxbuf_res.addr; 758 unsigned int i, hlen, count; 759 struct hn_rx_bufinfo *rxb; 760 761 /* At minimum we need type header */ 762 if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { 763 PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); 764 return; 765 } 766 767 /* Make sure that this is a RNDIS message. */ 768 if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { 769 PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", 770 nvs_hdr->type); 771 return; 772 } 773 774 hlen = vmbus_chanpkt_getlen(hdr->hlen); 775 if (unlikely(hlen < sizeof(*pkt))) { 776 PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); 777 return; 778 } 779 780 pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); 781 if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { 782 PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", 783 pkt->rxbuf_id); 784 return; 785 } 786 787 count = pkt->rxbuf_cnt; 788 if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, 789 rxbuf[count]))) { 790 PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); 791 return; 792 } 793 794 if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { 795 PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, 796 pkt->hdr.xactid); 797 return; 798 } 799 800 /* Setup receive buffer info to allow for callback */ 801 rxb = hn_rx_buf_init(rxq, pkt); 802 803 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 804 for (i = 0; i < count; ++i) { 805 unsigned int ofs, len; 806 807 ofs = pkt->rxbuf[i].ofs; 808 len = pkt->rxbuf[i].len; 809 810 if (unlikely(ofs + len > rxbuf_sz)) { 811 PMD_RX_LOG(ERR, 812 "%uth RNDIS msg overflow ofs %u, len %u", 813 i, ofs, len); 814 continue; 815 } 816 817 if (unlikely(len == 0)) { 818 PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); 819 continue; 820 } 821 822 hn_rndis_receive(dev, rxq, rxb, 823 rxbuf + ofs, len); 824 } 825 826 /* Send ACK now if external mbuf not used */ 827 if (rte_mbuf_ext_refcnt_update(&rxb->shinfo, -1) == 0) 828 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 829 } 830 831 /* 832 * Called when NVS inband events are received. 833 * Send up a two part message with port_id and the NVS message 834 * to the pipe to the netvsc-vf-event control thread. 835 */ 836 static void hn_nvs_handle_notify(struct rte_eth_dev *dev, 837 const struct vmbus_chanpkt_hdr *pkt, 838 const void *data) 839 { 840 const struct hn_nvs_hdr *hdr = data; 841 842 switch (hdr->type) { 843 case NVS_TYPE_TXTBL_NOTE: 844 /* Transmit indirection table has locking problems 845 * in DPDK and therefore not implemented 846 */ 847 PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); 848 break; 849 850 case NVS_TYPE_VFASSOC_NOTE: 851 hn_nvs_handle_vfassoc(dev, pkt, data); 852 break; 853 854 default: 855 PMD_DRV_LOG(INFO, 856 "got notify, nvs type %u", hdr->type); 857 } 858 } 859 860 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, 861 uint16_t queue_id, 862 unsigned int socket_id) 863 { 864 struct hn_rx_queue *rxq; 865 866 rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), 867 RTE_CACHE_LINE_SIZE, socket_id); 868 if (!rxq) 869 return NULL; 870 871 rxq->hv = hv; 872 rxq->chan = hv->channels[queue_id]; 873 rte_spinlock_init(&rxq->ring_lock); 874 rxq->port_id = hv->port_id; 875 rxq->queue_id = queue_id; 876 rxq->event_sz = HN_RXQ_EVENT_DEFAULT; 877 rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, 878 RTE_CACHE_LINE_SIZE, socket_id); 879 if (!rxq->event_buf) { 880 rte_free(rxq); 881 return NULL; 882 } 883 884 /* setup rxbuf_info for non-primary queue */ 885 if (queue_id) { 886 rxq->rxbuf_info = rte_calloc("HN_RXBUF_INFO", 887 hv->rxbuf_section_cnt, 888 sizeof(*rxq->rxbuf_info), 889 RTE_CACHE_LINE_SIZE); 890 891 if (!rxq->rxbuf_info) { 892 PMD_DRV_LOG(ERR, 893 "Could not allocate rxbuf info for queue %d", 894 queue_id); 895 rte_free(rxq->event_buf); 896 rte_free(rxq); 897 return NULL; 898 } 899 } 900 901 return rxq; 902 } 903 904 void 905 hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 906 struct rte_eth_rxq_info *qinfo) 907 { 908 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 909 910 qinfo->mp = rxq->mb_pool; 911 qinfo->nb_desc = rxq->rx_ring->size; 912 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 913 } 914 915 int 916 hn_dev_rx_queue_setup(struct rte_eth_dev *dev, 917 uint16_t queue_idx, uint16_t nb_desc, 918 unsigned int socket_id, 919 const struct rte_eth_rxconf *rx_conf, 920 struct rte_mempool *mp) 921 { 922 struct hn_data *hv = dev->data->dev_private; 923 char ring_name[RTE_RING_NAMESIZE]; 924 struct hn_rx_queue *rxq; 925 unsigned int count; 926 int error = -ENOMEM; 927 928 PMD_INIT_FUNC_TRACE(); 929 930 if (queue_idx == 0) { 931 rxq = hv->primary; 932 } else { 933 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 934 if (!rxq) 935 return -ENOMEM; 936 } 937 938 rxq->mb_pool = mp; 939 count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; 940 if (nb_desc == 0 || nb_desc > count) 941 nb_desc = count; 942 943 /* 944 * Staging ring from receive event logic to rx_pkts. 945 * rx_pkts assumes caller is handling multi-thread issue. 946 * event logic has locking. 947 */ 948 snprintf(ring_name, sizeof(ring_name), 949 "hn_rx_%u_%u", dev->data->port_id, queue_idx); 950 rxq->rx_ring = rte_ring_create(ring_name, 951 rte_align32pow2(nb_desc), 952 socket_id, 0); 953 if (!rxq->rx_ring) 954 goto fail; 955 956 error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, 957 socket_id, rx_conf, mp); 958 if (error) 959 goto fail; 960 961 dev->data->rx_queues[queue_idx] = rxq; 962 return 0; 963 964 fail: 965 rte_ring_free(rxq->rx_ring); 966 rte_free(rxq->rxbuf_info); 967 rte_free(rxq->event_buf); 968 rte_free(rxq); 969 return error; 970 } 971 972 static void 973 hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) 974 { 975 976 if (!rxq) 977 return; 978 979 rte_ring_free(rxq->rx_ring); 980 rxq->rx_ring = NULL; 981 rxq->mb_pool = NULL; 982 983 hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); 984 985 /* Keep primary queue to allow for control operations */ 986 if (keep_primary && rxq == rxq->hv->primary) 987 return; 988 989 rte_free(rxq->rxbuf_info); 990 rte_free(rxq->event_buf); 991 rte_free(rxq); 992 } 993 994 void 995 hn_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 996 { 997 struct hn_rx_queue *rxq = dev->data->rx_queues[qid]; 998 999 PMD_INIT_FUNC_TRACE(); 1000 1001 hn_rx_queue_free(rxq, true); 1002 } 1003 1004 /* 1005 * Get the number of used descriptor in a rx queue 1006 * For this device that means how many packets are pending in the ring. 1007 */ 1008 uint32_t 1009 hn_dev_rx_queue_count(void *rx_queue) 1010 { 1011 struct hn_rx_queue *rxq = rx_queue; 1012 1013 return rte_ring_count(rxq->rx_ring); 1014 } 1015 1016 /* 1017 * Check the status of a Rx descriptor in the queue 1018 * 1019 * returns: 1020 * - -EINVAL - offset outside of ring 1021 * - RTE_ETH_RX_DESC_AVAIL - no data available yet 1022 * - RTE_ETH_RX_DESC_DONE - data is waiting in staging ring 1023 */ 1024 int hn_dev_rx_queue_status(void *arg, uint16_t offset) 1025 { 1026 const struct hn_rx_queue *rxq = arg; 1027 1028 hn_process_events(rxq->hv, rxq->queue_id, 0); 1029 if (offset >= rxq->rx_ring->capacity) 1030 return -EINVAL; 1031 1032 if (offset < rte_ring_count(rxq->rx_ring)) 1033 return RTE_ETH_RX_DESC_DONE; 1034 else 1035 return RTE_ETH_RX_DESC_AVAIL; 1036 } 1037 1038 int 1039 hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) 1040 { 1041 struct hn_tx_queue *txq = arg; 1042 1043 return hn_process_events(txq->hv, txq->queue_id, free_cnt); 1044 } 1045 1046 /* 1047 * Process pending events on the channel. 1048 * Called from both Rx queue poll and Tx cleanup 1049 */ 1050 uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, 1051 uint32_t tx_limit) 1052 { 1053 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 1054 struct hn_rx_queue *rxq; 1055 uint32_t bytes_read = 0; 1056 uint32_t tx_done = 0; 1057 int ret = 0; 1058 1059 rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; 1060 1061 /* 1062 * Since channel is shared between Rx and TX queue need to have a lock 1063 * since DPDK does not force same CPU to be used for Rx/Tx. 1064 */ 1065 if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) 1066 return 0; 1067 1068 for (;;) { 1069 const struct vmbus_chanpkt_hdr *pkt; 1070 uint32_t len = rxq->event_sz; 1071 const void *data; 1072 1073 retry: 1074 ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); 1075 if (ret == -EAGAIN) 1076 break; /* ring is empty */ 1077 1078 if (unlikely(ret == -ENOBUFS)) { 1079 /* event buffer not large enough to read ring */ 1080 1081 PMD_DRV_LOG(DEBUG, 1082 "event buffer expansion (need %u)", len); 1083 rxq->event_sz = len + len / 4; 1084 rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, 1085 RTE_CACHE_LINE_SIZE); 1086 if (rxq->event_buf) 1087 goto retry; 1088 /* out of memory, no more events now */ 1089 rxq->event_sz = 0; 1090 break; 1091 } 1092 1093 if (unlikely(ret <= 0)) { 1094 /* This indicates a failure to communicate (or worse) */ 1095 rte_exit(EXIT_FAILURE, 1096 "vmbus ring buffer error: %d", ret); 1097 } 1098 1099 bytes_read += ret; 1100 pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; 1101 data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); 1102 1103 switch (pkt->type) { 1104 case VMBUS_CHANPKT_TYPE_COMP: 1105 ++tx_done; 1106 hn_nvs_handle_comp(dev, queue_id, pkt, data); 1107 break; 1108 1109 case VMBUS_CHANPKT_TYPE_RXBUF: 1110 hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); 1111 break; 1112 1113 case VMBUS_CHANPKT_TYPE_INBAND: 1114 hn_nvs_handle_notify(dev, pkt, data); 1115 break; 1116 1117 default: 1118 PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); 1119 break; 1120 } 1121 1122 if (tx_limit && tx_done >= tx_limit) 1123 break; 1124 } 1125 1126 if (bytes_read > 0) 1127 rte_vmbus_chan_signal_read(rxq->chan, bytes_read); 1128 1129 rte_spinlock_unlock(&rxq->ring_lock); 1130 1131 return tx_done; 1132 } 1133 1134 static void hn_append_to_chim(struct hn_tx_queue *txq, 1135 struct rndis_packet_msg *pkt, 1136 const struct rte_mbuf *m) 1137 { 1138 struct hn_txdesc *txd = txq->agg_txd; 1139 uint8_t *buf = (uint8_t *)pkt; 1140 unsigned int data_offs; 1141 1142 hn_rndis_dump(pkt); 1143 1144 data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 1145 txd->chim_size += pkt->len; 1146 txd->data_size += m->pkt_len; 1147 ++txd->packets; 1148 hn_update_packet_stats(&txq->stats, m); 1149 1150 for (; m; m = m->next) { 1151 uint16_t len = rte_pktmbuf_data_len(m); 1152 1153 rte_memcpy(buf + data_offs, 1154 rte_pktmbuf_mtod(m, const char *), len); 1155 data_offs += len; 1156 } 1157 } 1158 1159 /* 1160 * Send pending aggregated data in chimney buffer (if any). 1161 * Returns error if send was unsuccessful because channel ring buffer 1162 * was full. 1163 */ 1164 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) 1165 1166 { 1167 struct hn_txdesc *txd = txq->agg_txd; 1168 struct hn_nvs_rndis rndis; 1169 int ret; 1170 1171 if (!txd) 1172 return 0; 1173 1174 rndis = (struct hn_nvs_rndis) { 1175 .type = NVS_TYPE_RNDIS, 1176 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1177 .chim_idx = txd->chim_index, 1178 .chim_sz = txd->chim_size, 1179 }; 1180 1181 PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", 1182 txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); 1183 1184 ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, 1185 &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); 1186 1187 if (likely(ret == 0)) 1188 hn_reset_txagg(txq); 1189 else if (ret == -EAGAIN) { 1190 PMD_TX_LOG(DEBUG, "port %u:%u channel full", 1191 txq->port_id, txq->queue_id); 1192 ++txq->stats.channel_full; 1193 } else { 1194 ++txq->stats.errors; 1195 1196 PMD_DRV_LOG(NOTICE, "port %u:%u send failed: %d", 1197 txq->port_id, txq->queue_id, ret); 1198 } 1199 return ret; 1200 } 1201 1202 /* 1203 * Try and find a place in a send chimney buffer to put 1204 * the small packet. If space is available, this routine 1205 * returns a pointer of where to place the data. 1206 * If no space, caller should try direct transmit. 1207 */ 1208 static void * 1209 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, 1210 struct hn_txdesc *txd, uint32_t pktsize) 1211 { 1212 struct hn_txdesc *agg_txd = txq->agg_txd; 1213 struct rndis_packet_msg *pkt; 1214 void *chim; 1215 1216 if (agg_txd) { 1217 unsigned int padding, olen; 1218 1219 /* 1220 * Update the previous RNDIS packet's total length, 1221 * it can be increased due to the mandatory alignment 1222 * padding for this RNDIS packet. And update the 1223 * aggregating txdesc's chimney sending buffer size 1224 * accordingly. 1225 * 1226 * Zero-out the padding, as required by the RNDIS spec. 1227 */ 1228 pkt = txq->agg_prevpkt; 1229 olen = pkt->len; 1230 padding = RTE_ALIGN(olen, txq->agg_align) - olen; 1231 if (padding > 0) { 1232 agg_txd->chim_size += padding; 1233 pkt->len += padding; 1234 memset((uint8_t *)pkt + olen, 0, padding); 1235 } 1236 1237 chim = (uint8_t *)pkt + pkt->len; 1238 txq->agg_prevpkt = chim; 1239 txq->agg_pktleft--; 1240 txq->agg_szleft -= pktsize; 1241 if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { 1242 /* 1243 * Probably can't aggregate more packets, 1244 * flush this aggregating txdesc proactively. 1245 */ 1246 txq->agg_pktleft = 0; 1247 } 1248 1249 hn_txd_put(txq, txd); 1250 return chim; 1251 } 1252 1253 txd->chim_index = hn_chim_alloc(hv); 1254 if (txd->chim_index == NVS_CHIM_IDX_INVALID) 1255 return NULL; 1256 1257 chim = (uint8_t *)hv->chim_res.addr 1258 + txd->chim_index * hv->chim_szmax; 1259 1260 txq->agg_txd = txd; 1261 txq->agg_pktleft = txq->agg_pktmax - 1; 1262 txq->agg_szleft = txq->agg_szmax - pktsize; 1263 txq->agg_prevpkt = chim; 1264 1265 return chim; 1266 } 1267 1268 static inline void * 1269 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, 1270 uint32_t pi_dlen, uint32_t pi_type) 1271 { 1272 const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); 1273 struct rndis_pktinfo *pi; 1274 1275 /* 1276 * Per-packet-info does not move; it only grows. 1277 * 1278 * NOTE: 1279 * pktinfooffset in this phase counts from the beginning 1280 * of rndis_packet_msg. 1281 */ 1282 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); 1283 1284 pkt->pktinfolen += pi_size; 1285 1286 pi->size = pi_size; 1287 pi->type = pi_type; 1288 pi->offset = RNDIS_PKTINFO_OFFSET; 1289 1290 return pi->data; 1291 } 1292 1293 /* Put RNDIS header and packet info on packet */ 1294 static void hn_encap(struct rndis_packet_msg *pkt, 1295 uint16_t queue_id, 1296 const struct rte_mbuf *m) 1297 { 1298 unsigned int hlen = m->l2_len + m->l3_len; 1299 uint32_t *pi_data; 1300 uint32_t pkt_hlen; 1301 1302 pkt->type = RNDIS_PACKET_MSG; 1303 pkt->len = m->pkt_len; 1304 pkt->dataoffset = 0; 1305 pkt->datalen = m->pkt_len; 1306 pkt->oobdataoffset = 0; 1307 pkt->oobdatalen = 0; 1308 pkt->oobdataelements = 0; 1309 pkt->pktinfooffset = sizeof(*pkt); 1310 pkt->pktinfolen = 0; 1311 pkt->vchandle = 0; 1312 pkt->reserved = 0; 1313 1314 /* 1315 * Set the hash value for this packet, to the queue_id to cause 1316 * TX done event for this packet on the right channel. 1317 */ 1318 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, 1319 NDIS_PKTINFO_TYPE_HASHVAL); 1320 *pi_data = queue_id; 1321 1322 if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { 1323 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, 1324 NDIS_PKTINFO_TYPE_VLAN); 1325 *pi_data = NDIS_VLAN_INFO_MAKE(RTE_VLAN_TCI_ID(m->vlan_tci), 1326 RTE_VLAN_TCI_PRI(m->vlan_tci), 1327 RTE_VLAN_TCI_DEI(m->vlan_tci)); 1328 } 1329 1330 if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 1331 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, 1332 NDIS_PKTINFO_TYPE_LSO); 1333 1334 if (m->ol_flags & RTE_MBUF_F_TX_IPV6) { 1335 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, 1336 m->tso_segsz); 1337 } else { 1338 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, 1339 m->tso_segsz); 1340 } 1341 } else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1342 RTE_MBUF_F_TX_TCP_CKSUM || 1343 (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1344 RTE_MBUF_F_TX_UDP_CKSUM || 1345 (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { 1346 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, 1347 NDIS_PKTINFO_TYPE_CSUM); 1348 *pi_data = 0; 1349 1350 if (m->ol_flags & RTE_MBUF_F_TX_IPV6) 1351 *pi_data |= NDIS_TXCSUM_INFO_IPV6; 1352 if (m->ol_flags & RTE_MBUF_F_TX_IPV4) { 1353 *pi_data |= NDIS_TXCSUM_INFO_IPV4; 1354 1355 if (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) 1356 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1357 } 1358 1359 if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1360 RTE_MBUF_F_TX_TCP_CKSUM) 1361 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); 1362 else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1363 RTE_MBUF_F_TX_UDP_CKSUM) 1364 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); 1365 } 1366 1367 pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; 1368 /* Fixup RNDIS packet message total length */ 1369 pkt->len += pkt_hlen; 1370 1371 /* Convert RNDIS packet message offsets */ 1372 pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 1373 pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); 1374 } 1375 1376 /* How many scatter gather list elements ar needed */ 1377 static unsigned int hn_get_slots(const struct rte_mbuf *m) 1378 { 1379 unsigned int slots = 1; /* for RNDIS header */ 1380 1381 while (m) { 1382 unsigned int size = rte_pktmbuf_data_len(m); 1383 unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; 1384 1385 slots += (offs + size + rte_mem_page_size() - 1) / 1386 rte_mem_page_size(); 1387 m = m->next; 1388 } 1389 1390 return slots; 1391 } 1392 1393 /* Build scatter gather list from chained mbuf */ 1394 static unsigned int hn_fill_sg(struct vmbus_gpa *sg, 1395 const struct rte_mbuf *m) 1396 { 1397 unsigned int segs = 0; 1398 1399 while (m) { 1400 rte_iova_t addr = rte_mbuf_data_iova(m); 1401 unsigned int page = addr / rte_mem_page_size(); 1402 unsigned int offset = addr & PAGE_MASK; 1403 unsigned int len = rte_pktmbuf_data_len(m); 1404 1405 while (len > 0) { 1406 unsigned int bytes = RTE_MIN(len, 1407 rte_mem_page_size() - offset); 1408 1409 sg[segs].page = page; 1410 sg[segs].ofs = offset; 1411 sg[segs].len = bytes; 1412 segs++; 1413 1414 ++page; 1415 offset = 0; 1416 len -= bytes; 1417 } 1418 m = m->next; 1419 } 1420 1421 return segs; 1422 } 1423 1424 /* Transmit directly from mbuf */ 1425 static int hn_xmit_sg(struct hn_tx_queue *txq, 1426 const struct hn_txdesc *txd, const struct rte_mbuf *m, 1427 bool *need_sig) 1428 { 1429 struct vmbus_gpa sg[hn_get_slots(m)]; 1430 struct hn_nvs_rndis nvs_rndis = { 1431 .type = NVS_TYPE_RNDIS, 1432 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1433 .chim_sz = txd->chim_size, 1434 }; 1435 rte_iova_t addr; 1436 unsigned int segs; 1437 1438 /* attach aggregation data if present */ 1439 if (txd->chim_size > 0) 1440 nvs_rndis.chim_idx = txd->chim_index; 1441 else 1442 nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; 1443 1444 hn_rndis_dump(txd->rndis_pkt); 1445 1446 /* pass IOVA of rndis header in first segment */ 1447 addr = txq->tx_rndis_iova + 1448 ((char *)txd->rndis_pkt - (char *)txq->tx_rndis); 1449 1450 sg[0].page = addr / rte_mem_page_size(); 1451 sg[0].ofs = addr & PAGE_MASK; 1452 sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); 1453 segs = 1; 1454 1455 hn_update_packet_stats(&txq->stats, m); 1456 1457 segs += hn_fill_sg(sg + 1, m); 1458 1459 PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", 1460 txq->port_id, txq->queue_id, txd->chim_index, 1461 segs, nvs_rndis.chim_sz); 1462 1463 return hn_nvs_send_sglist(txq->chan, sg, segs, 1464 &nvs_rndis, sizeof(nvs_rndis), 1465 (uintptr_t)txd, need_sig); 1466 } 1467 1468 uint16_t 1469 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1470 { 1471 struct hn_tx_queue *txq = ptxq; 1472 uint16_t queue_id = txq->queue_id; 1473 struct hn_data *hv = txq->hv; 1474 struct rte_eth_dev *vf_dev; 1475 bool need_sig = false; 1476 uint16_t nb_tx, tx_thresh; 1477 int ret; 1478 1479 if (unlikely(hv->closed)) 1480 return 0; 1481 1482 /* 1483 * Always check for events on the primary channel 1484 * because that is where hotplug notifications occur. 1485 */ 1486 tx_thresh = RTE_MAX(txq->free_thresh, nb_pkts); 1487 if (txq->queue_id == 0 || 1488 rte_mempool_avail_count(txq->txdesc_pool) < tx_thresh) 1489 hn_process_events(hv, txq->queue_id, 0); 1490 1491 /* Transmit over VF if present and up */ 1492 if (hv->vf_ctx.vf_vsc_switched) { 1493 rte_rwlock_read_lock(&hv->vf_lock); 1494 vf_dev = hn_get_vf_dev(hv); 1495 if (hv->vf_ctx.vf_vsc_switched && vf_dev && 1496 vf_dev->data->dev_started) { 1497 void *sub_q = vf_dev->data->tx_queues[queue_id]; 1498 1499 nb_tx = (*vf_dev->tx_pkt_burst) 1500 (sub_q, tx_pkts, nb_pkts); 1501 rte_rwlock_read_unlock(&hv->vf_lock); 1502 return nb_tx; 1503 } 1504 rte_rwlock_read_unlock(&hv->vf_lock); 1505 } 1506 1507 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1508 struct rte_mbuf *m = tx_pkts[nb_tx]; 1509 uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; 1510 struct rndis_packet_msg *pkt; 1511 struct hn_txdesc *txd; 1512 1513 txd = hn_txd_get(txq); 1514 if (txd == NULL) 1515 break; 1516 1517 /* For small packets aggregate them in chimney buffer */ 1518 if (m->pkt_len <= hv->tx_copybreak && 1519 pkt_size <= txq->agg_szmax) { 1520 /* If this packet will not fit, then flush */ 1521 if (txq->agg_pktleft == 0 || 1522 RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { 1523 if (hn_flush_txagg(txq, &need_sig)) 1524 goto fail; 1525 } 1526 1527 1528 pkt = hn_try_txagg(hv, txq, txd, pkt_size); 1529 if (unlikely(!pkt)) 1530 break; 1531 1532 hn_encap(pkt, queue_id, m); 1533 hn_append_to_chim(txq, pkt, m); 1534 1535 rte_pktmbuf_free(m); 1536 1537 /* if buffer is full, flush */ 1538 if (txq->agg_pktleft == 0 && 1539 hn_flush_txagg(txq, &need_sig)) 1540 goto fail; 1541 } else { 1542 /* Send any outstanding packets in buffer */ 1543 if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) 1544 goto fail; 1545 1546 pkt = txd->rndis_pkt; 1547 txd->m = m; 1548 txd->data_size = m->pkt_len; 1549 ++txd->packets; 1550 1551 hn_encap(pkt, queue_id, m); 1552 1553 ret = hn_xmit_sg(txq, txd, m, &need_sig); 1554 if (unlikely(ret != 0)) { 1555 if (ret == -EAGAIN) { 1556 PMD_TX_LOG(DEBUG, "sg channel full"); 1557 ++txq->stats.channel_full; 1558 } else { 1559 PMD_DRV_LOG(NOTICE, "sg send failed: %d", ret); 1560 ++txq->stats.errors; 1561 } 1562 hn_txd_put(txq, txd); 1563 goto fail; 1564 } 1565 } 1566 } 1567 1568 /* If partial buffer left, then try and send it. 1569 * if that fails, then reuse it on next send. 1570 */ 1571 hn_flush_txagg(txq, &need_sig); 1572 1573 fail: 1574 if (need_sig) 1575 rte_vmbus_chan_signal_tx(txq->chan); 1576 1577 return nb_tx; 1578 } 1579 1580 static uint16_t 1581 hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, 1582 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1583 { 1584 uint16_t i, n; 1585 1586 if (unlikely(nb_pkts == 0)) 1587 return 0; 1588 1589 n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); 1590 1591 /* relabel the received mbufs */ 1592 for (i = 0; i < n; i++) 1593 rx_pkts[i]->port = rxq->port_id; 1594 1595 return n; 1596 } 1597 1598 uint16_t 1599 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1600 { 1601 struct hn_rx_queue *rxq = prxq; 1602 struct hn_data *hv = rxq->hv; 1603 struct rte_eth_dev *vf_dev; 1604 uint16_t nb_rcv; 1605 1606 if (unlikely(hv->closed)) 1607 return 0; 1608 1609 /* Check for new completions (and hotplug) */ 1610 if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) 1611 hn_process_events(hv, rxq->queue_id, 0); 1612 1613 /* Always check the vmbus path for multicast and new flows */ 1614 nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, 1615 (void **)rx_pkts, nb_pkts, NULL); 1616 1617 /* If VF is available, check that as well */ 1618 if (hv->vf_ctx.vf_vsc_switched) { 1619 rte_rwlock_read_lock(&hv->vf_lock); 1620 vf_dev = hn_get_vf_dev(hv); 1621 if (hv->vf_ctx.vf_vsc_switched && vf_dev && 1622 vf_dev->data->dev_started) 1623 nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, 1624 rx_pkts + nb_rcv, 1625 nb_pkts - nb_rcv); 1626 1627 rte_rwlock_read_unlock(&hv->vf_lock); 1628 } 1629 return nb_rcv; 1630 } 1631 1632 void 1633 hn_dev_free_queues(struct rte_eth_dev *dev) 1634 { 1635 unsigned int i; 1636 1637 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1638 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 1639 1640 hn_rx_queue_free(rxq, false); 1641 dev->data->rx_queues[i] = NULL; 1642 } 1643 dev->data->nb_rx_queues = 0; 1644 1645 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1646 hn_dev_tx_queue_release(dev, i); 1647 dev->data->tx_queues[i] = NULL; 1648 } 1649 dev->data->nb_tx_queues = 0; 1650 } 1651