1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <strings.h> 13 #include <malloc.h> 14 15 #include <rte_ethdev.h> 16 #include <rte_memcpy.h> 17 #include <rte_string_fns.h> 18 #include <rte_memzone.h> 19 #include <rte_malloc.h> 20 #include <rte_atomic.h> 21 #include <rte_bitmap.h> 22 #include <rte_branch_prediction.h> 23 #include <rte_ether.h> 24 #include <rte_common.h> 25 #include <rte_errno.h> 26 #include <rte_memory.h> 27 #include <rte_eal.h> 28 #include <dev_driver.h> 29 #include <rte_net.h> 30 #include <bus_vmbus_driver.h> 31 #include <rte_spinlock.h> 32 33 #include "hn_logs.h" 34 #include "hn_var.h" 35 #include "hn_rndis.h" 36 #include "hn_nvs.h" 37 #include "ndis.h" 38 39 #define HN_NVS_SEND_MSG_SIZE \ 40 (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) 41 42 #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ 43 #define HN_RXQ_EVENT_DEFAULT 2048 44 45 struct hn_rxinfo { 46 uint32_t vlan_info; 47 uint32_t csum_info; 48 uint32_t hash_info; 49 uint32_t hash_value; 50 }; 51 52 #define HN_RXINFO_VLAN 0x0001 53 #define HN_RXINFO_CSUM 0x0002 54 #define HN_RXINFO_HASHINF 0x0004 55 #define HN_RXINFO_HASHVAL 0x0008 56 #define HN_RXINFO_ALL \ 57 (HN_RXINFO_VLAN | \ 58 HN_RXINFO_CSUM | \ 59 HN_RXINFO_HASHINF | \ 60 HN_RXINFO_HASHVAL) 61 62 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 63 #define HN_NDIS_RXCSUM_INFO_INVALID 0 64 #define HN_NDIS_HASH_INFO_INVALID 0 65 66 /* 67 * Per-transmit book keeping. 68 * A slot in transmit ring (chim_index) is reserved for each transmit. 69 * 70 * There are two types of transmit: 71 * - buffered transmit where chimney buffer is used and RNDIS header 72 * is in the buffer. mbuf == NULL for this case. 73 * 74 * - direct transmit where RNDIS header is in the in rndis_pkt 75 * mbuf is freed after transmit. 76 * 77 * Descriptors come from per-port pool which is used 78 * to limit number of outstanding requests per device. 79 */ 80 struct hn_txdesc { 81 struct rte_mbuf *m; 82 83 uint16_t queue_id; 84 uint32_t chim_index; 85 uint32_t chim_size; 86 uint32_t data_size; 87 uint32_t packets; 88 89 struct rndis_packet_msg *rndis_pkt; 90 }; 91 92 #define HN_RNDIS_PKT_LEN \ 93 (sizeof(struct rndis_packet_msg) + \ 94 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ 95 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 96 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 97 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 98 99 #define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) 100 101 /* Minimum space required for a packet */ 102 #define HN_PKTSIZE_MIN(align) \ 103 RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) 104 105 #define DEFAULT_TX_FREE_THRESH 32 106 107 static void 108 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) 109 { 110 uint32_t s = m->pkt_len; 111 const struct rte_ether_addr *ea; 112 113 if (s == 64) { 114 stats->size_bins[1]++; 115 } else if (s > 64 && s < 1024) { 116 uint32_t bin; 117 118 /* count zeros, and offset into correct bin */ 119 bin = (sizeof(s) * 8) - rte_clz32(s) - 5; 120 stats->size_bins[bin]++; 121 } else { 122 if (s < 64) 123 stats->size_bins[0]++; 124 else if (s < 1519) 125 stats->size_bins[6]++; 126 else 127 stats->size_bins[7]++; 128 } 129 130 ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); 131 if (rte_is_multicast_ether_addr(ea)) { 132 if (rte_is_broadcast_ether_addr(ea)) 133 stats->broadcast++; 134 else 135 stats->multicast++; 136 } 137 } 138 139 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) 140 { 141 return pkt->pktinfooffset + pkt->pktinfolen; 142 } 143 144 static inline uint32_t 145 hn_rndis_pktmsg_offset(uint32_t ofs) 146 { 147 return ofs - offsetof(struct rndis_packet_msg, dataoffset); 148 } 149 150 static void hn_txd_init(struct rte_mempool *mp __rte_unused, 151 void *opaque, void *obj, unsigned int idx) 152 { 153 struct hn_tx_queue *txq = opaque; 154 struct hn_txdesc *txd = obj; 155 156 memset(txd, 0, sizeof(*txd)); 157 158 txd->queue_id = txq->queue_id; 159 txd->chim_index = NVS_CHIM_IDX_INVALID; 160 txd->rndis_pkt = (struct rndis_packet_msg *)((char *)txq->tx_rndis 161 + idx * HN_RNDIS_PKT_ALIGNED); 162 } 163 164 int 165 hn_chim_init(struct rte_eth_dev *dev) 166 { 167 struct hn_data *hv = dev->data->dev_private; 168 uint32_t i, chim_bmp_size; 169 170 rte_spinlock_init(&hv->chim_lock); 171 chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); 172 hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, 173 RTE_CACHE_LINE_SIZE); 174 if (hv->chim_bmem == NULL) { 175 PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", 176 chim_bmp_size); 177 return -1; 178 } 179 180 hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, 181 hv->chim_bmem, chim_bmp_size); 182 if (hv->chim_bmap == NULL) { 183 PMD_INIT_LOG(ERR, "failed to init chim bitmap"); 184 return -1; 185 } 186 187 for (i = 0; i < hv->chim_cnt; i++) 188 rte_bitmap_set(hv->chim_bmap, i); 189 190 return 0; 191 } 192 193 void 194 hn_chim_uninit(struct rte_eth_dev *dev) 195 { 196 struct hn_data *hv = dev->data->dev_private; 197 198 rte_bitmap_free(hv->chim_bmap); 199 rte_free(hv->chim_bmem); 200 hv->chim_bmem = NULL; 201 } 202 203 static uint32_t hn_chim_alloc(struct hn_data *hv) 204 { 205 uint32_t index = NVS_CHIM_IDX_INVALID; 206 uint64_t slab = 0; 207 208 rte_spinlock_lock(&hv->chim_lock); 209 if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) { 210 index += rte_bsf64(slab); 211 rte_bitmap_clear(hv->chim_bmap, index); 212 } 213 rte_spinlock_unlock(&hv->chim_lock); 214 215 return index; 216 } 217 218 static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) 219 { 220 if (chim_idx >= hv->chim_cnt) { 221 PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); 222 } else { 223 rte_spinlock_lock(&hv->chim_lock); 224 rte_bitmap_set(hv->chim_bmap, chim_idx); 225 rte_spinlock_unlock(&hv->chim_lock); 226 } 227 } 228 229 static void hn_reset_txagg(struct hn_tx_queue *txq) 230 { 231 txq->agg_szleft = txq->agg_szmax; 232 txq->agg_pktleft = txq->agg_pktmax; 233 txq->agg_txd = NULL; 234 txq->agg_prevpkt = NULL; 235 } 236 237 int 238 hn_dev_tx_queue_setup(struct rte_eth_dev *dev, 239 uint16_t queue_idx, uint16_t nb_desc, 240 unsigned int socket_id, 241 const struct rte_eth_txconf *tx_conf) 242 243 { 244 struct hn_data *hv = dev->data->dev_private; 245 struct hn_tx_queue *txq; 246 char name[RTE_MEMPOOL_NAMESIZE]; 247 uint32_t tx_free_thresh; 248 int err = -ENOMEM; 249 250 PMD_INIT_FUNC_TRACE(); 251 252 tx_free_thresh = tx_conf->tx_free_thresh; 253 if (tx_free_thresh == 0) 254 tx_free_thresh = RTE_MIN(nb_desc / 4, 255 DEFAULT_TX_FREE_THRESH); 256 257 if (tx_free_thresh + 3 >= nb_desc) { 258 PMD_INIT_LOG(ERR, 259 "tx_free_thresh must be less than the number of TX entries minus 3(%u)." 260 " (tx_free_thresh=%u port=%u queue=%u)\n", 261 nb_desc - 3, 262 tx_free_thresh, dev->data->port_id, queue_idx); 263 return -EINVAL; 264 } 265 266 txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, 267 socket_id); 268 if (!txq) 269 return -ENOMEM; 270 271 txq->hv = hv; 272 txq->chan = hv->channels[queue_idx]; 273 txq->port_id = dev->data->port_id; 274 txq->queue_id = queue_idx; 275 txq->free_thresh = tx_free_thresh; 276 277 snprintf(name, sizeof(name), 278 "hn_txd_%u_%u", dev->data->port_id, queue_idx); 279 280 PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", 281 name, nb_desc, sizeof(struct hn_txdesc)); 282 283 txq->tx_rndis_mz = rte_memzone_reserve_aligned(name, 284 nb_desc * HN_RNDIS_PKT_ALIGNED, rte_socket_id(), 285 RTE_MEMZONE_IOVA_CONTIG, HN_RNDIS_PKT_ALIGNED); 286 if (!txq->tx_rndis_mz) { 287 err = -rte_errno; 288 goto error; 289 } 290 txq->tx_rndis = txq->tx_rndis_mz->addr; 291 txq->tx_rndis_iova = txq->tx_rndis_mz->iova; 292 293 txq->txdesc_pool = rte_mempool_create(name, nb_desc, 294 sizeof(struct hn_txdesc), 295 0, 0, NULL, NULL, 296 hn_txd_init, txq, 297 dev->device->numa_node, 0); 298 if (txq->txdesc_pool == NULL) { 299 PMD_DRV_LOG(ERR, 300 "mempool %s create failed: %d", name, rte_errno); 301 goto error; 302 } 303 304 txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); 305 txq->agg_pktmax = hv->rndis_agg_pkts; 306 txq->agg_align = hv->rndis_agg_align; 307 308 hn_reset_txagg(txq); 309 310 err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, 311 socket_id, tx_conf); 312 if (err == 0) { 313 dev->data->tx_queues[queue_idx] = txq; 314 return 0; 315 } 316 317 error: 318 rte_mempool_free(txq->txdesc_pool); 319 rte_memzone_free(txq->tx_rndis_mz); 320 rte_free(txq); 321 return err; 322 } 323 324 void 325 hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 326 struct rte_eth_txq_info *qinfo) 327 { 328 struct hn_tx_queue *txq = dev->data->tx_queues[queue_id]; 329 330 qinfo->nb_desc = txq->txdesc_pool->size; 331 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 332 } 333 334 static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) 335 { 336 struct hn_txdesc *txd; 337 338 if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { 339 ++txq->stats.ring_full; 340 PMD_TX_LOG(DEBUG, "tx pool exhausted!"); 341 return NULL; 342 } 343 344 txd->m = NULL; 345 txd->packets = 0; 346 txd->data_size = 0; 347 txd->chim_size = 0; 348 349 return txd; 350 } 351 352 static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) 353 { 354 rte_mempool_put(txq->txdesc_pool, txd); 355 } 356 357 void 358 hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 359 { 360 struct hn_tx_queue *txq = dev->data->tx_queues[qid]; 361 362 PMD_INIT_FUNC_TRACE(); 363 364 if (!txq) 365 return; 366 367 rte_mempool_free(txq->txdesc_pool); 368 369 rte_memzone_free(txq->tx_rndis_mz); 370 rte_free(txq); 371 } 372 373 /* 374 * Check the status of a Tx descriptor in the queue. 375 * 376 * returns: 377 * - -EINVAL - offset outside of tx_descriptor pool. 378 * - RTE_ETH_TX_DESC_FULL - descriptor is not acknowledged by host. 379 * - RTE_ETH_TX_DESC_DONE - descriptor is available. 380 */ 381 int hn_dev_tx_descriptor_status(void *arg, uint16_t offset) 382 { 383 const struct hn_tx_queue *txq = arg; 384 385 hn_process_events(txq->hv, txq->queue_id, 0); 386 387 if (offset >= rte_mempool_avail_count(txq->txdesc_pool)) 388 return -EINVAL; 389 390 if (offset < rte_mempool_in_use_count(txq->txdesc_pool)) 391 return RTE_ETH_TX_DESC_FULL; 392 else 393 return RTE_ETH_TX_DESC_DONE; 394 } 395 396 static void 397 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, 398 unsigned long xactid, const struct hn_nvs_rndis_ack *ack) 399 { 400 struct hn_data *hv = dev->data->dev_private; 401 struct hn_txdesc *txd = (struct hn_txdesc *)xactid; 402 struct hn_tx_queue *txq; 403 404 /* Control packets are sent with xacid == 0 */ 405 if (!txd) 406 return; 407 408 txq = dev->data->tx_queues[queue_id]; 409 if (likely(ack->status == NVS_STATUS_OK)) { 410 PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", 411 txq->port_id, txq->queue_id, txd->chim_index, 412 txd->packets, txd->data_size); 413 txq->stats.bytes += txd->data_size; 414 txq->stats.packets += txd->packets; 415 } else { 416 PMD_DRV_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", 417 txq->port_id, txq->queue_id, txd->chim_index, ack->status); 418 ++txq->stats.errors; 419 } 420 421 if (txd->chim_index != NVS_CHIM_IDX_INVALID) { 422 hn_chim_free(hv, txd->chim_index); 423 txd->chim_index = NVS_CHIM_IDX_INVALID; 424 } 425 426 rte_pktmbuf_free(txd->m); 427 hn_txd_put(txq, txd); 428 } 429 430 /* Handle transmit completion events */ 431 static void 432 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, 433 const struct vmbus_chanpkt_hdr *pkt, 434 const void *data) 435 { 436 const struct hn_nvs_hdr *hdr = data; 437 438 switch (hdr->type) { 439 case NVS_TYPE_RNDIS_ACK: 440 hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); 441 break; 442 443 default: 444 PMD_DRV_LOG(NOTICE, "unexpected send completion type %u", 445 hdr->type); 446 } 447 } 448 449 /* Parse per-packet info (meta data) */ 450 static int 451 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, 452 struct hn_rxinfo *info) 453 { 454 const struct rndis_pktinfo *pi = info_data; 455 uint32_t mask = 0; 456 457 while (info_dlen != 0) { 458 const void *data; 459 uint32_t dlen; 460 461 if (unlikely(info_dlen < sizeof(*pi))) 462 return -EINVAL; 463 464 if (unlikely(info_dlen < pi->size)) 465 return -EINVAL; 466 info_dlen -= pi->size; 467 468 if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 469 return -EINVAL; 470 if (unlikely(pi->size < pi->offset)) 471 return -EINVAL; 472 473 dlen = pi->size - pi->offset; 474 data = pi->data; 475 476 switch (pi->type) { 477 case NDIS_PKTINFO_TYPE_VLAN: 478 if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) 479 return -EINVAL; 480 info->vlan_info = *((const uint32_t *)data); 481 mask |= HN_RXINFO_VLAN; 482 break; 483 484 case NDIS_PKTINFO_TYPE_CSUM: 485 if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) 486 return -EINVAL; 487 info->csum_info = *((const uint32_t *)data); 488 mask |= HN_RXINFO_CSUM; 489 break; 490 491 case NDIS_PKTINFO_TYPE_HASHVAL: 492 if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) 493 return -EINVAL; 494 info->hash_value = *((const uint32_t *)data); 495 mask |= HN_RXINFO_HASHVAL; 496 break; 497 498 case NDIS_PKTINFO_TYPE_HASHINF: 499 if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) 500 return -EINVAL; 501 info->hash_info = *((const uint32_t *)data); 502 mask |= HN_RXINFO_HASHINF; 503 break; 504 505 default: 506 goto next; 507 } 508 509 if (mask == HN_RXINFO_ALL) 510 break; /* All found; done */ 511 next: 512 pi = (const struct rndis_pktinfo *) 513 ((const uint8_t *)pi + pi->size); 514 } 515 516 /* 517 * Final fixup. 518 * - If there is no hash value, invalidate the hash info. 519 */ 520 if (!(mask & HN_RXINFO_HASHVAL)) 521 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 522 return 0; 523 } 524 525 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) 526 { 527 struct hn_rx_bufinfo *rxb = opaque; 528 struct hn_rx_queue *rxq = rxb->rxq; 529 530 rte_atomic32_dec(&rxq->rxbuf_outstanding); 531 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 532 } 533 534 static struct hn_rx_bufinfo *hn_rx_buf_init(struct hn_rx_queue *rxq, 535 const struct vmbus_chanpkt_rxbuf *pkt) 536 { 537 struct hn_rx_bufinfo *rxb; 538 539 rxb = rxq->rxbuf_info + pkt->hdr.xactid; 540 rxb->chan = rxq->chan; 541 rxb->xactid = pkt->hdr.xactid; 542 rxb->rxq = rxq; 543 544 rxb->shinfo.free_cb = hn_rx_buf_free_cb; 545 rxb->shinfo.fcb_opaque = rxb; 546 rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); 547 return rxb; 548 } 549 550 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, 551 uint8_t *data, unsigned int headroom, unsigned int dlen, 552 const struct hn_rxinfo *info) 553 { 554 struct hn_data *hv = rxq->hv; 555 struct rte_mbuf *m; 556 bool use_extbuf = false; 557 558 m = rte_pktmbuf_alloc(rxq->mb_pool); 559 if (unlikely(!m)) { 560 struct rte_eth_dev *dev = 561 &rte_eth_devices[rxq->port_id]; 562 563 dev->data->rx_mbuf_alloc_failed++; 564 return; 565 } 566 567 /* 568 * For large packets, avoid copy if possible but need to keep 569 * some space available in receive area for later packets. 570 */ 571 if (hv->rx_extmbuf_enable && dlen > hv->rx_copybreak && 572 (uint32_t)rte_atomic32_read(&rxq->rxbuf_outstanding) < 573 hv->rxbuf_section_cnt / 2) { 574 struct rte_mbuf_ext_shared_info *shinfo; 575 const void *rxbuf; 576 rte_iova_t iova; 577 578 /* 579 * Build an external mbuf that points to receive area. 580 * Use refcount to handle multiple packets in same 581 * receive buffer section. 582 */ 583 rxbuf = hv->rxbuf_res.addr; 584 iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); 585 shinfo = &rxb->shinfo; 586 587 /* shinfo is already set to 1 by the caller */ 588 if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) 589 rte_atomic32_inc(&rxq->rxbuf_outstanding); 590 591 rte_pktmbuf_attach_extbuf(m, data, iova, 592 dlen + headroom, shinfo); 593 m->data_off = headroom; 594 use_extbuf = true; 595 } else { 596 /* Mbuf's in pool must be large enough to hold small packets */ 597 if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { 598 rte_pktmbuf_free_seg(m); 599 ++rxq->stats.errors; 600 return; 601 } 602 rte_memcpy(rte_pktmbuf_mtod(m, void *), 603 data + headroom, dlen); 604 } 605 606 m->port = rxq->port_id; 607 m->pkt_len = dlen; 608 m->data_len = dlen; 609 m->packet_type = rte_net_get_ptype(m, NULL, 610 RTE_PTYPE_L2_MASK | 611 RTE_PTYPE_L3_MASK | 612 RTE_PTYPE_L4_MASK); 613 614 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 615 m->vlan_tci = RTE_VLAN_TCI_MAKE(NDIS_VLAN_INFO_ID(info->vlan_info), 616 NDIS_VLAN_INFO_PRI(info->vlan_info), 617 NDIS_VLAN_INFO_CFI(info->vlan_info)); 618 m->ol_flags |= RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_VLAN; 619 620 /* NDIS always strips tag, put it back if necessary */ 621 if (!hv->vlan_strip && rte_vlan_insert(&m)) { 622 PMD_DRV_LOG(DEBUG, "vlan insert failed"); 623 ++rxq->stats.errors; 624 if (use_extbuf) 625 rte_pktmbuf_detach_extbuf(m); 626 rte_pktmbuf_free(m); 627 return; 628 } 629 } 630 631 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 632 if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) 633 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 634 635 if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK 636 | NDIS_RXCSUM_INFO_TCPCS_OK)) 637 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 638 else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED 639 | NDIS_RXCSUM_INFO_UDPCS_FAILED)) 640 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 641 } 642 643 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 644 m->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 645 m->hash.rss = info->hash_value; 646 } 647 648 PMD_RX_LOG(DEBUG, 649 "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, 650 rxq->port_id, rxq->queue_id, rxb->xactid, 651 m->pkt_len, m->packet_type, m->ol_flags); 652 653 ++rxq->stats.packets; 654 rxq->stats.bytes += m->pkt_len; 655 hn_update_packet_stats(&rxq->stats, m); 656 657 if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { 658 ++rxq->stats.ring_full; 659 PMD_RX_LOG(DEBUG, "rx ring full"); 660 if (use_extbuf) 661 rte_pktmbuf_detach_extbuf(m); 662 rte_pktmbuf_free(m); 663 } 664 } 665 666 static void hn_rndis_rx_data(struct hn_rx_queue *rxq, 667 struct hn_rx_bufinfo *rxb, 668 void *data, uint32_t dlen) 669 { 670 unsigned int data_off, data_len; 671 unsigned int pktinfo_off, pktinfo_len; 672 const struct rndis_packet_msg *pkt = data; 673 struct hn_rxinfo info = { 674 .vlan_info = HN_NDIS_VLAN_INFO_INVALID, 675 .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, 676 .hash_info = HN_NDIS_HASH_INFO_INVALID, 677 }; 678 int err; 679 680 hn_rndis_dump(pkt); 681 682 if (unlikely(dlen < sizeof(*pkt))) 683 goto error; 684 685 if (unlikely(dlen < pkt->len)) 686 goto error; /* truncated RNDIS from host */ 687 688 if (unlikely(pkt->len < pkt->datalen 689 + pkt->oobdatalen + pkt->pktinfolen)) 690 goto error; 691 692 if (unlikely(pkt->datalen == 0)) 693 goto error; 694 695 /* Check offsets. */ 696 if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) 697 goto error; 698 699 if (likely(pkt->pktinfooffset > 0) && 700 unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || 701 (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) 702 goto error; 703 704 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 705 data_len = pkt->datalen; 706 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); 707 pktinfo_len = pkt->pktinfolen; 708 709 if (likely(pktinfo_len > 0)) { 710 err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, 711 pktinfo_len, &info); 712 if (err) 713 goto error; 714 } 715 716 /* overflow check */ 717 if (data_len > data_len + data_off || data_len + data_off > pkt->len) 718 goto error; 719 720 if (unlikely(data_len < RTE_ETHER_HDR_LEN)) 721 goto error; 722 723 hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); 724 return; 725 error: 726 ++rxq->stats.errors; 727 } 728 729 static void 730 hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, 731 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) 732 { 733 const struct rndis_msghdr *hdr = buf; 734 735 switch (hdr->type) { 736 case RNDIS_PACKET_MSG: 737 if (dev->data->dev_started) 738 hn_rndis_rx_data(rxq, rxb, buf, len); 739 break; 740 741 case RNDIS_INDICATE_STATUS_MSG: 742 hn_rndis_link_status(dev, buf); 743 break; 744 745 case RNDIS_INITIALIZE_CMPLT: 746 case RNDIS_QUERY_CMPLT: 747 case RNDIS_SET_CMPLT: 748 hn_rndis_receive_response(rxq->hv, buf, len); 749 break; 750 751 default: 752 PMD_DRV_LOG(NOTICE, 753 "unexpected RNDIS message (type %#x len %u)", 754 hdr->type, len); 755 break; 756 } 757 } 758 759 static void 760 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, 761 struct hn_data *hv, 762 struct hn_rx_queue *rxq, 763 const struct vmbus_chanpkt_hdr *hdr, 764 const void *buf) 765 { 766 const struct vmbus_chanpkt_rxbuf *pkt; 767 const struct hn_nvs_hdr *nvs_hdr = buf; 768 uint32_t rxbuf_sz = hv->rxbuf_res.len; 769 char *rxbuf = hv->rxbuf_res.addr; 770 unsigned int i, hlen, count; 771 struct hn_rx_bufinfo *rxb; 772 773 /* At minimum we need type header */ 774 if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { 775 PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); 776 return; 777 } 778 779 /* Make sure that this is a RNDIS message. */ 780 if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { 781 PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", 782 nvs_hdr->type); 783 return; 784 } 785 786 hlen = vmbus_chanpkt_getlen(hdr->hlen); 787 if (unlikely(hlen < sizeof(*pkt))) { 788 PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); 789 return; 790 } 791 792 pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); 793 if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { 794 PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", 795 pkt->rxbuf_id); 796 return; 797 } 798 799 count = pkt->rxbuf_cnt; 800 if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, 801 rxbuf[count]))) { 802 PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); 803 return; 804 } 805 806 if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { 807 PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, 808 pkt->hdr.xactid); 809 return; 810 } 811 812 /* Setup receive buffer info to allow for callback */ 813 rxb = hn_rx_buf_init(rxq, pkt); 814 815 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 816 for (i = 0; i < count; ++i) { 817 unsigned int ofs, len; 818 819 ofs = pkt->rxbuf[i].ofs; 820 len = pkt->rxbuf[i].len; 821 822 if (unlikely(ofs + len > rxbuf_sz)) { 823 PMD_RX_LOG(ERR, 824 "%uth RNDIS msg overflow ofs %u, len %u", 825 i, ofs, len); 826 continue; 827 } 828 829 if (unlikely(len == 0)) { 830 PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); 831 continue; 832 } 833 834 hn_rndis_receive(dev, rxq, rxb, 835 rxbuf + ofs, len); 836 } 837 838 /* Send ACK now if external mbuf not used */ 839 if (rte_mbuf_ext_refcnt_update(&rxb->shinfo, -1) == 0) 840 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 841 } 842 843 /* 844 * Called when NVS inband events are received. 845 * Send up a two part message with port_id and the NVS message 846 * to the pipe to the netvsc-vf-event control thread. 847 */ 848 static void hn_nvs_handle_notify(struct rte_eth_dev *dev, 849 const struct vmbus_chanpkt_hdr *pkt, 850 const void *data) 851 { 852 const struct hn_nvs_hdr *hdr = data; 853 854 switch (hdr->type) { 855 case NVS_TYPE_TXTBL_NOTE: 856 /* Transmit indirection table has locking problems 857 * in DPDK and therefore not implemented 858 */ 859 PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); 860 break; 861 862 case NVS_TYPE_VFASSOC_NOTE: 863 hn_nvs_handle_vfassoc(dev, pkt, data); 864 break; 865 866 default: 867 PMD_DRV_LOG(INFO, 868 "got notify, nvs type %u", hdr->type); 869 } 870 } 871 872 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, 873 uint16_t queue_id, 874 unsigned int socket_id) 875 { 876 struct hn_rx_queue *rxq; 877 878 rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), 879 RTE_CACHE_LINE_SIZE, socket_id); 880 if (!rxq) 881 return NULL; 882 883 rxq->hv = hv; 884 rxq->chan = hv->channels[queue_id]; 885 rte_spinlock_init(&rxq->ring_lock); 886 rxq->port_id = hv->port_id; 887 rxq->queue_id = queue_id; 888 rxq->event_sz = HN_RXQ_EVENT_DEFAULT; 889 rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, 890 RTE_CACHE_LINE_SIZE, socket_id); 891 if (!rxq->event_buf) { 892 rte_free(rxq); 893 return NULL; 894 } 895 896 /* setup rxbuf_info for non-primary queue */ 897 if (queue_id) { 898 rxq->rxbuf_info = rte_calloc("HN_RXBUF_INFO", 899 hv->rxbuf_section_cnt, 900 sizeof(*rxq->rxbuf_info), 901 RTE_CACHE_LINE_SIZE); 902 903 if (!rxq->rxbuf_info) { 904 PMD_DRV_LOG(ERR, 905 "Could not allocate rxbuf info for queue %d\n", 906 queue_id); 907 rte_free(rxq->event_buf); 908 rte_free(rxq); 909 return NULL; 910 } 911 } 912 913 return rxq; 914 } 915 916 void 917 hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 918 struct rte_eth_rxq_info *qinfo) 919 { 920 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 921 922 qinfo->mp = rxq->mb_pool; 923 qinfo->nb_desc = rxq->rx_ring->size; 924 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 925 } 926 927 int 928 hn_dev_rx_queue_setup(struct rte_eth_dev *dev, 929 uint16_t queue_idx, uint16_t nb_desc, 930 unsigned int socket_id, 931 const struct rte_eth_rxconf *rx_conf, 932 struct rte_mempool *mp) 933 { 934 struct hn_data *hv = dev->data->dev_private; 935 char ring_name[RTE_RING_NAMESIZE]; 936 struct hn_rx_queue *rxq; 937 unsigned int count; 938 int error = -ENOMEM; 939 940 PMD_INIT_FUNC_TRACE(); 941 942 if (queue_idx == 0) { 943 rxq = hv->primary; 944 } else { 945 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 946 if (!rxq) 947 return -ENOMEM; 948 } 949 950 rxq->mb_pool = mp; 951 count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; 952 if (nb_desc == 0 || nb_desc > count) 953 nb_desc = count; 954 955 /* 956 * Staging ring from receive event logic to rx_pkts. 957 * rx_pkts assumes caller is handling multi-thread issue. 958 * event logic has locking. 959 */ 960 snprintf(ring_name, sizeof(ring_name), 961 "hn_rx_%u_%u", dev->data->port_id, queue_idx); 962 rxq->rx_ring = rte_ring_create(ring_name, 963 rte_align32pow2(nb_desc), 964 socket_id, 0); 965 if (!rxq->rx_ring) 966 goto fail; 967 968 error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, 969 socket_id, rx_conf, mp); 970 if (error) 971 goto fail; 972 973 dev->data->rx_queues[queue_idx] = rxq; 974 return 0; 975 976 fail: 977 rte_ring_free(rxq->rx_ring); 978 rte_free(rxq->rxbuf_info); 979 rte_free(rxq->event_buf); 980 rte_free(rxq); 981 return error; 982 } 983 984 static void 985 hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) 986 { 987 988 if (!rxq) 989 return; 990 991 rte_ring_free(rxq->rx_ring); 992 rxq->rx_ring = NULL; 993 rxq->mb_pool = NULL; 994 995 hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); 996 997 /* Keep primary queue to allow for control operations */ 998 if (keep_primary && rxq == rxq->hv->primary) 999 return; 1000 1001 rte_free(rxq->rxbuf_info); 1002 rte_free(rxq->event_buf); 1003 rte_free(rxq); 1004 } 1005 1006 void 1007 hn_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1008 { 1009 struct hn_rx_queue *rxq = dev->data->rx_queues[qid]; 1010 1011 PMD_INIT_FUNC_TRACE(); 1012 1013 hn_rx_queue_free(rxq, true); 1014 } 1015 1016 /* 1017 * Get the number of used descriptor in a rx queue 1018 * For this device that means how many packets are pending in the ring. 1019 */ 1020 uint32_t 1021 hn_dev_rx_queue_count(void *rx_queue) 1022 { 1023 struct hn_rx_queue *rxq = rx_queue; 1024 1025 return rte_ring_count(rxq->rx_ring); 1026 } 1027 1028 /* 1029 * Check the status of a Rx descriptor in the queue 1030 * 1031 * returns: 1032 * - -EINVAL - offset outside of ring 1033 * - RTE_ETH_RX_DESC_AVAIL - no data available yet 1034 * - RTE_ETH_RX_DESC_DONE - data is waiting in staging ring 1035 */ 1036 int hn_dev_rx_queue_status(void *arg, uint16_t offset) 1037 { 1038 const struct hn_rx_queue *rxq = arg; 1039 1040 hn_process_events(rxq->hv, rxq->queue_id, 0); 1041 if (offset >= rxq->rx_ring->capacity) 1042 return -EINVAL; 1043 1044 if (offset < rte_ring_count(rxq->rx_ring)) 1045 return RTE_ETH_RX_DESC_DONE; 1046 else 1047 return RTE_ETH_RX_DESC_AVAIL; 1048 } 1049 1050 int 1051 hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) 1052 { 1053 struct hn_tx_queue *txq = arg; 1054 1055 return hn_process_events(txq->hv, txq->queue_id, free_cnt); 1056 } 1057 1058 /* 1059 * Process pending events on the channel. 1060 * Called from both Rx queue poll and Tx cleanup 1061 */ 1062 uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, 1063 uint32_t tx_limit) 1064 { 1065 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 1066 struct hn_rx_queue *rxq; 1067 uint32_t bytes_read = 0; 1068 uint32_t tx_done = 0; 1069 int ret = 0; 1070 1071 rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; 1072 1073 /* 1074 * Since channel is shared between Rx and TX queue need to have a lock 1075 * since DPDK does not force same CPU to be used for Rx/Tx. 1076 */ 1077 if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) 1078 return 0; 1079 1080 for (;;) { 1081 const struct vmbus_chanpkt_hdr *pkt; 1082 uint32_t len = rxq->event_sz; 1083 const void *data; 1084 1085 retry: 1086 ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); 1087 if (ret == -EAGAIN) 1088 break; /* ring is empty */ 1089 1090 if (unlikely(ret == -ENOBUFS)) { 1091 /* event buffer not large enough to read ring */ 1092 1093 PMD_DRV_LOG(DEBUG, 1094 "event buffer expansion (need %u)", len); 1095 rxq->event_sz = len + len / 4; 1096 rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, 1097 RTE_CACHE_LINE_SIZE); 1098 if (rxq->event_buf) 1099 goto retry; 1100 /* out of memory, no more events now */ 1101 rxq->event_sz = 0; 1102 break; 1103 } 1104 1105 if (unlikely(ret <= 0)) { 1106 /* This indicates a failure to communicate (or worse) */ 1107 rte_exit(EXIT_FAILURE, 1108 "vmbus ring buffer error: %d", ret); 1109 } 1110 1111 bytes_read += ret; 1112 pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; 1113 data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); 1114 1115 switch (pkt->type) { 1116 case VMBUS_CHANPKT_TYPE_COMP: 1117 ++tx_done; 1118 hn_nvs_handle_comp(dev, queue_id, pkt, data); 1119 break; 1120 1121 case VMBUS_CHANPKT_TYPE_RXBUF: 1122 hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); 1123 break; 1124 1125 case VMBUS_CHANPKT_TYPE_INBAND: 1126 hn_nvs_handle_notify(dev, pkt, data); 1127 break; 1128 1129 default: 1130 PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); 1131 break; 1132 } 1133 1134 if (tx_limit && tx_done >= tx_limit) 1135 break; 1136 } 1137 1138 if (bytes_read > 0) 1139 rte_vmbus_chan_signal_read(rxq->chan, bytes_read); 1140 1141 rte_spinlock_unlock(&rxq->ring_lock); 1142 1143 return tx_done; 1144 } 1145 1146 static void hn_append_to_chim(struct hn_tx_queue *txq, 1147 struct rndis_packet_msg *pkt, 1148 const struct rte_mbuf *m) 1149 { 1150 struct hn_txdesc *txd = txq->agg_txd; 1151 uint8_t *buf = (uint8_t *)pkt; 1152 unsigned int data_offs; 1153 1154 hn_rndis_dump(pkt); 1155 1156 data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 1157 txd->chim_size += pkt->len; 1158 txd->data_size += m->pkt_len; 1159 ++txd->packets; 1160 hn_update_packet_stats(&txq->stats, m); 1161 1162 for (; m; m = m->next) { 1163 uint16_t len = rte_pktmbuf_data_len(m); 1164 1165 rte_memcpy(buf + data_offs, 1166 rte_pktmbuf_mtod(m, const char *), len); 1167 data_offs += len; 1168 } 1169 } 1170 1171 /* 1172 * Send pending aggregated data in chimney buffer (if any). 1173 * Returns error if send was unsuccessful because channel ring buffer 1174 * was full. 1175 */ 1176 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) 1177 1178 { 1179 struct hn_txdesc *txd = txq->agg_txd; 1180 struct hn_nvs_rndis rndis; 1181 int ret; 1182 1183 if (!txd) 1184 return 0; 1185 1186 rndis = (struct hn_nvs_rndis) { 1187 .type = NVS_TYPE_RNDIS, 1188 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1189 .chim_idx = txd->chim_index, 1190 .chim_sz = txd->chim_size, 1191 }; 1192 1193 PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", 1194 txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); 1195 1196 ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, 1197 &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); 1198 1199 if (likely(ret == 0)) 1200 hn_reset_txagg(txq); 1201 else if (ret == -EAGAIN) { 1202 PMD_TX_LOG(DEBUG, "port %u:%u channel full", 1203 txq->port_id, txq->queue_id); 1204 ++txq->stats.channel_full; 1205 } else { 1206 ++txq->stats.errors; 1207 1208 PMD_DRV_LOG(NOTICE, "port %u:%u send failed: %d", 1209 txq->port_id, txq->queue_id, ret); 1210 } 1211 return ret; 1212 } 1213 1214 /* 1215 * Try and find a place in a send chimney buffer to put 1216 * the small packet. If space is available, this routine 1217 * returns a pointer of where to place the data. 1218 * If no space, caller should try direct transmit. 1219 */ 1220 static void * 1221 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, 1222 struct hn_txdesc *txd, uint32_t pktsize) 1223 { 1224 struct hn_txdesc *agg_txd = txq->agg_txd; 1225 struct rndis_packet_msg *pkt; 1226 void *chim; 1227 1228 if (agg_txd) { 1229 unsigned int padding, olen; 1230 1231 /* 1232 * Update the previous RNDIS packet's total length, 1233 * it can be increased due to the mandatory alignment 1234 * padding for this RNDIS packet. And update the 1235 * aggregating txdesc's chimney sending buffer size 1236 * accordingly. 1237 * 1238 * Zero-out the padding, as required by the RNDIS spec. 1239 */ 1240 pkt = txq->agg_prevpkt; 1241 olen = pkt->len; 1242 padding = RTE_ALIGN(olen, txq->agg_align) - olen; 1243 if (padding > 0) { 1244 agg_txd->chim_size += padding; 1245 pkt->len += padding; 1246 memset((uint8_t *)pkt + olen, 0, padding); 1247 } 1248 1249 chim = (uint8_t *)pkt + pkt->len; 1250 txq->agg_prevpkt = chim; 1251 txq->agg_pktleft--; 1252 txq->agg_szleft -= pktsize; 1253 if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { 1254 /* 1255 * Probably can't aggregate more packets, 1256 * flush this aggregating txdesc proactively. 1257 */ 1258 txq->agg_pktleft = 0; 1259 } 1260 1261 hn_txd_put(txq, txd); 1262 return chim; 1263 } 1264 1265 txd->chim_index = hn_chim_alloc(hv); 1266 if (txd->chim_index == NVS_CHIM_IDX_INVALID) 1267 return NULL; 1268 1269 chim = (uint8_t *)hv->chim_res.addr 1270 + txd->chim_index * hv->chim_szmax; 1271 1272 txq->agg_txd = txd; 1273 txq->agg_pktleft = txq->agg_pktmax - 1; 1274 txq->agg_szleft = txq->agg_szmax - pktsize; 1275 txq->agg_prevpkt = chim; 1276 1277 return chim; 1278 } 1279 1280 static inline void * 1281 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, 1282 uint32_t pi_dlen, uint32_t pi_type) 1283 { 1284 const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); 1285 struct rndis_pktinfo *pi; 1286 1287 /* 1288 * Per-packet-info does not move; it only grows. 1289 * 1290 * NOTE: 1291 * pktinfooffset in this phase counts from the beginning 1292 * of rndis_packet_msg. 1293 */ 1294 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); 1295 1296 pkt->pktinfolen += pi_size; 1297 1298 pi->size = pi_size; 1299 pi->type = pi_type; 1300 pi->offset = RNDIS_PKTINFO_OFFSET; 1301 1302 return pi->data; 1303 } 1304 1305 /* Put RNDIS header and packet info on packet */ 1306 static void hn_encap(struct rndis_packet_msg *pkt, 1307 uint16_t queue_id, 1308 const struct rte_mbuf *m) 1309 { 1310 unsigned int hlen = m->l2_len + m->l3_len; 1311 uint32_t *pi_data; 1312 uint32_t pkt_hlen; 1313 1314 pkt->type = RNDIS_PACKET_MSG; 1315 pkt->len = m->pkt_len; 1316 pkt->dataoffset = 0; 1317 pkt->datalen = m->pkt_len; 1318 pkt->oobdataoffset = 0; 1319 pkt->oobdatalen = 0; 1320 pkt->oobdataelements = 0; 1321 pkt->pktinfooffset = sizeof(*pkt); 1322 pkt->pktinfolen = 0; 1323 pkt->vchandle = 0; 1324 pkt->reserved = 0; 1325 1326 /* 1327 * Set the hash value for this packet, to the queue_id to cause 1328 * TX done event for this packet on the right channel. 1329 */ 1330 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, 1331 NDIS_PKTINFO_TYPE_HASHVAL); 1332 *pi_data = queue_id; 1333 1334 if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { 1335 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, 1336 NDIS_PKTINFO_TYPE_VLAN); 1337 *pi_data = NDIS_VLAN_INFO_MAKE(RTE_VLAN_TCI_ID(m->vlan_tci), 1338 RTE_VLAN_TCI_PRI(m->vlan_tci), 1339 RTE_VLAN_TCI_DEI(m->vlan_tci)); 1340 } 1341 1342 if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 1343 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, 1344 NDIS_PKTINFO_TYPE_LSO); 1345 1346 if (m->ol_flags & RTE_MBUF_F_TX_IPV6) { 1347 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, 1348 m->tso_segsz); 1349 } else { 1350 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, 1351 m->tso_segsz); 1352 } 1353 } else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1354 RTE_MBUF_F_TX_TCP_CKSUM || 1355 (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1356 RTE_MBUF_F_TX_UDP_CKSUM || 1357 (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { 1358 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, 1359 NDIS_PKTINFO_TYPE_CSUM); 1360 *pi_data = 0; 1361 1362 if (m->ol_flags & RTE_MBUF_F_TX_IPV6) 1363 *pi_data |= NDIS_TXCSUM_INFO_IPV6; 1364 if (m->ol_flags & RTE_MBUF_F_TX_IPV4) { 1365 *pi_data |= NDIS_TXCSUM_INFO_IPV4; 1366 1367 if (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) 1368 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1369 } 1370 1371 if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1372 RTE_MBUF_F_TX_TCP_CKSUM) 1373 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); 1374 else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 1375 RTE_MBUF_F_TX_UDP_CKSUM) 1376 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); 1377 } 1378 1379 pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; 1380 /* Fixup RNDIS packet message total length */ 1381 pkt->len += pkt_hlen; 1382 1383 /* Convert RNDIS packet message offsets */ 1384 pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 1385 pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); 1386 } 1387 1388 /* How many scatter gather list elements ar needed */ 1389 static unsigned int hn_get_slots(const struct rte_mbuf *m) 1390 { 1391 unsigned int slots = 1; /* for RNDIS header */ 1392 1393 while (m) { 1394 unsigned int size = rte_pktmbuf_data_len(m); 1395 unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; 1396 1397 slots += (offs + size + rte_mem_page_size() - 1) / 1398 rte_mem_page_size(); 1399 m = m->next; 1400 } 1401 1402 return slots; 1403 } 1404 1405 /* Build scatter gather list from chained mbuf */ 1406 static unsigned int hn_fill_sg(struct vmbus_gpa *sg, 1407 const struct rte_mbuf *m) 1408 { 1409 unsigned int segs = 0; 1410 1411 while (m) { 1412 rte_iova_t addr = rte_mbuf_data_iova(m); 1413 unsigned int page = addr / rte_mem_page_size(); 1414 unsigned int offset = addr & PAGE_MASK; 1415 unsigned int len = rte_pktmbuf_data_len(m); 1416 1417 while (len > 0) { 1418 unsigned int bytes = RTE_MIN(len, 1419 rte_mem_page_size() - offset); 1420 1421 sg[segs].page = page; 1422 sg[segs].ofs = offset; 1423 sg[segs].len = bytes; 1424 segs++; 1425 1426 ++page; 1427 offset = 0; 1428 len -= bytes; 1429 } 1430 m = m->next; 1431 } 1432 1433 return segs; 1434 } 1435 1436 /* Transmit directly from mbuf */ 1437 static int hn_xmit_sg(struct hn_tx_queue *txq, 1438 const struct hn_txdesc *txd, const struct rte_mbuf *m, 1439 bool *need_sig) 1440 { 1441 struct vmbus_gpa sg[hn_get_slots(m)]; 1442 struct hn_nvs_rndis nvs_rndis = { 1443 .type = NVS_TYPE_RNDIS, 1444 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1445 .chim_sz = txd->chim_size, 1446 }; 1447 rte_iova_t addr; 1448 unsigned int segs; 1449 1450 /* attach aggregation data if present */ 1451 if (txd->chim_size > 0) 1452 nvs_rndis.chim_idx = txd->chim_index; 1453 else 1454 nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; 1455 1456 hn_rndis_dump(txd->rndis_pkt); 1457 1458 /* pass IOVA of rndis header in first segment */ 1459 addr = txq->tx_rndis_iova + 1460 ((char *)txd->rndis_pkt - (char *)txq->tx_rndis); 1461 1462 sg[0].page = addr / rte_mem_page_size(); 1463 sg[0].ofs = addr & PAGE_MASK; 1464 sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); 1465 segs = 1; 1466 1467 hn_update_packet_stats(&txq->stats, m); 1468 1469 segs += hn_fill_sg(sg + 1, m); 1470 1471 PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", 1472 txq->port_id, txq->queue_id, txd->chim_index, 1473 segs, nvs_rndis.chim_sz); 1474 1475 return hn_nvs_send_sglist(txq->chan, sg, segs, 1476 &nvs_rndis, sizeof(nvs_rndis), 1477 (uintptr_t)txd, need_sig); 1478 } 1479 1480 uint16_t 1481 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1482 { 1483 struct hn_tx_queue *txq = ptxq; 1484 uint16_t queue_id = txq->queue_id; 1485 struct hn_data *hv = txq->hv; 1486 struct rte_eth_dev *vf_dev; 1487 bool need_sig = false; 1488 uint16_t nb_tx, tx_thresh; 1489 int ret; 1490 1491 if (unlikely(hv->closed)) 1492 return 0; 1493 1494 /* 1495 * Always check for events on the primary channel 1496 * because that is where hotplug notifications occur. 1497 */ 1498 tx_thresh = RTE_MAX(txq->free_thresh, nb_pkts); 1499 if (txq->queue_id == 0 || 1500 rte_mempool_avail_count(txq->txdesc_pool) < tx_thresh) 1501 hn_process_events(hv, txq->queue_id, 0); 1502 1503 /* Transmit over VF if present and up */ 1504 if (hv->vf_ctx.vf_vsc_switched) { 1505 rte_rwlock_read_lock(&hv->vf_lock); 1506 vf_dev = hn_get_vf_dev(hv); 1507 if (hv->vf_ctx.vf_vsc_switched && vf_dev && 1508 vf_dev->data->dev_started) { 1509 void *sub_q = vf_dev->data->tx_queues[queue_id]; 1510 1511 nb_tx = (*vf_dev->tx_pkt_burst) 1512 (sub_q, tx_pkts, nb_pkts); 1513 rte_rwlock_read_unlock(&hv->vf_lock); 1514 return nb_tx; 1515 } 1516 rte_rwlock_read_unlock(&hv->vf_lock); 1517 } 1518 1519 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1520 struct rte_mbuf *m = tx_pkts[nb_tx]; 1521 uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; 1522 struct rndis_packet_msg *pkt; 1523 struct hn_txdesc *txd; 1524 1525 txd = hn_txd_get(txq); 1526 if (txd == NULL) 1527 break; 1528 1529 /* For small packets aggregate them in chimney buffer */ 1530 if (m->pkt_len <= hv->tx_copybreak && 1531 pkt_size <= txq->agg_szmax) { 1532 /* If this packet will not fit, then flush */ 1533 if (txq->agg_pktleft == 0 || 1534 RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { 1535 if (hn_flush_txagg(txq, &need_sig)) 1536 goto fail; 1537 } 1538 1539 1540 pkt = hn_try_txagg(hv, txq, txd, pkt_size); 1541 if (unlikely(!pkt)) 1542 break; 1543 1544 hn_encap(pkt, queue_id, m); 1545 hn_append_to_chim(txq, pkt, m); 1546 1547 rte_pktmbuf_free(m); 1548 1549 /* if buffer is full, flush */ 1550 if (txq->agg_pktleft == 0 && 1551 hn_flush_txagg(txq, &need_sig)) 1552 goto fail; 1553 } else { 1554 /* Send any outstanding packets in buffer */ 1555 if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) 1556 goto fail; 1557 1558 pkt = txd->rndis_pkt; 1559 txd->m = m; 1560 txd->data_size = m->pkt_len; 1561 ++txd->packets; 1562 1563 hn_encap(pkt, queue_id, m); 1564 1565 ret = hn_xmit_sg(txq, txd, m, &need_sig); 1566 if (unlikely(ret != 0)) { 1567 if (ret == -EAGAIN) { 1568 PMD_TX_LOG(DEBUG, "sg channel full"); 1569 ++txq->stats.channel_full; 1570 } else { 1571 PMD_DRV_LOG(NOTICE, "sg send failed: %d", ret); 1572 ++txq->stats.errors; 1573 } 1574 hn_txd_put(txq, txd); 1575 goto fail; 1576 } 1577 } 1578 } 1579 1580 /* If partial buffer left, then try and send it. 1581 * if that fails, then reuse it on next send. 1582 */ 1583 hn_flush_txagg(txq, &need_sig); 1584 1585 fail: 1586 if (need_sig) 1587 rte_vmbus_chan_signal_tx(txq->chan); 1588 1589 return nb_tx; 1590 } 1591 1592 static uint16_t 1593 hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, 1594 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1595 { 1596 uint16_t i, n; 1597 1598 if (unlikely(nb_pkts == 0)) 1599 return 0; 1600 1601 n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); 1602 1603 /* relabel the received mbufs */ 1604 for (i = 0; i < n; i++) 1605 rx_pkts[i]->port = rxq->port_id; 1606 1607 return n; 1608 } 1609 1610 uint16_t 1611 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1612 { 1613 struct hn_rx_queue *rxq = prxq; 1614 struct hn_data *hv = rxq->hv; 1615 struct rte_eth_dev *vf_dev; 1616 uint16_t nb_rcv; 1617 1618 if (unlikely(hv->closed)) 1619 return 0; 1620 1621 /* Check for new completions (and hotplug) */ 1622 if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) 1623 hn_process_events(hv, rxq->queue_id, 0); 1624 1625 /* Always check the vmbus path for multicast and new flows */ 1626 nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, 1627 (void **)rx_pkts, nb_pkts, NULL); 1628 1629 /* If VF is available, check that as well */ 1630 if (hv->vf_ctx.vf_vsc_switched) { 1631 rte_rwlock_read_lock(&hv->vf_lock); 1632 vf_dev = hn_get_vf_dev(hv); 1633 if (hv->vf_ctx.vf_vsc_switched && vf_dev && 1634 vf_dev->data->dev_started) 1635 nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, 1636 rx_pkts + nb_rcv, 1637 nb_pkts - nb_rcv); 1638 1639 rte_rwlock_read_unlock(&hv->vf_lock); 1640 } 1641 return nb_rcv; 1642 } 1643 1644 void 1645 hn_dev_free_queues(struct rte_eth_dev *dev) 1646 { 1647 unsigned int i; 1648 1649 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1650 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 1651 1652 hn_rx_queue_free(rxq, false); 1653 dev->data->rx_queues[i] = NULL; 1654 } 1655 dev->data->nb_rx_queues = 0; 1656 1657 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1658 hn_dev_tx_queue_release(dev, i); 1659 dev->data->tx_queues[i] = NULL; 1660 } 1661 dev->data->nb_tx_queues = 0; 1662 } 1663