1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <strings.h> 13 #include <malloc.h> 14 15 #include <rte_ethdev.h> 16 #include <rte_memcpy.h> 17 #include <rte_string_fns.h> 18 #include <rte_memzone.h> 19 #include <rte_malloc.h> 20 #include <rte_atomic.h> 21 #include <rte_bitmap.h> 22 #include <rte_branch_prediction.h> 23 #include <rte_ether.h> 24 #include <rte_common.h> 25 #include <rte_errno.h> 26 #include <rte_memory.h> 27 #include <rte_eal.h> 28 #include <rte_dev.h> 29 #include <rte_net.h> 30 #include <rte_bus_vmbus.h> 31 #include <rte_spinlock.h> 32 33 #include "hn_logs.h" 34 #include "hn_var.h" 35 #include "hn_rndis.h" 36 #include "hn_nvs.h" 37 #include "ndis.h" 38 39 #define HN_NVS_SEND_MSG_SIZE \ 40 (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) 41 42 #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ 43 #define HN_TXCOPY_THRESHOLD 512 44 45 #define HN_RXCOPY_THRESHOLD 256 46 #define HN_RXQ_EVENT_DEFAULT 2048 47 48 struct hn_rxinfo { 49 uint32_t vlan_info; 50 uint32_t csum_info; 51 uint32_t hash_info; 52 uint32_t hash_value; 53 }; 54 55 #define HN_RXINFO_VLAN 0x0001 56 #define HN_RXINFO_CSUM 0x0002 57 #define HN_RXINFO_HASHINF 0x0004 58 #define HN_RXINFO_HASHVAL 0x0008 59 #define HN_RXINFO_ALL \ 60 (HN_RXINFO_VLAN | \ 61 HN_RXINFO_CSUM | \ 62 HN_RXINFO_HASHINF | \ 63 HN_RXINFO_HASHVAL) 64 65 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 66 #define HN_NDIS_RXCSUM_INFO_INVALID 0 67 #define HN_NDIS_HASH_INFO_INVALID 0 68 69 /* 70 * Per-transmit book keeping. 71 * A slot in transmit ring (chim_index) is reserved for each transmit. 72 * 73 * There are two types of transmit: 74 * - buffered transmit where chimney buffer is used and RNDIS header 75 * is in the buffer. mbuf == NULL for this case. 76 * 77 * - direct transmit where RNDIS header is in the in rndis_pkt 78 * mbuf is freed after transmit. 79 * 80 * Descriptors come from per-port pool which is used 81 * to limit number of outstanding requests per device. 82 */ 83 struct hn_txdesc { 84 struct rte_mbuf *m; 85 86 uint16_t queue_id; 87 uint32_t chim_index; 88 uint32_t chim_size; 89 uint32_t data_size; 90 uint32_t packets; 91 92 struct rndis_packet_msg *rndis_pkt; 93 }; 94 95 #define HN_RNDIS_PKT_LEN \ 96 (sizeof(struct rndis_packet_msg) + \ 97 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ 98 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 99 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 100 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 101 102 #define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) 103 104 /* Minimum space required for a packet */ 105 #define HN_PKTSIZE_MIN(align) \ 106 RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) 107 108 #define DEFAULT_TX_FREE_THRESH 32 109 110 static void 111 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) 112 { 113 uint32_t s = m->pkt_len; 114 const struct rte_ether_addr *ea; 115 116 if (s == 64) { 117 stats->size_bins[1]++; 118 } else if (s > 64 && s < 1024) { 119 uint32_t bin; 120 121 /* count zeros, and offset into correct bin */ 122 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 123 stats->size_bins[bin]++; 124 } else { 125 if (s < 64) 126 stats->size_bins[0]++; 127 else if (s < 1519) 128 stats->size_bins[6]++; 129 else 130 stats->size_bins[7]++; 131 } 132 133 ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); 134 if (rte_is_multicast_ether_addr(ea)) { 135 if (rte_is_broadcast_ether_addr(ea)) 136 stats->broadcast++; 137 else 138 stats->multicast++; 139 } 140 } 141 142 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) 143 { 144 return pkt->pktinfooffset + pkt->pktinfolen; 145 } 146 147 static inline uint32_t 148 hn_rndis_pktmsg_offset(uint32_t ofs) 149 { 150 return ofs - offsetof(struct rndis_packet_msg, dataoffset); 151 } 152 153 static void hn_txd_init(struct rte_mempool *mp __rte_unused, 154 void *opaque, void *obj, unsigned int idx) 155 { 156 struct hn_tx_queue *txq = opaque; 157 struct hn_txdesc *txd = obj; 158 159 memset(txd, 0, sizeof(*txd)); 160 161 txd->queue_id = txq->queue_id; 162 txd->chim_index = NVS_CHIM_IDX_INVALID; 163 txd->rndis_pkt = (struct rndis_packet_msg *)(char *)txq->tx_rndis 164 + idx * HN_RNDIS_PKT_ALIGNED; 165 } 166 167 int 168 hn_chim_init(struct rte_eth_dev *dev) 169 { 170 struct hn_data *hv = dev->data->dev_private; 171 uint32_t i, chim_bmp_size; 172 173 rte_spinlock_init(&hv->chim_lock); 174 chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); 175 hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, 176 RTE_CACHE_LINE_SIZE); 177 if (hv->chim_bmem == NULL) { 178 PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", 179 chim_bmp_size); 180 return -1; 181 } 182 183 hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, 184 hv->chim_bmem, chim_bmp_size); 185 if (hv->chim_bmap == NULL) { 186 PMD_INIT_LOG(ERR, "failed to init chim bitmap"); 187 return -1; 188 } 189 190 for (i = 0; i < hv->chim_cnt; i++) 191 rte_bitmap_set(hv->chim_bmap, i); 192 193 return 0; 194 } 195 196 void 197 hn_chim_uninit(struct rte_eth_dev *dev) 198 { 199 struct hn_data *hv = dev->data->dev_private; 200 201 rte_bitmap_free(hv->chim_bmap); 202 rte_free(hv->chim_bmem); 203 hv->chim_bmem = NULL; 204 } 205 206 static uint32_t hn_chim_alloc(struct hn_data *hv) 207 { 208 uint32_t index = NVS_CHIM_IDX_INVALID; 209 uint64_t slab; 210 211 rte_spinlock_lock(&hv->chim_lock); 212 if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) 213 rte_bitmap_clear(hv->chim_bmap, index); 214 rte_spinlock_unlock(&hv->chim_lock); 215 216 return index; 217 } 218 219 static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) 220 { 221 if (chim_idx >= hv->chim_cnt) { 222 PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); 223 } else { 224 rte_spinlock_lock(&hv->chim_lock); 225 rte_bitmap_set(hv->chim_bmap, chim_idx); 226 rte_spinlock_unlock(&hv->chim_lock); 227 } 228 } 229 230 static void hn_reset_txagg(struct hn_tx_queue *txq) 231 { 232 txq->agg_szleft = txq->agg_szmax; 233 txq->agg_pktleft = txq->agg_pktmax; 234 txq->agg_txd = NULL; 235 txq->agg_prevpkt = NULL; 236 } 237 238 int 239 hn_dev_tx_queue_setup(struct rte_eth_dev *dev, 240 uint16_t queue_idx, uint16_t nb_desc, 241 unsigned int socket_id, 242 const struct rte_eth_txconf *tx_conf) 243 244 { 245 struct hn_data *hv = dev->data->dev_private; 246 struct hn_tx_queue *txq; 247 char name[RTE_MEMPOOL_NAMESIZE]; 248 uint32_t tx_free_thresh; 249 int err = -ENOMEM; 250 251 PMD_INIT_FUNC_TRACE(); 252 253 txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, 254 socket_id); 255 if (!txq) 256 return -ENOMEM; 257 258 txq->hv = hv; 259 txq->chan = hv->channels[queue_idx]; 260 txq->port_id = dev->data->port_id; 261 txq->queue_id = queue_idx; 262 263 tx_free_thresh = tx_conf->tx_free_thresh; 264 if (tx_free_thresh == 0) 265 tx_free_thresh = RTE_MIN(nb_desc / 4, 266 DEFAULT_TX_FREE_THRESH); 267 268 if (tx_free_thresh + 3 >= nb_desc) { 269 PMD_INIT_LOG(ERR, 270 "tx_free_thresh must be less than the number of TX entries minus 3(%u)." 271 " (tx_free_thresh=%u port=%u queue=%u)\n", 272 nb_desc - 3, 273 tx_free_thresh, dev->data->port_id, queue_idx); 274 return -EINVAL; 275 } 276 277 txq->free_thresh = tx_free_thresh; 278 279 snprintf(name, sizeof(name), 280 "hn_txd_%u_%u", dev->data->port_id, queue_idx); 281 282 PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", 283 name, nb_desc, sizeof(struct hn_txdesc)); 284 285 txq->tx_rndis = rte_calloc("hn_txq_rndis", nb_desc, 286 HN_RNDIS_PKT_ALIGNED, RTE_CACHE_LINE_SIZE); 287 if (txq->tx_rndis == NULL) 288 goto error; 289 290 txq->txdesc_pool = rte_mempool_create(name, nb_desc, 291 sizeof(struct hn_txdesc), 292 0, 0, NULL, NULL, 293 hn_txd_init, txq, 294 dev->device->numa_node, 0); 295 if (txq->txdesc_pool == NULL) { 296 PMD_DRV_LOG(ERR, 297 "mempool %s create failed: %d", name, rte_errno); 298 goto error; 299 } 300 301 txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); 302 txq->agg_pktmax = hv->rndis_agg_pkts; 303 txq->agg_align = hv->rndis_agg_align; 304 305 hn_reset_txagg(txq); 306 307 err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, 308 socket_id, tx_conf); 309 if (err == 0) { 310 dev->data->tx_queues[queue_idx] = txq; 311 return 0; 312 } 313 314 error: 315 if (txq->txdesc_pool) 316 rte_mempool_free(txq->txdesc_pool); 317 rte_free(txq->tx_rndis); 318 rte_free(txq); 319 return err; 320 } 321 322 void 323 hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 324 struct rte_eth_txq_info *qinfo) 325 { 326 struct hn_tx_queue *txq = dev->data->tx_queues[queue_id]; 327 328 qinfo->nb_desc = txq->txdesc_pool->size; 329 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 330 } 331 332 static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) 333 { 334 struct hn_txdesc *txd; 335 336 if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { 337 ++txq->stats.ring_full; 338 PMD_TX_LOG(DEBUG, "tx pool exhausted!"); 339 return NULL; 340 } 341 342 txd->m = NULL; 343 txd->packets = 0; 344 txd->data_size = 0; 345 txd->chim_size = 0; 346 347 return txd; 348 } 349 350 static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) 351 { 352 rte_mempool_put(txq->txdesc_pool, txd); 353 } 354 355 void 356 hn_dev_tx_queue_release(void *arg) 357 { 358 struct hn_tx_queue *txq = arg; 359 360 PMD_INIT_FUNC_TRACE(); 361 362 if (!txq) 363 return; 364 365 if (txq->txdesc_pool) 366 rte_mempool_free(txq->txdesc_pool); 367 368 rte_free(txq->tx_rndis); 369 rte_free(txq); 370 } 371 372 /* 373 * Check the status of a Tx descriptor in the queue. 374 * 375 * returns: 376 * - -EINVAL - offset outside of tx_descriptor pool. 377 * - RTE_ETH_TX_DESC_FULL - descriptor is not acknowledged by host. 378 * - RTE_ETH_TX_DESC_DONE - descriptor is available. 379 */ 380 int hn_dev_tx_descriptor_status(void *arg, uint16_t offset) 381 { 382 const struct hn_tx_queue *txq = arg; 383 384 hn_process_events(txq->hv, txq->queue_id, 0); 385 386 if (offset >= rte_mempool_avail_count(txq->txdesc_pool)) 387 return -EINVAL; 388 389 if (offset < rte_mempool_in_use_count(txq->txdesc_pool)) 390 return RTE_ETH_TX_DESC_FULL; 391 else 392 return RTE_ETH_TX_DESC_DONE; 393 } 394 395 static void 396 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, 397 unsigned long xactid, const struct hn_nvs_rndis_ack *ack) 398 { 399 struct hn_data *hv = dev->data->dev_private; 400 struct hn_txdesc *txd = (struct hn_txdesc *)xactid; 401 struct hn_tx_queue *txq; 402 403 /* Control packets are sent with xacid == 0 */ 404 if (!txd) 405 return; 406 407 txq = dev->data->tx_queues[queue_id]; 408 if (likely(ack->status == NVS_STATUS_OK)) { 409 PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", 410 txq->port_id, txq->queue_id, txd->chim_index, 411 txd->packets, txd->data_size); 412 txq->stats.bytes += txd->data_size; 413 txq->stats.packets += txd->packets; 414 } else { 415 PMD_DRV_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", 416 txq->port_id, txq->queue_id, txd->chim_index, ack->status); 417 ++txq->stats.errors; 418 } 419 420 if (txd->chim_index != NVS_CHIM_IDX_INVALID) 421 hn_chim_free(hv, txd->chim_index); 422 423 rte_pktmbuf_free(txd->m); 424 hn_txd_put(txq, txd); 425 } 426 427 /* Handle transmit completion events */ 428 static void 429 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, 430 const struct vmbus_chanpkt_hdr *pkt, 431 const void *data) 432 { 433 const struct hn_nvs_hdr *hdr = data; 434 435 switch (hdr->type) { 436 case NVS_TYPE_RNDIS_ACK: 437 hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); 438 break; 439 440 default: 441 PMD_DRV_LOG(NOTICE, "unexpected send completion type %u", 442 hdr->type); 443 } 444 } 445 446 /* Parse per-packet info (meta data) */ 447 static int 448 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, 449 struct hn_rxinfo *info) 450 { 451 const struct rndis_pktinfo *pi = info_data; 452 uint32_t mask = 0; 453 454 while (info_dlen != 0) { 455 const void *data; 456 uint32_t dlen; 457 458 if (unlikely(info_dlen < sizeof(*pi))) 459 return -EINVAL; 460 461 if (unlikely(info_dlen < pi->size)) 462 return -EINVAL; 463 info_dlen -= pi->size; 464 465 if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 466 return -EINVAL; 467 if (unlikely(pi->size < pi->offset)) 468 return -EINVAL; 469 470 dlen = pi->size - pi->offset; 471 data = pi->data; 472 473 switch (pi->type) { 474 case NDIS_PKTINFO_TYPE_VLAN: 475 if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) 476 return -EINVAL; 477 info->vlan_info = *((const uint32_t *)data); 478 mask |= HN_RXINFO_VLAN; 479 break; 480 481 case NDIS_PKTINFO_TYPE_CSUM: 482 if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) 483 return -EINVAL; 484 info->csum_info = *((const uint32_t *)data); 485 mask |= HN_RXINFO_CSUM; 486 break; 487 488 case NDIS_PKTINFO_TYPE_HASHVAL: 489 if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) 490 return -EINVAL; 491 info->hash_value = *((const uint32_t *)data); 492 mask |= HN_RXINFO_HASHVAL; 493 break; 494 495 case NDIS_PKTINFO_TYPE_HASHINF: 496 if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) 497 return -EINVAL; 498 info->hash_info = *((const uint32_t *)data); 499 mask |= HN_RXINFO_HASHINF; 500 break; 501 502 default: 503 goto next; 504 } 505 506 if (mask == HN_RXINFO_ALL) 507 break; /* All found; done */ 508 next: 509 pi = (const struct rndis_pktinfo *) 510 ((const uint8_t *)pi + pi->size); 511 } 512 513 /* 514 * Final fixup. 515 * - If there is no hash value, invalidate the hash info. 516 */ 517 if (!(mask & HN_RXINFO_HASHVAL)) 518 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 519 return 0; 520 } 521 522 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) 523 { 524 struct hn_rx_bufinfo *rxb = opaque; 525 struct hn_data *hv = rxb->hv; 526 527 rte_atomic32_dec(&hv->rxbuf_outstanding); 528 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 529 } 530 531 static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq, 532 const struct vmbus_chanpkt_rxbuf *pkt) 533 { 534 struct hn_rx_bufinfo *rxb; 535 536 rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid; 537 rxb->chan = rxq->chan; 538 rxb->xactid = pkt->hdr.xactid; 539 rxb->hv = rxq->hv; 540 541 rxb->shinfo.free_cb = hn_rx_buf_free_cb; 542 rxb->shinfo.fcb_opaque = rxb; 543 rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); 544 return rxb; 545 } 546 547 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, 548 uint8_t *data, unsigned int headroom, unsigned int dlen, 549 const struct hn_rxinfo *info) 550 { 551 struct hn_data *hv = rxq->hv; 552 struct rte_mbuf *m; 553 bool use_extbuf = false; 554 555 m = rte_pktmbuf_alloc(rxq->mb_pool); 556 if (unlikely(!m)) { 557 struct rte_eth_dev *dev = 558 &rte_eth_devices[rxq->port_id]; 559 560 dev->data->rx_mbuf_alloc_failed++; 561 return; 562 } 563 564 /* 565 * For large packets, avoid copy if possible but need to keep 566 * some space available in receive area for later packets. 567 */ 568 if (dlen >= HN_RXCOPY_THRESHOLD && 569 (uint32_t)rte_atomic32_read(&hv->rxbuf_outstanding) < 570 hv->rxbuf_section_cnt / 2) { 571 struct rte_mbuf_ext_shared_info *shinfo; 572 const void *rxbuf; 573 rte_iova_t iova; 574 575 /* 576 * Build an external mbuf that points to recveive area. 577 * Use refcount to handle multiple packets in same 578 * receive buffer section. 579 */ 580 rxbuf = hv->rxbuf_res->addr; 581 iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); 582 shinfo = &rxb->shinfo; 583 584 /* shinfo is already set to 1 by the caller */ 585 if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) 586 rte_atomic32_inc(&hv->rxbuf_outstanding); 587 588 rte_pktmbuf_attach_extbuf(m, data, iova, 589 dlen + headroom, shinfo); 590 m->data_off = headroom; 591 use_extbuf = true; 592 } else { 593 /* Mbuf's in pool must be large enough to hold small packets */ 594 if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { 595 rte_pktmbuf_free_seg(m); 596 ++rxq->stats.errors; 597 return; 598 } 599 rte_memcpy(rte_pktmbuf_mtod(m, void *), 600 data + headroom, dlen); 601 } 602 603 m->port = rxq->port_id; 604 m->pkt_len = dlen; 605 m->data_len = dlen; 606 m->packet_type = rte_net_get_ptype(m, NULL, 607 RTE_PTYPE_L2_MASK | 608 RTE_PTYPE_L3_MASK | 609 RTE_PTYPE_L4_MASK); 610 611 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 612 m->vlan_tci = info->vlan_info; 613 m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN; 614 615 /* NDIS always strips tag, put it back if necessary */ 616 if (!hv->vlan_strip && rte_vlan_insert(&m)) { 617 PMD_DRV_LOG(DEBUG, "vlan insert failed"); 618 ++rxq->stats.errors; 619 if (use_extbuf) 620 rte_pktmbuf_detach_extbuf(m); 621 rte_pktmbuf_free(m); 622 return; 623 } 624 } 625 626 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 627 if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) 628 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; 629 630 if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK 631 | NDIS_RXCSUM_INFO_TCPCS_OK)) 632 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 633 else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED 634 | NDIS_RXCSUM_INFO_UDPCS_FAILED)) 635 m->ol_flags |= PKT_RX_L4_CKSUM_BAD; 636 } 637 638 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 639 m->ol_flags |= PKT_RX_RSS_HASH; 640 m->hash.rss = info->hash_value; 641 } 642 643 PMD_RX_LOG(DEBUG, 644 "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, 645 rxq->port_id, rxq->queue_id, rxb->xactid, 646 m->pkt_len, m->packet_type, m->ol_flags); 647 648 ++rxq->stats.packets; 649 rxq->stats.bytes += m->pkt_len; 650 hn_update_packet_stats(&rxq->stats, m); 651 652 if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { 653 ++rxq->stats.ring_full; 654 PMD_RX_LOG(DEBUG, "rx ring full"); 655 if (use_extbuf) 656 rte_pktmbuf_detach_extbuf(m); 657 rte_pktmbuf_free(m); 658 } 659 } 660 661 static void hn_rndis_rx_data(struct hn_rx_queue *rxq, 662 struct hn_rx_bufinfo *rxb, 663 void *data, uint32_t dlen) 664 { 665 unsigned int data_off, data_len, pktinfo_off, pktinfo_len; 666 const struct rndis_packet_msg *pkt = data; 667 struct hn_rxinfo info = { 668 .vlan_info = HN_NDIS_VLAN_INFO_INVALID, 669 .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, 670 .hash_info = HN_NDIS_HASH_INFO_INVALID, 671 }; 672 int err; 673 674 hn_rndis_dump(pkt); 675 676 if (unlikely(dlen < sizeof(*pkt))) 677 goto error; 678 679 if (unlikely(dlen < pkt->len)) 680 goto error; /* truncated RNDIS from host */ 681 682 if (unlikely(pkt->len < pkt->datalen 683 + pkt->oobdatalen + pkt->pktinfolen)) 684 goto error; 685 686 if (unlikely(pkt->datalen == 0)) 687 goto error; 688 689 /* Check offsets. */ 690 if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) 691 goto error; 692 693 if (likely(pkt->pktinfooffset > 0) && 694 unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || 695 (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) 696 goto error; 697 698 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 699 data_len = pkt->datalen; 700 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); 701 pktinfo_len = pkt->pktinfolen; 702 703 if (likely(pktinfo_len > 0)) { 704 err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, 705 pktinfo_len, &info); 706 if (err) 707 goto error; 708 } 709 710 if (unlikely(data_off + data_len > pkt->len)) 711 goto error; 712 713 if (unlikely(data_len < RTE_ETHER_HDR_LEN)) 714 goto error; 715 716 hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); 717 return; 718 error: 719 ++rxq->stats.errors; 720 } 721 722 static void 723 hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, 724 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) 725 { 726 const struct rndis_msghdr *hdr = buf; 727 728 switch (hdr->type) { 729 case RNDIS_PACKET_MSG: 730 if (dev->data->dev_started) 731 hn_rndis_rx_data(rxq, rxb, buf, len); 732 break; 733 734 case RNDIS_INDICATE_STATUS_MSG: 735 hn_rndis_link_status(dev, buf); 736 break; 737 738 case RNDIS_INITIALIZE_CMPLT: 739 case RNDIS_QUERY_CMPLT: 740 case RNDIS_SET_CMPLT: 741 hn_rndis_receive_response(rxq->hv, buf, len); 742 break; 743 744 default: 745 PMD_DRV_LOG(NOTICE, 746 "unexpected RNDIS message (type %#x len %u)", 747 hdr->type, len); 748 break; 749 } 750 } 751 752 static void 753 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, 754 struct hn_data *hv, 755 struct hn_rx_queue *rxq, 756 const struct vmbus_chanpkt_hdr *hdr, 757 const void *buf) 758 { 759 const struct vmbus_chanpkt_rxbuf *pkt; 760 const struct hn_nvs_hdr *nvs_hdr = buf; 761 uint32_t rxbuf_sz = hv->rxbuf_res->len; 762 char *rxbuf = hv->rxbuf_res->addr; 763 unsigned int i, hlen, count; 764 struct hn_rx_bufinfo *rxb; 765 766 /* At minimum we need type header */ 767 if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { 768 PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); 769 return; 770 } 771 772 /* Make sure that this is a RNDIS message. */ 773 if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { 774 PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", 775 nvs_hdr->type); 776 return; 777 } 778 779 hlen = vmbus_chanpkt_getlen(hdr->hlen); 780 if (unlikely(hlen < sizeof(*pkt))) { 781 PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); 782 return; 783 } 784 785 pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); 786 if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { 787 PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", 788 pkt->rxbuf_id); 789 return; 790 } 791 792 count = pkt->rxbuf_cnt; 793 if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, 794 rxbuf[count]))) { 795 PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); 796 return; 797 } 798 799 if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { 800 PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, 801 pkt->hdr.xactid); 802 return; 803 } 804 805 /* Setup receive buffer info to allow for callback */ 806 rxb = hn_rx_buf_init(rxq, pkt); 807 808 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 809 for (i = 0; i < count; ++i) { 810 unsigned int ofs, len; 811 812 ofs = pkt->rxbuf[i].ofs; 813 len = pkt->rxbuf[i].len; 814 815 if (unlikely(ofs + len > rxbuf_sz)) { 816 PMD_RX_LOG(ERR, 817 "%uth RNDIS msg overflow ofs %u, len %u", 818 i, ofs, len); 819 continue; 820 } 821 822 if (unlikely(len == 0)) { 823 PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); 824 continue; 825 } 826 827 hn_rndis_receive(dev, rxq, rxb, 828 rxbuf + ofs, len); 829 } 830 831 /* Send ACK now if external mbuf not used */ 832 if (rte_mbuf_ext_refcnt_update(&rxb->shinfo, -1) == 0) 833 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 834 } 835 836 /* 837 * Called when NVS inband events are received. 838 * Send up a two part message with port_id and the NVS message 839 * to the pipe to the netvsc-vf-event control thread. 840 */ 841 static void hn_nvs_handle_notify(struct rte_eth_dev *dev, 842 const struct vmbus_chanpkt_hdr *pkt, 843 const void *data) 844 { 845 const struct hn_nvs_hdr *hdr = data; 846 847 switch (hdr->type) { 848 case NVS_TYPE_TXTBL_NOTE: 849 /* Transmit indirection table has locking problems 850 * in DPDK and therefore not implemented 851 */ 852 PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); 853 break; 854 855 case NVS_TYPE_VFASSOC_NOTE: 856 hn_nvs_handle_vfassoc(dev, pkt, data); 857 break; 858 859 default: 860 PMD_DRV_LOG(INFO, 861 "got notify, nvs type %u", hdr->type); 862 } 863 } 864 865 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, 866 uint16_t queue_id, 867 unsigned int socket_id) 868 { 869 struct hn_rx_queue *rxq; 870 871 rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), 872 RTE_CACHE_LINE_SIZE, socket_id); 873 if (!rxq) 874 return NULL; 875 876 rxq->hv = hv; 877 rxq->chan = hv->channels[queue_id]; 878 rte_spinlock_init(&rxq->ring_lock); 879 rxq->port_id = hv->port_id; 880 rxq->queue_id = queue_id; 881 rxq->event_sz = HN_RXQ_EVENT_DEFAULT; 882 rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, 883 RTE_CACHE_LINE_SIZE, socket_id); 884 if (!rxq->event_buf) { 885 rte_free(rxq); 886 return NULL; 887 } 888 889 return rxq; 890 } 891 892 void 893 hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 894 struct rte_eth_rxq_info *qinfo) 895 { 896 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 897 898 qinfo->mp = rxq->mb_pool; 899 qinfo->nb_desc = rxq->rx_ring->size; 900 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 901 } 902 903 int 904 hn_dev_rx_queue_setup(struct rte_eth_dev *dev, 905 uint16_t queue_idx, uint16_t nb_desc, 906 unsigned int socket_id, 907 const struct rte_eth_rxconf *rx_conf, 908 struct rte_mempool *mp) 909 { 910 struct hn_data *hv = dev->data->dev_private; 911 char ring_name[RTE_RING_NAMESIZE]; 912 struct hn_rx_queue *rxq; 913 unsigned int count; 914 int error = -ENOMEM; 915 916 PMD_INIT_FUNC_TRACE(); 917 918 if (queue_idx == 0) { 919 rxq = hv->primary; 920 } else { 921 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 922 if (!rxq) 923 return -ENOMEM; 924 } 925 926 rxq->mb_pool = mp; 927 count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; 928 if (nb_desc == 0 || nb_desc > count) 929 nb_desc = count; 930 931 /* 932 * Staging ring from receive event logic to rx_pkts. 933 * rx_pkts assumes caller is handling multi-thread issue. 934 * event logic has locking. 935 */ 936 snprintf(ring_name, sizeof(ring_name), 937 "hn_rx_%u_%u", dev->data->port_id, queue_idx); 938 rxq->rx_ring = rte_ring_create(ring_name, 939 rte_align32pow2(nb_desc), 940 socket_id, 0); 941 if (!rxq->rx_ring) 942 goto fail; 943 944 error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, 945 socket_id, rx_conf, mp); 946 if (error) 947 goto fail; 948 949 dev->data->rx_queues[queue_idx] = rxq; 950 return 0; 951 952 fail: 953 rte_ring_free(rxq->rx_ring); 954 rte_free(rxq->event_buf); 955 rte_free(rxq); 956 return error; 957 } 958 959 static void 960 hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) 961 { 962 963 if (!rxq) 964 return; 965 966 rte_ring_free(rxq->rx_ring); 967 rxq->rx_ring = NULL; 968 rxq->mb_pool = NULL; 969 970 hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); 971 972 /* Keep primary queue to allow for control operations */ 973 if (keep_primary && rxq == rxq->hv->primary) 974 return; 975 976 rte_free(rxq->event_buf); 977 rte_free(rxq); 978 } 979 980 void 981 hn_dev_rx_queue_release(void *arg) 982 { 983 struct hn_rx_queue *rxq = arg; 984 985 PMD_INIT_FUNC_TRACE(); 986 987 hn_rx_queue_free(rxq, true); 988 } 989 990 /* 991 * Get the number of used descriptor in a rx queue 992 * For this device that means how many packets are pending in the ring. 993 */ 994 uint32_t 995 hn_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_id) 996 { 997 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 998 999 return rte_ring_count(rxq->rx_ring); 1000 } 1001 1002 /* 1003 * Check the status of a Rx descriptor in the queue 1004 * 1005 * returns: 1006 * - -EINVAL - offset outside of ring 1007 * - RTE_ETH_RX_DESC_AVAIL - no data available yet 1008 * - RTE_ETH_RX_DESC_DONE - data is waiting in stagin ring 1009 */ 1010 int hn_dev_rx_queue_status(void *arg, uint16_t offset) 1011 { 1012 const struct hn_rx_queue *rxq = arg; 1013 1014 hn_process_events(rxq->hv, rxq->queue_id, 0); 1015 if (offset >= rxq->rx_ring->capacity) 1016 return -EINVAL; 1017 1018 if (offset < rte_ring_count(rxq->rx_ring)) 1019 return RTE_ETH_RX_DESC_DONE; 1020 else 1021 return RTE_ETH_RX_DESC_AVAIL; 1022 } 1023 1024 int 1025 hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) 1026 { 1027 struct hn_tx_queue *txq = arg; 1028 1029 return hn_process_events(txq->hv, txq->queue_id, free_cnt); 1030 } 1031 1032 /* 1033 * Process pending events on the channel. 1034 * Called from both Rx queue poll and Tx cleanup 1035 */ 1036 uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, 1037 uint32_t tx_limit) 1038 { 1039 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 1040 struct hn_rx_queue *rxq; 1041 uint32_t bytes_read = 0; 1042 uint32_t tx_done = 0; 1043 int ret = 0; 1044 1045 rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; 1046 1047 /* 1048 * Since channel is shared between Rx and TX queue need to have a lock 1049 * since DPDK does not force same CPU to be used for Rx/Tx. 1050 */ 1051 if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) 1052 return 0; 1053 1054 for (;;) { 1055 const struct vmbus_chanpkt_hdr *pkt; 1056 uint32_t len = rxq->event_sz; 1057 const void *data; 1058 1059 retry: 1060 ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); 1061 if (ret == -EAGAIN) 1062 break; /* ring is empty */ 1063 1064 if (unlikely(ret == -ENOBUFS)) { 1065 /* event buffer not large enough to read ring */ 1066 1067 PMD_DRV_LOG(DEBUG, 1068 "event buffer expansion (need %u)", len); 1069 rxq->event_sz = len + len / 4; 1070 rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, 1071 RTE_CACHE_LINE_SIZE); 1072 if (rxq->event_buf) 1073 goto retry; 1074 /* out of memory, no more events now */ 1075 rxq->event_sz = 0; 1076 break; 1077 } 1078 1079 if (unlikely(ret <= 0)) { 1080 /* This indicates a failure to communicate (or worse) */ 1081 rte_exit(EXIT_FAILURE, 1082 "vmbus ring buffer error: %d", ret); 1083 } 1084 1085 bytes_read += ret; 1086 pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; 1087 data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); 1088 1089 switch (pkt->type) { 1090 case VMBUS_CHANPKT_TYPE_COMP: 1091 ++tx_done; 1092 hn_nvs_handle_comp(dev, queue_id, pkt, data); 1093 break; 1094 1095 case VMBUS_CHANPKT_TYPE_RXBUF: 1096 hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); 1097 break; 1098 1099 case VMBUS_CHANPKT_TYPE_INBAND: 1100 hn_nvs_handle_notify(dev, pkt, data); 1101 break; 1102 1103 default: 1104 PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); 1105 break; 1106 } 1107 1108 if (tx_limit && tx_done >= tx_limit) 1109 break; 1110 } 1111 1112 if (bytes_read > 0) 1113 rte_vmbus_chan_signal_read(rxq->chan, bytes_read); 1114 1115 rte_spinlock_unlock(&rxq->ring_lock); 1116 1117 return tx_done; 1118 } 1119 1120 static void hn_append_to_chim(struct hn_tx_queue *txq, 1121 struct rndis_packet_msg *pkt, 1122 const struct rte_mbuf *m) 1123 { 1124 struct hn_txdesc *txd = txq->agg_txd; 1125 uint8_t *buf = (uint8_t *)pkt; 1126 unsigned int data_offs; 1127 1128 hn_rndis_dump(pkt); 1129 1130 data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 1131 txd->chim_size += pkt->len; 1132 txd->data_size += m->pkt_len; 1133 ++txd->packets; 1134 hn_update_packet_stats(&txq->stats, m); 1135 1136 for (; m; m = m->next) { 1137 uint16_t len = rte_pktmbuf_data_len(m); 1138 1139 rte_memcpy(buf + data_offs, 1140 rte_pktmbuf_mtod(m, const char *), len); 1141 data_offs += len; 1142 } 1143 } 1144 1145 /* 1146 * Send pending aggregated data in chimney buffer (if any). 1147 * Returns error if send was unsuccessful because channel ring buffer 1148 * was full. 1149 */ 1150 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) 1151 1152 { 1153 struct hn_txdesc *txd = txq->agg_txd; 1154 struct hn_nvs_rndis rndis; 1155 int ret; 1156 1157 if (!txd) 1158 return 0; 1159 1160 rndis = (struct hn_nvs_rndis) { 1161 .type = NVS_TYPE_RNDIS, 1162 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1163 .chim_idx = txd->chim_index, 1164 .chim_sz = txd->chim_size, 1165 }; 1166 1167 PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", 1168 txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); 1169 1170 ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, 1171 &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); 1172 1173 if (likely(ret == 0)) 1174 hn_reset_txagg(txq); 1175 else if (ret == -EAGAIN) { 1176 PMD_TX_LOG(DEBUG, "port %u:%u channel full", 1177 txq->port_id, txq->queue_id); 1178 ++txq->stats.channel_full; 1179 } else { 1180 ++txq->stats.errors; 1181 1182 PMD_DRV_LOG(NOTICE, "port %u:%u send failed: %d", 1183 txq->port_id, txq->queue_id, ret); 1184 } 1185 return ret; 1186 } 1187 1188 /* 1189 * Try and find a place in a send chimney buffer to put 1190 * the small packet. If space is available, this routine 1191 * returns a pointer of where to place the data. 1192 * If no space, caller should try direct transmit. 1193 */ 1194 static void * 1195 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, 1196 struct hn_txdesc *txd, uint32_t pktsize) 1197 { 1198 struct hn_txdesc *agg_txd = txq->agg_txd; 1199 struct rndis_packet_msg *pkt; 1200 void *chim; 1201 1202 if (agg_txd) { 1203 unsigned int padding, olen; 1204 1205 /* 1206 * Update the previous RNDIS packet's total length, 1207 * it can be increased due to the mandatory alignment 1208 * padding for this RNDIS packet. And update the 1209 * aggregating txdesc's chimney sending buffer size 1210 * accordingly. 1211 * 1212 * Zero-out the padding, as required by the RNDIS spec. 1213 */ 1214 pkt = txq->agg_prevpkt; 1215 olen = pkt->len; 1216 padding = RTE_ALIGN(olen, txq->agg_align) - olen; 1217 if (padding > 0) { 1218 agg_txd->chim_size += padding; 1219 pkt->len += padding; 1220 memset((uint8_t *)pkt + olen, 0, padding); 1221 } 1222 1223 chim = (uint8_t *)pkt + pkt->len; 1224 txq->agg_prevpkt = chim; 1225 txq->agg_pktleft--; 1226 txq->agg_szleft -= pktsize; 1227 if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { 1228 /* 1229 * Probably can't aggregate more packets, 1230 * flush this aggregating txdesc proactively. 1231 */ 1232 txq->agg_pktleft = 0; 1233 } 1234 1235 hn_txd_put(txq, txd); 1236 return chim; 1237 } 1238 1239 txd->chim_index = hn_chim_alloc(hv); 1240 if (txd->chim_index == NVS_CHIM_IDX_INVALID) 1241 return NULL; 1242 1243 chim = (uint8_t *)hv->chim_res->addr 1244 + txd->chim_index * hv->chim_szmax; 1245 1246 txq->agg_txd = txd; 1247 txq->agg_pktleft = txq->agg_pktmax - 1; 1248 txq->agg_szleft = txq->agg_szmax - pktsize; 1249 txq->agg_prevpkt = chim; 1250 1251 return chim; 1252 } 1253 1254 static inline void * 1255 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, 1256 uint32_t pi_dlen, uint32_t pi_type) 1257 { 1258 const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); 1259 struct rndis_pktinfo *pi; 1260 1261 /* 1262 * Per-packet-info does not move; it only grows. 1263 * 1264 * NOTE: 1265 * pktinfooffset in this phase counts from the beginning 1266 * of rndis_packet_msg. 1267 */ 1268 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); 1269 1270 pkt->pktinfolen += pi_size; 1271 1272 pi->size = pi_size; 1273 pi->type = pi_type; 1274 pi->offset = RNDIS_PKTINFO_OFFSET; 1275 1276 return pi->data; 1277 } 1278 1279 /* Put RNDIS header and packet info on packet */ 1280 static void hn_encap(struct rndis_packet_msg *pkt, 1281 uint16_t queue_id, 1282 const struct rte_mbuf *m) 1283 { 1284 unsigned int hlen = m->l2_len + m->l3_len; 1285 uint32_t *pi_data; 1286 uint32_t pkt_hlen; 1287 1288 pkt->type = RNDIS_PACKET_MSG; 1289 pkt->len = m->pkt_len; 1290 pkt->dataoffset = 0; 1291 pkt->datalen = m->pkt_len; 1292 pkt->oobdataoffset = 0; 1293 pkt->oobdatalen = 0; 1294 pkt->oobdataelements = 0; 1295 pkt->pktinfooffset = sizeof(*pkt); 1296 pkt->pktinfolen = 0; 1297 pkt->vchandle = 0; 1298 pkt->reserved = 0; 1299 1300 /* 1301 * Set the hash value for this packet, to the queue_id to cause 1302 * TX done event for this packet on the right channel. 1303 */ 1304 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, 1305 NDIS_PKTINFO_TYPE_HASHVAL); 1306 *pi_data = queue_id; 1307 1308 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1309 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, 1310 NDIS_PKTINFO_TYPE_VLAN); 1311 *pi_data = m->vlan_tci; 1312 } 1313 1314 if (m->ol_flags & PKT_TX_TCP_SEG) { 1315 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, 1316 NDIS_PKTINFO_TYPE_LSO); 1317 1318 if (m->ol_flags & PKT_TX_IPV6) { 1319 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, 1320 m->tso_segsz); 1321 } else { 1322 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, 1323 m->tso_segsz); 1324 } 1325 } else if (m->ol_flags & 1326 (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) { 1327 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, 1328 NDIS_PKTINFO_TYPE_CSUM); 1329 *pi_data = 0; 1330 1331 if (m->ol_flags & PKT_TX_IPV6) 1332 *pi_data |= NDIS_TXCSUM_INFO_IPV6; 1333 if (m->ol_flags & PKT_TX_IPV4) { 1334 *pi_data |= NDIS_TXCSUM_INFO_IPV4; 1335 1336 if (m->ol_flags & PKT_TX_IP_CKSUM) 1337 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1338 } 1339 1340 if (m->ol_flags & PKT_TX_TCP_CKSUM) 1341 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); 1342 else if (m->ol_flags & PKT_TX_UDP_CKSUM) 1343 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); 1344 } 1345 1346 pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; 1347 /* Fixup RNDIS packet message total length */ 1348 pkt->len += pkt_hlen; 1349 1350 /* Convert RNDIS packet message offsets */ 1351 pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 1352 pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); 1353 } 1354 1355 /* How many scatter gather list elements ar needed */ 1356 static unsigned int hn_get_slots(const struct rte_mbuf *m) 1357 { 1358 unsigned int slots = 1; /* for RNDIS header */ 1359 1360 while (m) { 1361 unsigned int size = rte_pktmbuf_data_len(m); 1362 unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; 1363 1364 slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE; 1365 m = m->next; 1366 } 1367 1368 return slots; 1369 } 1370 1371 /* Build scatter gather list from chained mbuf */ 1372 static unsigned int hn_fill_sg(struct vmbus_gpa *sg, 1373 const struct rte_mbuf *m) 1374 { 1375 unsigned int segs = 0; 1376 1377 while (m) { 1378 rte_iova_t addr = rte_mbuf_data_iova(m); 1379 unsigned int page = addr / PAGE_SIZE; 1380 unsigned int offset = addr & PAGE_MASK; 1381 unsigned int len = rte_pktmbuf_data_len(m); 1382 1383 while (len > 0) { 1384 unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset); 1385 1386 sg[segs].page = page; 1387 sg[segs].ofs = offset; 1388 sg[segs].len = bytes; 1389 segs++; 1390 1391 ++page; 1392 offset = 0; 1393 len -= bytes; 1394 } 1395 m = m->next; 1396 } 1397 1398 return segs; 1399 } 1400 1401 /* Transmit directly from mbuf */ 1402 static int hn_xmit_sg(struct hn_tx_queue *txq, 1403 const struct hn_txdesc *txd, const struct rte_mbuf *m, 1404 bool *need_sig) 1405 { 1406 struct vmbus_gpa sg[hn_get_slots(m)]; 1407 struct hn_nvs_rndis nvs_rndis = { 1408 .type = NVS_TYPE_RNDIS, 1409 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1410 .chim_sz = txd->chim_size, 1411 }; 1412 rte_iova_t addr; 1413 unsigned int segs; 1414 1415 /* attach aggregation data if present */ 1416 if (txd->chim_size > 0) 1417 nvs_rndis.chim_idx = txd->chim_index; 1418 else 1419 nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; 1420 1421 hn_rndis_dump(txd->rndis_pkt); 1422 1423 /* pass IOVA of rndis header in first segment */ 1424 addr = rte_malloc_virt2iova(txd->rndis_pkt); 1425 if (unlikely(addr == RTE_BAD_IOVA)) { 1426 PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova"); 1427 return -EINVAL; 1428 } 1429 1430 sg[0].page = addr / PAGE_SIZE; 1431 sg[0].ofs = addr & PAGE_MASK; 1432 sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); 1433 segs = 1; 1434 1435 hn_update_packet_stats(&txq->stats, m); 1436 1437 segs += hn_fill_sg(sg + 1, m); 1438 1439 PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", 1440 txq->port_id, txq->queue_id, txd->chim_index, 1441 segs, nvs_rndis.chim_sz); 1442 1443 return hn_nvs_send_sglist(txq->chan, sg, segs, 1444 &nvs_rndis, sizeof(nvs_rndis), 1445 (uintptr_t)txd, need_sig); 1446 } 1447 1448 uint16_t 1449 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1450 { 1451 struct hn_tx_queue *txq = ptxq; 1452 uint16_t queue_id = txq->queue_id; 1453 struct hn_data *hv = txq->hv; 1454 struct rte_eth_dev *vf_dev; 1455 bool need_sig = false; 1456 uint16_t nb_tx, tx_thresh; 1457 int ret; 1458 1459 if (unlikely(hv->closed)) 1460 return 0; 1461 1462 /* 1463 * Always check for events on the primary channel 1464 * because that is where hotplug notifications occur. 1465 */ 1466 tx_thresh = RTE_MAX(txq->free_thresh, nb_pkts); 1467 if (txq->queue_id == 0 || 1468 rte_mempool_avail_count(txq->txdesc_pool) < tx_thresh) 1469 hn_process_events(hv, txq->queue_id, 0); 1470 1471 /* Transmit over VF if present and up */ 1472 rte_rwlock_read_lock(&hv->vf_lock); 1473 vf_dev = hn_get_vf_dev(hv); 1474 if (vf_dev && vf_dev->data->dev_started) { 1475 void *sub_q = vf_dev->data->tx_queues[queue_id]; 1476 1477 nb_tx = (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); 1478 rte_rwlock_read_unlock(&hv->vf_lock); 1479 return nb_tx; 1480 } 1481 rte_rwlock_read_unlock(&hv->vf_lock); 1482 1483 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1484 struct rte_mbuf *m = tx_pkts[nb_tx]; 1485 uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; 1486 struct rndis_packet_msg *pkt; 1487 struct hn_txdesc *txd; 1488 1489 txd = hn_txd_get(txq); 1490 if (txd == NULL) 1491 break; 1492 1493 /* For small packets aggregate them in chimney buffer */ 1494 if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { 1495 /* If this packet will not fit, then flush */ 1496 if (txq->agg_pktleft == 0 || 1497 RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { 1498 if (hn_flush_txagg(txq, &need_sig)) 1499 goto fail; 1500 } 1501 1502 1503 pkt = hn_try_txagg(hv, txq, txd, pkt_size); 1504 if (unlikely(!pkt)) 1505 break; 1506 1507 hn_encap(pkt, queue_id, m); 1508 hn_append_to_chim(txq, pkt, m); 1509 1510 rte_pktmbuf_free(m); 1511 1512 /* if buffer is full, flush */ 1513 if (txq->agg_pktleft == 0 && 1514 hn_flush_txagg(txq, &need_sig)) 1515 goto fail; 1516 } else { 1517 /* Send any outstanding packets in buffer */ 1518 if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) 1519 goto fail; 1520 1521 pkt = txd->rndis_pkt; 1522 txd->m = m; 1523 txd->data_size = m->pkt_len; 1524 ++txd->packets; 1525 1526 hn_encap(pkt, queue_id, m); 1527 1528 ret = hn_xmit_sg(txq, txd, m, &need_sig); 1529 if (unlikely(ret != 0)) { 1530 if (ret == -EAGAIN) { 1531 PMD_TX_LOG(DEBUG, "sg channel full"); 1532 ++txq->stats.channel_full; 1533 } else { 1534 PMD_DRV_LOG(NOTICE, "sg send failed: %d", ret); 1535 ++txq->stats.errors; 1536 } 1537 hn_txd_put(txq, txd); 1538 goto fail; 1539 } 1540 } 1541 } 1542 1543 /* If partial buffer left, then try and send it. 1544 * if that fails, then reuse it on next send. 1545 */ 1546 hn_flush_txagg(txq, &need_sig); 1547 1548 fail: 1549 if (need_sig) 1550 rte_vmbus_chan_signal_tx(txq->chan); 1551 1552 return nb_tx; 1553 } 1554 1555 static uint16_t 1556 hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, 1557 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1558 { 1559 uint16_t i, n; 1560 1561 if (unlikely(nb_pkts == 0)) 1562 return 0; 1563 1564 n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); 1565 1566 /* relabel the received mbufs */ 1567 for (i = 0; i < n; i++) 1568 rx_pkts[i]->port = rxq->port_id; 1569 1570 return n; 1571 } 1572 1573 uint16_t 1574 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1575 { 1576 struct hn_rx_queue *rxq = prxq; 1577 struct hn_data *hv = rxq->hv; 1578 struct rte_eth_dev *vf_dev; 1579 uint16_t nb_rcv; 1580 1581 if (unlikely(hv->closed)) 1582 return 0; 1583 1584 /* Check for new completions (and hotplug) */ 1585 if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) 1586 hn_process_events(hv, rxq->queue_id, 0); 1587 1588 /* Always check the vmbus path for multicast and new flows */ 1589 nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, 1590 (void **)rx_pkts, nb_pkts, NULL); 1591 1592 /* If VF is available, check that as well */ 1593 rte_rwlock_read_lock(&hv->vf_lock); 1594 vf_dev = hn_get_vf_dev(hv); 1595 if (vf_dev && vf_dev->data->dev_started) 1596 nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, 1597 rx_pkts + nb_rcv, nb_pkts - nb_rcv); 1598 1599 rte_rwlock_read_unlock(&hv->vf_lock); 1600 return nb_rcv; 1601 } 1602 1603 void 1604 hn_dev_free_queues(struct rte_eth_dev *dev) 1605 { 1606 unsigned int i; 1607 1608 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1609 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 1610 1611 hn_rx_queue_free(rxq, false); 1612 dev->data->rx_queues[i] = NULL; 1613 } 1614 dev->data->nb_rx_queues = 0; 1615 1616 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1617 hn_dev_tx_queue_release(dev->data->tx_queues[i]); 1618 dev->data->tx_queues[i] = NULL; 1619 } 1620 dev->data->nb_tx_queues = 0; 1621 } 1622