1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2018 Microsoft Corporation 3 * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. 4 * All rights reserved. 5 */ 6 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdio.h> 10 #include <errno.h> 11 #include <unistd.h> 12 #include <strings.h> 13 #include <malloc.h> 14 15 #include <rte_ethdev.h> 16 #include <rte_memcpy.h> 17 #include <rte_string_fns.h> 18 #include <rte_memzone.h> 19 #include <rte_malloc.h> 20 #include <rte_atomic.h> 21 #include <rte_bitmap.h> 22 #include <rte_branch_prediction.h> 23 #include <rte_ether.h> 24 #include <rte_common.h> 25 #include <rte_errno.h> 26 #include <rte_memory.h> 27 #include <rte_eal.h> 28 #include <rte_dev.h> 29 #include <rte_net.h> 30 #include <rte_bus_vmbus.h> 31 #include <rte_spinlock.h> 32 33 #include "hn_logs.h" 34 #include "hn_var.h" 35 #include "hn_rndis.h" 36 #include "hn_nvs.h" 37 #include "ndis.h" 38 39 #define HN_NVS_SEND_MSG_SIZE \ 40 (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) 41 42 #define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ 43 #define HN_TXCOPY_THRESHOLD 512 44 45 #define HN_RXCOPY_THRESHOLD 256 46 #define HN_RXQ_EVENT_DEFAULT 2048 47 48 struct hn_rxinfo { 49 uint32_t vlan_info; 50 uint32_t csum_info; 51 uint32_t hash_info; 52 uint32_t hash_value; 53 }; 54 55 #define HN_RXINFO_VLAN 0x0001 56 #define HN_RXINFO_CSUM 0x0002 57 #define HN_RXINFO_HASHINF 0x0004 58 #define HN_RXINFO_HASHVAL 0x0008 59 #define HN_RXINFO_ALL \ 60 (HN_RXINFO_VLAN | \ 61 HN_RXINFO_CSUM | \ 62 HN_RXINFO_HASHINF | \ 63 HN_RXINFO_HASHVAL) 64 65 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 66 #define HN_NDIS_RXCSUM_INFO_INVALID 0 67 #define HN_NDIS_HASH_INFO_INVALID 0 68 69 /* 70 * Per-transmit book keeping. 71 * A slot in transmit ring (chim_index) is reserved for each transmit. 72 * 73 * There are two types of transmit: 74 * - buffered transmit where chimney buffer is used and RNDIS header 75 * is in the buffer. mbuf == NULL for this case. 76 * 77 * - direct transmit where RNDIS header is in the in rndis_pkt 78 * mbuf is freed after transmit. 79 * 80 * Descriptors come from per-port pool which is used 81 * to limit number of outstanding requests per device. 82 */ 83 struct hn_txdesc { 84 struct rte_mbuf *m; 85 86 uint16_t queue_id; 87 uint32_t chim_index; 88 uint32_t chim_size; 89 uint32_t data_size; 90 uint32_t packets; 91 92 struct rndis_packet_msg *rndis_pkt; 93 }; 94 95 #define HN_RNDIS_PKT_LEN \ 96 (sizeof(struct rndis_packet_msg) + \ 97 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ 98 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 99 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 100 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 101 102 #define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) 103 104 /* Minimum space required for a packet */ 105 #define HN_PKTSIZE_MIN(align) \ 106 RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) 107 108 #define DEFAULT_TX_FREE_THRESH 32 109 110 static void 111 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) 112 { 113 uint32_t s = m->pkt_len; 114 const struct rte_ether_addr *ea; 115 116 if (s == 64) { 117 stats->size_bins[1]++; 118 } else if (s > 64 && s < 1024) { 119 uint32_t bin; 120 121 /* count zeros, and offset into correct bin */ 122 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 123 stats->size_bins[bin]++; 124 } else { 125 if (s < 64) 126 stats->size_bins[0]++; 127 else if (s < 1519) 128 stats->size_bins[6]++; 129 else 130 stats->size_bins[7]++; 131 } 132 133 ea = rte_pktmbuf_mtod(m, const struct rte_ether_addr *); 134 if (rte_is_multicast_ether_addr(ea)) { 135 if (rte_is_broadcast_ether_addr(ea)) 136 stats->broadcast++; 137 else 138 stats->multicast++; 139 } 140 } 141 142 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) 143 { 144 return pkt->pktinfooffset + pkt->pktinfolen; 145 } 146 147 static inline uint32_t 148 hn_rndis_pktmsg_offset(uint32_t ofs) 149 { 150 return ofs - offsetof(struct rndis_packet_msg, dataoffset); 151 } 152 153 static void hn_txd_init(struct rte_mempool *mp __rte_unused, 154 void *opaque, void *obj, unsigned int idx) 155 { 156 struct hn_tx_queue *txq = opaque; 157 struct hn_txdesc *txd = obj; 158 159 memset(txd, 0, sizeof(*txd)); 160 161 txd->queue_id = txq->queue_id; 162 txd->chim_index = NVS_CHIM_IDX_INVALID; 163 txd->rndis_pkt = (struct rndis_packet_msg *)(char *)txq->tx_rndis 164 + idx * HN_RNDIS_PKT_ALIGNED; 165 } 166 167 int 168 hn_chim_init(struct rte_eth_dev *dev) 169 { 170 struct hn_data *hv = dev->data->dev_private; 171 uint32_t i, chim_bmp_size; 172 173 rte_spinlock_init(&hv->chim_lock); 174 chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); 175 hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, 176 RTE_CACHE_LINE_SIZE); 177 if (hv->chim_bmem == NULL) { 178 PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", 179 chim_bmp_size); 180 return -1; 181 } 182 183 hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, 184 hv->chim_bmem, chim_bmp_size); 185 if (hv->chim_bmap == NULL) { 186 PMD_INIT_LOG(ERR, "failed to init chim bitmap"); 187 return -1; 188 } 189 190 for (i = 0; i < hv->chim_cnt; i++) 191 rte_bitmap_set(hv->chim_bmap, i); 192 193 return 0; 194 } 195 196 void 197 hn_chim_uninit(struct rte_eth_dev *dev) 198 { 199 struct hn_data *hv = dev->data->dev_private; 200 201 rte_bitmap_free(hv->chim_bmap); 202 rte_free(hv->chim_bmem); 203 hv->chim_bmem = NULL; 204 } 205 206 static uint32_t hn_chim_alloc(struct hn_data *hv) 207 { 208 uint32_t index = NVS_CHIM_IDX_INVALID; 209 uint64_t slab = 0; 210 211 rte_spinlock_lock(&hv->chim_lock); 212 if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) { 213 index += rte_bsf64(slab); 214 rte_bitmap_clear(hv->chim_bmap, index); 215 } 216 rte_spinlock_unlock(&hv->chim_lock); 217 218 return index; 219 } 220 221 static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) 222 { 223 if (chim_idx >= hv->chim_cnt) { 224 PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); 225 } else { 226 rte_spinlock_lock(&hv->chim_lock); 227 rte_bitmap_set(hv->chim_bmap, chim_idx); 228 rte_spinlock_unlock(&hv->chim_lock); 229 } 230 } 231 232 static void hn_reset_txagg(struct hn_tx_queue *txq) 233 { 234 txq->agg_szleft = txq->agg_szmax; 235 txq->agg_pktleft = txq->agg_pktmax; 236 txq->agg_txd = NULL; 237 txq->agg_prevpkt = NULL; 238 } 239 240 int 241 hn_dev_tx_queue_setup(struct rte_eth_dev *dev, 242 uint16_t queue_idx, uint16_t nb_desc, 243 unsigned int socket_id, 244 const struct rte_eth_txconf *tx_conf) 245 246 { 247 struct hn_data *hv = dev->data->dev_private; 248 struct hn_tx_queue *txq; 249 char name[RTE_MEMPOOL_NAMESIZE]; 250 uint32_t tx_free_thresh; 251 int err = -ENOMEM; 252 253 PMD_INIT_FUNC_TRACE(); 254 255 txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, 256 socket_id); 257 if (!txq) 258 return -ENOMEM; 259 260 txq->hv = hv; 261 txq->chan = hv->channels[queue_idx]; 262 txq->port_id = dev->data->port_id; 263 txq->queue_id = queue_idx; 264 265 tx_free_thresh = tx_conf->tx_free_thresh; 266 if (tx_free_thresh == 0) 267 tx_free_thresh = RTE_MIN(nb_desc / 4, 268 DEFAULT_TX_FREE_THRESH); 269 270 if (tx_free_thresh + 3 >= nb_desc) { 271 PMD_INIT_LOG(ERR, 272 "tx_free_thresh must be less than the number of TX entries minus 3(%u)." 273 " (tx_free_thresh=%u port=%u queue=%u)\n", 274 nb_desc - 3, 275 tx_free_thresh, dev->data->port_id, queue_idx); 276 return -EINVAL; 277 } 278 279 txq->free_thresh = tx_free_thresh; 280 281 snprintf(name, sizeof(name), 282 "hn_txd_%u_%u", dev->data->port_id, queue_idx); 283 284 PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", 285 name, nb_desc, sizeof(struct hn_txdesc)); 286 287 txq->tx_rndis = rte_calloc("hn_txq_rndis", nb_desc, 288 HN_RNDIS_PKT_ALIGNED, RTE_CACHE_LINE_SIZE); 289 if (txq->tx_rndis == NULL) 290 goto error; 291 292 txq->txdesc_pool = rte_mempool_create(name, nb_desc, 293 sizeof(struct hn_txdesc), 294 0, 0, NULL, NULL, 295 hn_txd_init, txq, 296 dev->device->numa_node, 0); 297 if (txq->txdesc_pool == NULL) { 298 PMD_DRV_LOG(ERR, 299 "mempool %s create failed: %d", name, rte_errno); 300 goto error; 301 } 302 303 txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); 304 txq->agg_pktmax = hv->rndis_agg_pkts; 305 txq->agg_align = hv->rndis_agg_align; 306 307 hn_reset_txagg(txq); 308 309 err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, 310 socket_id, tx_conf); 311 if (err == 0) { 312 dev->data->tx_queues[queue_idx] = txq; 313 return 0; 314 } 315 316 error: 317 if (txq->txdesc_pool) 318 rte_mempool_free(txq->txdesc_pool); 319 rte_free(txq->tx_rndis); 320 rte_free(txq); 321 return err; 322 } 323 324 void 325 hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 326 struct rte_eth_txq_info *qinfo) 327 { 328 struct hn_tx_queue *txq = dev->data->tx_queues[queue_id]; 329 330 qinfo->nb_desc = txq->txdesc_pool->size; 331 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 332 } 333 334 static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) 335 { 336 struct hn_txdesc *txd; 337 338 if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { 339 ++txq->stats.ring_full; 340 PMD_TX_LOG(DEBUG, "tx pool exhausted!"); 341 return NULL; 342 } 343 344 txd->m = NULL; 345 txd->packets = 0; 346 txd->data_size = 0; 347 txd->chim_size = 0; 348 349 return txd; 350 } 351 352 static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) 353 { 354 rte_mempool_put(txq->txdesc_pool, txd); 355 } 356 357 void 358 hn_dev_tx_queue_release(void *arg) 359 { 360 struct hn_tx_queue *txq = arg; 361 362 PMD_INIT_FUNC_TRACE(); 363 364 if (!txq) 365 return; 366 367 if (txq->txdesc_pool) 368 rte_mempool_free(txq->txdesc_pool); 369 370 rte_free(txq->tx_rndis); 371 rte_free(txq); 372 } 373 374 /* 375 * Check the status of a Tx descriptor in the queue. 376 * 377 * returns: 378 * - -EINVAL - offset outside of tx_descriptor pool. 379 * - RTE_ETH_TX_DESC_FULL - descriptor is not acknowledged by host. 380 * - RTE_ETH_TX_DESC_DONE - descriptor is available. 381 */ 382 int hn_dev_tx_descriptor_status(void *arg, uint16_t offset) 383 { 384 const struct hn_tx_queue *txq = arg; 385 386 hn_process_events(txq->hv, txq->queue_id, 0); 387 388 if (offset >= rte_mempool_avail_count(txq->txdesc_pool)) 389 return -EINVAL; 390 391 if (offset < rte_mempool_in_use_count(txq->txdesc_pool)) 392 return RTE_ETH_TX_DESC_FULL; 393 else 394 return RTE_ETH_TX_DESC_DONE; 395 } 396 397 static void 398 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, 399 unsigned long xactid, const struct hn_nvs_rndis_ack *ack) 400 { 401 struct hn_data *hv = dev->data->dev_private; 402 struct hn_txdesc *txd = (struct hn_txdesc *)xactid; 403 struct hn_tx_queue *txq; 404 405 /* Control packets are sent with xacid == 0 */ 406 if (!txd) 407 return; 408 409 txq = dev->data->tx_queues[queue_id]; 410 if (likely(ack->status == NVS_STATUS_OK)) { 411 PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", 412 txq->port_id, txq->queue_id, txd->chim_index, 413 txd->packets, txd->data_size); 414 txq->stats.bytes += txd->data_size; 415 txq->stats.packets += txd->packets; 416 } else { 417 PMD_DRV_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", 418 txq->port_id, txq->queue_id, txd->chim_index, ack->status); 419 ++txq->stats.errors; 420 } 421 422 if (txd->chim_index != NVS_CHIM_IDX_INVALID) 423 hn_chim_free(hv, txd->chim_index); 424 425 rte_pktmbuf_free(txd->m); 426 hn_txd_put(txq, txd); 427 } 428 429 /* Handle transmit completion events */ 430 static void 431 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, 432 const struct vmbus_chanpkt_hdr *pkt, 433 const void *data) 434 { 435 const struct hn_nvs_hdr *hdr = data; 436 437 switch (hdr->type) { 438 case NVS_TYPE_RNDIS_ACK: 439 hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); 440 break; 441 442 default: 443 PMD_DRV_LOG(NOTICE, "unexpected send completion type %u", 444 hdr->type); 445 } 446 } 447 448 /* Parse per-packet info (meta data) */ 449 static int 450 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, 451 struct hn_rxinfo *info) 452 { 453 const struct rndis_pktinfo *pi = info_data; 454 uint32_t mask = 0; 455 456 while (info_dlen != 0) { 457 const void *data; 458 uint32_t dlen; 459 460 if (unlikely(info_dlen < sizeof(*pi))) 461 return -EINVAL; 462 463 if (unlikely(info_dlen < pi->size)) 464 return -EINVAL; 465 info_dlen -= pi->size; 466 467 if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 468 return -EINVAL; 469 if (unlikely(pi->size < pi->offset)) 470 return -EINVAL; 471 472 dlen = pi->size - pi->offset; 473 data = pi->data; 474 475 switch (pi->type) { 476 case NDIS_PKTINFO_TYPE_VLAN: 477 if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) 478 return -EINVAL; 479 info->vlan_info = *((const uint32_t *)data); 480 mask |= HN_RXINFO_VLAN; 481 break; 482 483 case NDIS_PKTINFO_TYPE_CSUM: 484 if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) 485 return -EINVAL; 486 info->csum_info = *((const uint32_t *)data); 487 mask |= HN_RXINFO_CSUM; 488 break; 489 490 case NDIS_PKTINFO_TYPE_HASHVAL: 491 if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) 492 return -EINVAL; 493 info->hash_value = *((const uint32_t *)data); 494 mask |= HN_RXINFO_HASHVAL; 495 break; 496 497 case NDIS_PKTINFO_TYPE_HASHINF: 498 if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) 499 return -EINVAL; 500 info->hash_info = *((const uint32_t *)data); 501 mask |= HN_RXINFO_HASHINF; 502 break; 503 504 default: 505 goto next; 506 } 507 508 if (mask == HN_RXINFO_ALL) 509 break; /* All found; done */ 510 next: 511 pi = (const struct rndis_pktinfo *) 512 ((const uint8_t *)pi + pi->size); 513 } 514 515 /* 516 * Final fixup. 517 * - If there is no hash value, invalidate the hash info. 518 */ 519 if (!(mask & HN_RXINFO_HASHVAL)) 520 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 521 return 0; 522 } 523 524 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) 525 { 526 struct hn_rx_bufinfo *rxb = opaque; 527 struct hn_data *hv = rxb->hv; 528 529 rte_atomic32_dec(&hv->rxbuf_outstanding); 530 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 531 } 532 533 static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq, 534 const struct vmbus_chanpkt_rxbuf *pkt) 535 { 536 struct hn_rx_bufinfo *rxb; 537 538 rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid; 539 rxb->chan = rxq->chan; 540 rxb->xactid = pkt->hdr.xactid; 541 rxb->hv = rxq->hv; 542 543 rxb->shinfo.free_cb = hn_rx_buf_free_cb; 544 rxb->shinfo.fcb_opaque = rxb; 545 rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); 546 return rxb; 547 } 548 549 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, 550 uint8_t *data, unsigned int headroom, unsigned int dlen, 551 const struct hn_rxinfo *info) 552 { 553 struct hn_data *hv = rxq->hv; 554 struct rte_mbuf *m; 555 bool use_extbuf = false; 556 557 m = rte_pktmbuf_alloc(rxq->mb_pool); 558 if (unlikely(!m)) { 559 struct rte_eth_dev *dev = 560 &rte_eth_devices[rxq->port_id]; 561 562 dev->data->rx_mbuf_alloc_failed++; 563 return; 564 } 565 566 /* 567 * For large packets, avoid copy if possible but need to keep 568 * some space available in receive area for later packets. 569 */ 570 if (dlen >= HN_RXCOPY_THRESHOLD && 571 (uint32_t)rte_atomic32_read(&hv->rxbuf_outstanding) < 572 hv->rxbuf_section_cnt / 2) { 573 struct rte_mbuf_ext_shared_info *shinfo; 574 const void *rxbuf; 575 rte_iova_t iova; 576 577 /* 578 * Build an external mbuf that points to recveive area. 579 * Use refcount to handle multiple packets in same 580 * receive buffer section. 581 */ 582 rxbuf = hv->rxbuf_res->addr; 583 iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); 584 shinfo = &rxb->shinfo; 585 586 /* shinfo is already set to 1 by the caller */ 587 if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 2) 588 rte_atomic32_inc(&hv->rxbuf_outstanding); 589 590 rte_pktmbuf_attach_extbuf(m, data, iova, 591 dlen + headroom, shinfo); 592 m->data_off = headroom; 593 use_extbuf = true; 594 } else { 595 /* Mbuf's in pool must be large enough to hold small packets */ 596 if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { 597 rte_pktmbuf_free_seg(m); 598 ++rxq->stats.errors; 599 return; 600 } 601 rte_memcpy(rte_pktmbuf_mtod(m, void *), 602 data + headroom, dlen); 603 } 604 605 m->port = rxq->port_id; 606 m->pkt_len = dlen; 607 m->data_len = dlen; 608 m->packet_type = rte_net_get_ptype(m, NULL, 609 RTE_PTYPE_L2_MASK | 610 RTE_PTYPE_L3_MASK | 611 RTE_PTYPE_L4_MASK); 612 613 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 614 m->vlan_tci = info->vlan_info; 615 m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN; 616 617 /* NDIS always strips tag, put it back if necessary */ 618 if (!hv->vlan_strip && rte_vlan_insert(&m)) { 619 PMD_DRV_LOG(DEBUG, "vlan insert failed"); 620 ++rxq->stats.errors; 621 if (use_extbuf) 622 rte_pktmbuf_detach_extbuf(m); 623 rte_pktmbuf_free(m); 624 return; 625 } 626 } 627 628 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 629 if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) 630 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; 631 632 if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK 633 | NDIS_RXCSUM_INFO_TCPCS_OK)) 634 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 635 else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED 636 | NDIS_RXCSUM_INFO_UDPCS_FAILED)) 637 m->ol_flags |= PKT_RX_L4_CKSUM_BAD; 638 } 639 640 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 641 m->ol_flags |= PKT_RX_RSS_HASH; 642 m->hash.rss = info->hash_value; 643 } 644 645 PMD_RX_LOG(DEBUG, 646 "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, 647 rxq->port_id, rxq->queue_id, rxb->xactid, 648 m->pkt_len, m->packet_type, m->ol_flags); 649 650 ++rxq->stats.packets; 651 rxq->stats.bytes += m->pkt_len; 652 hn_update_packet_stats(&rxq->stats, m); 653 654 if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { 655 ++rxq->stats.ring_full; 656 PMD_RX_LOG(DEBUG, "rx ring full"); 657 if (use_extbuf) 658 rte_pktmbuf_detach_extbuf(m); 659 rte_pktmbuf_free(m); 660 } 661 } 662 663 static void hn_rndis_rx_data(struct hn_rx_queue *rxq, 664 struct hn_rx_bufinfo *rxb, 665 void *data, uint32_t dlen) 666 { 667 unsigned int data_off, data_len, pktinfo_off, pktinfo_len; 668 const struct rndis_packet_msg *pkt = data; 669 struct hn_rxinfo info = { 670 .vlan_info = HN_NDIS_VLAN_INFO_INVALID, 671 .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, 672 .hash_info = HN_NDIS_HASH_INFO_INVALID, 673 }; 674 int err; 675 676 hn_rndis_dump(pkt); 677 678 if (unlikely(dlen < sizeof(*pkt))) 679 goto error; 680 681 if (unlikely(dlen < pkt->len)) 682 goto error; /* truncated RNDIS from host */ 683 684 if (unlikely(pkt->len < pkt->datalen 685 + pkt->oobdatalen + pkt->pktinfolen)) 686 goto error; 687 688 if (unlikely(pkt->datalen == 0)) 689 goto error; 690 691 /* Check offsets. */ 692 if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) 693 goto error; 694 695 if (likely(pkt->pktinfooffset > 0) && 696 unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || 697 (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) 698 goto error; 699 700 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 701 data_len = pkt->datalen; 702 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); 703 pktinfo_len = pkt->pktinfolen; 704 705 if (likely(pktinfo_len > 0)) { 706 err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, 707 pktinfo_len, &info); 708 if (err) 709 goto error; 710 } 711 712 if (unlikely(data_off + data_len > pkt->len)) 713 goto error; 714 715 if (unlikely(data_len < RTE_ETHER_HDR_LEN)) 716 goto error; 717 718 hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); 719 return; 720 error: 721 ++rxq->stats.errors; 722 } 723 724 static void 725 hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, 726 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) 727 { 728 const struct rndis_msghdr *hdr = buf; 729 730 switch (hdr->type) { 731 case RNDIS_PACKET_MSG: 732 if (dev->data->dev_started) 733 hn_rndis_rx_data(rxq, rxb, buf, len); 734 break; 735 736 case RNDIS_INDICATE_STATUS_MSG: 737 hn_rndis_link_status(dev, buf); 738 break; 739 740 case RNDIS_INITIALIZE_CMPLT: 741 case RNDIS_QUERY_CMPLT: 742 case RNDIS_SET_CMPLT: 743 hn_rndis_receive_response(rxq->hv, buf, len); 744 break; 745 746 default: 747 PMD_DRV_LOG(NOTICE, 748 "unexpected RNDIS message (type %#x len %u)", 749 hdr->type, len); 750 break; 751 } 752 } 753 754 static void 755 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, 756 struct hn_data *hv, 757 struct hn_rx_queue *rxq, 758 const struct vmbus_chanpkt_hdr *hdr, 759 const void *buf) 760 { 761 const struct vmbus_chanpkt_rxbuf *pkt; 762 const struct hn_nvs_hdr *nvs_hdr = buf; 763 uint32_t rxbuf_sz = hv->rxbuf_res->len; 764 char *rxbuf = hv->rxbuf_res->addr; 765 unsigned int i, hlen, count; 766 struct hn_rx_bufinfo *rxb; 767 768 /* At minimum we need type header */ 769 if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { 770 PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); 771 return; 772 } 773 774 /* Make sure that this is a RNDIS message. */ 775 if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { 776 PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", 777 nvs_hdr->type); 778 return; 779 } 780 781 hlen = vmbus_chanpkt_getlen(hdr->hlen); 782 if (unlikely(hlen < sizeof(*pkt))) { 783 PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); 784 return; 785 } 786 787 pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); 788 if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { 789 PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", 790 pkt->rxbuf_id); 791 return; 792 } 793 794 count = pkt->rxbuf_cnt; 795 if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, 796 rxbuf[count]))) { 797 PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); 798 return; 799 } 800 801 if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { 802 PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, 803 pkt->hdr.xactid); 804 return; 805 } 806 807 /* Setup receive buffer info to allow for callback */ 808 rxb = hn_rx_buf_init(rxq, pkt); 809 810 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 811 for (i = 0; i < count; ++i) { 812 unsigned int ofs, len; 813 814 ofs = pkt->rxbuf[i].ofs; 815 len = pkt->rxbuf[i].len; 816 817 if (unlikely(ofs + len > rxbuf_sz)) { 818 PMD_RX_LOG(ERR, 819 "%uth RNDIS msg overflow ofs %u, len %u", 820 i, ofs, len); 821 continue; 822 } 823 824 if (unlikely(len == 0)) { 825 PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); 826 continue; 827 } 828 829 hn_rndis_receive(dev, rxq, rxb, 830 rxbuf + ofs, len); 831 } 832 833 /* Send ACK now if external mbuf not used */ 834 if (rte_mbuf_ext_refcnt_update(&rxb->shinfo, -1) == 0) 835 hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); 836 } 837 838 /* 839 * Called when NVS inband events are received. 840 * Send up a two part message with port_id and the NVS message 841 * to the pipe to the netvsc-vf-event control thread. 842 */ 843 static void hn_nvs_handle_notify(struct rte_eth_dev *dev, 844 const struct vmbus_chanpkt_hdr *pkt, 845 const void *data) 846 { 847 const struct hn_nvs_hdr *hdr = data; 848 849 switch (hdr->type) { 850 case NVS_TYPE_TXTBL_NOTE: 851 /* Transmit indirection table has locking problems 852 * in DPDK and therefore not implemented 853 */ 854 PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); 855 break; 856 857 case NVS_TYPE_VFASSOC_NOTE: 858 hn_nvs_handle_vfassoc(dev, pkt, data); 859 break; 860 861 default: 862 PMD_DRV_LOG(INFO, 863 "got notify, nvs type %u", hdr->type); 864 } 865 } 866 867 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, 868 uint16_t queue_id, 869 unsigned int socket_id) 870 { 871 struct hn_rx_queue *rxq; 872 873 rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), 874 RTE_CACHE_LINE_SIZE, socket_id); 875 if (!rxq) 876 return NULL; 877 878 rxq->hv = hv; 879 rxq->chan = hv->channels[queue_id]; 880 rte_spinlock_init(&rxq->ring_lock); 881 rxq->port_id = hv->port_id; 882 rxq->queue_id = queue_id; 883 rxq->event_sz = HN_RXQ_EVENT_DEFAULT; 884 rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, 885 RTE_CACHE_LINE_SIZE, socket_id); 886 if (!rxq->event_buf) { 887 rte_free(rxq); 888 return NULL; 889 } 890 891 return rxq; 892 } 893 894 void 895 hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 896 struct rte_eth_rxq_info *qinfo) 897 { 898 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 899 900 qinfo->mp = rxq->mb_pool; 901 qinfo->nb_desc = rxq->rx_ring->size; 902 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 903 } 904 905 int 906 hn_dev_rx_queue_setup(struct rte_eth_dev *dev, 907 uint16_t queue_idx, uint16_t nb_desc, 908 unsigned int socket_id, 909 const struct rte_eth_rxconf *rx_conf, 910 struct rte_mempool *mp) 911 { 912 struct hn_data *hv = dev->data->dev_private; 913 char ring_name[RTE_RING_NAMESIZE]; 914 struct hn_rx_queue *rxq; 915 unsigned int count; 916 int error = -ENOMEM; 917 918 PMD_INIT_FUNC_TRACE(); 919 920 if (queue_idx == 0) { 921 rxq = hv->primary; 922 } else { 923 rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); 924 if (!rxq) 925 return -ENOMEM; 926 } 927 928 rxq->mb_pool = mp; 929 count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; 930 if (nb_desc == 0 || nb_desc > count) 931 nb_desc = count; 932 933 /* 934 * Staging ring from receive event logic to rx_pkts. 935 * rx_pkts assumes caller is handling multi-thread issue. 936 * event logic has locking. 937 */ 938 snprintf(ring_name, sizeof(ring_name), 939 "hn_rx_%u_%u", dev->data->port_id, queue_idx); 940 rxq->rx_ring = rte_ring_create(ring_name, 941 rte_align32pow2(nb_desc), 942 socket_id, 0); 943 if (!rxq->rx_ring) 944 goto fail; 945 946 error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, 947 socket_id, rx_conf, mp); 948 if (error) 949 goto fail; 950 951 dev->data->rx_queues[queue_idx] = rxq; 952 return 0; 953 954 fail: 955 rte_ring_free(rxq->rx_ring); 956 rte_free(rxq->event_buf); 957 rte_free(rxq); 958 return error; 959 } 960 961 static void 962 hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) 963 { 964 965 if (!rxq) 966 return; 967 968 rte_ring_free(rxq->rx_ring); 969 rxq->rx_ring = NULL; 970 rxq->mb_pool = NULL; 971 972 hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); 973 974 /* Keep primary queue to allow for control operations */ 975 if (keep_primary && rxq == rxq->hv->primary) 976 return; 977 978 rte_free(rxq->event_buf); 979 rte_free(rxq); 980 } 981 982 void 983 hn_dev_rx_queue_release(void *arg) 984 { 985 struct hn_rx_queue *rxq = arg; 986 987 PMD_INIT_FUNC_TRACE(); 988 989 hn_rx_queue_free(rxq, true); 990 } 991 992 /* 993 * Get the number of used descriptor in a rx queue 994 * For this device that means how many packets are pending in the ring. 995 */ 996 uint32_t 997 hn_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_id) 998 { 999 struct hn_rx_queue *rxq = dev->data->rx_queues[queue_id]; 1000 1001 return rte_ring_count(rxq->rx_ring); 1002 } 1003 1004 /* 1005 * Check the status of a Rx descriptor in the queue 1006 * 1007 * returns: 1008 * - -EINVAL - offset outside of ring 1009 * - RTE_ETH_RX_DESC_AVAIL - no data available yet 1010 * - RTE_ETH_RX_DESC_DONE - data is waiting in stagin ring 1011 */ 1012 int hn_dev_rx_queue_status(void *arg, uint16_t offset) 1013 { 1014 const struct hn_rx_queue *rxq = arg; 1015 1016 hn_process_events(rxq->hv, rxq->queue_id, 0); 1017 if (offset >= rxq->rx_ring->capacity) 1018 return -EINVAL; 1019 1020 if (offset < rte_ring_count(rxq->rx_ring)) 1021 return RTE_ETH_RX_DESC_DONE; 1022 else 1023 return RTE_ETH_RX_DESC_AVAIL; 1024 } 1025 1026 int 1027 hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) 1028 { 1029 struct hn_tx_queue *txq = arg; 1030 1031 return hn_process_events(txq->hv, txq->queue_id, free_cnt); 1032 } 1033 1034 /* 1035 * Process pending events on the channel. 1036 * Called from both Rx queue poll and Tx cleanup 1037 */ 1038 uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, 1039 uint32_t tx_limit) 1040 { 1041 struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; 1042 struct hn_rx_queue *rxq; 1043 uint32_t bytes_read = 0; 1044 uint32_t tx_done = 0; 1045 int ret = 0; 1046 1047 rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; 1048 1049 /* 1050 * Since channel is shared between Rx and TX queue need to have a lock 1051 * since DPDK does not force same CPU to be used for Rx/Tx. 1052 */ 1053 if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) 1054 return 0; 1055 1056 for (;;) { 1057 const struct vmbus_chanpkt_hdr *pkt; 1058 uint32_t len = rxq->event_sz; 1059 const void *data; 1060 1061 retry: 1062 ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); 1063 if (ret == -EAGAIN) 1064 break; /* ring is empty */ 1065 1066 if (unlikely(ret == -ENOBUFS)) { 1067 /* event buffer not large enough to read ring */ 1068 1069 PMD_DRV_LOG(DEBUG, 1070 "event buffer expansion (need %u)", len); 1071 rxq->event_sz = len + len / 4; 1072 rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, 1073 RTE_CACHE_LINE_SIZE); 1074 if (rxq->event_buf) 1075 goto retry; 1076 /* out of memory, no more events now */ 1077 rxq->event_sz = 0; 1078 break; 1079 } 1080 1081 if (unlikely(ret <= 0)) { 1082 /* This indicates a failure to communicate (or worse) */ 1083 rte_exit(EXIT_FAILURE, 1084 "vmbus ring buffer error: %d", ret); 1085 } 1086 1087 bytes_read += ret; 1088 pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; 1089 data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); 1090 1091 switch (pkt->type) { 1092 case VMBUS_CHANPKT_TYPE_COMP: 1093 ++tx_done; 1094 hn_nvs_handle_comp(dev, queue_id, pkt, data); 1095 break; 1096 1097 case VMBUS_CHANPKT_TYPE_RXBUF: 1098 hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); 1099 break; 1100 1101 case VMBUS_CHANPKT_TYPE_INBAND: 1102 hn_nvs_handle_notify(dev, pkt, data); 1103 break; 1104 1105 default: 1106 PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); 1107 break; 1108 } 1109 1110 if (tx_limit && tx_done >= tx_limit) 1111 break; 1112 } 1113 1114 if (bytes_read > 0) 1115 rte_vmbus_chan_signal_read(rxq->chan, bytes_read); 1116 1117 rte_spinlock_unlock(&rxq->ring_lock); 1118 1119 return tx_done; 1120 } 1121 1122 static void hn_append_to_chim(struct hn_tx_queue *txq, 1123 struct rndis_packet_msg *pkt, 1124 const struct rte_mbuf *m) 1125 { 1126 struct hn_txdesc *txd = txq->agg_txd; 1127 uint8_t *buf = (uint8_t *)pkt; 1128 unsigned int data_offs; 1129 1130 hn_rndis_dump(pkt); 1131 1132 data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); 1133 txd->chim_size += pkt->len; 1134 txd->data_size += m->pkt_len; 1135 ++txd->packets; 1136 hn_update_packet_stats(&txq->stats, m); 1137 1138 for (; m; m = m->next) { 1139 uint16_t len = rte_pktmbuf_data_len(m); 1140 1141 rte_memcpy(buf + data_offs, 1142 rte_pktmbuf_mtod(m, const char *), len); 1143 data_offs += len; 1144 } 1145 } 1146 1147 /* 1148 * Send pending aggregated data in chimney buffer (if any). 1149 * Returns error if send was unsuccessful because channel ring buffer 1150 * was full. 1151 */ 1152 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) 1153 1154 { 1155 struct hn_txdesc *txd = txq->agg_txd; 1156 struct hn_nvs_rndis rndis; 1157 int ret; 1158 1159 if (!txd) 1160 return 0; 1161 1162 rndis = (struct hn_nvs_rndis) { 1163 .type = NVS_TYPE_RNDIS, 1164 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1165 .chim_idx = txd->chim_index, 1166 .chim_sz = txd->chim_size, 1167 }; 1168 1169 PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", 1170 txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); 1171 1172 ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, 1173 &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); 1174 1175 if (likely(ret == 0)) 1176 hn_reset_txagg(txq); 1177 else if (ret == -EAGAIN) { 1178 PMD_TX_LOG(DEBUG, "port %u:%u channel full", 1179 txq->port_id, txq->queue_id); 1180 ++txq->stats.channel_full; 1181 } else { 1182 ++txq->stats.errors; 1183 1184 PMD_DRV_LOG(NOTICE, "port %u:%u send failed: %d", 1185 txq->port_id, txq->queue_id, ret); 1186 } 1187 return ret; 1188 } 1189 1190 /* 1191 * Try and find a place in a send chimney buffer to put 1192 * the small packet. If space is available, this routine 1193 * returns a pointer of where to place the data. 1194 * If no space, caller should try direct transmit. 1195 */ 1196 static void * 1197 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, 1198 struct hn_txdesc *txd, uint32_t pktsize) 1199 { 1200 struct hn_txdesc *agg_txd = txq->agg_txd; 1201 struct rndis_packet_msg *pkt; 1202 void *chim; 1203 1204 if (agg_txd) { 1205 unsigned int padding, olen; 1206 1207 /* 1208 * Update the previous RNDIS packet's total length, 1209 * it can be increased due to the mandatory alignment 1210 * padding for this RNDIS packet. And update the 1211 * aggregating txdesc's chimney sending buffer size 1212 * accordingly. 1213 * 1214 * Zero-out the padding, as required by the RNDIS spec. 1215 */ 1216 pkt = txq->agg_prevpkt; 1217 olen = pkt->len; 1218 padding = RTE_ALIGN(olen, txq->agg_align) - olen; 1219 if (padding > 0) { 1220 agg_txd->chim_size += padding; 1221 pkt->len += padding; 1222 memset((uint8_t *)pkt + olen, 0, padding); 1223 } 1224 1225 chim = (uint8_t *)pkt + pkt->len; 1226 txq->agg_prevpkt = chim; 1227 txq->agg_pktleft--; 1228 txq->agg_szleft -= pktsize; 1229 if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { 1230 /* 1231 * Probably can't aggregate more packets, 1232 * flush this aggregating txdesc proactively. 1233 */ 1234 txq->agg_pktleft = 0; 1235 } 1236 1237 hn_txd_put(txq, txd); 1238 return chim; 1239 } 1240 1241 txd->chim_index = hn_chim_alloc(hv); 1242 if (txd->chim_index == NVS_CHIM_IDX_INVALID) 1243 return NULL; 1244 1245 chim = (uint8_t *)hv->chim_res->addr 1246 + txd->chim_index * hv->chim_szmax; 1247 1248 txq->agg_txd = txd; 1249 txq->agg_pktleft = txq->agg_pktmax - 1; 1250 txq->agg_szleft = txq->agg_szmax - pktsize; 1251 txq->agg_prevpkt = chim; 1252 1253 return chim; 1254 } 1255 1256 static inline void * 1257 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, 1258 uint32_t pi_dlen, uint32_t pi_type) 1259 { 1260 const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); 1261 struct rndis_pktinfo *pi; 1262 1263 /* 1264 * Per-packet-info does not move; it only grows. 1265 * 1266 * NOTE: 1267 * pktinfooffset in this phase counts from the beginning 1268 * of rndis_packet_msg. 1269 */ 1270 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); 1271 1272 pkt->pktinfolen += pi_size; 1273 1274 pi->size = pi_size; 1275 pi->type = pi_type; 1276 pi->offset = RNDIS_PKTINFO_OFFSET; 1277 1278 return pi->data; 1279 } 1280 1281 /* Put RNDIS header and packet info on packet */ 1282 static void hn_encap(struct rndis_packet_msg *pkt, 1283 uint16_t queue_id, 1284 const struct rte_mbuf *m) 1285 { 1286 unsigned int hlen = m->l2_len + m->l3_len; 1287 uint32_t *pi_data; 1288 uint32_t pkt_hlen; 1289 1290 pkt->type = RNDIS_PACKET_MSG; 1291 pkt->len = m->pkt_len; 1292 pkt->dataoffset = 0; 1293 pkt->datalen = m->pkt_len; 1294 pkt->oobdataoffset = 0; 1295 pkt->oobdatalen = 0; 1296 pkt->oobdataelements = 0; 1297 pkt->pktinfooffset = sizeof(*pkt); 1298 pkt->pktinfolen = 0; 1299 pkt->vchandle = 0; 1300 pkt->reserved = 0; 1301 1302 /* 1303 * Set the hash value for this packet, to the queue_id to cause 1304 * TX done event for this packet on the right channel. 1305 */ 1306 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, 1307 NDIS_PKTINFO_TYPE_HASHVAL); 1308 *pi_data = queue_id; 1309 1310 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1311 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, 1312 NDIS_PKTINFO_TYPE_VLAN); 1313 *pi_data = m->vlan_tci; 1314 } 1315 1316 if (m->ol_flags & PKT_TX_TCP_SEG) { 1317 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, 1318 NDIS_PKTINFO_TYPE_LSO); 1319 1320 if (m->ol_flags & PKT_TX_IPV6) { 1321 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, 1322 m->tso_segsz); 1323 } else { 1324 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, 1325 m->tso_segsz); 1326 } 1327 } else if (m->ol_flags & 1328 (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) { 1329 pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, 1330 NDIS_PKTINFO_TYPE_CSUM); 1331 *pi_data = 0; 1332 1333 if (m->ol_flags & PKT_TX_IPV6) 1334 *pi_data |= NDIS_TXCSUM_INFO_IPV6; 1335 if (m->ol_flags & PKT_TX_IPV4) { 1336 *pi_data |= NDIS_TXCSUM_INFO_IPV4; 1337 1338 if (m->ol_flags & PKT_TX_IP_CKSUM) 1339 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1340 } 1341 1342 if (m->ol_flags & PKT_TX_TCP_CKSUM) 1343 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); 1344 else if (m->ol_flags & PKT_TX_UDP_CKSUM) 1345 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); 1346 } 1347 1348 pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; 1349 /* Fixup RNDIS packet message total length */ 1350 pkt->len += pkt_hlen; 1351 1352 /* Convert RNDIS packet message offsets */ 1353 pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 1354 pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); 1355 } 1356 1357 /* How many scatter gather list elements ar needed */ 1358 static unsigned int hn_get_slots(const struct rte_mbuf *m) 1359 { 1360 unsigned int slots = 1; /* for RNDIS header */ 1361 1362 while (m) { 1363 unsigned int size = rte_pktmbuf_data_len(m); 1364 unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; 1365 1366 slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE; 1367 m = m->next; 1368 } 1369 1370 return slots; 1371 } 1372 1373 /* Build scatter gather list from chained mbuf */ 1374 static unsigned int hn_fill_sg(struct vmbus_gpa *sg, 1375 const struct rte_mbuf *m) 1376 { 1377 unsigned int segs = 0; 1378 1379 while (m) { 1380 rte_iova_t addr = rte_mbuf_data_iova(m); 1381 unsigned int page = addr / PAGE_SIZE; 1382 unsigned int offset = addr & PAGE_MASK; 1383 unsigned int len = rte_pktmbuf_data_len(m); 1384 1385 while (len > 0) { 1386 unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset); 1387 1388 sg[segs].page = page; 1389 sg[segs].ofs = offset; 1390 sg[segs].len = bytes; 1391 segs++; 1392 1393 ++page; 1394 offset = 0; 1395 len -= bytes; 1396 } 1397 m = m->next; 1398 } 1399 1400 return segs; 1401 } 1402 1403 /* Transmit directly from mbuf */ 1404 static int hn_xmit_sg(struct hn_tx_queue *txq, 1405 const struct hn_txdesc *txd, const struct rte_mbuf *m, 1406 bool *need_sig) 1407 { 1408 struct vmbus_gpa sg[hn_get_slots(m)]; 1409 struct hn_nvs_rndis nvs_rndis = { 1410 .type = NVS_TYPE_RNDIS, 1411 .rndis_mtype = NVS_RNDIS_MTYPE_DATA, 1412 .chim_sz = txd->chim_size, 1413 }; 1414 rte_iova_t addr; 1415 unsigned int segs; 1416 1417 /* attach aggregation data if present */ 1418 if (txd->chim_size > 0) 1419 nvs_rndis.chim_idx = txd->chim_index; 1420 else 1421 nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; 1422 1423 hn_rndis_dump(txd->rndis_pkt); 1424 1425 /* pass IOVA of rndis header in first segment */ 1426 addr = rte_malloc_virt2iova(txq->tx_rndis); 1427 if (unlikely(addr == RTE_BAD_IOVA)) { 1428 PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova"); 1429 return -EINVAL; 1430 } 1431 addr = addr + ((char *)txd->rndis_pkt - (char *)txq->tx_rndis); 1432 1433 sg[0].page = addr / PAGE_SIZE; 1434 sg[0].ofs = addr & PAGE_MASK; 1435 sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); 1436 segs = 1; 1437 1438 hn_update_packet_stats(&txq->stats, m); 1439 1440 segs += hn_fill_sg(sg + 1, m); 1441 1442 PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", 1443 txq->port_id, txq->queue_id, txd->chim_index, 1444 segs, nvs_rndis.chim_sz); 1445 1446 return hn_nvs_send_sglist(txq->chan, sg, segs, 1447 &nvs_rndis, sizeof(nvs_rndis), 1448 (uintptr_t)txd, need_sig); 1449 } 1450 1451 uint16_t 1452 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1453 { 1454 struct hn_tx_queue *txq = ptxq; 1455 uint16_t queue_id = txq->queue_id; 1456 struct hn_data *hv = txq->hv; 1457 struct rte_eth_dev *vf_dev; 1458 bool need_sig = false; 1459 uint16_t nb_tx, tx_thresh; 1460 int ret; 1461 1462 if (unlikely(hv->closed)) 1463 return 0; 1464 1465 /* 1466 * Always check for events on the primary channel 1467 * because that is where hotplug notifications occur. 1468 */ 1469 tx_thresh = RTE_MAX(txq->free_thresh, nb_pkts); 1470 if (txq->queue_id == 0 || 1471 rte_mempool_avail_count(txq->txdesc_pool) < tx_thresh) 1472 hn_process_events(hv, txq->queue_id, 0); 1473 1474 /* Transmit over VF if present and up */ 1475 rte_rwlock_read_lock(&hv->vf_lock); 1476 vf_dev = hn_get_vf_dev(hv); 1477 if (vf_dev && vf_dev->data->dev_started) { 1478 void *sub_q = vf_dev->data->tx_queues[queue_id]; 1479 1480 nb_tx = (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); 1481 rte_rwlock_read_unlock(&hv->vf_lock); 1482 return nb_tx; 1483 } 1484 rte_rwlock_read_unlock(&hv->vf_lock); 1485 1486 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1487 struct rte_mbuf *m = tx_pkts[nb_tx]; 1488 uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; 1489 struct rndis_packet_msg *pkt; 1490 struct hn_txdesc *txd; 1491 1492 txd = hn_txd_get(txq); 1493 if (txd == NULL) 1494 break; 1495 1496 /* For small packets aggregate them in chimney buffer */ 1497 if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { 1498 /* If this packet will not fit, then flush */ 1499 if (txq->agg_pktleft == 0 || 1500 RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { 1501 if (hn_flush_txagg(txq, &need_sig)) 1502 goto fail; 1503 } 1504 1505 1506 pkt = hn_try_txagg(hv, txq, txd, pkt_size); 1507 if (unlikely(!pkt)) 1508 break; 1509 1510 hn_encap(pkt, queue_id, m); 1511 hn_append_to_chim(txq, pkt, m); 1512 1513 rte_pktmbuf_free(m); 1514 1515 /* if buffer is full, flush */ 1516 if (txq->agg_pktleft == 0 && 1517 hn_flush_txagg(txq, &need_sig)) 1518 goto fail; 1519 } else { 1520 /* Send any outstanding packets in buffer */ 1521 if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) 1522 goto fail; 1523 1524 pkt = txd->rndis_pkt; 1525 txd->m = m; 1526 txd->data_size = m->pkt_len; 1527 ++txd->packets; 1528 1529 hn_encap(pkt, queue_id, m); 1530 1531 ret = hn_xmit_sg(txq, txd, m, &need_sig); 1532 if (unlikely(ret != 0)) { 1533 if (ret == -EAGAIN) { 1534 PMD_TX_LOG(DEBUG, "sg channel full"); 1535 ++txq->stats.channel_full; 1536 } else { 1537 PMD_DRV_LOG(NOTICE, "sg send failed: %d", ret); 1538 ++txq->stats.errors; 1539 } 1540 hn_txd_put(txq, txd); 1541 goto fail; 1542 } 1543 } 1544 } 1545 1546 /* If partial buffer left, then try and send it. 1547 * if that fails, then reuse it on next send. 1548 */ 1549 hn_flush_txagg(txq, &need_sig); 1550 1551 fail: 1552 if (need_sig) 1553 rte_vmbus_chan_signal_tx(txq->chan); 1554 1555 return nb_tx; 1556 } 1557 1558 static uint16_t 1559 hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, 1560 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1561 { 1562 uint16_t i, n; 1563 1564 if (unlikely(nb_pkts == 0)) 1565 return 0; 1566 1567 n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); 1568 1569 /* relabel the received mbufs */ 1570 for (i = 0; i < n; i++) 1571 rx_pkts[i]->port = rxq->port_id; 1572 1573 return n; 1574 } 1575 1576 uint16_t 1577 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1578 { 1579 struct hn_rx_queue *rxq = prxq; 1580 struct hn_data *hv = rxq->hv; 1581 struct rte_eth_dev *vf_dev; 1582 uint16_t nb_rcv; 1583 1584 if (unlikely(hv->closed)) 1585 return 0; 1586 1587 /* Check for new completions (and hotplug) */ 1588 if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) 1589 hn_process_events(hv, rxq->queue_id, 0); 1590 1591 /* Always check the vmbus path for multicast and new flows */ 1592 nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, 1593 (void **)rx_pkts, nb_pkts, NULL); 1594 1595 /* If VF is available, check that as well */ 1596 rte_rwlock_read_lock(&hv->vf_lock); 1597 vf_dev = hn_get_vf_dev(hv); 1598 if (vf_dev && vf_dev->data->dev_started) 1599 nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, 1600 rx_pkts + nb_rcv, nb_pkts - nb_rcv); 1601 1602 rte_rwlock_read_unlock(&hv->vf_lock); 1603 return nb_rcv; 1604 } 1605 1606 void 1607 hn_dev_free_queues(struct rte_eth_dev *dev) 1608 { 1609 unsigned int i; 1610 1611 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1612 struct hn_rx_queue *rxq = dev->data->rx_queues[i]; 1613 1614 hn_rx_queue_free(rxq, false); 1615 dev->data->rx_queues[i] = NULL; 1616 } 1617 dev->data->nb_rx_queues = 0; 1618 1619 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1620 hn_dev_tx_queue_release(dev->data->tx_queues[i]); 1621 dev->data->tx_queues[i] = NULL; 1622 } 1623 dev->data->nb_tx_queues = 0; 1624 } 1625