1 /* 2 * BSD LICENSE 3 * 4 * Copyright (c) 2013-2017, Wind River Systems, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1) Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * 2) Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3) Neither the name of Wind River Systems nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <stdint.h> 34 #include <string.h> 35 #include <stdio.h> 36 #include <errno.h> 37 #include <unistd.h> 38 39 #include <rte_ethdev.h> 40 #include <rte_ethdev_pci.h> 41 #include <rte_memcpy.h> 42 #include <rte_string_fns.h> 43 #include <rte_malloc.h> 44 #include <rte_atomic.h> 45 #include <rte_branch_prediction.h> 46 #include <rte_pci.h> 47 #include <rte_bus_pci.h> 48 #include <rte_ether.h> 49 #include <rte_common.h> 50 #include <rte_cycles.h> 51 #include <rte_spinlock.h> 52 #include <rte_byteorder.h> 53 #include <rte_dev.h> 54 #include <rte_memory.h> 55 #include <rte_eal.h> 56 #include <rte_io.h> 57 58 #include "rte_avp_common.h" 59 #include "rte_avp_fifo.h" 60 61 #include "avp_logs.h" 62 63 64 static int avp_dev_create(struct rte_pci_device *pci_dev, 65 struct rte_eth_dev *eth_dev); 66 67 static int avp_dev_configure(struct rte_eth_dev *dev); 68 static int avp_dev_start(struct rte_eth_dev *dev); 69 static void avp_dev_stop(struct rte_eth_dev *dev); 70 static void avp_dev_close(struct rte_eth_dev *dev); 71 static void avp_dev_info_get(struct rte_eth_dev *dev, 72 struct rte_eth_dev_info *dev_info); 73 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask); 74 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete); 75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev); 76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev); 77 78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev, 79 uint16_t rx_queue_id, 80 uint16_t nb_rx_desc, 81 unsigned int socket_id, 82 const struct rte_eth_rxconf *rx_conf, 83 struct rte_mempool *pool); 84 85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev, 86 uint16_t tx_queue_id, 87 uint16_t nb_tx_desc, 88 unsigned int socket_id, 89 const struct rte_eth_txconf *tx_conf); 90 91 static uint16_t avp_recv_scattered_pkts(void *rx_queue, 92 struct rte_mbuf **rx_pkts, 93 uint16_t nb_pkts); 94 95 static uint16_t avp_recv_pkts(void *rx_queue, 96 struct rte_mbuf **rx_pkts, 97 uint16_t nb_pkts); 98 99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue, 100 struct rte_mbuf **tx_pkts, 101 uint16_t nb_pkts); 102 103 static uint16_t avp_xmit_pkts(void *tx_queue, 104 struct rte_mbuf **tx_pkts, 105 uint16_t nb_pkts); 106 107 static void avp_dev_rx_queue_release(void *rxq); 108 static void avp_dev_tx_queue_release(void *txq); 109 110 static int avp_dev_stats_get(struct rte_eth_dev *dev, 111 struct rte_eth_stats *stats); 112 static void avp_dev_stats_reset(struct rte_eth_dev *dev); 113 114 115 #define AVP_MAX_RX_BURST 64 116 #define AVP_MAX_TX_BURST 64 117 #define AVP_MAX_MAC_ADDRS 1 118 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN 119 120 121 /* 122 * Defines the number of microseconds to wait before checking the response 123 * queue for completion. 124 */ 125 #define AVP_REQUEST_DELAY_USECS (5000) 126 127 /* 128 * Defines the number times to check the response queue for completion before 129 * declaring a timeout. 130 */ 131 #define AVP_MAX_REQUEST_RETRY (100) 132 133 /* Defines the current PCI driver version number */ 134 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION 135 136 /* 137 * The set of PCI devices this driver supports 138 */ 139 static const struct rte_pci_id pci_id_avp_map[] = { 140 { .vendor_id = RTE_AVP_PCI_VENDOR_ID, 141 .device_id = RTE_AVP_PCI_DEVICE_ID, 142 .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID, 143 .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID, 144 .class_id = RTE_CLASS_ANY_ID, 145 }, 146 147 { .vendor_id = 0, /* sentinel */ 148 }, 149 }; 150 151 /* 152 * dev_ops for avp, bare necessities for basic operation 153 */ 154 static const struct eth_dev_ops avp_eth_dev_ops = { 155 .dev_configure = avp_dev_configure, 156 .dev_start = avp_dev_start, 157 .dev_stop = avp_dev_stop, 158 .dev_close = avp_dev_close, 159 .dev_infos_get = avp_dev_info_get, 160 .vlan_offload_set = avp_vlan_offload_set, 161 .stats_get = avp_dev_stats_get, 162 .stats_reset = avp_dev_stats_reset, 163 .link_update = avp_dev_link_update, 164 .promiscuous_enable = avp_dev_promiscuous_enable, 165 .promiscuous_disable = avp_dev_promiscuous_disable, 166 .rx_queue_setup = avp_dev_rx_queue_setup, 167 .rx_queue_release = avp_dev_rx_queue_release, 168 .tx_queue_setup = avp_dev_tx_queue_setup, 169 .tx_queue_release = avp_dev_tx_queue_release, 170 }; 171 172 /**@{ AVP device flags */ 173 #define AVP_F_PROMISC (1 << 1) 174 #define AVP_F_CONFIGURED (1 << 2) 175 #define AVP_F_LINKUP (1 << 3) 176 #define AVP_F_DETACHED (1 << 4) 177 /**@} */ 178 179 /* Ethernet device validation marker */ 180 #define AVP_ETHDEV_MAGIC 0x92972862 181 182 /* 183 * Defines the AVP device attributes which are attached to an RTE ethernet 184 * device 185 */ 186 struct avp_dev { 187 uint32_t magic; /**< Memory validation marker */ 188 uint64_t device_id; /**< Unique system identifier */ 189 struct ether_addr ethaddr; /**< Host specified MAC address */ 190 struct rte_eth_dev_data *dev_data; 191 /**< Back pointer to ethernet device data */ 192 volatile uint32_t flags; /**< Device operational flags */ 193 uint16_t port_id; /**< Ethernet port identifier */ 194 struct rte_mempool *pool; /**< pkt mbuf mempool */ 195 unsigned int guest_mbuf_size; /**< local pool mbuf size */ 196 unsigned int host_mbuf_size; /**< host mbuf size */ 197 unsigned int max_rx_pkt_len; /**< maximum receive unit */ 198 uint32_t host_features; /**< Supported feature bitmap */ 199 uint32_t features; /**< Enabled feature bitmap */ 200 unsigned int num_tx_queues; /**< Negotiated number of transmit queues */ 201 unsigned int max_tx_queues; /**< Maximum number of transmit queues */ 202 unsigned int num_rx_queues; /**< Negotiated number of receive queues */ 203 unsigned int max_rx_queues; /**< Maximum number of receive queues */ 204 205 struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */ 206 struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */ 207 struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES]; 208 /**< Allocated mbufs queue */ 209 struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES]; 210 /**< To be freed mbufs queue */ 211 212 /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */ 213 rte_spinlock_t lock; 214 215 /* For request & response */ 216 struct rte_avp_fifo *req_q; /**< Request queue */ 217 struct rte_avp_fifo *resp_q; /**< Response queue */ 218 void *host_sync_addr; /**< (host) Req/Resp Mem address */ 219 void *sync_addr; /**< Req/Resp Mem address */ 220 void *host_mbuf_addr; /**< (host) MBUF pool start address */ 221 void *mbuf_addr; /**< MBUF pool start address */ 222 } __rte_cache_aligned; 223 224 /* RTE ethernet private data */ 225 struct avp_adapter { 226 struct avp_dev avp; 227 } __rte_cache_aligned; 228 229 230 /* 32-bit MMIO register write */ 231 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr)) 232 233 /* 32-bit MMIO register read */ 234 #define AVP_READ32(_addr) rte_read32_relaxed((_addr)) 235 236 /* Macro to cast the ethernet device private data to a AVP object */ 237 #define AVP_DEV_PRIVATE_TO_HW(adapter) \ 238 (&((struct avp_adapter *)adapter)->avp) 239 240 /* 241 * Defines the structure of a AVP device queue for the purpose of handling the 242 * receive and transmit burst callback functions 243 */ 244 struct avp_queue { 245 struct rte_eth_dev_data *dev_data; 246 /**< Backpointer to ethernet device data */ 247 struct avp_dev *avp; /**< Backpointer to AVP device */ 248 uint16_t queue_id; 249 /**< Queue identifier used for indexing current queue */ 250 uint16_t queue_base; 251 /**< Base queue identifier for queue servicing */ 252 uint16_t queue_limit; 253 /**< Maximum queue identifier for queue servicing */ 254 255 uint64_t packets; 256 uint64_t bytes; 257 uint64_t errors; 258 }; 259 260 /* send a request and wait for a response 261 * 262 * @warning must be called while holding the avp->lock spinlock. 263 */ 264 static int 265 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request) 266 { 267 unsigned int retry = AVP_MAX_REQUEST_RETRY; 268 void *resp_addr = NULL; 269 unsigned int count; 270 int ret; 271 272 PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id); 273 274 request->result = -ENOTSUP; 275 276 /* Discard any stale responses before starting a new request */ 277 while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1)) 278 PMD_DRV_LOG(DEBUG, "Discarding stale response\n"); 279 280 rte_memcpy(avp->sync_addr, request, sizeof(*request)); 281 count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1); 282 if (count < 1) { 283 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n", 284 request->req_id); 285 ret = -EBUSY; 286 goto done; 287 } 288 289 while (retry--) { 290 /* wait for a response */ 291 usleep(AVP_REQUEST_DELAY_USECS); 292 293 count = avp_fifo_count(avp->resp_q); 294 if (count >= 1) { 295 /* response received */ 296 break; 297 } 298 299 if ((count < 1) && (retry == 0)) { 300 PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n", 301 request->req_id); 302 ret = -ETIME; 303 goto done; 304 } 305 } 306 307 /* retrieve the response */ 308 count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1); 309 if ((count != 1) || (resp_addr != avp->host_sync_addr)) { 310 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n", 311 count, resp_addr, avp->host_sync_addr); 312 ret = -ENODATA; 313 goto done; 314 } 315 316 /* copy to user buffer */ 317 rte_memcpy(request, avp->sync_addr, sizeof(*request)); 318 ret = 0; 319 320 PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n", 321 request->result, request->req_id); 322 323 done: 324 return ret; 325 } 326 327 static int 328 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state) 329 { 330 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 331 struct rte_avp_request request; 332 int ret; 333 334 /* setup a link state change request */ 335 memset(&request, 0, sizeof(request)); 336 request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF; 337 request.if_up = state; 338 339 ret = avp_dev_process_request(avp, &request); 340 341 return ret == 0 ? request.result : ret; 342 } 343 344 static int 345 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev, 346 struct rte_avp_device_config *config) 347 { 348 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 349 struct rte_avp_request request; 350 int ret; 351 352 /* setup a configure request */ 353 memset(&request, 0, sizeof(request)); 354 request.req_id = RTE_AVP_REQ_CFG_DEVICE; 355 memcpy(&request.config, config, sizeof(request.config)); 356 357 ret = avp_dev_process_request(avp, &request); 358 359 return ret == 0 ? request.result : ret; 360 } 361 362 static int 363 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev) 364 { 365 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 366 struct rte_avp_request request; 367 int ret; 368 369 /* setup a shutdown request */ 370 memset(&request, 0, sizeof(request)); 371 request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE; 372 373 ret = avp_dev_process_request(avp, &request); 374 375 return ret == 0 ? request.result : ret; 376 } 377 378 /* translate from host mbuf virtual address to guest virtual address */ 379 static inline void * 380 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address) 381 { 382 return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address, 383 (uintptr_t)avp->host_mbuf_addr), 384 (uintptr_t)avp->mbuf_addr); 385 } 386 387 /* translate from host physical address to guest virtual address */ 388 static void * 389 avp_dev_translate_address(struct rte_eth_dev *eth_dev, 390 rte_iova_t host_phys_addr) 391 { 392 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 393 struct rte_mem_resource *resource; 394 struct rte_avp_memmap_info *info; 395 struct rte_avp_memmap *map; 396 off_t offset; 397 void *addr; 398 unsigned int i; 399 400 addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr; 401 resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR]; 402 info = (struct rte_avp_memmap_info *)resource->addr; 403 404 offset = 0; 405 for (i = 0; i < info->nb_maps; i++) { 406 /* search all segments looking for a matching address */ 407 map = &info->maps[i]; 408 409 if ((host_phys_addr >= map->phys_addr) && 410 (host_phys_addr < (map->phys_addr + map->length))) { 411 /* address is within this segment */ 412 offset += (host_phys_addr - map->phys_addr); 413 addr = RTE_PTR_ADD(addr, offset); 414 415 PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n", 416 host_phys_addr, addr); 417 418 return addr; 419 } 420 offset += map->length; 421 } 422 423 return NULL; 424 } 425 426 /* verify that the incoming device version is compatible with our version */ 427 static int 428 avp_dev_version_check(uint32_t version) 429 { 430 uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION); 431 uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version); 432 433 if (device <= driver) { 434 /* the host driver version is less than or equal to ours */ 435 return 0; 436 } 437 438 return 1; 439 } 440 441 /* verify that memory regions have expected version and validation markers */ 442 static int 443 avp_dev_check_regions(struct rte_eth_dev *eth_dev) 444 { 445 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 446 struct rte_avp_memmap_info *memmap; 447 struct rte_avp_device_info *info; 448 struct rte_mem_resource *resource; 449 unsigned int i; 450 451 /* Dump resource info for debug */ 452 for (i = 0; i < PCI_MAX_RESOURCE; i++) { 453 resource = &pci_dev->mem_resource[i]; 454 if ((resource->phys_addr == 0) || (resource->len == 0)) 455 continue; 456 457 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n", 458 i, resource->phys_addr, 459 resource->len, resource->addr); 460 461 switch (i) { 462 case RTE_AVP_PCI_MEMMAP_BAR: 463 memmap = (struct rte_avp_memmap_info *)resource->addr; 464 if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) || 465 (memmap->version != RTE_AVP_MEMMAP_VERSION)) { 466 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n", 467 memmap->magic, memmap->version); 468 return -EINVAL; 469 } 470 break; 471 472 case RTE_AVP_PCI_DEVICE_BAR: 473 info = (struct rte_avp_device_info *)resource->addr; 474 if ((info->magic != RTE_AVP_DEVICE_MAGIC) || 475 avp_dev_version_check(info->version)) { 476 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n", 477 info->magic, info->version, 478 AVP_DPDK_DRIVER_VERSION); 479 return -EINVAL; 480 } 481 break; 482 483 case RTE_AVP_PCI_MEMORY_BAR: 484 case RTE_AVP_PCI_MMIO_BAR: 485 if (resource->addr == NULL) { 486 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n", 487 i); 488 return -EINVAL; 489 } 490 break; 491 492 case RTE_AVP_PCI_MSIX_BAR: 493 default: 494 /* no validation required */ 495 break; 496 } 497 } 498 499 return 0; 500 } 501 502 static int 503 avp_dev_detach(struct rte_eth_dev *eth_dev) 504 { 505 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 506 int ret; 507 508 PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n", 509 eth_dev->data->port_id, avp->device_id); 510 511 rte_spinlock_lock(&avp->lock); 512 513 if (avp->flags & AVP_F_DETACHED) { 514 PMD_DRV_LOG(NOTICE, "port %u already detached\n", 515 eth_dev->data->port_id); 516 ret = 0; 517 goto unlock; 518 } 519 520 /* shutdown the device first so the host stops sending us packets. */ 521 ret = avp_dev_ctrl_shutdown(eth_dev); 522 if (ret < 0) { 523 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n", 524 ret); 525 avp->flags &= ~AVP_F_DETACHED; 526 goto unlock; 527 } 528 529 avp->flags |= AVP_F_DETACHED; 530 rte_wmb(); 531 532 /* wait for queues to acknowledge the presence of the detach flag */ 533 rte_delay_ms(1); 534 535 ret = 0; 536 537 unlock: 538 rte_spinlock_unlock(&avp->lock); 539 return ret; 540 } 541 542 static void 543 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 544 { 545 struct avp_dev *avp = 546 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 547 struct avp_queue *rxq; 548 uint16_t queue_count; 549 uint16_t remainder; 550 551 rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id]; 552 553 /* 554 * Must map all AVP fifos as evenly as possible between the configured 555 * device queues. Each device queue will service a subset of the AVP 556 * fifos. If there is an odd number of device queues the first set of 557 * device queues will get the extra AVP fifos. 558 */ 559 queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues; 560 remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues; 561 if (rx_queue_id < remainder) { 562 /* these queues must service one extra FIFO */ 563 rxq->queue_base = rx_queue_id * (queue_count + 1); 564 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1; 565 } else { 566 /* these queues service the regular number of FIFO */ 567 rxq->queue_base = ((remainder * (queue_count + 1)) + 568 ((rx_queue_id - remainder) * queue_count)); 569 rxq->queue_limit = rxq->queue_base + queue_count - 1; 570 } 571 572 PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n", 573 rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit); 574 575 rxq->queue_id = rxq->queue_base; 576 } 577 578 static void 579 _avp_set_queue_counts(struct rte_eth_dev *eth_dev) 580 { 581 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 582 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 583 struct rte_avp_device_info *host_info; 584 void *addr; 585 586 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 587 host_info = (struct rte_avp_device_info *)addr; 588 589 /* 590 * the transmit direction is not negotiated beyond respecting the max 591 * number of queues because the host can handle arbitrary guest tx 592 * queues (host rx queues). 593 */ 594 avp->num_tx_queues = eth_dev->data->nb_tx_queues; 595 596 /* 597 * the receive direction is more restrictive. The host requires a 598 * minimum number of guest rx queues (host tx queues) therefore 599 * negotiate a value that is at least as large as the host minimum 600 * requirement. If the host and guest values are not identical then a 601 * mapping will be established in the receive_queue_setup function. 602 */ 603 avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues, 604 eth_dev->data->nb_rx_queues); 605 606 PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n", 607 avp->num_tx_queues, avp->num_rx_queues); 608 } 609 610 static int 611 avp_dev_attach(struct rte_eth_dev *eth_dev) 612 { 613 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 614 struct rte_avp_device_config config; 615 unsigned int i; 616 int ret; 617 618 PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n", 619 eth_dev->data->port_id, avp->device_id); 620 621 rte_spinlock_lock(&avp->lock); 622 623 if (!(avp->flags & AVP_F_DETACHED)) { 624 PMD_DRV_LOG(NOTICE, "port %u already attached\n", 625 eth_dev->data->port_id); 626 ret = 0; 627 goto unlock; 628 } 629 630 /* 631 * make sure that the detached flag is set prior to reconfiguring the 632 * queues. 633 */ 634 avp->flags |= AVP_F_DETACHED; 635 rte_wmb(); 636 637 /* 638 * re-run the device create utility which will parse the new host info 639 * and setup the AVP device queue pointers. 640 */ 641 ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev); 642 if (ret < 0) { 643 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n", 644 ret); 645 goto unlock; 646 } 647 648 if (avp->flags & AVP_F_CONFIGURED) { 649 /* 650 * Update the receive queue mapping to handle cases where the 651 * source and destination hosts have different queue 652 * requirements. As long as the DETACHED flag is asserted the 653 * queue table should not be referenced so it should be safe to 654 * update it. 655 */ 656 _avp_set_queue_counts(eth_dev); 657 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) 658 _avp_set_rx_queue_mappings(eth_dev, i); 659 660 /* 661 * Update the host with our config details so that it knows the 662 * device is active. 663 */ 664 memset(&config, 0, sizeof(config)); 665 config.device_id = avp->device_id; 666 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 667 config.driver_version = AVP_DPDK_DRIVER_VERSION; 668 config.features = avp->features; 669 config.num_tx_queues = avp->num_tx_queues; 670 config.num_rx_queues = avp->num_rx_queues; 671 config.if_up = !!(avp->flags & AVP_F_LINKUP); 672 673 ret = avp_dev_ctrl_set_config(eth_dev, &config); 674 if (ret < 0) { 675 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 676 ret); 677 goto unlock; 678 } 679 } 680 681 rte_wmb(); 682 avp->flags &= ~AVP_F_DETACHED; 683 684 ret = 0; 685 686 unlock: 687 rte_spinlock_unlock(&avp->lock); 688 return ret; 689 } 690 691 static void 692 avp_dev_interrupt_handler(void *data) 693 { 694 struct rte_eth_dev *eth_dev = data; 695 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 696 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 697 uint32_t status, value; 698 int ret; 699 700 if (registers == NULL) 701 rte_panic("no mapped MMIO register space\n"); 702 703 /* read the interrupt status register 704 * note: this register clears on read so all raised interrupts must be 705 * handled or remembered for later processing 706 */ 707 status = AVP_READ32( 708 RTE_PTR_ADD(registers, 709 RTE_AVP_INTERRUPT_STATUS_OFFSET)); 710 711 if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) { 712 /* handle interrupt based on current status */ 713 value = AVP_READ32( 714 RTE_PTR_ADD(registers, 715 RTE_AVP_MIGRATION_STATUS_OFFSET)); 716 switch (value) { 717 case RTE_AVP_MIGRATION_DETACHED: 718 ret = avp_dev_detach(eth_dev); 719 break; 720 case RTE_AVP_MIGRATION_ATTACHED: 721 ret = avp_dev_attach(eth_dev); 722 break; 723 default: 724 PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n", 725 value); 726 ret = -EINVAL; 727 } 728 729 /* acknowledge the request by writing out our current status */ 730 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR); 731 AVP_WRITE32(value, 732 RTE_PTR_ADD(registers, 733 RTE_AVP_MIGRATION_ACK_OFFSET)); 734 735 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n"); 736 } 737 738 if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK) 739 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n", 740 status); 741 742 /* re-enable UIO interrupt handling */ 743 ret = rte_intr_enable(&pci_dev->intr_handle); 744 if (ret < 0) { 745 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", 746 ret); 747 /* continue */ 748 } 749 } 750 751 static int 752 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev) 753 { 754 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 755 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 756 int ret; 757 758 if (registers == NULL) 759 return -EINVAL; 760 761 /* enable UIO interrupt handling */ 762 ret = rte_intr_enable(&pci_dev->intr_handle); 763 if (ret < 0) { 764 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n", 765 ret); 766 return ret; 767 } 768 769 /* inform the device that all interrupts are enabled */ 770 AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK, 771 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 772 773 return 0; 774 } 775 776 static int 777 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev) 778 { 779 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 780 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 781 int ret; 782 783 if (registers == NULL) 784 return 0; 785 786 /* inform the device that all interrupts are disabled */ 787 AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK, 788 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 789 790 /* enable UIO interrupt handling */ 791 ret = rte_intr_disable(&pci_dev->intr_handle); 792 if (ret < 0) { 793 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n", 794 ret); 795 return ret; 796 } 797 798 return 0; 799 } 800 801 static int 802 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev) 803 { 804 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 805 int ret; 806 807 /* register a callback handler with UIO for interrupt notifications */ 808 ret = rte_intr_callback_register(&pci_dev->intr_handle, 809 avp_dev_interrupt_handler, 810 (void *)eth_dev); 811 if (ret < 0) { 812 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n", 813 ret); 814 return ret; 815 } 816 817 /* enable interrupt processing */ 818 return avp_dev_enable_interrupts(eth_dev); 819 } 820 821 static int 822 avp_dev_migration_pending(struct rte_eth_dev *eth_dev) 823 { 824 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 825 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 826 uint32_t value; 827 828 if (registers == NULL) 829 return 0; 830 831 value = AVP_READ32(RTE_PTR_ADD(registers, 832 RTE_AVP_MIGRATION_STATUS_OFFSET)); 833 if (value == RTE_AVP_MIGRATION_DETACHED) { 834 /* migration is in progress; ack it if we have not already */ 835 AVP_WRITE32(value, 836 RTE_PTR_ADD(registers, 837 RTE_AVP_MIGRATION_ACK_OFFSET)); 838 return 1; 839 } 840 return 0; 841 } 842 843 /* 844 * create a AVP device using the supplied device info by first translating it 845 * to guest address space(s). 846 */ 847 static int 848 avp_dev_create(struct rte_pci_device *pci_dev, 849 struct rte_eth_dev *eth_dev) 850 { 851 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 852 struct rte_avp_device_info *host_info; 853 struct rte_mem_resource *resource; 854 unsigned int i; 855 856 resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR]; 857 if (resource->addr == NULL) { 858 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n", 859 RTE_AVP_PCI_DEVICE_BAR); 860 return -EFAULT; 861 } 862 host_info = (struct rte_avp_device_info *)resource->addr; 863 864 if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) || 865 avp_dev_version_check(host_info->version)) { 866 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n", 867 host_info->magic, host_info->version, 868 AVP_DPDK_DRIVER_VERSION); 869 return -EINVAL; 870 } 871 872 PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n", 873 RTE_AVP_GET_RELEASE_VERSION(host_info->version), 874 RTE_AVP_GET_MAJOR_VERSION(host_info->version), 875 RTE_AVP_GET_MINOR_VERSION(host_info->version)); 876 877 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n", 878 host_info->min_tx_queues, host_info->max_tx_queues); 879 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n", 880 host_info->min_rx_queues, host_info->max_rx_queues); 881 PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n", 882 host_info->features); 883 884 if (avp->magic != AVP_ETHDEV_MAGIC) { 885 /* 886 * First time initialization (i.e., not during a VM 887 * migration) 888 */ 889 memset(avp, 0, sizeof(*avp)); 890 avp->magic = AVP_ETHDEV_MAGIC; 891 avp->dev_data = eth_dev->data; 892 avp->port_id = eth_dev->data->port_id; 893 avp->host_mbuf_size = host_info->mbuf_size; 894 avp->host_features = host_info->features; 895 rte_spinlock_init(&avp->lock); 896 memcpy(&avp->ethaddr.addr_bytes[0], 897 host_info->ethaddr, ETHER_ADDR_LEN); 898 /* adjust max values to not exceed our max */ 899 avp->max_tx_queues = 900 RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES); 901 avp->max_rx_queues = 902 RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES); 903 } else { 904 /* Re-attaching during migration */ 905 906 /* TODO... requires validation of host values */ 907 if ((host_info->features & avp->features) != avp->features) { 908 PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n", 909 avp->features, host_info->features); 910 /* this should not be possible; continue for now */ 911 } 912 } 913 914 /* the device id is allowed to change over migrations */ 915 avp->device_id = host_info->device_id; 916 917 /* translate incoming host addresses to guest address space */ 918 PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n", 919 host_info->tx_phys); 920 PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n", 921 host_info->alloc_phys); 922 for (i = 0; i < avp->max_tx_queues; i++) { 923 avp->tx_q[i] = avp_dev_translate_address(eth_dev, 924 host_info->tx_phys + (i * host_info->tx_size)); 925 926 avp->alloc_q[i] = avp_dev_translate_address(eth_dev, 927 host_info->alloc_phys + (i * host_info->alloc_size)); 928 } 929 930 PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n", 931 host_info->rx_phys); 932 PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n", 933 host_info->free_phys); 934 for (i = 0; i < avp->max_rx_queues; i++) { 935 avp->rx_q[i] = avp_dev_translate_address(eth_dev, 936 host_info->rx_phys + (i * host_info->rx_size)); 937 avp->free_q[i] = avp_dev_translate_address(eth_dev, 938 host_info->free_phys + (i * host_info->free_size)); 939 } 940 941 PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n", 942 host_info->req_phys); 943 PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n", 944 host_info->resp_phys); 945 PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n", 946 host_info->sync_phys); 947 PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n", 948 host_info->mbuf_phys); 949 avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys); 950 avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys); 951 avp->sync_addr = 952 avp_dev_translate_address(eth_dev, host_info->sync_phys); 953 avp->mbuf_addr = 954 avp_dev_translate_address(eth_dev, host_info->mbuf_phys); 955 956 /* 957 * store the host mbuf virtual address so that we can calculate 958 * relative offsets for each mbuf as they are processed 959 */ 960 avp->host_mbuf_addr = host_info->mbuf_va; 961 avp->host_sync_addr = host_info->sync_va; 962 963 /* 964 * store the maximum packet length that is supported by the host. 965 */ 966 avp->max_rx_pkt_len = host_info->max_rx_pkt_len; 967 PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n", 968 host_info->max_rx_pkt_len); 969 970 return 0; 971 } 972 973 /* 974 * This function is based on probe() function in avp_pci.c 975 * It returns 0 on success. 976 */ 977 static int 978 eth_avp_dev_init(struct rte_eth_dev *eth_dev) 979 { 980 struct avp_dev *avp = 981 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 982 struct rte_pci_device *pci_dev; 983 int ret; 984 985 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 986 eth_dev->dev_ops = &avp_eth_dev_ops; 987 eth_dev->rx_pkt_burst = &avp_recv_pkts; 988 eth_dev->tx_pkt_burst = &avp_xmit_pkts; 989 990 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 991 /* 992 * no setup required on secondary processes. All data is saved 993 * in dev_private by the primary process. All resource should 994 * be mapped to the same virtual address so all pointers should 995 * be valid. 996 */ 997 if (eth_dev->data->scattered_rx) { 998 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 999 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1000 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1001 } 1002 return 0; 1003 } 1004 1005 rte_eth_copy_pci_info(eth_dev, pci_dev); 1006 1007 /* Check current migration status */ 1008 if (avp_dev_migration_pending(eth_dev)) { 1009 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n"); 1010 return -EBUSY; 1011 } 1012 1013 /* Check BAR resources */ 1014 ret = avp_dev_check_regions(eth_dev); 1015 if (ret < 0) { 1016 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n", 1017 ret); 1018 return ret; 1019 } 1020 1021 /* Enable interrupts */ 1022 ret = avp_dev_setup_interrupts(eth_dev); 1023 if (ret < 0) { 1024 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret); 1025 return ret; 1026 } 1027 1028 /* Handle each subtype */ 1029 ret = avp_dev_create(pci_dev, eth_dev); 1030 if (ret < 0) { 1031 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret); 1032 return ret; 1033 } 1034 1035 /* Allocate memory for storing MAC addresses */ 1036 eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0); 1037 if (eth_dev->data->mac_addrs == NULL) { 1038 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n", 1039 ETHER_ADDR_LEN); 1040 return -ENOMEM; 1041 } 1042 1043 /* Get a mac from device config */ 1044 ether_addr_copy(&avp->ethaddr, ð_dev->data->mac_addrs[0]); 1045 1046 return 0; 1047 } 1048 1049 static int 1050 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev) 1051 { 1052 int ret; 1053 1054 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1055 return -EPERM; 1056 1057 if (eth_dev->data == NULL) 1058 return 0; 1059 1060 ret = avp_dev_disable_interrupts(eth_dev); 1061 if (ret != 0) { 1062 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret); 1063 return ret; 1064 } 1065 1066 if (eth_dev->data->mac_addrs != NULL) { 1067 rte_free(eth_dev->data->mac_addrs); 1068 eth_dev->data->mac_addrs = NULL; 1069 } 1070 1071 return 0; 1072 } 1073 1074 static int 1075 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1076 struct rte_pci_device *pci_dev) 1077 { 1078 struct rte_eth_dev *eth_dev; 1079 int ret; 1080 1081 eth_dev = rte_eth_dev_pci_allocate(pci_dev, 1082 sizeof(struct avp_adapter)); 1083 if (eth_dev == NULL) 1084 return -ENOMEM; 1085 1086 ret = eth_avp_dev_init(eth_dev); 1087 if (ret) 1088 rte_eth_dev_pci_release(eth_dev); 1089 1090 return ret; 1091 } 1092 1093 static int 1094 eth_avp_pci_remove(struct rte_pci_device *pci_dev) 1095 { 1096 return rte_eth_dev_pci_generic_remove(pci_dev, 1097 eth_avp_dev_uninit); 1098 } 1099 1100 static struct rte_pci_driver rte_avp_pmd = { 1101 .id_table = pci_id_avp_map, 1102 .drv_flags = RTE_PCI_DRV_NEED_MAPPING, 1103 .probe = eth_avp_pci_probe, 1104 .remove = eth_avp_pci_remove, 1105 }; 1106 1107 static int 1108 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev, 1109 struct avp_dev *avp) 1110 { 1111 unsigned int max_rx_pkt_len; 1112 1113 max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len; 1114 1115 if ((max_rx_pkt_len > avp->guest_mbuf_size) || 1116 (max_rx_pkt_len > avp->host_mbuf_size)) { 1117 /* 1118 * If the guest MTU is greater than either the host or guest 1119 * buffers then chained mbufs have to be enabled in the TX 1120 * direction. It is assumed that the application will not need 1121 * to send packets larger than their max_rx_pkt_len (MRU). 1122 */ 1123 return 1; 1124 } 1125 1126 if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) || 1127 (avp->max_rx_pkt_len > avp->host_mbuf_size)) { 1128 /* 1129 * If the host MRU is greater than its own mbuf size or the 1130 * guest mbuf size then chained mbufs have to be enabled in the 1131 * RX direction. 1132 */ 1133 return 1; 1134 } 1135 1136 return 0; 1137 } 1138 1139 static int 1140 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, 1141 uint16_t rx_queue_id, 1142 uint16_t nb_rx_desc, 1143 unsigned int socket_id, 1144 const struct rte_eth_rxconf *rx_conf, 1145 struct rte_mempool *pool) 1146 { 1147 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1148 struct rte_pktmbuf_pool_private *mbp_priv; 1149 struct avp_queue *rxq; 1150 1151 if (rx_queue_id >= eth_dev->data->nb_rx_queues) { 1152 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n", 1153 rx_queue_id, eth_dev->data->nb_rx_queues); 1154 return -EINVAL; 1155 } 1156 1157 /* Save mbuf pool pointer */ 1158 avp->pool = pool; 1159 1160 /* Save the local mbuf size */ 1161 mbp_priv = rte_mempool_get_priv(pool); 1162 avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size); 1163 avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM; 1164 1165 if (avp_dev_enable_scattered(eth_dev, avp)) { 1166 if (!eth_dev->data->scattered_rx) { 1167 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 1168 eth_dev->data->scattered_rx = 1; 1169 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1170 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1171 } 1172 } 1173 1174 PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n", 1175 avp->max_rx_pkt_len, 1176 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len, 1177 avp->host_mbuf_size, 1178 avp->guest_mbuf_size); 1179 1180 /* allocate a queue object */ 1181 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue), 1182 RTE_CACHE_LINE_SIZE, socket_id); 1183 if (rxq == NULL) { 1184 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n"); 1185 return -ENOMEM; 1186 } 1187 1188 /* save back pointers to AVP and Ethernet devices */ 1189 rxq->avp = avp; 1190 rxq->dev_data = eth_dev->data; 1191 eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq; 1192 1193 /* setup the queue receive mapping for the current queue. */ 1194 _avp_set_rx_queue_mappings(eth_dev, rx_queue_id); 1195 1196 PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq); 1197 1198 (void)nb_rx_desc; 1199 (void)rx_conf; 1200 return 0; 1201 } 1202 1203 static int 1204 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, 1205 uint16_t tx_queue_id, 1206 uint16_t nb_tx_desc, 1207 unsigned int socket_id, 1208 const struct rte_eth_txconf *tx_conf) 1209 { 1210 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1211 struct avp_queue *txq; 1212 1213 if (tx_queue_id >= eth_dev->data->nb_tx_queues) { 1214 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n", 1215 tx_queue_id, eth_dev->data->nb_tx_queues); 1216 return -EINVAL; 1217 } 1218 1219 /* allocate a queue object */ 1220 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue), 1221 RTE_CACHE_LINE_SIZE, socket_id); 1222 if (txq == NULL) { 1223 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n"); 1224 return -ENOMEM; 1225 } 1226 1227 /* only the configured set of transmit queues are used */ 1228 txq->queue_id = tx_queue_id; 1229 txq->queue_base = tx_queue_id; 1230 txq->queue_limit = tx_queue_id; 1231 1232 /* save back pointers to AVP and Ethernet devices */ 1233 txq->avp = avp; 1234 txq->dev_data = eth_dev->data; 1235 eth_dev->data->tx_queues[tx_queue_id] = (void *)txq; 1236 1237 PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq); 1238 1239 (void)nb_tx_desc; 1240 (void)tx_conf; 1241 return 0; 1242 } 1243 1244 static inline int 1245 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b) 1246 { 1247 uint16_t *_a = (uint16_t *)&a->addr_bytes[0]; 1248 uint16_t *_b = (uint16_t *)&b->addr_bytes[0]; 1249 return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]); 1250 } 1251 1252 static inline int 1253 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m) 1254 { 1255 struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); 1256 1257 if (likely(_avp_cmp_ether_addr(&avp->ethaddr, ð->d_addr) == 0)) { 1258 /* allow all packets destined to our address */ 1259 return 0; 1260 } 1261 1262 if (likely(is_broadcast_ether_addr(ð->d_addr))) { 1263 /* allow all broadcast packets */ 1264 return 0; 1265 } 1266 1267 if (likely(is_multicast_ether_addr(ð->d_addr))) { 1268 /* allow all multicast packets */ 1269 return 0; 1270 } 1271 1272 if (avp->flags & AVP_F_PROMISC) { 1273 /* allow all packets when in promiscuous mode */ 1274 return 0; 1275 } 1276 1277 return -1; 1278 } 1279 1280 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1281 static inline void 1282 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf) 1283 { 1284 struct rte_avp_desc *first_buf; 1285 struct rte_avp_desc *pkt_buf; 1286 unsigned int pkt_len; 1287 unsigned int nb_segs; 1288 void *pkt_data; 1289 unsigned int i; 1290 1291 first_buf = avp_dev_translate_buffer(avp, buf); 1292 1293 i = 0; 1294 pkt_len = 0; 1295 nb_segs = first_buf->nb_segs; 1296 do { 1297 /* Adjust pointers for guest addressing */ 1298 pkt_buf = avp_dev_translate_buffer(avp, buf); 1299 if (pkt_buf == NULL) 1300 rte_panic("bad buffer: segment %u has an invalid address %p\n", 1301 i, buf); 1302 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1303 if (pkt_data == NULL) 1304 rte_panic("bad buffer: segment %u has a NULL data pointer\n", 1305 i); 1306 if (pkt_buf->data_len == 0) 1307 rte_panic("bad buffer: segment %u has 0 data length\n", 1308 i); 1309 pkt_len += pkt_buf->data_len; 1310 nb_segs--; 1311 i++; 1312 1313 } while (nb_segs && (buf = pkt_buf->next) != NULL); 1314 1315 if (nb_segs != 0) 1316 rte_panic("bad buffer: expected %u segments found %u\n", 1317 first_buf->nb_segs, (first_buf->nb_segs - nb_segs)); 1318 if (pkt_len != first_buf->pkt_len) 1319 rte_panic("bad buffer: expected length %u found %u\n", 1320 first_buf->pkt_len, pkt_len); 1321 } 1322 1323 #define avp_dev_buffer_sanity_check(a, b) \ 1324 __avp_dev_buffer_sanity_check((a), (b)) 1325 1326 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */ 1327 1328 #define avp_dev_buffer_sanity_check(a, b) do {} while (0) 1329 1330 #endif 1331 1332 /* 1333 * Copy a host buffer chain to a set of mbufs. This function assumes that 1334 * there exactly the required number of mbufs to copy all source bytes. 1335 */ 1336 static inline struct rte_mbuf * 1337 avp_dev_copy_from_buffers(struct avp_dev *avp, 1338 struct rte_avp_desc *buf, 1339 struct rte_mbuf **mbufs, 1340 unsigned int count) 1341 { 1342 struct rte_mbuf *m_previous = NULL; 1343 struct rte_avp_desc *pkt_buf; 1344 unsigned int total_length = 0; 1345 unsigned int copy_length; 1346 unsigned int src_offset; 1347 struct rte_mbuf *m; 1348 uint16_t ol_flags; 1349 uint16_t vlan_tci; 1350 void *pkt_data; 1351 unsigned int i; 1352 1353 avp_dev_buffer_sanity_check(avp, buf); 1354 1355 /* setup the first source buffer */ 1356 pkt_buf = avp_dev_translate_buffer(avp, buf); 1357 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1358 total_length = pkt_buf->pkt_len; 1359 src_offset = 0; 1360 1361 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1362 ol_flags = PKT_RX_VLAN; 1363 vlan_tci = pkt_buf->vlan_tci; 1364 } else { 1365 ol_flags = 0; 1366 vlan_tci = 0; 1367 } 1368 1369 for (i = 0; (i < count) && (buf != NULL); i++) { 1370 /* fill each destination buffer */ 1371 m = mbufs[i]; 1372 1373 if (m_previous != NULL) 1374 m_previous->next = m; 1375 1376 m_previous = m; 1377 1378 do { 1379 /* 1380 * Copy as many source buffers as will fit in the 1381 * destination buffer. 1382 */ 1383 copy_length = RTE_MIN((avp->guest_mbuf_size - 1384 rte_pktmbuf_data_len(m)), 1385 (pkt_buf->data_len - 1386 src_offset)); 1387 rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1388 rte_pktmbuf_data_len(m)), 1389 RTE_PTR_ADD(pkt_data, src_offset), 1390 copy_length); 1391 rte_pktmbuf_data_len(m) += copy_length; 1392 src_offset += copy_length; 1393 1394 if (likely(src_offset == pkt_buf->data_len)) { 1395 /* need a new source buffer */ 1396 buf = pkt_buf->next; 1397 if (buf != NULL) { 1398 pkt_buf = avp_dev_translate_buffer( 1399 avp, buf); 1400 pkt_data = avp_dev_translate_buffer( 1401 avp, pkt_buf->data); 1402 src_offset = 0; 1403 } 1404 } 1405 1406 if (unlikely(rte_pktmbuf_data_len(m) == 1407 avp->guest_mbuf_size)) { 1408 /* need a new destination mbuf */ 1409 break; 1410 } 1411 1412 } while (buf != NULL); 1413 } 1414 1415 m = mbufs[0]; 1416 m->ol_flags = ol_flags; 1417 m->nb_segs = count; 1418 rte_pktmbuf_pkt_len(m) = total_length; 1419 m->vlan_tci = vlan_tci; 1420 1421 __rte_mbuf_sanity_check(m, 1); 1422 1423 return m; 1424 } 1425 1426 static uint16_t 1427 avp_recv_scattered_pkts(void *rx_queue, 1428 struct rte_mbuf **rx_pkts, 1429 uint16_t nb_pkts) 1430 { 1431 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1432 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1433 struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS]; 1434 struct avp_dev *avp = rxq->avp; 1435 struct rte_avp_desc *pkt_buf; 1436 struct rte_avp_fifo *free_q; 1437 struct rte_avp_fifo *rx_q; 1438 struct rte_avp_desc *buf; 1439 unsigned int count, avail, n; 1440 unsigned int guest_mbuf_size; 1441 struct rte_mbuf *m; 1442 unsigned int required; 1443 unsigned int buf_len; 1444 unsigned int port_id; 1445 unsigned int i; 1446 1447 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1448 /* VM live migration in progress */ 1449 return 0; 1450 } 1451 1452 guest_mbuf_size = avp->guest_mbuf_size; 1453 port_id = avp->port_id; 1454 rx_q = avp->rx_q[rxq->queue_id]; 1455 free_q = avp->free_q[rxq->queue_id]; 1456 1457 /* setup next queue to service */ 1458 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1459 (rxq->queue_id + 1) : rxq->queue_base; 1460 1461 /* determine how many slots are available in the free queue */ 1462 count = avp_fifo_free_count(free_q); 1463 1464 /* determine how many packets are available in the rx queue */ 1465 avail = avp_fifo_count(rx_q); 1466 1467 /* determine how many packets can be received */ 1468 count = RTE_MIN(count, avail); 1469 count = RTE_MIN(count, nb_pkts); 1470 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1471 1472 if (unlikely(count == 0)) { 1473 /* no free buffers, or no buffers on the rx queue */ 1474 return 0; 1475 } 1476 1477 /* retrieve pending packets */ 1478 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1479 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1480 count, rx_q); 1481 1482 count = 0; 1483 for (i = 0; i < n; i++) { 1484 /* prefetch next entry while processing current one */ 1485 if (i + 1 < n) { 1486 pkt_buf = avp_dev_translate_buffer(avp, 1487 avp_bufs[i + 1]); 1488 rte_prefetch0(pkt_buf); 1489 } 1490 buf = avp_bufs[i]; 1491 1492 /* Peek into the first buffer to determine the total length */ 1493 pkt_buf = avp_dev_translate_buffer(avp, buf); 1494 buf_len = pkt_buf->pkt_len; 1495 1496 /* Allocate enough mbufs to receive the entire packet */ 1497 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size; 1498 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) { 1499 rxq->dev_data->rx_mbuf_alloc_failed++; 1500 continue; 1501 } 1502 1503 /* Copy the data from the buffers to our mbufs */ 1504 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required); 1505 1506 /* finalize mbuf */ 1507 m->port = port_id; 1508 1509 if (_avp_mac_filter(avp, m) != 0) { 1510 /* silently discard packets not destined to our MAC */ 1511 rte_pktmbuf_free(m); 1512 continue; 1513 } 1514 1515 /* return new mbuf to caller */ 1516 rx_pkts[count++] = m; 1517 rxq->bytes += buf_len; 1518 } 1519 1520 rxq->packets += count; 1521 1522 /* return the buffers to the free queue */ 1523 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1524 1525 return count; 1526 } 1527 1528 1529 static uint16_t 1530 avp_recv_pkts(void *rx_queue, 1531 struct rte_mbuf **rx_pkts, 1532 uint16_t nb_pkts) 1533 { 1534 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1535 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1536 struct avp_dev *avp = rxq->avp; 1537 struct rte_avp_desc *pkt_buf; 1538 struct rte_avp_fifo *free_q; 1539 struct rte_avp_fifo *rx_q; 1540 unsigned int count, avail, n; 1541 unsigned int pkt_len; 1542 struct rte_mbuf *m; 1543 char *pkt_data; 1544 unsigned int i; 1545 1546 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1547 /* VM live migration in progress */ 1548 return 0; 1549 } 1550 1551 rx_q = avp->rx_q[rxq->queue_id]; 1552 free_q = avp->free_q[rxq->queue_id]; 1553 1554 /* setup next queue to service */ 1555 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1556 (rxq->queue_id + 1) : rxq->queue_base; 1557 1558 /* determine how many slots are available in the free queue */ 1559 count = avp_fifo_free_count(free_q); 1560 1561 /* determine how many packets are available in the rx queue */ 1562 avail = avp_fifo_count(rx_q); 1563 1564 /* determine how many packets can be received */ 1565 count = RTE_MIN(count, avail); 1566 count = RTE_MIN(count, nb_pkts); 1567 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1568 1569 if (unlikely(count == 0)) { 1570 /* no free buffers, or no buffers on the rx queue */ 1571 return 0; 1572 } 1573 1574 /* retrieve pending packets */ 1575 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1576 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1577 count, rx_q); 1578 1579 count = 0; 1580 for (i = 0; i < n; i++) { 1581 /* prefetch next entry while processing current one */ 1582 if (i < n - 1) { 1583 pkt_buf = avp_dev_translate_buffer(avp, 1584 avp_bufs[i + 1]); 1585 rte_prefetch0(pkt_buf); 1586 } 1587 1588 /* Adjust host pointers for guest addressing */ 1589 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1590 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1591 pkt_len = pkt_buf->pkt_len; 1592 1593 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1594 (pkt_buf->nb_segs > 1))) { 1595 /* 1596 * application should be using the scattered receive 1597 * function 1598 */ 1599 rxq->errors++; 1600 continue; 1601 } 1602 1603 /* process each packet to be transmitted */ 1604 m = rte_pktmbuf_alloc(avp->pool); 1605 if (unlikely(m == NULL)) { 1606 rxq->dev_data->rx_mbuf_alloc_failed++; 1607 continue; 1608 } 1609 1610 /* copy data out of the host buffer to our buffer */ 1611 m->data_off = RTE_PKTMBUF_HEADROOM; 1612 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len); 1613 1614 /* initialize the local mbuf */ 1615 rte_pktmbuf_data_len(m) = pkt_len; 1616 rte_pktmbuf_pkt_len(m) = pkt_len; 1617 m->port = avp->port_id; 1618 1619 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1620 m->ol_flags = PKT_RX_VLAN; 1621 m->vlan_tci = pkt_buf->vlan_tci; 1622 } 1623 1624 if (_avp_mac_filter(avp, m) != 0) { 1625 /* silently discard packets not destined to our MAC */ 1626 rte_pktmbuf_free(m); 1627 continue; 1628 } 1629 1630 /* return new mbuf to caller */ 1631 rx_pkts[count++] = m; 1632 rxq->bytes += pkt_len; 1633 } 1634 1635 rxq->packets += count; 1636 1637 /* return the buffers to the free queue */ 1638 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1639 1640 return count; 1641 } 1642 1643 /* 1644 * Copy a chained mbuf to a set of host buffers. This function assumes that 1645 * there are sufficient destination buffers to contain the entire source 1646 * packet. 1647 */ 1648 static inline uint16_t 1649 avp_dev_copy_to_buffers(struct avp_dev *avp, 1650 struct rte_mbuf *mbuf, 1651 struct rte_avp_desc **buffers, 1652 unsigned int count) 1653 { 1654 struct rte_avp_desc *previous_buf = NULL; 1655 struct rte_avp_desc *first_buf = NULL; 1656 struct rte_avp_desc *pkt_buf; 1657 struct rte_avp_desc *buf; 1658 size_t total_length; 1659 struct rte_mbuf *m; 1660 size_t copy_length; 1661 size_t src_offset; 1662 char *pkt_data; 1663 unsigned int i; 1664 1665 __rte_mbuf_sanity_check(mbuf, 1); 1666 1667 m = mbuf; 1668 src_offset = 0; 1669 total_length = rte_pktmbuf_pkt_len(m); 1670 for (i = 0; (i < count) && (m != NULL); i++) { 1671 /* fill each destination buffer */ 1672 buf = buffers[i]; 1673 1674 if (i < count - 1) { 1675 /* prefetch next entry while processing this one */ 1676 pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]); 1677 rte_prefetch0(pkt_buf); 1678 } 1679 1680 /* Adjust pointers for guest addressing */ 1681 pkt_buf = avp_dev_translate_buffer(avp, buf); 1682 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1683 1684 /* setup the buffer chain */ 1685 if (previous_buf != NULL) 1686 previous_buf->next = buf; 1687 else 1688 first_buf = pkt_buf; 1689 1690 previous_buf = pkt_buf; 1691 1692 do { 1693 /* 1694 * copy as many source mbuf segments as will fit in the 1695 * destination buffer. 1696 */ 1697 copy_length = RTE_MIN((avp->host_mbuf_size - 1698 pkt_buf->data_len), 1699 (rte_pktmbuf_data_len(m) - 1700 src_offset)); 1701 rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len), 1702 RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1703 src_offset), 1704 copy_length); 1705 pkt_buf->data_len += copy_length; 1706 src_offset += copy_length; 1707 1708 if (likely(src_offset == rte_pktmbuf_data_len(m))) { 1709 /* need a new source buffer */ 1710 m = m->next; 1711 src_offset = 0; 1712 } 1713 1714 if (unlikely(pkt_buf->data_len == 1715 avp->host_mbuf_size)) { 1716 /* need a new destination buffer */ 1717 break; 1718 } 1719 1720 } while (m != NULL); 1721 } 1722 1723 first_buf->nb_segs = count; 1724 first_buf->pkt_len = total_length; 1725 1726 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 1727 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1728 first_buf->vlan_tci = mbuf->vlan_tci; 1729 } 1730 1731 avp_dev_buffer_sanity_check(avp, buffers[0]); 1732 1733 return total_length; 1734 } 1735 1736 1737 static uint16_t 1738 avp_xmit_scattered_pkts(void *tx_queue, 1739 struct rte_mbuf **tx_pkts, 1740 uint16_t nb_pkts) 1741 { 1742 struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * 1743 RTE_AVP_MAX_MBUF_SEGMENTS)]; 1744 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1745 struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; 1746 struct avp_dev *avp = txq->avp; 1747 struct rte_avp_fifo *alloc_q; 1748 struct rte_avp_fifo *tx_q; 1749 unsigned int count, avail, n; 1750 unsigned int orig_nb_pkts; 1751 struct rte_mbuf *m; 1752 unsigned int required; 1753 unsigned int segments; 1754 unsigned int tx_bytes; 1755 unsigned int i; 1756 1757 orig_nb_pkts = nb_pkts; 1758 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1759 /* VM live migration in progress */ 1760 /* TODO ... buffer for X packets then drop? */ 1761 txq->errors += nb_pkts; 1762 return 0; 1763 } 1764 1765 tx_q = avp->tx_q[txq->queue_id]; 1766 alloc_q = avp->alloc_q[txq->queue_id]; 1767 1768 /* limit the number of transmitted packets to the max burst size */ 1769 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1770 nb_pkts = AVP_MAX_TX_BURST; 1771 1772 /* determine how many buffers are available to copy into */ 1773 avail = avp_fifo_count(alloc_q); 1774 if (unlikely(avail > (AVP_MAX_TX_BURST * 1775 RTE_AVP_MAX_MBUF_SEGMENTS))) 1776 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS; 1777 1778 /* determine how many slots are available in the transmit queue */ 1779 count = avp_fifo_free_count(tx_q); 1780 1781 /* determine how many packets can be sent */ 1782 nb_pkts = RTE_MIN(count, nb_pkts); 1783 1784 /* determine how many packets will fit in the available buffers */ 1785 count = 0; 1786 segments = 0; 1787 for (i = 0; i < nb_pkts; i++) { 1788 m = tx_pkts[i]; 1789 if (likely(i < (unsigned int)nb_pkts - 1)) { 1790 /* prefetch next entry while processing this one */ 1791 rte_prefetch0(tx_pkts[i + 1]); 1792 } 1793 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1794 avp->host_mbuf_size; 1795 1796 if (unlikely((required == 0) || 1797 (required > RTE_AVP_MAX_MBUF_SEGMENTS))) 1798 break; 1799 else if (unlikely(required + segments > avail)) 1800 break; 1801 segments += required; 1802 count++; 1803 } 1804 nb_pkts = count; 1805 1806 if (unlikely(nb_pkts == 0)) { 1807 /* no available buffers, or no space on the tx queue */ 1808 txq->errors += orig_nb_pkts; 1809 return 0; 1810 } 1811 1812 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1813 nb_pkts, tx_q); 1814 1815 /* retrieve sufficient send buffers */ 1816 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments); 1817 if (unlikely(n != segments)) { 1818 PMD_TX_LOG(DEBUG, "Failed to allocate buffers " 1819 "n=%u, segments=%u, orig=%u\n", 1820 n, segments, orig_nb_pkts); 1821 txq->errors += orig_nb_pkts; 1822 return 0; 1823 } 1824 1825 tx_bytes = 0; 1826 count = 0; 1827 for (i = 0; i < nb_pkts; i++) { 1828 /* process each packet to be transmitted */ 1829 m = tx_pkts[i]; 1830 1831 /* determine how many buffers are required for this packet */ 1832 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1833 avp->host_mbuf_size; 1834 1835 tx_bytes += avp_dev_copy_to_buffers(avp, m, 1836 &avp_bufs[count], required); 1837 tx_bufs[i] = avp_bufs[count]; 1838 count += required; 1839 1840 /* free the original mbuf */ 1841 rte_pktmbuf_free(m); 1842 } 1843 1844 txq->packets += nb_pkts; 1845 txq->bytes += tx_bytes; 1846 1847 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1848 for (i = 0; i < nb_pkts; i++) 1849 avp_dev_buffer_sanity_check(avp, tx_bufs[i]); 1850 #endif 1851 1852 /* send the packets */ 1853 n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts); 1854 if (unlikely(n != orig_nb_pkts)) 1855 txq->errors += (orig_nb_pkts - n); 1856 1857 return n; 1858 } 1859 1860 1861 static uint16_t 1862 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1863 { 1864 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1865 struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST]; 1866 struct avp_dev *avp = txq->avp; 1867 struct rte_avp_desc *pkt_buf; 1868 struct rte_avp_fifo *alloc_q; 1869 struct rte_avp_fifo *tx_q; 1870 unsigned int count, avail, n; 1871 struct rte_mbuf *m; 1872 unsigned int pkt_len; 1873 unsigned int tx_bytes; 1874 char *pkt_data; 1875 unsigned int i; 1876 1877 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1878 /* VM live migration in progress */ 1879 /* TODO ... buffer for X packets then drop?! */ 1880 txq->errors++; 1881 return 0; 1882 } 1883 1884 tx_q = avp->tx_q[txq->queue_id]; 1885 alloc_q = avp->alloc_q[txq->queue_id]; 1886 1887 /* limit the number of transmitted packets to the max burst size */ 1888 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1889 nb_pkts = AVP_MAX_TX_BURST; 1890 1891 /* determine how many buffers are available to copy into */ 1892 avail = avp_fifo_count(alloc_q); 1893 1894 /* determine how many slots are available in the transmit queue */ 1895 count = avp_fifo_free_count(tx_q); 1896 1897 /* determine how many packets can be sent */ 1898 count = RTE_MIN(count, avail); 1899 count = RTE_MIN(count, nb_pkts); 1900 1901 if (unlikely(count == 0)) { 1902 /* no available buffers, or no space on the tx queue */ 1903 txq->errors += nb_pkts; 1904 return 0; 1905 } 1906 1907 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1908 count, tx_q); 1909 1910 /* retrieve sufficient send buffers */ 1911 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count); 1912 if (unlikely(n != count)) { 1913 txq->errors++; 1914 return 0; 1915 } 1916 1917 tx_bytes = 0; 1918 for (i = 0; i < count; i++) { 1919 /* prefetch next entry while processing the current one */ 1920 if (i < count - 1) { 1921 pkt_buf = avp_dev_translate_buffer(avp, 1922 avp_bufs[i + 1]); 1923 rte_prefetch0(pkt_buf); 1924 } 1925 1926 /* process each packet to be transmitted */ 1927 m = tx_pkts[i]; 1928 1929 /* Adjust pointers for guest addressing */ 1930 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1931 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1932 pkt_len = rte_pktmbuf_pkt_len(m); 1933 1934 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1935 (pkt_len > avp->host_mbuf_size))) { 1936 /* 1937 * application should be using the scattered transmit 1938 * function; send it truncated to avoid the performance 1939 * hit of having to manage returning the already 1940 * allocated buffer to the free list. This should not 1941 * happen since the application should have set the 1942 * max_rx_pkt_len based on its MTU and it should be 1943 * policing its own packet sizes. 1944 */ 1945 txq->errors++; 1946 pkt_len = RTE_MIN(avp->guest_mbuf_size, 1947 avp->host_mbuf_size); 1948 } 1949 1950 /* copy data out of our mbuf and into the AVP buffer */ 1951 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len); 1952 pkt_buf->pkt_len = pkt_len; 1953 pkt_buf->data_len = pkt_len; 1954 pkt_buf->nb_segs = 1; 1955 pkt_buf->next = NULL; 1956 1957 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1958 pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1959 pkt_buf->vlan_tci = m->vlan_tci; 1960 } 1961 1962 tx_bytes += pkt_len; 1963 1964 /* free the original mbuf */ 1965 rte_pktmbuf_free(m); 1966 } 1967 1968 txq->packets += count; 1969 txq->bytes += tx_bytes; 1970 1971 /* send the packets */ 1972 n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count); 1973 1974 return n; 1975 } 1976 1977 static void 1978 avp_dev_rx_queue_release(void *rx_queue) 1979 { 1980 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1981 struct avp_dev *avp = rxq->avp; 1982 struct rte_eth_dev_data *data = avp->dev_data; 1983 unsigned int i; 1984 1985 for (i = 0; i < avp->num_rx_queues; i++) { 1986 if (data->rx_queues[i] == rxq) 1987 data->rx_queues[i] = NULL; 1988 } 1989 } 1990 1991 static void 1992 avp_dev_tx_queue_release(void *tx_queue) 1993 { 1994 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1995 struct avp_dev *avp = txq->avp; 1996 struct rte_eth_dev_data *data = avp->dev_data; 1997 unsigned int i; 1998 1999 for (i = 0; i < avp->num_tx_queues; i++) { 2000 if (data->tx_queues[i] == txq) 2001 data->tx_queues[i] = NULL; 2002 } 2003 } 2004 2005 static int 2006 avp_dev_configure(struct rte_eth_dev *eth_dev) 2007 { 2008 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2009 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2010 struct rte_avp_device_info *host_info; 2011 struct rte_avp_device_config config; 2012 int mask = 0; 2013 void *addr; 2014 int ret; 2015 2016 rte_spinlock_lock(&avp->lock); 2017 if (avp->flags & AVP_F_DETACHED) { 2018 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2019 ret = -ENOTSUP; 2020 goto unlock; 2021 } 2022 2023 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 2024 host_info = (struct rte_avp_device_info *)addr; 2025 2026 /* Setup required number of queues */ 2027 _avp_set_queue_counts(eth_dev); 2028 2029 mask = (ETH_VLAN_STRIP_MASK | 2030 ETH_VLAN_FILTER_MASK | 2031 ETH_VLAN_EXTEND_MASK); 2032 ret = avp_vlan_offload_set(eth_dev, mask); 2033 if (ret < 0) { 2034 PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n", 2035 ret); 2036 goto unlock; 2037 } 2038 2039 /* update device config */ 2040 memset(&config, 0, sizeof(config)); 2041 config.device_id = host_info->device_id; 2042 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 2043 config.driver_version = AVP_DPDK_DRIVER_VERSION; 2044 config.features = avp->features; 2045 config.num_tx_queues = avp->num_tx_queues; 2046 config.num_rx_queues = avp->num_rx_queues; 2047 2048 ret = avp_dev_ctrl_set_config(eth_dev, &config); 2049 if (ret < 0) { 2050 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 2051 ret); 2052 goto unlock; 2053 } 2054 2055 avp->flags |= AVP_F_CONFIGURED; 2056 ret = 0; 2057 2058 unlock: 2059 rte_spinlock_unlock(&avp->lock); 2060 return ret; 2061 } 2062 2063 static int 2064 avp_dev_start(struct rte_eth_dev *eth_dev) 2065 { 2066 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2067 int ret; 2068 2069 rte_spinlock_lock(&avp->lock); 2070 if (avp->flags & AVP_F_DETACHED) { 2071 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2072 ret = -ENOTSUP; 2073 goto unlock; 2074 } 2075 2076 /* disable features that we do not support */ 2077 eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0; 2078 eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0; 2079 eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0; 2080 eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0; 2081 2082 /* update link state */ 2083 ret = avp_dev_ctrl_set_link_state(eth_dev, 1); 2084 if (ret < 0) { 2085 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2086 ret); 2087 goto unlock; 2088 } 2089 2090 /* remember current link state */ 2091 avp->flags |= AVP_F_LINKUP; 2092 2093 ret = 0; 2094 2095 unlock: 2096 rte_spinlock_unlock(&avp->lock); 2097 return ret; 2098 } 2099 2100 static void 2101 avp_dev_stop(struct rte_eth_dev *eth_dev) 2102 { 2103 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2104 int ret; 2105 2106 rte_spinlock_lock(&avp->lock); 2107 if (avp->flags & AVP_F_DETACHED) { 2108 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2109 goto unlock; 2110 } 2111 2112 /* remember current link state */ 2113 avp->flags &= ~AVP_F_LINKUP; 2114 2115 /* update link state */ 2116 ret = avp_dev_ctrl_set_link_state(eth_dev, 0); 2117 if (ret < 0) { 2118 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2119 ret); 2120 } 2121 2122 unlock: 2123 rte_spinlock_unlock(&avp->lock); 2124 } 2125 2126 static void 2127 avp_dev_close(struct rte_eth_dev *eth_dev) 2128 { 2129 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2130 int ret; 2131 2132 rte_spinlock_lock(&avp->lock); 2133 if (avp->flags & AVP_F_DETACHED) { 2134 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2135 goto unlock; 2136 } 2137 2138 /* remember current link state */ 2139 avp->flags &= ~AVP_F_LINKUP; 2140 avp->flags &= ~AVP_F_CONFIGURED; 2141 2142 ret = avp_dev_disable_interrupts(eth_dev); 2143 if (ret < 0) { 2144 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n"); 2145 /* continue */ 2146 } 2147 2148 /* update device state */ 2149 ret = avp_dev_ctrl_shutdown(eth_dev); 2150 if (ret < 0) { 2151 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n", 2152 ret); 2153 /* continue */ 2154 } 2155 2156 unlock: 2157 rte_spinlock_unlock(&avp->lock); 2158 } 2159 2160 static int 2161 avp_dev_link_update(struct rte_eth_dev *eth_dev, 2162 __rte_unused int wait_to_complete) 2163 { 2164 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2165 struct rte_eth_link *link = ð_dev->data->dev_link; 2166 2167 link->link_speed = ETH_SPEED_NUM_10G; 2168 link->link_duplex = ETH_LINK_FULL_DUPLEX; 2169 link->link_status = !!(avp->flags & AVP_F_LINKUP); 2170 2171 return -1; 2172 } 2173 2174 static void 2175 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) 2176 { 2177 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2178 2179 rte_spinlock_lock(&avp->lock); 2180 if ((avp->flags & AVP_F_PROMISC) == 0) { 2181 avp->flags |= AVP_F_PROMISC; 2182 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n", 2183 eth_dev->data->port_id); 2184 } 2185 rte_spinlock_unlock(&avp->lock); 2186 } 2187 2188 static void 2189 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) 2190 { 2191 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2192 2193 rte_spinlock_lock(&avp->lock); 2194 if ((avp->flags & AVP_F_PROMISC) != 0) { 2195 avp->flags &= ~AVP_F_PROMISC; 2196 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n", 2197 eth_dev->data->port_id); 2198 } 2199 rte_spinlock_unlock(&avp->lock); 2200 } 2201 2202 static void 2203 avp_dev_info_get(struct rte_eth_dev *eth_dev, 2204 struct rte_eth_dev_info *dev_info) 2205 { 2206 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2207 2208 dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2209 dev_info->max_rx_queues = avp->max_rx_queues; 2210 dev_info->max_tx_queues = avp->max_tx_queues; 2211 dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE; 2212 dev_info->max_rx_pktlen = avp->max_rx_pkt_len; 2213 dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS; 2214 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2215 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 2216 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 2217 } 2218 } 2219 2220 static int 2221 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) 2222 { 2223 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2224 2225 if (mask & ETH_VLAN_STRIP_MASK) { 2226 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2227 if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip) 2228 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD; 2229 else 2230 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD; 2231 } else { 2232 PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n"); 2233 } 2234 } 2235 2236 if (mask & ETH_VLAN_FILTER_MASK) { 2237 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter) 2238 PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n"); 2239 } 2240 2241 if (mask & ETH_VLAN_EXTEND_MASK) { 2242 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend) 2243 PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n"); 2244 } 2245 2246 return 0; 2247 } 2248 2249 static int 2250 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats) 2251 { 2252 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2253 unsigned int i; 2254 2255 for (i = 0; i < avp->num_rx_queues; i++) { 2256 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2257 2258 if (rxq) { 2259 stats->ipackets += rxq->packets; 2260 stats->ibytes += rxq->bytes; 2261 stats->ierrors += rxq->errors; 2262 2263 stats->q_ipackets[i] += rxq->packets; 2264 stats->q_ibytes[i] += rxq->bytes; 2265 stats->q_errors[i] += rxq->errors; 2266 } 2267 } 2268 2269 for (i = 0; i < avp->num_tx_queues; i++) { 2270 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2271 2272 if (txq) { 2273 stats->opackets += txq->packets; 2274 stats->obytes += txq->bytes; 2275 stats->oerrors += txq->errors; 2276 2277 stats->q_opackets[i] += txq->packets; 2278 stats->q_obytes[i] += txq->bytes; 2279 stats->q_errors[i] += txq->errors; 2280 } 2281 } 2282 2283 return 0; 2284 } 2285 2286 static void 2287 avp_dev_stats_reset(struct rte_eth_dev *eth_dev) 2288 { 2289 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2290 unsigned int i; 2291 2292 for (i = 0; i < avp->num_rx_queues; i++) { 2293 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2294 2295 if (rxq) { 2296 rxq->bytes = 0; 2297 rxq->packets = 0; 2298 rxq->errors = 0; 2299 } 2300 } 2301 2302 for (i = 0; i < avp->num_tx_queues; i++) { 2303 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2304 2305 if (txq) { 2306 txq->bytes = 0; 2307 txq->packets = 0; 2308 txq->errors = 0; 2309 } 2310 } 2311 } 2312 2313 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd); 2314 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map); 2315