1 /* 2 * BSD LICENSE 3 * 4 * Copyright (c) 2013-2017, Wind River Systems, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1) Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * 2) Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3) Neither the name of Wind River Systems nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <stdint.h> 34 #include <string.h> 35 #include <stdio.h> 36 #include <errno.h> 37 #include <unistd.h> 38 39 #include <rte_ethdev.h> 40 #include <rte_memcpy.h> 41 #include <rte_string_fns.h> 42 #include <rte_memzone.h> 43 #include <rte_malloc.h> 44 #include <rte_atomic.h> 45 #include <rte_branch_prediction.h> 46 #include <rte_pci.h> 47 #include <rte_ether.h> 48 #include <rte_common.h> 49 #include <rte_cycles.h> 50 #include <rte_spinlock.h> 51 #include <rte_byteorder.h> 52 #include <rte_dev.h> 53 #include <rte_memory.h> 54 #include <rte_eal.h> 55 #include <rte_io.h> 56 57 #include "rte_avp_common.h" 58 #include "rte_avp_fifo.h" 59 60 #include "avp_logs.h" 61 62 63 static int avp_dev_create(struct rte_pci_device *pci_dev, 64 struct rte_eth_dev *eth_dev); 65 66 static int avp_dev_configure(struct rte_eth_dev *dev); 67 static int avp_dev_start(struct rte_eth_dev *dev); 68 static void avp_dev_stop(struct rte_eth_dev *dev); 69 static void avp_dev_close(struct rte_eth_dev *dev); 70 static void avp_dev_info_get(struct rte_eth_dev *dev, 71 struct rte_eth_dev_info *dev_info); 72 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask); 73 static int avp_dev_link_update(struct rte_eth_dev *dev, 74 __rte_unused int wait_to_complete); 75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev); 76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev); 77 78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev, 79 uint16_t rx_queue_id, 80 uint16_t nb_rx_desc, 81 unsigned int socket_id, 82 const struct rte_eth_rxconf *rx_conf, 83 struct rte_mempool *pool); 84 85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev, 86 uint16_t tx_queue_id, 87 uint16_t nb_tx_desc, 88 unsigned int socket_id, 89 const struct rte_eth_txconf *tx_conf); 90 91 static uint16_t avp_recv_scattered_pkts(void *rx_queue, 92 struct rte_mbuf **rx_pkts, 93 uint16_t nb_pkts); 94 95 static uint16_t avp_recv_pkts(void *rx_queue, 96 struct rte_mbuf **rx_pkts, 97 uint16_t nb_pkts); 98 99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue, 100 struct rte_mbuf **tx_pkts, 101 uint16_t nb_pkts); 102 103 static uint16_t avp_xmit_pkts(void *tx_queue, 104 struct rte_mbuf **tx_pkts, 105 uint16_t nb_pkts); 106 107 static void avp_dev_rx_queue_release(void *rxq); 108 static void avp_dev_tx_queue_release(void *txq); 109 110 static void avp_dev_stats_get(struct rte_eth_dev *dev, 111 struct rte_eth_stats *stats); 112 static void avp_dev_stats_reset(struct rte_eth_dev *dev); 113 114 115 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) 116 117 118 #define AVP_MAX_RX_BURST 64 119 #define AVP_MAX_TX_BURST 64 120 #define AVP_MAX_MAC_ADDRS 1 121 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN 122 123 124 /* 125 * Defines the number of microseconds to wait before checking the response 126 * queue for completion. 127 */ 128 #define AVP_REQUEST_DELAY_USECS (5000) 129 130 /* 131 * Defines the number times to check the response queue for completion before 132 * declaring a timeout. 133 */ 134 #define AVP_MAX_REQUEST_RETRY (100) 135 136 /* Defines the current PCI driver version number */ 137 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION 138 139 /* 140 * The set of PCI devices this driver supports 141 */ 142 static const struct rte_pci_id pci_id_avp_map[] = { 143 { .vendor_id = RTE_AVP_PCI_VENDOR_ID, 144 .device_id = RTE_AVP_PCI_DEVICE_ID, 145 .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID, 146 .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID, 147 .class_id = RTE_CLASS_ANY_ID, 148 }, 149 150 { .vendor_id = 0, /* sentinel */ 151 }, 152 }; 153 154 /* 155 * dev_ops for avp, bare necessities for basic operation 156 */ 157 static const struct eth_dev_ops avp_eth_dev_ops = { 158 .dev_configure = avp_dev_configure, 159 .dev_start = avp_dev_start, 160 .dev_stop = avp_dev_stop, 161 .dev_close = avp_dev_close, 162 .dev_infos_get = avp_dev_info_get, 163 .vlan_offload_set = avp_vlan_offload_set, 164 .stats_get = avp_dev_stats_get, 165 .stats_reset = avp_dev_stats_reset, 166 .link_update = avp_dev_link_update, 167 .promiscuous_enable = avp_dev_promiscuous_enable, 168 .promiscuous_disable = avp_dev_promiscuous_disable, 169 .rx_queue_setup = avp_dev_rx_queue_setup, 170 .rx_queue_release = avp_dev_rx_queue_release, 171 .tx_queue_setup = avp_dev_tx_queue_setup, 172 .tx_queue_release = avp_dev_tx_queue_release, 173 }; 174 175 /**@{ AVP device flags */ 176 #define AVP_F_PROMISC (1 << 1) 177 #define AVP_F_CONFIGURED (1 << 2) 178 #define AVP_F_LINKUP (1 << 3) 179 #define AVP_F_DETACHED (1 << 4) 180 /**@} */ 181 182 /* Ethernet device validation marker */ 183 #define AVP_ETHDEV_MAGIC 0x92972862 184 185 /* 186 * Defines the AVP device attributes which are attached to an RTE ethernet 187 * device 188 */ 189 struct avp_dev { 190 uint32_t magic; /**< Memory validation marker */ 191 uint64_t device_id; /**< Unique system identifier */ 192 struct ether_addr ethaddr; /**< Host specified MAC address */ 193 struct rte_eth_dev_data *dev_data; 194 /**< Back pointer to ethernet device data */ 195 volatile uint32_t flags; /**< Device operational flags */ 196 uint8_t port_id; /**< Ethernet port identifier */ 197 struct rte_mempool *pool; /**< pkt mbuf mempool */ 198 unsigned int guest_mbuf_size; /**< local pool mbuf size */ 199 unsigned int host_mbuf_size; /**< host mbuf size */ 200 unsigned int max_rx_pkt_len; /**< maximum receive unit */ 201 uint32_t host_features; /**< Supported feature bitmap */ 202 uint32_t features; /**< Enabled feature bitmap */ 203 unsigned int num_tx_queues; /**< Negotiated number of transmit queues */ 204 unsigned int max_tx_queues; /**< Maximum number of transmit queues */ 205 unsigned int num_rx_queues; /**< Negotiated number of receive queues */ 206 unsigned int max_rx_queues; /**< Maximum number of receive queues */ 207 208 struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */ 209 struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */ 210 struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES]; 211 /**< Allocated mbufs queue */ 212 struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES]; 213 /**< To be freed mbufs queue */ 214 215 /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */ 216 rte_spinlock_t lock; 217 218 /* For request & response */ 219 struct rte_avp_fifo *req_q; /**< Request queue */ 220 struct rte_avp_fifo *resp_q; /**< Response queue */ 221 void *host_sync_addr; /**< (host) Req/Resp Mem address */ 222 void *sync_addr; /**< Req/Resp Mem address */ 223 void *host_mbuf_addr; /**< (host) MBUF pool start address */ 224 void *mbuf_addr; /**< MBUF pool start address */ 225 } __rte_cache_aligned; 226 227 /* RTE ethernet private data */ 228 struct avp_adapter { 229 struct avp_dev avp; 230 } __rte_cache_aligned; 231 232 233 /* 32-bit MMIO register write */ 234 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr)) 235 236 /* 32-bit MMIO register read */ 237 #define AVP_READ32(_addr) rte_read32_relaxed((_addr)) 238 239 /* Macro to cast the ethernet device private data to a AVP object */ 240 #define AVP_DEV_PRIVATE_TO_HW(adapter) \ 241 (&((struct avp_adapter *)adapter)->avp) 242 243 /* 244 * Defines the structure of a AVP device queue for the purpose of handling the 245 * receive and transmit burst callback functions 246 */ 247 struct avp_queue { 248 struct rte_eth_dev_data *dev_data; 249 /**< Backpointer to ethernet device data */ 250 struct avp_dev *avp; /**< Backpointer to AVP device */ 251 uint16_t queue_id; 252 /**< Queue identifier used for indexing current queue */ 253 uint16_t queue_base; 254 /**< Base queue identifier for queue servicing */ 255 uint16_t queue_limit; 256 /**< Maximum queue identifier for queue servicing */ 257 258 uint64_t packets; 259 uint64_t bytes; 260 uint64_t errors; 261 }; 262 263 /* send a request and wait for a response 264 * 265 * @warning must be called while holding the avp->lock spinlock. 266 */ 267 static int 268 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request) 269 { 270 unsigned int retry = AVP_MAX_REQUEST_RETRY; 271 void *resp_addr = NULL; 272 unsigned int count; 273 int ret; 274 275 PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id); 276 277 request->result = -ENOTSUP; 278 279 /* Discard any stale responses before starting a new request */ 280 while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1)) 281 PMD_DRV_LOG(DEBUG, "Discarding stale response\n"); 282 283 rte_memcpy(avp->sync_addr, request, sizeof(*request)); 284 count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1); 285 if (count < 1) { 286 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n", 287 request->req_id); 288 ret = -EBUSY; 289 goto done; 290 } 291 292 while (retry--) { 293 /* wait for a response */ 294 usleep(AVP_REQUEST_DELAY_USECS); 295 296 count = avp_fifo_count(avp->resp_q); 297 if (count >= 1) { 298 /* response received */ 299 break; 300 } 301 302 if ((count < 1) && (retry == 0)) { 303 PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n", 304 request->req_id); 305 ret = -ETIME; 306 goto done; 307 } 308 } 309 310 /* retrieve the response */ 311 count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1); 312 if ((count != 1) || (resp_addr != avp->host_sync_addr)) { 313 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n", 314 count, resp_addr, avp->host_sync_addr); 315 ret = -ENODATA; 316 goto done; 317 } 318 319 /* copy to user buffer */ 320 rte_memcpy(request, avp->sync_addr, sizeof(*request)); 321 ret = 0; 322 323 PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n", 324 request->result, request->req_id); 325 326 done: 327 return ret; 328 } 329 330 static int 331 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state) 332 { 333 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 334 struct rte_avp_request request; 335 int ret; 336 337 /* setup a link state change request */ 338 memset(&request, 0, sizeof(request)); 339 request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF; 340 request.if_up = state; 341 342 ret = avp_dev_process_request(avp, &request); 343 344 return ret == 0 ? request.result : ret; 345 } 346 347 static int 348 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev, 349 struct rte_avp_device_config *config) 350 { 351 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 352 struct rte_avp_request request; 353 int ret; 354 355 /* setup a configure request */ 356 memset(&request, 0, sizeof(request)); 357 request.req_id = RTE_AVP_REQ_CFG_DEVICE; 358 memcpy(&request.config, config, sizeof(request.config)); 359 360 ret = avp_dev_process_request(avp, &request); 361 362 return ret == 0 ? request.result : ret; 363 } 364 365 static int 366 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev) 367 { 368 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 369 struct rte_avp_request request; 370 int ret; 371 372 /* setup a shutdown request */ 373 memset(&request, 0, sizeof(request)); 374 request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE; 375 376 ret = avp_dev_process_request(avp, &request); 377 378 return ret == 0 ? request.result : ret; 379 } 380 381 /* translate from host mbuf virtual address to guest virtual address */ 382 static inline void * 383 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address) 384 { 385 return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address, 386 (uintptr_t)avp->host_mbuf_addr), 387 (uintptr_t)avp->mbuf_addr); 388 } 389 390 /* translate from host physical address to guest virtual address */ 391 static void * 392 avp_dev_translate_address(struct rte_eth_dev *eth_dev, 393 phys_addr_t host_phys_addr) 394 { 395 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 396 struct rte_mem_resource *resource; 397 struct rte_avp_memmap_info *info; 398 struct rte_avp_memmap *map; 399 off_t offset; 400 void *addr; 401 unsigned int i; 402 403 addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr; 404 resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR]; 405 info = (struct rte_avp_memmap_info *)resource->addr; 406 407 offset = 0; 408 for (i = 0; i < info->nb_maps; i++) { 409 /* search all segments looking for a matching address */ 410 map = &info->maps[i]; 411 412 if ((host_phys_addr >= map->phys_addr) && 413 (host_phys_addr < (map->phys_addr + map->length))) { 414 /* address is within this segment */ 415 offset += (host_phys_addr - map->phys_addr); 416 addr = RTE_PTR_ADD(addr, offset); 417 418 PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n", 419 host_phys_addr, addr); 420 421 return addr; 422 } 423 offset += map->length; 424 } 425 426 return NULL; 427 } 428 429 /* verify that the incoming device version is compatible with our version */ 430 static int 431 avp_dev_version_check(uint32_t version) 432 { 433 uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION); 434 uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version); 435 436 if (device <= driver) { 437 /* the host driver version is less than or equal to ours */ 438 return 0; 439 } 440 441 return 1; 442 } 443 444 /* verify that memory regions have expected version and validation markers */ 445 static int 446 avp_dev_check_regions(struct rte_eth_dev *eth_dev) 447 { 448 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 449 struct rte_avp_memmap_info *memmap; 450 struct rte_avp_device_info *info; 451 struct rte_mem_resource *resource; 452 unsigned int i; 453 454 /* Dump resource info for debug */ 455 for (i = 0; i < PCI_MAX_RESOURCE; i++) { 456 resource = &pci_dev->mem_resource[i]; 457 if ((resource->phys_addr == 0) || (resource->len == 0)) 458 continue; 459 460 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n", 461 i, resource->phys_addr, 462 resource->len, resource->addr); 463 464 switch (i) { 465 case RTE_AVP_PCI_MEMMAP_BAR: 466 memmap = (struct rte_avp_memmap_info *)resource->addr; 467 if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) || 468 (memmap->version != RTE_AVP_MEMMAP_VERSION)) { 469 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n", 470 memmap->magic, memmap->version); 471 return -EINVAL; 472 } 473 break; 474 475 case RTE_AVP_PCI_DEVICE_BAR: 476 info = (struct rte_avp_device_info *)resource->addr; 477 if ((info->magic != RTE_AVP_DEVICE_MAGIC) || 478 avp_dev_version_check(info->version)) { 479 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n", 480 info->magic, info->version, 481 AVP_DPDK_DRIVER_VERSION); 482 return -EINVAL; 483 } 484 break; 485 486 case RTE_AVP_PCI_MEMORY_BAR: 487 case RTE_AVP_PCI_MMIO_BAR: 488 if (resource->addr == NULL) { 489 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n", 490 i); 491 return -EINVAL; 492 } 493 break; 494 495 case RTE_AVP_PCI_MSIX_BAR: 496 default: 497 /* no validation required */ 498 break; 499 } 500 } 501 502 return 0; 503 } 504 505 static int 506 avp_dev_detach(struct rte_eth_dev *eth_dev) 507 { 508 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 509 int ret; 510 511 PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n", 512 eth_dev->data->port_id, avp->device_id); 513 514 rte_spinlock_lock(&avp->lock); 515 516 if (avp->flags & AVP_F_DETACHED) { 517 PMD_DRV_LOG(NOTICE, "port %u already detached\n", 518 eth_dev->data->port_id); 519 ret = 0; 520 goto unlock; 521 } 522 523 /* shutdown the device first so the host stops sending us packets. */ 524 ret = avp_dev_ctrl_shutdown(eth_dev); 525 if (ret < 0) { 526 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n", 527 ret); 528 avp->flags &= ~AVP_F_DETACHED; 529 goto unlock; 530 } 531 532 avp->flags |= AVP_F_DETACHED; 533 rte_wmb(); 534 535 /* wait for queues to acknowledge the presence of the detach flag */ 536 rte_delay_ms(1); 537 538 ret = 0; 539 540 unlock: 541 rte_spinlock_unlock(&avp->lock); 542 return ret; 543 } 544 545 static void 546 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 547 { 548 struct avp_dev *avp = 549 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 550 struct avp_queue *rxq; 551 uint16_t queue_count; 552 uint16_t remainder; 553 554 rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id]; 555 556 /* 557 * Must map all AVP fifos as evenly as possible between the configured 558 * device queues. Each device queue will service a subset of the AVP 559 * fifos. If there is an odd number of device queues the first set of 560 * device queues will get the extra AVP fifos. 561 */ 562 queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues; 563 remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues; 564 if (rx_queue_id < remainder) { 565 /* these queues must service one extra FIFO */ 566 rxq->queue_base = rx_queue_id * (queue_count + 1); 567 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1; 568 } else { 569 /* these queues service the regular number of FIFO */ 570 rxq->queue_base = ((remainder * (queue_count + 1)) + 571 ((rx_queue_id - remainder) * queue_count)); 572 rxq->queue_limit = rxq->queue_base + queue_count - 1; 573 } 574 575 PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n", 576 rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit); 577 578 rxq->queue_id = rxq->queue_base; 579 } 580 581 static void 582 _avp_set_queue_counts(struct rte_eth_dev *eth_dev) 583 { 584 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 585 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 586 struct rte_avp_device_info *host_info; 587 void *addr; 588 589 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 590 host_info = (struct rte_avp_device_info *)addr; 591 592 /* 593 * the transmit direction is not negotiated beyond respecting the max 594 * number of queues because the host can handle arbitrary guest tx 595 * queues (host rx queues). 596 */ 597 avp->num_tx_queues = eth_dev->data->nb_tx_queues; 598 599 /* 600 * the receive direction is more restrictive. The host requires a 601 * minimum number of guest rx queues (host tx queues) therefore 602 * negotiate a value that is at least as large as the host minimum 603 * requirement. If the host and guest values are not identical then a 604 * mapping will be established in the receive_queue_setup function. 605 */ 606 avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues, 607 eth_dev->data->nb_rx_queues); 608 609 PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n", 610 avp->num_tx_queues, avp->num_rx_queues); 611 } 612 613 static int 614 avp_dev_attach(struct rte_eth_dev *eth_dev) 615 { 616 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 617 struct rte_avp_device_config config; 618 unsigned int i; 619 int ret; 620 621 PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n", 622 eth_dev->data->port_id, avp->device_id); 623 624 rte_spinlock_lock(&avp->lock); 625 626 if (!(avp->flags & AVP_F_DETACHED)) { 627 PMD_DRV_LOG(NOTICE, "port %u already attached\n", 628 eth_dev->data->port_id); 629 ret = 0; 630 goto unlock; 631 } 632 633 /* 634 * make sure that the detached flag is set prior to reconfiguring the 635 * queues. 636 */ 637 avp->flags |= AVP_F_DETACHED; 638 rte_wmb(); 639 640 /* 641 * re-run the device create utility which will parse the new host info 642 * and setup the AVP device queue pointers. 643 */ 644 ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev); 645 if (ret < 0) { 646 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n", 647 ret); 648 goto unlock; 649 } 650 651 if (avp->flags & AVP_F_CONFIGURED) { 652 /* 653 * Update the receive queue mapping to handle cases where the 654 * source and destination hosts have different queue 655 * requirements. As long as the DETACHED flag is asserted the 656 * queue table should not be referenced so it should be safe to 657 * update it. 658 */ 659 _avp_set_queue_counts(eth_dev); 660 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) 661 _avp_set_rx_queue_mappings(eth_dev, i); 662 663 /* 664 * Update the host with our config details so that it knows the 665 * device is active. 666 */ 667 memset(&config, 0, sizeof(config)); 668 config.device_id = avp->device_id; 669 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 670 config.driver_version = AVP_DPDK_DRIVER_VERSION; 671 config.features = avp->features; 672 config.num_tx_queues = avp->num_tx_queues; 673 config.num_rx_queues = avp->num_rx_queues; 674 config.if_up = !!(avp->flags & AVP_F_LINKUP); 675 676 ret = avp_dev_ctrl_set_config(eth_dev, &config); 677 if (ret < 0) { 678 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 679 ret); 680 goto unlock; 681 } 682 } 683 684 rte_wmb(); 685 avp->flags &= ~AVP_F_DETACHED; 686 687 ret = 0; 688 689 unlock: 690 rte_spinlock_unlock(&avp->lock); 691 return ret; 692 } 693 694 static void 695 avp_dev_interrupt_handler(struct rte_intr_handle *intr_handle, 696 void *data) 697 { 698 struct rte_eth_dev *eth_dev = data; 699 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 700 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 701 uint32_t status, value; 702 int ret; 703 704 if (registers == NULL) 705 rte_panic("no mapped MMIO register space\n"); 706 707 /* read the interrupt status register 708 * note: this register clears on read so all raised interrupts must be 709 * handled or remembered for later processing 710 */ 711 status = AVP_READ32( 712 RTE_PTR_ADD(registers, 713 RTE_AVP_INTERRUPT_STATUS_OFFSET)); 714 715 if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) { 716 /* handle interrupt based on current status */ 717 value = AVP_READ32( 718 RTE_PTR_ADD(registers, 719 RTE_AVP_MIGRATION_STATUS_OFFSET)); 720 switch (value) { 721 case RTE_AVP_MIGRATION_DETACHED: 722 ret = avp_dev_detach(eth_dev); 723 break; 724 case RTE_AVP_MIGRATION_ATTACHED: 725 ret = avp_dev_attach(eth_dev); 726 break; 727 default: 728 PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n", 729 value); 730 ret = -EINVAL; 731 } 732 733 /* acknowledge the request by writing out our current status */ 734 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR); 735 AVP_WRITE32(value, 736 RTE_PTR_ADD(registers, 737 RTE_AVP_MIGRATION_ACK_OFFSET)); 738 739 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n"); 740 } 741 742 if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK) 743 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n", 744 status); 745 746 /* re-enable UIO interrupt handling */ 747 ret = rte_intr_enable(intr_handle); 748 if (ret < 0) { 749 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", 750 ret); 751 /* continue */ 752 } 753 } 754 755 static int 756 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev) 757 { 758 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 759 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 760 int ret; 761 762 if (registers == NULL) 763 return -EINVAL; 764 765 /* enable UIO interrupt handling */ 766 ret = rte_intr_enable(&pci_dev->intr_handle); 767 if (ret < 0) { 768 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n", 769 ret); 770 return ret; 771 } 772 773 /* inform the device that all interrupts are enabled */ 774 AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK, 775 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 776 777 return 0; 778 } 779 780 static int 781 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev) 782 { 783 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 784 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 785 int ret; 786 787 if (registers == NULL) 788 return 0; 789 790 /* inform the device that all interrupts are disabled */ 791 AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK, 792 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 793 794 /* enable UIO interrupt handling */ 795 ret = rte_intr_disable(&pci_dev->intr_handle); 796 if (ret < 0) { 797 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n", 798 ret); 799 return ret; 800 } 801 802 return 0; 803 } 804 805 static int 806 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev) 807 { 808 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 809 int ret; 810 811 /* register a callback handler with UIO for interrupt notifications */ 812 ret = rte_intr_callback_register(&pci_dev->intr_handle, 813 avp_dev_interrupt_handler, 814 (void *)eth_dev); 815 if (ret < 0) { 816 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n", 817 ret); 818 return ret; 819 } 820 821 /* enable interrupt processing */ 822 return avp_dev_enable_interrupts(eth_dev); 823 } 824 825 static int 826 avp_dev_migration_pending(struct rte_eth_dev *eth_dev) 827 { 828 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 829 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 830 uint32_t value; 831 832 if (registers == NULL) 833 return 0; 834 835 value = AVP_READ32(RTE_PTR_ADD(registers, 836 RTE_AVP_MIGRATION_STATUS_OFFSET)); 837 if (value == RTE_AVP_MIGRATION_DETACHED) { 838 /* migration is in progress; ack it if we have not already */ 839 AVP_WRITE32(value, 840 RTE_PTR_ADD(registers, 841 RTE_AVP_MIGRATION_ACK_OFFSET)); 842 return 1; 843 } 844 return 0; 845 } 846 847 /* 848 * create a AVP device using the supplied device info by first translating it 849 * to guest address space(s). 850 */ 851 static int 852 avp_dev_create(struct rte_pci_device *pci_dev, 853 struct rte_eth_dev *eth_dev) 854 { 855 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 856 struct rte_avp_device_info *host_info; 857 struct rte_mem_resource *resource; 858 unsigned int i; 859 860 resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR]; 861 if (resource->addr == NULL) { 862 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n", 863 RTE_AVP_PCI_DEVICE_BAR); 864 return -EFAULT; 865 } 866 host_info = (struct rte_avp_device_info *)resource->addr; 867 868 if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) || 869 avp_dev_version_check(host_info->version)) { 870 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n", 871 host_info->magic, host_info->version, 872 AVP_DPDK_DRIVER_VERSION); 873 return -EINVAL; 874 } 875 876 PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n", 877 RTE_AVP_GET_RELEASE_VERSION(host_info->version), 878 RTE_AVP_GET_MAJOR_VERSION(host_info->version), 879 RTE_AVP_GET_MINOR_VERSION(host_info->version)); 880 881 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n", 882 host_info->min_tx_queues, host_info->max_tx_queues); 883 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n", 884 host_info->min_rx_queues, host_info->max_rx_queues); 885 PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n", 886 host_info->features); 887 888 if (avp->magic != AVP_ETHDEV_MAGIC) { 889 /* 890 * First time initialization (i.e., not during a VM 891 * migration) 892 */ 893 memset(avp, 0, sizeof(*avp)); 894 avp->magic = AVP_ETHDEV_MAGIC; 895 avp->dev_data = eth_dev->data; 896 avp->port_id = eth_dev->data->port_id; 897 avp->host_mbuf_size = host_info->mbuf_size; 898 avp->host_features = host_info->features; 899 rte_spinlock_init(&avp->lock); 900 memcpy(&avp->ethaddr.addr_bytes[0], 901 host_info->ethaddr, ETHER_ADDR_LEN); 902 /* adjust max values to not exceed our max */ 903 avp->max_tx_queues = 904 RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES); 905 avp->max_rx_queues = 906 RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES); 907 } else { 908 /* Re-attaching during migration */ 909 910 /* TODO... requires validation of host values */ 911 if ((host_info->features & avp->features) != avp->features) { 912 PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n", 913 avp->features, host_info->features); 914 /* this should not be possible; continue for now */ 915 } 916 } 917 918 /* the device id is allowed to change over migrations */ 919 avp->device_id = host_info->device_id; 920 921 /* translate incoming host addresses to guest address space */ 922 PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n", 923 host_info->tx_phys); 924 PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n", 925 host_info->alloc_phys); 926 for (i = 0; i < avp->max_tx_queues; i++) { 927 avp->tx_q[i] = avp_dev_translate_address(eth_dev, 928 host_info->tx_phys + (i * host_info->tx_size)); 929 930 avp->alloc_q[i] = avp_dev_translate_address(eth_dev, 931 host_info->alloc_phys + (i * host_info->alloc_size)); 932 } 933 934 PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n", 935 host_info->rx_phys); 936 PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n", 937 host_info->free_phys); 938 for (i = 0; i < avp->max_rx_queues; i++) { 939 avp->rx_q[i] = avp_dev_translate_address(eth_dev, 940 host_info->rx_phys + (i * host_info->rx_size)); 941 avp->free_q[i] = avp_dev_translate_address(eth_dev, 942 host_info->free_phys + (i * host_info->free_size)); 943 } 944 945 PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n", 946 host_info->req_phys); 947 PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n", 948 host_info->resp_phys); 949 PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n", 950 host_info->sync_phys); 951 PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n", 952 host_info->mbuf_phys); 953 avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys); 954 avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys); 955 avp->sync_addr = 956 avp_dev_translate_address(eth_dev, host_info->sync_phys); 957 avp->mbuf_addr = 958 avp_dev_translate_address(eth_dev, host_info->mbuf_phys); 959 960 /* 961 * store the host mbuf virtual address so that we can calculate 962 * relative offsets for each mbuf as they are processed 963 */ 964 avp->host_mbuf_addr = host_info->mbuf_va; 965 avp->host_sync_addr = host_info->sync_va; 966 967 /* 968 * store the maximum packet length that is supported by the host. 969 */ 970 avp->max_rx_pkt_len = host_info->max_rx_pkt_len; 971 PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n", 972 host_info->max_rx_pkt_len); 973 974 return 0; 975 } 976 977 /* 978 * This function is based on probe() function in avp_pci.c 979 * It returns 0 on success. 980 */ 981 static int 982 eth_avp_dev_init(struct rte_eth_dev *eth_dev) 983 { 984 struct avp_dev *avp = 985 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 986 struct rte_pci_device *pci_dev; 987 int ret; 988 989 pci_dev = AVP_DEV_TO_PCI(eth_dev); 990 eth_dev->dev_ops = &avp_eth_dev_ops; 991 eth_dev->rx_pkt_burst = &avp_recv_pkts; 992 eth_dev->tx_pkt_burst = &avp_xmit_pkts; 993 994 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 995 /* 996 * no setup required on secondary processes. All data is saved 997 * in dev_private by the primary process. All resource should 998 * be mapped to the same virtual address so all pointers should 999 * be valid. 1000 */ 1001 if (eth_dev->data->scattered_rx) { 1002 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 1003 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1004 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1005 } 1006 return 0; 1007 } 1008 1009 rte_eth_copy_pci_info(eth_dev, pci_dev); 1010 1011 eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; 1012 1013 /* Check current migration status */ 1014 if (avp_dev_migration_pending(eth_dev)) { 1015 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n"); 1016 return -EBUSY; 1017 } 1018 1019 /* Check BAR resources */ 1020 ret = avp_dev_check_regions(eth_dev); 1021 if (ret < 0) { 1022 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n", 1023 ret); 1024 return ret; 1025 } 1026 1027 /* Enable interrupts */ 1028 ret = avp_dev_setup_interrupts(eth_dev); 1029 if (ret < 0) { 1030 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret); 1031 return ret; 1032 } 1033 1034 /* Handle each subtype */ 1035 ret = avp_dev_create(pci_dev, eth_dev); 1036 if (ret < 0) { 1037 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret); 1038 return ret; 1039 } 1040 1041 /* Allocate memory for storing MAC addresses */ 1042 eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0); 1043 if (eth_dev->data->mac_addrs == NULL) { 1044 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n", 1045 ETHER_ADDR_LEN); 1046 return -ENOMEM; 1047 } 1048 1049 /* Get a mac from device config */ 1050 ether_addr_copy(&avp->ethaddr, ð_dev->data->mac_addrs[0]); 1051 1052 return 0; 1053 } 1054 1055 static int 1056 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev) 1057 { 1058 int ret; 1059 1060 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1061 return -EPERM; 1062 1063 if (eth_dev->data == NULL) 1064 return 0; 1065 1066 ret = avp_dev_disable_interrupts(eth_dev); 1067 if (ret != 0) { 1068 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret); 1069 return ret; 1070 } 1071 1072 if (eth_dev->data->mac_addrs != NULL) { 1073 rte_free(eth_dev->data->mac_addrs); 1074 eth_dev->data->mac_addrs = NULL; 1075 } 1076 1077 return 0; 1078 } 1079 1080 1081 static struct eth_driver rte_avp_pmd = { 1082 { 1083 .id_table = pci_id_avp_map, 1084 .drv_flags = RTE_PCI_DRV_NEED_MAPPING, 1085 .probe = rte_eth_dev_pci_probe, 1086 .remove = rte_eth_dev_pci_remove, 1087 }, 1088 .eth_dev_init = eth_avp_dev_init, 1089 .eth_dev_uninit = eth_avp_dev_uninit, 1090 .dev_private_size = sizeof(struct avp_adapter), 1091 }; 1092 1093 static int 1094 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev, 1095 struct avp_dev *avp) 1096 { 1097 unsigned int max_rx_pkt_len; 1098 1099 max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len; 1100 1101 if ((max_rx_pkt_len > avp->guest_mbuf_size) || 1102 (max_rx_pkt_len > avp->host_mbuf_size)) { 1103 /* 1104 * If the guest MTU is greater than either the host or guest 1105 * buffers then chained mbufs have to be enabled in the TX 1106 * direction. It is assumed that the application will not need 1107 * to send packets larger than their max_rx_pkt_len (MRU). 1108 */ 1109 return 1; 1110 } 1111 1112 if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) || 1113 (avp->max_rx_pkt_len > avp->host_mbuf_size)) { 1114 /* 1115 * If the host MRU is greater than its own mbuf size or the 1116 * guest mbuf size then chained mbufs have to be enabled in the 1117 * RX direction. 1118 */ 1119 return 1; 1120 } 1121 1122 return 0; 1123 } 1124 1125 static int 1126 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, 1127 uint16_t rx_queue_id, 1128 uint16_t nb_rx_desc, 1129 unsigned int socket_id, 1130 const struct rte_eth_rxconf *rx_conf, 1131 struct rte_mempool *pool) 1132 { 1133 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1134 struct rte_pktmbuf_pool_private *mbp_priv; 1135 struct avp_queue *rxq; 1136 1137 if (rx_queue_id >= eth_dev->data->nb_rx_queues) { 1138 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n", 1139 rx_queue_id, eth_dev->data->nb_rx_queues); 1140 return -EINVAL; 1141 } 1142 1143 /* Save mbuf pool pointer */ 1144 avp->pool = pool; 1145 1146 /* Save the local mbuf size */ 1147 mbp_priv = rte_mempool_get_priv(pool); 1148 avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size); 1149 avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM; 1150 1151 if (avp_dev_enable_scattered(eth_dev, avp)) { 1152 if (!eth_dev->data->scattered_rx) { 1153 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 1154 eth_dev->data->scattered_rx = 1; 1155 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1156 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1157 } 1158 } 1159 1160 PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n", 1161 avp->max_rx_pkt_len, 1162 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len, 1163 avp->host_mbuf_size, 1164 avp->guest_mbuf_size); 1165 1166 /* allocate a queue object */ 1167 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue), 1168 RTE_CACHE_LINE_SIZE, socket_id); 1169 if (rxq == NULL) { 1170 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n"); 1171 return -ENOMEM; 1172 } 1173 1174 /* save back pointers to AVP and Ethernet devices */ 1175 rxq->avp = avp; 1176 rxq->dev_data = eth_dev->data; 1177 eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq; 1178 1179 /* setup the queue receive mapping for the current queue. */ 1180 _avp_set_rx_queue_mappings(eth_dev, rx_queue_id); 1181 1182 PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq); 1183 1184 (void)nb_rx_desc; 1185 (void)rx_conf; 1186 return 0; 1187 } 1188 1189 static int 1190 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, 1191 uint16_t tx_queue_id, 1192 uint16_t nb_tx_desc, 1193 unsigned int socket_id, 1194 const struct rte_eth_txconf *tx_conf) 1195 { 1196 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1197 struct avp_queue *txq; 1198 1199 if (tx_queue_id >= eth_dev->data->nb_tx_queues) { 1200 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n", 1201 tx_queue_id, eth_dev->data->nb_tx_queues); 1202 return -EINVAL; 1203 } 1204 1205 /* allocate a queue object */ 1206 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue), 1207 RTE_CACHE_LINE_SIZE, socket_id); 1208 if (txq == NULL) { 1209 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n"); 1210 return -ENOMEM; 1211 } 1212 1213 /* only the configured set of transmit queues are used */ 1214 txq->queue_id = tx_queue_id; 1215 txq->queue_base = tx_queue_id; 1216 txq->queue_limit = tx_queue_id; 1217 1218 /* save back pointers to AVP and Ethernet devices */ 1219 txq->avp = avp; 1220 txq->dev_data = eth_dev->data; 1221 eth_dev->data->tx_queues[tx_queue_id] = (void *)txq; 1222 1223 PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq); 1224 1225 (void)nb_tx_desc; 1226 (void)tx_conf; 1227 return 0; 1228 } 1229 1230 static inline int 1231 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b) 1232 { 1233 uint16_t *_a = (uint16_t *)&a->addr_bytes[0]; 1234 uint16_t *_b = (uint16_t *)&b->addr_bytes[0]; 1235 return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]); 1236 } 1237 1238 static inline int 1239 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m) 1240 { 1241 struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); 1242 1243 if (likely(_avp_cmp_ether_addr(&avp->ethaddr, ð->d_addr) == 0)) { 1244 /* allow all packets destined to our address */ 1245 return 0; 1246 } 1247 1248 if (likely(is_broadcast_ether_addr(ð->d_addr))) { 1249 /* allow all broadcast packets */ 1250 return 0; 1251 } 1252 1253 if (likely(is_multicast_ether_addr(ð->d_addr))) { 1254 /* allow all multicast packets */ 1255 return 0; 1256 } 1257 1258 if (avp->flags & AVP_F_PROMISC) { 1259 /* allow all packets when in promiscuous mode */ 1260 return 0; 1261 } 1262 1263 return -1; 1264 } 1265 1266 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1267 static inline void 1268 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf) 1269 { 1270 struct rte_avp_desc *first_buf; 1271 struct rte_avp_desc *pkt_buf; 1272 unsigned int pkt_len; 1273 unsigned int nb_segs; 1274 void *pkt_data; 1275 unsigned int i; 1276 1277 first_buf = avp_dev_translate_buffer(avp, buf); 1278 1279 i = 0; 1280 pkt_len = 0; 1281 nb_segs = first_buf->nb_segs; 1282 do { 1283 /* Adjust pointers for guest addressing */ 1284 pkt_buf = avp_dev_translate_buffer(avp, buf); 1285 if (pkt_buf == NULL) 1286 rte_panic("bad buffer: segment %u has an invalid address %p\n", 1287 i, buf); 1288 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1289 if (pkt_data == NULL) 1290 rte_panic("bad buffer: segment %u has a NULL data pointer\n", 1291 i); 1292 if (pkt_buf->data_len == 0) 1293 rte_panic("bad buffer: segment %u has 0 data length\n", 1294 i); 1295 pkt_len += pkt_buf->data_len; 1296 nb_segs--; 1297 i++; 1298 1299 } while (nb_segs && (buf = pkt_buf->next) != NULL); 1300 1301 if (nb_segs != 0) 1302 rte_panic("bad buffer: expected %u segments found %u\n", 1303 first_buf->nb_segs, (first_buf->nb_segs - nb_segs)); 1304 if (pkt_len != first_buf->pkt_len) 1305 rte_panic("bad buffer: expected length %u found %u\n", 1306 first_buf->pkt_len, pkt_len); 1307 } 1308 1309 #define avp_dev_buffer_sanity_check(a, b) \ 1310 __avp_dev_buffer_sanity_check((a), (b)) 1311 1312 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */ 1313 1314 #define avp_dev_buffer_sanity_check(a, b) do {} while (0) 1315 1316 #endif 1317 1318 /* 1319 * Copy a host buffer chain to a set of mbufs. This function assumes that 1320 * there exactly the required number of mbufs to copy all source bytes. 1321 */ 1322 static inline struct rte_mbuf * 1323 avp_dev_copy_from_buffers(struct avp_dev *avp, 1324 struct rte_avp_desc *buf, 1325 struct rte_mbuf **mbufs, 1326 unsigned int count) 1327 { 1328 struct rte_mbuf *m_previous = NULL; 1329 struct rte_avp_desc *pkt_buf; 1330 unsigned int total_length = 0; 1331 unsigned int copy_length; 1332 unsigned int src_offset; 1333 struct rte_mbuf *m; 1334 uint16_t ol_flags; 1335 uint16_t vlan_tci; 1336 void *pkt_data; 1337 unsigned int i; 1338 1339 avp_dev_buffer_sanity_check(avp, buf); 1340 1341 /* setup the first source buffer */ 1342 pkt_buf = avp_dev_translate_buffer(avp, buf); 1343 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1344 total_length = pkt_buf->pkt_len; 1345 src_offset = 0; 1346 1347 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1348 ol_flags = PKT_RX_VLAN_PKT; 1349 vlan_tci = pkt_buf->vlan_tci; 1350 } else { 1351 ol_flags = 0; 1352 vlan_tci = 0; 1353 } 1354 1355 for (i = 0; (i < count) && (buf != NULL); i++) { 1356 /* fill each destination buffer */ 1357 m = mbufs[i]; 1358 1359 if (m_previous != NULL) 1360 m_previous->next = m; 1361 1362 m_previous = m; 1363 1364 do { 1365 /* 1366 * Copy as many source buffers as will fit in the 1367 * destination buffer. 1368 */ 1369 copy_length = RTE_MIN((avp->guest_mbuf_size - 1370 rte_pktmbuf_data_len(m)), 1371 (pkt_buf->data_len - 1372 src_offset)); 1373 rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1374 rte_pktmbuf_data_len(m)), 1375 RTE_PTR_ADD(pkt_data, src_offset), 1376 copy_length); 1377 rte_pktmbuf_data_len(m) += copy_length; 1378 src_offset += copy_length; 1379 1380 if (likely(src_offset == pkt_buf->data_len)) { 1381 /* need a new source buffer */ 1382 buf = pkt_buf->next; 1383 if (buf != NULL) { 1384 pkt_buf = avp_dev_translate_buffer( 1385 avp, buf); 1386 pkt_data = avp_dev_translate_buffer( 1387 avp, pkt_buf->data); 1388 src_offset = 0; 1389 } 1390 } 1391 1392 if (unlikely(rte_pktmbuf_data_len(m) == 1393 avp->guest_mbuf_size)) { 1394 /* need a new destination mbuf */ 1395 break; 1396 } 1397 1398 } while (buf != NULL); 1399 } 1400 1401 m = mbufs[0]; 1402 m->ol_flags = ol_flags; 1403 m->nb_segs = count; 1404 rte_pktmbuf_pkt_len(m) = total_length; 1405 m->vlan_tci = vlan_tci; 1406 1407 __rte_mbuf_sanity_check(m, 1); 1408 1409 return m; 1410 } 1411 1412 static uint16_t 1413 avp_recv_scattered_pkts(void *rx_queue, 1414 struct rte_mbuf **rx_pkts, 1415 uint16_t nb_pkts) 1416 { 1417 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1418 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1419 struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS]; 1420 struct avp_dev *avp = rxq->avp; 1421 struct rte_avp_desc *pkt_buf; 1422 struct rte_avp_fifo *free_q; 1423 struct rte_avp_fifo *rx_q; 1424 struct rte_avp_desc *buf; 1425 unsigned int count, avail, n; 1426 unsigned int guest_mbuf_size; 1427 struct rte_mbuf *m; 1428 unsigned int required; 1429 unsigned int buf_len; 1430 unsigned int port_id; 1431 unsigned int i; 1432 1433 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1434 /* VM live migration in progress */ 1435 return 0; 1436 } 1437 1438 guest_mbuf_size = avp->guest_mbuf_size; 1439 port_id = avp->port_id; 1440 rx_q = avp->rx_q[rxq->queue_id]; 1441 free_q = avp->free_q[rxq->queue_id]; 1442 1443 /* setup next queue to service */ 1444 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1445 (rxq->queue_id + 1) : rxq->queue_base; 1446 1447 /* determine how many slots are available in the free queue */ 1448 count = avp_fifo_free_count(free_q); 1449 1450 /* determine how many packets are available in the rx queue */ 1451 avail = avp_fifo_count(rx_q); 1452 1453 /* determine how many packets can be received */ 1454 count = RTE_MIN(count, avail); 1455 count = RTE_MIN(count, nb_pkts); 1456 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1457 1458 if (unlikely(count == 0)) { 1459 /* no free buffers, or no buffers on the rx queue */ 1460 return 0; 1461 } 1462 1463 /* retrieve pending packets */ 1464 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1465 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1466 count, rx_q); 1467 1468 count = 0; 1469 for (i = 0; i < n; i++) { 1470 /* prefetch next entry while processing current one */ 1471 if (i + 1 < n) { 1472 pkt_buf = avp_dev_translate_buffer(avp, 1473 avp_bufs[i + 1]); 1474 rte_prefetch0(pkt_buf); 1475 } 1476 buf = avp_bufs[i]; 1477 1478 /* Peek into the first buffer to determine the total length */ 1479 pkt_buf = avp_dev_translate_buffer(avp, buf); 1480 buf_len = pkt_buf->pkt_len; 1481 1482 /* Allocate enough mbufs to receive the entire packet */ 1483 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size; 1484 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) { 1485 rxq->dev_data->rx_mbuf_alloc_failed++; 1486 continue; 1487 } 1488 1489 /* Copy the data from the buffers to our mbufs */ 1490 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required); 1491 1492 /* finalize mbuf */ 1493 m->port = port_id; 1494 1495 if (_avp_mac_filter(avp, m) != 0) { 1496 /* silently discard packets not destined to our MAC */ 1497 rte_pktmbuf_free(m); 1498 continue; 1499 } 1500 1501 /* return new mbuf to caller */ 1502 rx_pkts[count++] = m; 1503 rxq->bytes += buf_len; 1504 } 1505 1506 rxq->packets += count; 1507 1508 /* return the buffers to the free queue */ 1509 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1510 1511 return count; 1512 } 1513 1514 1515 static uint16_t 1516 avp_recv_pkts(void *rx_queue, 1517 struct rte_mbuf **rx_pkts, 1518 uint16_t nb_pkts) 1519 { 1520 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1521 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1522 struct avp_dev *avp = rxq->avp; 1523 struct rte_avp_desc *pkt_buf; 1524 struct rte_avp_fifo *free_q; 1525 struct rte_avp_fifo *rx_q; 1526 unsigned int count, avail, n; 1527 unsigned int pkt_len; 1528 struct rte_mbuf *m; 1529 char *pkt_data; 1530 unsigned int i; 1531 1532 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1533 /* VM live migration in progress */ 1534 return 0; 1535 } 1536 1537 rx_q = avp->rx_q[rxq->queue_id]; 1538 free_q = avp->free_q[rxq->queue_id]; 1539 1540 /* setup next queue to service */ 1541 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1542 (rxq->queue_id + 1) : rxq->queue_base; 1543 1544 /* determine how many slots are available in the free queue */ 1545 count = avp_fifo_free_count(free_q); 1546 1547 /* determine how many packets are available in the rx queue */ 1548 avail = avp_fifo_count(rx_q); 1549 1550 /* determine how many packets can be received */ 1551 count = RTE_MIN(count, avail); 1552 count = RTE_MIN(count, nb_pkts); 1553 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1554 1555 if (unlikely(count == 0)) { 1556 /* no free buffers, or no buffers on the rx queue */ 1557 return 0; 1558 } 1559 1560 /* retrieve pending packets */ 1561 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1562 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1563 count, rx_q); 1564 1565 count = 0; 1566 for (i = 0; i < n; i++) { 1567 /* prefetch next entry while processing current one */ 1568 if (i < n - 1) { 1569 pkt_buf = avp_dev_translate_buffer(avp, 1570 avp_bufs[i + 1]); 1571 rte_prefetch0(pkt_buf); 1572 } 1573 1574 /* Adjust host pointers for guest addressing */ 1575 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1576 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1577 pkt_len = pkt_buf->pkt_len; 1578 1579 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1580 (pkt_buf->nb_segs > 1))) { 1581 /* 1582 * application should be using the scattered receive 1583 * function 1584 */ 1585 rxq->errors++; 1586 continue; 1587 } 1588 1589 /* process each packet to be transmitted */ 1590 m = rte_pktmbuf_alloc(avp->pool); 1591 if (unlikely(m == NULL)) { 1592 rxq->dev_data->rx_mbuf_alloc_failed++; 1593 continue; 1594 } 1595 1596 /* copy data out of the host buffer to our buffer */ 1597 m->data_off = RTE_PKTMBUF_HEADROOM; 1598 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len); 1599 1600 /* initialize the local mbuf */ 1601 rte_pktmbuf_data_len(m) = pkt_len; 1602 rte_pktmbuf_pkt_len(m) = pkt_len; 1603 m->port = avp->port_id; 1604 1605 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1606 m->ol_flags = PKT_RX_VLAN_PKT; 1607 m->vlan_tci = pkt_buf->vlan_tci; 1608 } 1609 1610 if (_avp_mac_filter(avp, m) != 0) { 1611 /* silently discard packets not destined to our MAC */ 1612 rte_pktmbuf_free(m); 1613 continue; 1614 } 1615 1616 /* return new mbuf to caller */ 1617 rx_pkts[count++] = m; 1618 rxq->bytes += pkt_len; 1619 } 1620 1621 rxq->packets += count; 1622 1623 /* return the buffers to the free queue */ 1624 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1625 1626 return count; 1627 } 1628 1629 /* 1630 * Copy a chained mbuf to a set of host buffers. This function assumes that 1631 * there are sufficient destination buffers to contain the entire source 1632 * packet. 1633 */ 1634 static inline uint16_t 1635 avp_dev_copy_to_buffers(struct avp_dev *avp, 1636 struct rte_mbuf *mbuf, 1637 struct rte_avp_desc **buffers, 1638 unsigned int count) 1639 { 1640 struct rte_avp_desc *previous_buf = NULL; 1641 struct rte_avp_desc *first_buf = NULL; 1642 struct rte_avp_desc *pkt_buf; 1643 struct rte_avp_desc *buf; 1644 size_t total_length; 1645 struct rte_mbuf *m; 1646 size_t copy_length; 1647 size_t src_offset; 1648 char *pkt_data; 1649 unsigned int i; 1650 1651 __rte_mbuf_sanity_check(mbuf, 1); 1652 1653 m = mbuf; 1654 src_offset = 0; 1655 total_length = rte_pktmbuf_pkt_len(m); 1656 for (i = 0; (i < count) && (m != NULL); i++) { 1657 /* fill each destination buffer */ 1658 buf = buffers[i]; 1659 1660 if (i < count - 1) { 1661 /* prefetch next entry while processing this one */ 1662 pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]); 1663 rte_prefetch0(pkt_buf); 1664 } 1665 1666 /* Adjust pointers for guest addressing */ 1667 pkt_buf = avp_dev_translate_buffer(avp, buf); 1668 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1669 1670 /* setup the buffer chain */ 1671 if (previous_buf != NULL) 1672 previous_buf->next = buf; 1673 else 1674 first_buf = pkt_buf; 1675 1676 previous_buf = pkt_buf; 1677 1678 do { 1679 /* 1680 * copy as many source mbuf segments as will fit in the 1681 * destination buffer. 1682 */ 1683 copy_length = RTE_MIN((avp->host_mbuf_size - 1684 pkt_buf->data_len), 1685 (rte_pktmbuf_data_len(m) - 1686 src_offset)); 1687 rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len), 1688 RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1689 src_offset), 1690 copy_length); 1691 pkt_buf->data_len += copy_length; 1692 src_offset += copy_length; 1693 1694 if (likely(src_offset == rte_pktmbuf_data_len(m))) { 1695 /* need a new source buffer */ 1696 m = m->next; 1697 src_offset = 0; 1698 } 1699 1700 if (unlikely(pkt_buf->data_len == 1701 avp->host_mbuf_size)) { 1702 /* need a new destination buffer */ 1703 break; 1704 } 1705 1706 } while (m != NULL); 1707 } 1708 1709 first_buf->nb_segs = count; 1710 first_buf->pkt_len = total_length; 1711 1712 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 1713 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1714 first_buf->vlan_tci = mbuf->vlan_tci; 1715 } 1716 1717 avp_dev_buffer_sanity_check(avp, buffers[0]); 1718 1719 return total_length; 1720 } 1721 1722 1723 static uint16_t 1724 avp_xmit_scattered_pkts(void *tx_queue, 1725 struct rte_mbuf **tx_pkts, 1726 uint16_t nb_pkts) 1727 { 1728 struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * 1729 RTE_AVP_MAX_MBUF_SEGMENTS)]; 1730 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1731 struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; 1732 struct avp_dev *avp = txq->avp; 1733 struct rte_avp_fifo *alloc_q; 1734 struct rte_avp_fifo *tx_q; 1735 unsigned int count, avail, n; 1736 unsigned int orig_nb_pkts; 1737 struct rte_mbuf *m; 1738 unsigned int required; 1739 unsigned int segments; 1740 unsigned int tx_bytes; 1741 unsigned int i; 1742 1743 orig_nb_pkts = nb_pkts; 1744 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1745 /* VM live migration in progress */ 1746 /* TODO ... buffer for X packets then drop? */ 1747 txq->errors += nb_pkts; 1748 return 0; 1749 } 1750 1751 tx_q = avp->tx_q[txq->queue_id]; 1752 alloc_q = avp->alloc_q[txq->queue_id]; 1753 1754 /* limit the number of transmitted packets to the max burst size */ 1755 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1756 nb_pkts = AVP_MAX_TX_BURST; 1757 1758 /* determine how many buffers are available to copy into */ 1759 avail = avp_fifo_count(alloc_q); 1760 if (unlikely(avail > (AVP_MAX_TX_BURST * 1761 RTE_AVP_MAX_MBUF_SEGMENTS))) 1762 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS; 1763 1764 /* determine how many slots are available in the transmit queue */ 1765 count = avp_fifo_free_count(tx_q); 1766 1767 /* determine how many packets can be sent */ 1768 nb_pkts = RTE_MIN(count, nb_pkts); 1769 1770 /* determine how many packets will fit in the available buffers */ 1771 count = 0; 1772 segments = 0; 1773 for (i = 0; i < nb_pkts; i++) { 1774 m = tx_pkts[i]; 1775 if (likely(i < (unsigned int)nb_pkts - 1)) { 1776 /* prefetch next entry while processing this one */ 1777 rte_prefetch0(tx_pkts[i + 1]); 1778 } 1779 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1780 avp->host_mbuf_size; 1781 1782 if (unlikely((required == 0) || 1783 (required > RTE_AVP_MAX_MBUF_SEGMENTS))) 1784 break; 1785 else if (unlikely(required + segments > avail)) 1786 break; 1787 segments += required; 1788 count++; 1789 } 1790 nb_pkts = count; 1791 1792 if (unlikely(nb_pkts == 0)) { 1793 /* no available buffers, or no space on the tx queue */ 1794 txq->errors += orig_nb_pkts; 1795 return 0; 1796 } 1797 1798 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1799 nb_pkts, tx_q); 1800 1801 /* retrieve sufficient send buffers */ 1802 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments); 1803 if (unlikely(n != segments)) { 1804 PMD_TX_LOG(DEBUG, "Failed to allocate buffers " 1805 "n=%u, segments=%u, orig=%u\n", 1806 n, segments, orig_nb_pkts); 1807 txq->errors += orig_nb_pkts; 1808 return 0; 1809 } 1810 1811 tx_bytes = 0; 1812 count = 0; 1813 for (i = 0; i < nb_pkts; i++) { 1814 /* process each packet to be transmitted */ 1815 m = tx_pkts[i]; 1816 1817 /* determine how many buffers are required for this packet */ 1818 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1819 avp->host_mbuf_size; 1820 1821 tx_bytes += avp_dev_copy_to_buffers(avp, m, 1822 &avp_bufs[count], required); 1823 tx_bufs[i] = avp_bufs[count]; 1824 count += required; 1825 1826 /* free the original mbuf */ 1827 rte_pktmbuf_free(m); 1828 } 1829 1830 txq->packets += nb_pkts; 1831 txq->bytes += tx_bytes; 1832 1833 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1834 for (i = 0; i < nb_pkts; i++) 1835 avp_dev_buffer_sanity_check(avp, tx_bufs[i]); 1836 #endif 1837 1838 /* send the packets */ 1839 n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts); 1840 if (unlikely(n != orig_nb_pkts)) 1841 txq->errors += (orig_nb_pkts - n); 1842 1843 return n; 1844 } 1845 1846 1847 static uint16_t 1848 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1849 { 1850 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1851 struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST]; 1852 struct avp_dev *avp = txq->avp; 1853 struct rte_avp_desc *pkt_buf; 1854 struct rte_avp_fifo *alloc_q; 1855 struct rte_avp_fifo *tx_q; 1856 unsigned int count, avail, n; 1857 struct rte_mbuf *m; 1858 unsigned int pkt_len; 1859 unsigned int tx_bytes; 1860 char *pkt_data; 1861 unsigned int i; 1862 1863 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1864 /* VM live migration in progress */ 1865 /* TODO ... buffer for X packets then drop?! */ 1866 txq->errors++; 1867 return 0; 1868 } 1869 1870 tx_q = avp->tx_q[txq->queue_id]; 1871 alloc_q = avp->alloc_q[txq->queue_id]; 1872 1873 /* limit the number of transmitted packets to the max burst size */ 1874 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1875 nb_pkts = AVP_MAX_TX_BURST; 1876 1877 /* determine how many buffers are available to copy into */ 1878 avail = avp_fifo_count(alloc_q); 1879 1880 /* determine how many slots are available in the transmit queue */ 1881 count = avp_fifo_free_count(tx_q); 1882 1883 /* determine how many packets can be sent */ 1884 count = RTE_MIN(count, avail); 1885 count = RTE_MIN(count, nb_pkts); 1886 1887 if (unlikely(count == 0)) { 1888 /* no available buffers, or no space on the tx queue */ 1889 txq->errors += nb_pkts; 1890 return 0; 1891 } 1892 1893 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1894 count, tx_q); 1895 1896 /* retrieve sufficient send buffers */ 1897 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count); 1898 if (unlikely(n != count)) { 1899 txq->errors++; 1900 return 0; 1901 } 1902 1903 tx_bytes = 0; 1904 for (i = 0; i < count; i++) { 1905 /* prefetch next entry while processing the current one */ 1906 if (i < count - 1) { 1907 pkt_buf = avp_dev_translate_buffer(avp, 1908 avp_bufs[i + 1]); 1909 rte_prefetch0(pkt_buf); 1910 } 1911 1912 /* process each packet to be transmitted */ 1913 m = tx_pkts[i]; 1914 1915 /* Adjust pointers for guest addressing */ 1916 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1917 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1918 pkt_len = rte_pktmbuf_pkt_len(m); 1919 1920 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1921 (pkt_len > avp->host_mbuf_size))) { 1922 /* 1923 * application should be using the scattered transmit 1924 * function; send it truncated to avoid the performance 1925 * hit of having to manage returning the already 1926 * allocated buffer to the free list. This should not 1927 * happen since the application should have set the 1928 * max_rx_pkt_len based on its MTU and it should be 1929 * policing its own packet sizes. 1930 */ 1931 txq->errors++; 1932 pkt_len = RTE_MIN(avp->guest_mbuf_size, 1933 avp->host_mbuf_size); 1934 } 1935 1936 /* copy data out of our mbuf and into the AVP buffer */ 1937 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len); 1938 pkt_buf->pkt_len = pkt_len; 1939 pkt_buf->data_len = pkt_len; 1940 pkt_buf->nb_segs = 1; 1941 pkt_buf->next = NULL; 1942 1943 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1944 pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1945 pkt_buf->vlan_tci = m->vlan_tci; 1946 } 1947 1948 tx_bytes += pkt_len; 1949 1950 /* free the original mbuf */ 1951 rte_pktmbuf_free(m); 1952 } 1953 1954 txq->packets += count; 1955 txq->bytes += tx_bytes; 1956 1957 /* send the packets */ 1958 n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count); 1959 1960 return n; 1961 } 1962 1963 static void 1964 avp_dev_rx_queue_release(void *rx_queue) 1965 { 1966 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1967 struct avp_dev *avp = rxq->avp; 1968 struct rte_eth_dev_data *data = avp->dev_data; 1969 unsigned int i; 1970 1971 for (i = 0; i < avp->num_rx_queues; i++) { 1972 if (data->rx_queues[i] == rxq) 1973 data->rx_queues[i] = NULL; 1974 } 1975 } 1976 1977 static void 1978 avp_dev_tx_queue_release(void *tx_queue) 1979 { 1980 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1981 struct avp_dev *avp = txq->avp; 1982 struct rte_eth_dev_data *data = avp->dev_data; 1983 unsigned int i; 1984 1985 for (i = 0; i < avp->num_tx_queues; i++) { 1986 if (data->tx_queues[i] == txq) 1987 data->tx_queues[i] = NULL; 1988 } 1989 } 1990 1991 static int 1992 avp_dev_configure(struct rte_eth_dev *eth_dev) 1993 { 1994 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 1995 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1996 struct rte_avp_device_info *host_info; 1997 struct rte_avp_device_config config; 1998 int mask = 0; 1999 void *addr; 2000 int ret; 2001 2002 rte_spinlock_lock(&avp->lock); 2003 if (avp->flags & AVP_F_DETACHED) { 2004 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2005 ret = -ENOTSUP; 2006 goto unlock; 2007 } 2008 2009 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 2010 host_info = (struct rte_avp_device_info *)addr; 2011 2012 /* Setup required number of queues */ 2013 _avp_set_queue_counts(eth_dev); 2014 2015 mask = (ETH_VLAN_STRIP_MASK | 2016 ETH_VLAN_FILTER_MASK | 2017 ETH_VLAN_EXTEND_MASK); 2018 avp_vlan_offload_set(eth_dev, mask); 2019 2020 /* update device config */ 2021 memset(&config, 0, sizeof(config)); 2022 config.device_id = host_info->device_id; 2023 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 2024 config.driver_version = AVP_DPDK_DRIVER_VERSION; 2025 config.features = avp->features; 2026 config.num_tx_queues = avp->num_tx_queues; 2027 config.num_rx_queues = avp->num_rx_queues; 2028 2029 ret = avp_dev_ctrl_set_config(eth_dev, &config); 2030 if (ret < 0) { 2031 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 2032 ret); 2033 goto unlock; 2034 } 2035 2036 avp->flags |= AVP_F_CONFIGURED; 2037 ret = 0; 2038 2039 unlock: 2040 rte_spinlock_unlock(&avp->lock); 2041 return ret; 2042 } 2043 2044 static int 2045 avp_dev_start(struct rte_eth_dev *eth_dev) 2046 { 2047 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2048 int ret; 2049 2050 rte_spinlock_lock(&avp->lock); 2051 if (avp->flags & AVP_F_DETACHED) { 2052 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2053 ret = -ENOTSUP; 2054 goto unlock; 2055 } 2056 2057 /* disable features that we do not support */ 2058 eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0; 2059 eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0; 2060 eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0; 2061 eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0; 2062 2063 /* update link state */ 2064 ret = avp_dev_ctrl_set_link_state(eth_dev, 1); 2065 if (ret < 0) { 2066 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2067 ret); 2068 goto unlock; 2069 } 2070 2071 /* remember current link state */ 2072 avp->flags |= AVP_F_LINKUP; 2073 2074 ret = 0; 2075 2076 unlock: 2077 rte_spinlock_unlock(&avp->lock); 2078 return ret; 2079 } 2080 2081 static void 2082 avp_dev_stop(struct rte_eth_dev *eth_dev) 2083 { 2084 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2085 int ret; 2086 2087 rte_spinlock_lock(&avp->lock); 2088 if (avp->flags & AVP_F_DETACHED) { 2089 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2090 goto unlock; 2091 } 2092 2093 /* remember current link state */ 2094 avp->flags &= ~AVP_F_LINKUP; 2095 2096 /* update link state */ 2097 ret = avp_dev_ctrl_set_link_state(eth_dev, 0); 2098 if (ret < 0) { 2099 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2100 ret); 2101 } 2102 2103 unlock: 2104 rte_spinlock_unlock(&avp->lock); 2105 } 2106 2107 static void 2108 avp_dev_close(struct rte_eth_dev *eth_dev) 2109 { 2110 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2111 int ret; 2112 2113 rte_spinlock_lock(&avp->lock); 2114 if (avp->flags & AVP_F_DETACHED) { 2115 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2116 goto unlock; 2117 } 2118 2119 /* remember current link state */ 2120 avp->flags &= ~AVP_F_LINKUP; 2121 avp->flags &= ~AVP_F_CONFIGURED; 2122 2123 ret = avp_dev_disable_interrupts(eth_dev); 2124 if (ret < 0) { 2125 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n"); 2126 /* continue */ 2127 } 2128 2129 /* update device state */ 2130 ret = avp_dev_ctrl_shutdown(eth_dev); 2131 if (ret < 0) { 2132 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n", 2133 ret); 2134 /* continue */ 2135 } 2136 2137 unlock: 2138 rte_spinlock_unlock(&avp->lock); 2139 } 2140 2141 static int 2142 avp_dev_link_update(struct rte_eth_dev *eth_dev, 2143 __rte_unused int wait_to_complete) 2144 { 2145 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2146 struct rte_eth_link *link = ð_dev->data->dev_link; 2147 2148 link->link_speed = ETH_SPEED_NUM_10G; 2149 link->link_duplex = ETH_LINK_FULL_DUPLEX; 2150 link->link_status = !!(avp->flags & AVP_F_LINKUP); 2151 2152 return -1; 2153 } 2154 2155 static void 2156 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) 2157 { 2158 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2159 2160 rte_spinlock_lock(&avp->lock); 2161 if ((avp->flags & AVP_F_PROMISC) == 0) { 2162 avp->flags |= AVP_F_PROMISC; 2163 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n", 2164 eth_dev->data->port_id); 2165 } 2166 rte_spinlock_unlock(&avp->lock); 2167 } 2168 2169 static void 2170 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) 2171 { 2172 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2173 2174 rte_spinlock_lock(&avp->lock); 2175 if ((avp->flags & AVP_F_PROMISC) != 0) { 2176 avp->flags &= ~AVP_F_PROMISC; 2177 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n", 2178 eth_dev->data->port_id); 2179 } 2180 rte_spinlock_unlock(&avp->lock); 2181 } 2182 2183 static void 2184 avp_dev_info_get(struct rte_eth_dev *eth_dev, 2185 struct rte_eth_dev_info *dev_info) 2186 { 2187 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2188 2189 dev_info->driver_name = "rte_avp_pmd"; 2190 dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); 2191 dev_info->max_rx_queues = avp->max_rx_queues; 2192 dev_info->max_tx_queues = avp->max_tx_queues; 2193 dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE; 2194 dev_info->max_rx_pktlen = avp->max_rx_pkt_len; 2195 dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS; 2196 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2197 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 2198 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 2199 } 2200 } 2201 2202 static void 2203 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) 2204 { 2205 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2206 2207 if (mask & ETH_VLAN_STRIP_MASK) { 2208 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2209 if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip) 2210 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD; 2211 else 2212 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD; 2213 } else { 2214 PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n"); 2215 } 2216 } 2217 2218 if (mask & ETH_VLAN_FILTER_MASK) { 2219 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter) 2220 PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n"); 2221 } 2222 2223 if (mask & ETH_VLAN_EXTEND_MASK) { 2224 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend) 2225 PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n"); 2226 } 2227 } 2228 2229 static void 2230 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats) 2231 { 2232 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2233 unsigned int i; 2234 2235 for (i = 0; i < avp->num_rx_queues; i++) { 2236 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2237 2238 if (rxq) { 2239 stats->ipackets += rxq->packets; 2240 stats->ibytes += rxq->bytes; 2241 stats->ierrors += rxq->errors; 2242 2243 stats->q_ipackets[i] += rxq->packets; 2244 stats->q_ibytes[i] += rxq->bytes; 2245 stats->q_errors[i] += rxq->errors; 2246 } 2247 } 2248 2249 for (i = 0; i < avp->num_tx_queues; i++) { 2250 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2251 2252 if (txq) { 2253 stats->opackets += txq->packets; 2254 stats->obytes += txq->bytes; 2255 stats->oerrors += txq->errors; 2256 2257 stats->q_opackets[i] += txq->packets; 2258 stats->q_obytes[i] += txq->bytes; 2259 stats->q_errors[i] += txq->errors; 2260 } 2261 } 2262 } 2263 2264 static void 2265 avp_dev_stats_reset(struct rte_eth_dev *eth_dev) 2266 { 2267 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2268 unsigned int i; 2269 2270 for (i = 0; i < avp->num_rx_queues; i++) { 2271 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2272 2273 if (rxq) { 2274 rxq->bytes = 0; 2275 rxq->packets = 0; 2276 rxq->errors = 0; 2277 } 2278 } 2279 2280 for (i = 0; i < avp->num_tx_queues; i++) { 2281 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2282 2283 if (txq) { 2284 txq->bytes = 0; 2285 txq->packets = 0; 2286 txq->errors = 0; 2287 } 2288 } 2289 } 2290 2291 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd.pci_drv); 2292 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map); 2293