1 /* 2 * BSD LICENSE 3 * 4 * Copyright (c) 2013-2017, Wind River Systems, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1) Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * 2) Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3) Neither the name of Wind River Systems nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <stdint.h> 34 #include <string.h> 35 #include <stdio.h> 36 #include <errno.h> 37 #include <unistd.h> 38 39 #include <rte_ethdev.h> 40 #include <rte_ethdev_pci.h> 41 #include <rte_memcpy.h> 42 #include <rte_string_fns.h> 43 #include <rte_malloc.h> 44 #include <rte_atomic.h> 45 #include <rte_branch_prediction.h> 46 #include <rte_pci.h> 47 #include <rte_bus_pci.h> 48 #include <rte_ether.h> 49 #include <rte_common.h> 50 #include <rte_cycles.h> 51 #include <rte_spinlock.h> 52 #include <rte_byteorder.h> 53 #include <rte_dev.h> 54 #include <rte_memory.h> 55 #include <rte_eal.h> 56 #include <rte_io.h> 57 58 #include "rte_avp_common.h" 59 #include "rte_avp_fifo.h" 60 61 #include "avp_logs.h" 62 63 int avp_logtype_driver; 64 65 static int avp_dev_create(struct rte_pci_device *pci_dev, 66 struct rte_eth_dev *eth_dev); 67 68 static int avp_dev_configure(struct rte_eth_dev *dev); 69 static int avp_dev_start(struct rte_eth_dev *dev); 70 static void avp_dev_stop(struct rte_eth_dev *dev); 71 static void avp_dev_close(struct rte_eth_dev *dev); 72 static void avp_dev_info_get(struct rte_eth_dev *dev, 73 struct rte_eth_dev_info *dev_info); 74 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask); 75 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete); 76 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev); 77 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev); 78 79 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev, 80 uint16_t rx_queue_id, 81 uint16_t nb_rx_desc, 82 unsigned int socket_id, 83 const struct rte_eth_rxconf *rx_conf, 84 struct rte_mempool *pool); 85 86 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev, 87 uint16_t tx_queue_id, 88 uint16_t nb_tx_desc, 89 unsigned int socket_id, 90 const struct rte_eth_txconf *tx_conf); 91 92 static uint16_t avp_recv_scattered_pkts(void *rx_queue, 93 struct rte_mbuf **rx_pkts, 94 uint16_t nb_pkts); 95 96 static uint16_t avp_recv_pkts(void *rx_queue, 97 struct rte_mbuf **rx_pkts, 98 uint16_t nb_pkts); 99 100 static uint16_t avp_xmit_scattered_pkts(void *tx_queue, 101 struct rte_mbuf **tx_pkts, 102 uint16_t nb_pkts); 103 104 static uint16_t avp_xmit_pkts(void *tx_queue, 105 struct rte_mbuf **tx_pkts, 106 uint16_t nb_pkts); 107 108 static void avp_dev_rx_queue_release(void *rxq); 109 static void avp_dev_tx_queue_release(void *txq); 110 111 static int avp_dev_stats_get(struct rte_eth_dev *dev, 112 struct rte_eth_stats *stats); 113 static void avp_dev_stats_reset(struct rte_eth_dev *dev); 114 115 116 #define AVP_MAX_RX_BURST 64 117 #define AVP_MAX_TX_BURST 64 118 #define AVP_MAX_MAC_ADDRS 1 119 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN 120 121 122 /* 123 * Defines the number of microseconds to wait before checking the response 124 * queue for completion. 125 */ 126 #define AVP_REQUEST_DELAY_USECS (5000) 127 128 /* 129 * Defines the number times to check the response queue for completion before 130 * declaring a timeout. 131 */ 132 #define AVP_MAX_REQUEST_RETRY (100) 133 134 /* Defines the current PCI driver version number */ 135 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION 136 137 /* 138 * The set of PCI devices this driver supports 139 */ 140 static const struct rte_pci_id pci_id_avp_map[] = { 141 { .vendor_id = RTE_AVP_PCI_VENDOR_ID, 142 .device_id = RTE_AVP_PCI_DEVICE_ID, 143 .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID, 144 .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID, 145 .class_id = RTE_CLASS_ANY_ID, 146 }, 147 148 { .vendor_id = 0, /* sentinel */ 149 }, 150 }; 151 152 /* 153 * dev_ops for avp, bare necessities for basic operation 154 */ 155 static const struct eth_dev_ops avp_eth_dev_ops = { 156 .dev_configure = avp_dev_configure, 157 .dev_start = avp_dev_start, 158 .dev_stop = avp_dev_stop, 159 .dev_close = avp_dev_close, 160 .dev_infos_get = avp_dev_info_get, 161 .vlan_offload_set = avp_vlan_offload_set, 162 .stats_get = avp_dev_stats_get, 163 .stats_reset = avp_dev_stats_reset, 164 .link_update = avp_dev_link_update, 165 .promiscuous_enable = avp_dev_promiscuous_enable, 166 .promiscuous_disable = avp_dev_promiscuous_disable, 167 .rx_queue_setup = avp_dev_rx_queue_setup, 168 .rx_queue_release = avp_dev_rx_queue_release, 169 .tx_queue_setup = avp_dev_tx_queue_setup, 170 .tx_queue_release = avp_dev_tx_queue_release, 171 }; 172 173 /**@{ AVP device flags */ 174 #define AVP_F_PROMISC (1 << 1) 175 #define AVP_F_CONFIGURED (1 << 2) 176 #define AVP_F_LINKUP (1 << 3) 177 #define AVP_F_DETACHED (1 << 4) 178 /**@} */ 179 180 /* Ethernet device validation marker */ 181 #define AVP_ETHDEV_MAGIC 0x92972862 182 183 /* 184 * Defines the AVP device attributes which are attached to an RTE ethernet 185 * device 186 */ 187 struct avp_dev { 188 uint32_t magic; /**< Memory validation marker */ 189 uint64_t device_id; /**< Unique system identifier */ 190 struct ether_addr ethaddr; /**< Host specified MAC address */ 191 struct rte_eth_dev_data *dev_data; 192 /**< Back pointer to ethernet device data */ 193 volatile uint32_t flags; /**< Device operational flags */ 194 uint16_t port_id; /**< Ethernet port identifier */ 195 struct rte_mempool *pool; /**< pkt mbuf mempool */ 196 unsigned int guest_mbuf_size; /**< local pool mbuf size */ 197 unsigned int host_mbuf_size; /**< host mbuf size */ 198 unsigned int max_rx_pkt_len; /**< maximum receive unit */ 199 uint32_t host_features; /**< Supported feature bitmap */ 200 uint32_t features; /**< Enabled feature bitmap */ 201 unsigned int num_tx_queues; /**< Negotiated number of transmit queues */ 202 unsigned int max_tx_queues; /**< Maximum number of transmit queues */ 203 unsigned int num_rx_queues; /**< Negotiated number of receive queues */ 204 unsigned int max_rx_queues; /**< Maximum number of receive queues */ 205 206 struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */ 207 struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */ 208 struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES]; 209 /**< Allocated mbufs queue */ 210 struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES]; 211 /**< To be freed mbufs queue */ 212 213 /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */ 214 rte_spinlock_t lock; 215 216 /* For request & response */ 217 struct rte_avp_fifo *req_q; /**< Request queue */ 218 struct rte_avp_fifo *resp_q; /**< Response queue */ 219 void *host_sync_addr; /**< (host) Req/Resp Mem address */ 220 void *sync_addr; /**< Req/Resp Mem address */ 221 void *host_mbuf_addr; /**< (host) MBUF pool start address */ 222 void *mbuf_addr; /**< MBUF pool start address */ 223 } __rte_cache_aligned; 224 225 /* RTE ethernet private data */ 226 struct avp_adapter { 227 struct avp_dev avp; 228 } __rte_cache_aligned; 229 230 231 /* 32-bit MMIO register write */ 232 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr)) 233 234 /* 32-bit MMIO register read */ 235 #define AVP_READ32(_addr) rte_read32_relaxed((_addr)) 236 237 /* Macro to cast the ethernet device private data to a AVP object */ 238 #define AVP_DEV_PRIVATE_TO_HW(adapter) \ 239 (&((struct avp_adapter *)adapter)->avp) 240 241 /* 242 * Defines the structure of a AVP device queue for the purpose of handling the 243 * receive and transmit burst callback functions 244 */ 245 struct avp_queue { 246 struct rte_eth_dev_data *dev_data; 247 /**< Backpointer to ethernet device data */ 248 struct avp_dev *avp; /**< Backpointer to AVP device */ 249 uint16_t queue_id; 250 /**< Queue identifier used for indexing current queue */ 251 uint16_t queue_base; 252 /**< Base queue identifier for queue servicing */ 253 uint16_t queue_limit; 254 /**< Maximum queue identifier for queue servicing */ 255 256 uint64_t packets; 257 uint64_t bytes; 258 uint64_t errors; 259 }; 260 261 /* send a request and wait for a response 262 * 263 * @warning must be called while holding the avp->lock spinlock. 264 */ 265 static int 266 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request) 267 { 268 unsigned int retry = AVP_MAX_REQUEST_RETRY; 269 void *resp_addr = NULL; 270 unsigned int count; 271 int ret; 272 273 PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id); 274 275 request->result = -ENOTSUP; 276 277 /* Discard any stale responses before starting a new request */ 278 while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1)) 279 PMD_DRV_LOG(DEBUG, "Discarding stale response\n"); 280 281 rte_memcpy(avp->sync_addr, request, sizeof(*request)); 282 count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1); 283 if (count < 1) { 284 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n", 285 request->req_id); 286 ret = -EBUSY; 287 goto done; 288 } 289 290 while (retry--) { 291 /* wait for a response */ 292 usleep(AVP_REQUEST_DELAY_USECS); 293 294 count = avp_fifo_count(avp->resp_q); 295 if (count >= 1) { 296 /* response received */ 297 break; 298 } 299 300 if ((count < 1) && (retry == 0)) { 301 PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n", 302 request->req_id); 303 ret = -ETIME; 304 goto done; 305 } 306 } 307 308 /* retrieve the response */ 309 count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1); 310 if ((count != 1) || (resp_addr != avp->host_sync_addr)) { 311 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n", 312 count, resp_addr, avp->host_sync_addr); 313 ret = -ENODATA; 314 goto done; 315 } 316 317 /* copy to user buffer */ 318 rte_memcpy(request, avp->sync_addr, sizeof(*request)); 319 ret = 0; 320 321 PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n", 322 request->result, request->req_id); 323 324 done: 325 return ret; 326 } 327 328 static int 329 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state) 330 { 331 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 332 struct rte_avp_request request; 333 int ret; 334 335 /* setup a link state change request */ 336 memset(&request, 0, sizeof(request)); 337 request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF; 338 request.if_up = state; 339 340 ret = avp_dev_process_request(avp, &request); 341 342 return ret == 0 ? request.result : ret; 343 } 344 345 static int 346 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev, 347 struct rte_avp_device_config *config) 348 { 349 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 350 struct rte_avp_request request; 351 int ret; 352 353 /* setup a configure request */ 354 memset(&request, 0, sizeof(request)); 355 request.req_id = RTE_AVP_REQ_CFG_DEVICE; 356 memcpy(&request.config, config, sizeof(request.config)); 357 358 ret = avp_dev_process_request(avp, &request); 359 360 return ret == 0 ? request.result : ret; 361 } 362 363 static int 364 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev) 365 { 366 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 367 struct rte_avp_request request; 368 int ret; 369 370 /* setup a shutdown request */ 371 memset(&request, 0, sizeof(request)); 372 request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE; 373 374 ret = avp_dev_process_request(avp, &request); 375 376 return ret == 0 ? request.result : ret; 377 } 378 379 /* translate from host mbuf virtual address to guest virtual address */ 380 static inline void * 381 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address) 382 { 383 return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address, 384 (uintptr_t)avp->host_mbuf_addr), 385 (uintptr_t)avp->mbuf_addr); 386 } 387 388 /* translate from host physical address to guest virtual address */ 389 static void * 390 avp_dev_translate_address(struct rte_eth_dev *eth_dev, 391 rte_iova_t host_phys_addr) 392 { 393 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 394 struct rte_mem_resource *resource; 395 struct rte_avp_memmap_info *info; 396 struct rte_avp_memmap *map; 397 off_t offset; 398 void *addr; 399 unsigned int i; 400 401 addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr; 402 resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR]; 403 info = (struct rte_avp_memmap_info *)resource->addr; 404 405 offset = 0; 406 for (i = 0; i < info->nb_maps; i++) { 407 /* search all segments looking for a matching address */ 408 map = &info->maps[i]; 409 410 if ((host_phys_addr >= map->phys_addr) && 411 (host_phys_addr < (map->phys_addr + map->length))) { 412 /* address is within this segment */ 413 offset += (host_phys_addr - map->phys_addr); 414 addr = RTE_PTR_ADD(addr, offset); 415 416 PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n", 417 host_phys_addr, addr); 418 419 return addr; 420 } 421 offset += map->length; 422 } 423 424 return NULL; 425 } 426 427 /* verify that the incoming device version is compatible with our version */ 428 static int 429 avp_dev_version_check(uint32_t version) 430 { 431 uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION); 432 uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version); 433 434 if (device <= driver) { 435 /* the host driver version is less than or equal to ours */ 436 return 0; 437 } 438 439 return 1; 440 } 441 442 /* verify that memory regions have expected version and validation markers */ 443 static int 444 avp_dev_check_regions(struct rte_eth_dev *eth_dev) 445 { 446 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 447 struct rte_avp_memmap_info *memmap; 448 struct rte_avp_device_info *info; 449 struct rte_mem_resource *resource; 450 unsigned int i; 451 452 /* Dump resource info for debug */ 453 for (i = 0; i < PCI_MAX_RESOURCE; i++) { 454 resource = &pci_dev->mem_resource[i]; 455 if ((resource->phys_addr == 0) || (resource->len == 0)) 456 continue; 457 458 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n", 459 i, resource->phys_addr, 460 resource->len, resource->addr); 461 462 switch (i) { 463 case RTE_AVP_PCI_MEMMAP_BAR: 464 memmap = (struct rte_avp_memmap_info *)resource->addr; 465 if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) || 466 (memmap->version != RTE_AVP_MEMMAP_VERSION)) { 467 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n", 468 memmap->magic, memmap->version); 469 return -EINVAL; 470 } 471 break; 472 473 case RTE_AVP_PCI_DEVICE_BAR: 474 info = (struct rte_avp_device_info *)resource->addr; 475 if ((info->magic != RTE_AVP_DEVICE_MAGIC) || 476 avp_dev_version_check(info->version)) { 477 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n", 478 info->magic, info->version, 479 AVP_DPDK_DRIVER_VERSION); 480 return -EINVAL; 481 } 482 break; 483 484 case RTE_AVP_PCI_MEMORY_BAR: 485 case RTE_AVP_PCI_MMIO_BAR: 486 if (resource->addr == NULL) { 487 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n", 488 i); 489 return -EINVAL; 490 } 491 break; 492 493 case RTE_AVP_PCI_MSIX_BAR: 494 default: 495 /* no validation required */ 496 break; 497 } 498 } 499 500 return 0; 501 } 502 503 static int 504 avp_dev_detach(struct rte_eth_dev *eth_dev) 505 { 506 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 507 int ret; 508 509 PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n", 510 eth_dev->data->port_id, avp->device_id); 511 512 rte_spinlock_lock(&avp->lock); 513 514 if (avp->flags & AVP_F_DETACHED) { 515 PMD_DRV_LOG(NOTICE, "port %u already detached\n", 516 eth_dev->data->port_id); 517 ret = 0; 518 goto unlock; 519 } 520 521 /* shutdown the device first so the host stops sending us packets. */ 522 ret = avp_dev_ctrl_shutdown(eth_dev); 523 if (ret < 0) { 524 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n", 525 ret); 526 avp->flags &= ~AVP_F_DETACHED; 527 goto unlock; 528 } 529 530 avp->flags |= AVP_F_DETACHED; 531 rte_wmb(); 532 533 /* wait for queues to acknowledge the presence of the detach flag */ 534 rte_delay_ms(1); 535 536 ret = 0; 537 538 unlock: 539 rte_spinlock_unlock(&avp->lock); 540 return ret; 541 } 542 543 static void 544 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 545 { 546 struct avp_dev *avp = 547 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 548 struct avp_queue *rxq; 549 uint16_t queue_count; 550 uint16_t remainder; 551 552 rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id]; 553 554 /* 555 * Must map all AVP fifos as evenly as possible between the configured 556 * device queues. Each device queue will service a subset of the AVP 557 * fifos. If there is an odd number of device queues the first set of 558 * device queues will get the extra AVP fifos. 559 */ 560 queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues; 561 remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues; 562 if (rx_queue_id < remainder) { 563 /* these queues must service one extra FIFO */ 564 rxq->queue_base = rx_queue_id * (queue_count + 1); 565 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1; 566 } else { 567 /* these queues service the regular number of FIFO */ 568 rxq->queue_base = ((remainder * (queue_count + 1)) + 569 ((rx_queue_id - remainder) * queue_count)); 570 rxq->queue_limit = rxq->queue_base + queue_count - 1; 571 } 572 573 PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n", 574 rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit); 575 576 rxq->queue_id = rxq->queue_base; 577 } 578 579 static void 580 _avp_set_queue_counts(struct rte_eth_dev *eth_dev) 581 { 582 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 583 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 584 struct rte_avp_device_info *host_info; 585 void *addr; 586 587 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 588 host_info = (struct rte_avp_device_info *)addr; 589 590 /* 591 * the transmit direction is not negotiated beyond respecting the max 592 * number of queues because the host can handle arbitrary guest tx 593 * queues (host rx queues). 594 */ 595 avp->num_tx_queues = eth_dev->data->nb_tx_queues; 596 597 /* 598 * the receive direction is more restrictive. The host requires a 599 * minimum number of guest rx queues (host tx queues) therefore 600 * negotiate a value that is at least as large as the host minimum 601 * requirement. If the host and guest values are not identical then a 602 * mapping will be established in the receive_queue_setup function. 603 */ 604 avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues, 605 eth_dev->data->nb_rx_queues); 606 607 PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n", 608 avp->num_tx_queues, avp->num_rx_queues); 609 } 610 611 static int 612 avp_dev_attach(struct rte_eth_dev *eth_dev) 613 { 614 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 615 struct rte_avp_device_config config; 616 unsigned int i; 617 int ret; 618 619 PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n", 620 eth_dev->data->port_id, avp->device_id); 621 622 rte_spinlock_lock(&avp->lock); 623 624 if (!(avp->flags & AVP_F_DETACHED)) { 625 PMD_DRV_LOG(NOTICE, "port %u already attached\n", 626 eth_dev->data->port_id); 627 ret = 0; 628 goto unlock; 629 } 630 631 /* 632 * make sure that the detached flag is set prior to reconfiguring the 633 * queues. 634 */ 635 avp->flags |= AVP_F_DETACHED; 636 rte_wmb(); 637 638 /* 639 * re-run the device create utility which will parse the new host info 640 * and setup the AVP device queue pointers. 641 */ 642 ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev); 643 if (ret < 0) { 644 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n", 645 ret); 646 goto unlock; 647 } 648 649 if (avp->flags & AVP_F_CONFIGURED) { 650 /* 651 * Update the receive queue mapping to handle cases where the 652 * source and destination hosts have different queue 653 * requirements. As long as the DETACHED flag is asserted the 654 * queue table should not be referenced so it should be safe to 655 * update it. 656 */ 657 _avp_set_queue_counts(eth_dev); 658 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) 659 _avp_set_rx_queue_mappings(eth_dev, i); 660 661 /* 662 * Update the host with our config details so that it knows the 663 * device is active. 664 */ 665 memset(&config, 0, sizeof(config)); 666 config.device_id = avp->device_id; 667 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 668 config.driver_version = AVP_DPDK_DRIVER_VERSION; 669 config.features = avp->features; 670 config.num_tx_queues = avp->num_tx_queues; 671 config.num_rx_queues = avp->num_rx_queues; 672 config.if_up = !!(avp->flags & AVP_F_LINKUP); 673 674 ret = avp_dev_ctrl_set_config(eth_dev, &config); 675 if (ret < 0) { 676 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 677 ret); 678 goto unlock; 679 } 680 } 681 682 rte_wmb(); 683 avp->flags &= ~AVP_F_DETACHED; 684 685 ret = 0; 686 687 unlock: 688 rte_spinlock_unlock(&avp->lock); 689 return ret; 690 } 691 692 static void 693 avp_dev_interrupt_handler(void *data) 694 { 695 struct rte_eth_dev *eth_dev = data; 696 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 697 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 698 uint32_t status, value; 699 int ret; 700 701 if (registers == NULL) 702 rte_panic("no mapped MMIO register space\n"); 703 704 /* read the interrupt status register 705 * note: this register clears on read so all raised interrupts must be 706 * handled or remembered for later processing 707 */ 708 status = AVP_READ32( 709 RTE_PTR_ADD(registers, 710 RTE_AVP_INTERRUPT_STATUS_OFFSET)); 711 712 if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) { 713 /* handle interrupt based on current status */ 714 value = AVP_READ32( 715 RTE_PTR_ADD(registers, 716 RTE_AVP_MIGRATION_STATUS_OFFSET)); 717 switch (value) { 718 case RTE_AVP_MIGRATION_DETACHED: 719 ret = avp_dev_detach(eth_dev); 720 break; 721 case RTE_AVP_MIGRATION_ATTACHED: 722 ret = avp_dev_attach(eth_dev); 723 break; 724 default: 725 PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n", 726 value); 727 ret = -EINVAL; 728 } 729 730 /* acknowledge the request by writing out our current status */ 731 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR); 732 AVP_WRITE32(value, 733 RTE_PTR_ADD(registers, 734 RTE_AVP_MIGRATION_ACK_OFFSET)); 735 736 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n"); 737 } 738 739 if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK) 740 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n", 741 status); 742 743 /* re-enable UIO interrupt handling */ 744 ret = rte_intr_enable(&pci_dev->intr_handle); 745 if (ret < 0) { 746 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", 747 ret); 748 /* continue */ 749 } 750 } 751 752 static int 753 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev) 754 { 755 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 756 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 757 int ret; 758 759 if (registers == NULL) 760 return -EINVAL; 761 762 /* enable UIO interrupt handling */ 763 ret = rte_intr_enable(&pci_dev->intr_handle); 764 if (ret < 0) { 765 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n", 766 ret); 767 return ret; 768 } 769 770 /* inform the device that all interrupts are enabled */ 771 AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK, 772 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 773 774 return 0; 775 } 776 777 static int 778 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev) 779 { 780 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 781 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 782 int ret; 783 784 if (registers == NULL) 785 return 0; 786 787 /* inform the device that all interrupts are disabled */ 788 AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK, 789 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 790 791 /* enable UIO interrupt handling */ 792 ret = rte_intr_disable(&pci_dev->intr_handle); 793 if (ret < 0) { 794 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n", 795 ret); 796 return ret; 797 } 798 799 return 0; 800 } 801 802 static int 803 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev) 804 { 805 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 806 int ret; 807 808 /* register a callback handler with UIO for interrupt notifications */ 809 ret = rte_intr_callback_register(&pci_dev->intr_handle, 810 avp_dev_interrupt_handler, 811 (void *)eth_dev); 812 if (ret < 0) { 813 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n", 814 ret); 815 return ret; 816 } 817 818 /* enable interrupt processing */ 819 return avp_dev_enable_interrupts(eth_dev); 820 } 821 822 static int 823 avp_dev_migration_pending(struct rte_eth_dev *eth_dev) 824 { 825 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 826 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 827 uint32_t value; 828 829 if (registers == NULL) 830 return 0; 831 832 value = AVP_READ32(RTE_PTR_ADD(registers, 833 RTE_AVP_MIGRATION_STATUS_OFFSET)); 834 if (value == RTE_AVP_MIGRATION_DETACHED) { 835 /* migration is in progress; ack it if we have not already */ 836 AVP_WRITE32(value, 837 RTE_PTR_ADD(registers, 838 RTE_AVP_MIGRATION_ACK_OFFSET)); 839 return 1; 840 } 841 return 0; 842 } 843 844 /* 845 * create a AVP device using the supplied device info by first translating it 846 * to guest address space(s). 847 */ 848 static int 849 avp_dev_create(struct rte_pci_device *pci_dev, 850 struct rte_eth_dev *eth_dev) 851 { 852 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 853 struct rte_avp_device_info *host_info; 854 struct rte_mem_resource *resource; 855 unsigned int i; 856 857 resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR]; 858 if (resource->addr == NULL) { 859 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n", 860 RTE_AVP_PCI_DEVICE_BAR); 861 return -EFAULT; 862 } 863 host_info = (struct rte_avp_device_info *)resource->addr; 864 865 if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) || 866 avp_dev_version_check(host_info->version)) { 867 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n", 868 host_info->magic, host_info->version, 869 AVP_DPDK_DRIVER_VERSION); 870 return -EINVAL; 871 } 872 873 PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n", 874 RTE_AVP_GET_RELEASE_VERSION(host_info->version), 875 RTE_AVP_GET_MAJOR_VERSION(host_info->version), 876 RTE_AVP_GET_MINOR_VERSION(host_info->version)); 877 878 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n", 879 host_info->min_tx_queues, host_info->max_tx_queues); 880 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n", 881 host_info->min_rx_queues, host_info->max_rx_queues); 882 PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n", 883 host_info->features); 884 885 if (avp->magic != AVP_ETHDEV_MAGIC) { 886 /* 887 * First time initialization (i.e., not during a VM 888 * migration) 889 */ 890 memset(avp, 0, sizeof(*avp)); 891 avp->magic = AVP_ETHDEV_MAGIC; 892 avp->dev_data = eth_dev->data; 893 avp->port_id = eth_dev->data->port_id; 894 avp->host_mbuf_size = host_info->mbuf_size; 895 avp->host_features = host_info->features; 896 rte_spinlock_init(&avp->lock); 897 memcpy(&avp->ethaddr.addr_bytes[0], 898 host_info->ethaddr, ETHER_ADDR_LEN); 899 /* adjust max values to not exceed our max */ 900 avp->max_tx_queues = 901 RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES); 902 avp->max_rx_queues = 903 RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES); 904 } else { 905 /* Re-attaching during migration */ 906 907 /* TODO... requires validation of host values */ 908 if ((host_info->features & avp->features) != avp->features) { 909 PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n", 910 avp->features, host_info->features); 911 /* this should not be possible; continue for now */ 912 } 913 } 914 915 /* the device id is allowed to change over migrations */ 916 avp->device_id = host_info->device_id; 917 918 /* translate incoming host addresses to guest address space */ 919 PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n", 920 host_info->tx_phys); 921 PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n", 922 host_info->alloc_phys); 923 for (i = 0; i < avp->max_tx_queues; i++) { 924 avp->tx_q[i] = avp_dev_translate_address(eth_dev, 925 host_info->tx_phys + (i * host_info->tx_size)); 926 927 avp->alloc_q[i] = avp_dev_translate_address(eth_dev, 928 host_info->alloc_phys + (i * host_info->alloc_size)); 929 } 930 931 PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n", 932 host_info->rx_phys); 933 PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n", 934 host_info->free_phys); 935 for (i = 0; i < avp->max_rx_queues; i++) { 936 avp->rx_q[i] = avp_dev_translate_address(eth_dev, 937 host_info->rx_phys + (i * host_info->rx_size)); 938 avp->free_q[i] = avp_dev_translate_address(eth_dev, 939 host_info->free_phys + (i * host_info->free_size)); 940 } 941 942 PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n", 943 host_info->req_phys); 944 PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n", 945 host_info->resp_phys); 946 PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n", 947 host_info->sync_phys); 948 PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n", 949 host_info->mbuf_phys); 950 avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys); 951 avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys); 952 avp->sync_addr = 953 avp_dev_translate_address(eth_dev, host_info->sync_phys); 954 avp->mbuf_addr = 955 avp_dev_translate_address(eth_dev, host_info->mbuf_phys); 956 957 /* 958 * store the host mbuf virtual address so that we can calculate 959 * relative offsets for each mbuf as they are processed 960 */ 961 avp->host_mbuf_addr = host_info->mbuf_va; 962 avp->host_sync_addr = host_info->sync_va; 963 964 /* 965 * store the maximum packet length that is supported by the host. 966 */ 967 avp->max_rx_pkt_len = host_info->max_rx_pkt_len; 968 PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n", 969 host_info->max_rx_pkt_len); 970 971 return 0; 972 } 973 974 /* 975 * This function is based on probe() function in avp_pci.c 976 * It returns 0 on success. 977 */ 978 static int 979 eth_avp_dev_init(struct rte_eth_dev *eth_dev) 980 { 981 struct avp_dev *avp = 982 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 983 struct rte_pci_device *pci_dev; 984 int ret; 985 986 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 987 eth_dev->dev_ops = &avp_eth_dev_ops; 988 eth_dev->rx_pkt_burst = &avp_recv_pkts; 989 eth_dev->tx_pkt_burst = &avp_xmit_pkts; 990 991 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 992 /* 993 * no setup required on secondary processes. All data is saved 994 * in dev_private by the primary process. All resource should 995 * be mapped to the same virtual address so all pointers should 996 * be valid. 997 */ 998 if (eth_dev->data->scattered_rx) { 999 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 1000 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1001 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1002 } 1003 return 0; 1004 } 1005 1006 rte_eth_copy_pci_info(eth_dev, pci_dev); 1007 1008 /* Check current migration status */ 1009 if (avp_dev_migration_pending(eth_dev)) { 1010 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n"); 1011 return -EBUSY; 1012 } 1013 1014 /* Check BAR resources */ 1015 ret = avp_dev_check_regions(eth_dev); 1016 if (ret < 0) { 1017 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n", 1018 ret); 1019 return ret; 1020 } 1021 1022 /* Enable interrupts */ 1023 ret = avp_dev_setup_interrupts(eth_dev); 1024 if (ret < 0) { 1025 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret); 1026 return ret; 1027 } 1028 1029 /* Handle each subtype */ 1030 ret = avp_dev_create(pci_dev, eth_dev); 1031 if (ret < 0) { 1032 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret); 1033 return ret; 1034 } 1035 1036 /* Allocate memory for storing MAC addresses */ 1037 eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0); 1038 if (eth_dev->data->mac_addrs == NULL) { 1039 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n", 1040 ETHER_ADDR_LEN); 1041 return -ENOMEM; 1042 } 1043 1044 /* Get a mac from device config */ 1045 ether_addr_copy(&avp->ethaddr, ð_dev->data->mac_addrs[0]); 1046 1047 return 0; 1048 } 1049 1050 static int 1051 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev) 1052 { 1053 int ret; 1054 1055 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1056 return -EPERM; 1057 1058 if (eth_dev->data == NULL) 1059 return 0; 1060 1061 ret = avp_dev_disable_interrupts(eth_dev); 1062 if (ret != 0) { 1063 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret); 1064 return ret; 1065 } 1066 1067 if (eth_dev->data->mac_addrs != NULL) { 1068 rte_free(eth_dev->data->mac_addrs); 1069 eth_dev->data->mac_addrs = NULL; 1070 } 1071 1072 return 0; 1073 } 1074 1075 static int 1076 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1077 struct rte_pci_device *pci_dev) 1078 { 1079 struct rte_eth_dev *eth_dev; 1080 int ret; 1081 1082 eth_dev = rte_eth_dev_pci_allocate(pci_dev, 1083 sizeof(struct avp_adapter)); 1084 if (eth_dev == NULL) 1085 return -ENOMEM; 1086 1087 ret = eth_avp_dev_init(eth_dev); 1088 if (ret) 1089 rte_eth_dev_pci_release(eth_dev); 1090 1091 return ret; 1092 } 1093 1094 static int 1095 eth_avp_pci_remove(struct rte_pci_device *pci_dev) 1096 { 1097 return rte_eth_dev_pci_generic_remove(pci_dev, 1098 eth_avp_dev_uninit); 1099 } 1100 1101 static struct rte_pci_driver rte_avp_pmd = { 1102 .id_table = pci_id_avp_map, 1103 .drv_flags = RTE_PCI_DRV_NEED_MAPPING, 1104 .probe = eth_avp_pci_probe, 1105 .remove = eth_avp_pci_remove, 1106 }; 1107 1108 static int 1109 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev, 1110 struct avp_dev *avp) 1111 { 1112 unsigned int max_rx_pkt_len; 1113 1114 max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len; 1115 1116 if ((max_rx_pkt_len > avp->guest_mbuf_size) || 1117 (max_rx_pkt_len > avp->host_mbuf_size)) { 1118 /* 1119 * If the guest MTU is greater than either the host or guest 1120 * buffers then chained mbufs have to be enabled in the TX 1121 * direction. It is assumed that the application will not need 1122 * to send packets larger than their max_rx_pkt_len (MRU). 1123 */ 1124 return 1; 1125 } 1126 1127 if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) || 1128 (avp->max_rx_pkt_len > avp->host_mbuf_size)) { 1129 /* 1130 * If the host MRU is greater than its own mbuf size or the 1131 * guest mbuf size then chained mbufs have to be enabled in the 1132 * RX direction. 1133 */ 1134 return 1; 1135 } 1136 1137 return 0; 1138 } 1139 1140 static int 1141 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, 1142 uint16_t rx_queue_id, 1143 uint16_t nb_rx_desc, 1144 unsigned int socket_id, 1145 const struct rte_eth_rxconf *rx_conf, 1146 struct rte_mempool *pool) 1147 { 1148 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1149 struct rte_pktmbuf_pool_private *mbp_priv; 1150 struct avp_queue *rxq; 1151 1152 if (rx_queue_id >= eth_dev->data->nb_rx_queues) { 1153 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n", 1154 rx_queue_id, eth_dev->data->nb_rx_queues); 1155 return -EINVAL; 1156 } 1157 1158 /* Save mbuf pool pointer */ 1159 avp->pool = pool; 1160 1161 /* Save the local mbuf size */ 1162 mbp_priv = rte_mempool_get_priv(pool); 1163 avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size); 1164 avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM; 1165 1166 if (avp_dev_enable_scattered(eth_dev, avp)) { 1167 if (!eth_dev->data->scattered_rx) { 1168 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 1169 eth_dev->data->scattered_rx = 1; 1170 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1171 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1172 } 1173 } 1174 1175 PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n", 1176 avp->max_rx_pkt_len, 1177 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len, 1178 avp->host_mbuf_size, 1179 avp->guest_mbuf_size); 1180 1181 /* allocate a queue object */ 1182 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue), 1183 RTE_CACHE_LINE_SIZE, socket_id); 1184 if (rxq == NULL) { 1185 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n"); 1186 return -ENOMEM; 1187 } 1188 1189 /* save back pointers to AVP and Ethernet devices */ 1190 rxq->avp = avp; 1191 rxq->dev_data = eth_dev->data; 1192 eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq; 1193 1194 /* setup the queue receive mapping for the current queue. */ 1195 _avp_set_rx_queue_mappings(eth_dev, rx_queue_id); 1196 1197 PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq); 1198 1199 (void)nb_rx_desc; 1200 (void)rx_conf; 1201 return 0; 1202 } 1203 1204 static int 1205 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, 1206 uint16_t tx_queue_id, 1207 uint16_t nb_tx_desc, 1208 unsigned int socket_id, 1209 const struct rte_eth_txconf *tx_conf) 1210 { 1211 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1212 struct avp_queue *txq; 1213 1214 if (tx_queue_id >= eth_dev->data->nb_tx_queues) { 1215 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n", 1216 tx_queue_id, eth_dev->data->nb_tx_queues); 1217 return -EINVAL; 1218 } 1219 1220 /* allocate a queue object */ 1221 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue), 1222 RTE_CACHE_LINE_SIZE, socket_id); 1223 if (txq == NULL) { 1224 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n"); 1225 return -ENOMEM; 1226 } 1227 1228 /* only the configured set of transmit queues are used */ 1229 txq->queue_id = tx_queue_id; 1230 txq->queue_base = tx_queue_id; 1231 txq->queue_limit = tx_queue_id; 1232 1233 /* save back pointers to AVP and Ethernet devices */ 1234 txq->avp = avp; 1235 txq->dev_data = eth_dev->data; 1236 eth_dev->data->tx_queues[tx_queue_id] = (void *)txq; 1237 1238 PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq); 1239 1240 (void)nb_tx_desc; 1241 (void)tx_conf; 1242 return 0; 1243 } 1244 1245 static inline int 1246 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b) 1247 { 1248 uint16_t *_a = (uint16_t *)&a->addr_bytes[0]; 1249 uint16_t *_b = (uint16_t *)&b->addr_bytes[0]; 1250 return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]); 1251 } 1252 1253 static inline int 1254 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m) 1255 { 1256 struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); 1257 1258 if (likely(_avp_cmp_ether_addr(&avp->ethaddr, ð->d_addr) == 0)) { 1259 /* allow all packets destined to our address */ 1260 return 0; 1261 } 1262 1263 if (likely(is_broadcast_ether_addr(ð->d_addr))) { 1264 /* allow all broadcast packets */ 1265 return 0; 1266 } 1267 1268 if (likely(is_multicast_ether_addr(ð->d_addr))) { 1269 /* allow all multicast packets */ 1270 return 0; 1271 } 1272 1273 if (avp->flags & AVP_F_PROMISC) { 1274 /* allow all packets when in promiscuous mode */ 1275 return 0; 1276 } 1277 1278 return -1; 1279 } 1280 1281 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1282 static inline void 1283 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf) 1284 { 1285 struct rte_avp_desc *first_buf; 1286 struct rte_avp_desc *pkt_buf; 1287 unsigned int pkt_len; 1288 unsigned int nb_segs; 1289 void *pkt_data; 1290 unsigned int i; 1291 1292 first_buf = avp_dev_translate_buffer(avp, buf); 1293 1294 i = 0; 1295 pkt_len = 0; 1296 nb_segs = first_buf->nb_segs; 1297 do { 1298 /* Adjust pointers for guest addressing */ 1299 pkt_buf = avp_dev_translate_buffer(avp, buf); 1300 if (pkt_buf == NULL) 1301 rte_panic("bad buffer: segment %u has an invalid address %p\n", 1302 i, buf); 1303 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1304 if (pkt_data == NULL) 1305 rte_panic("bad buffer: segment %u has a NULL data pointer\n", 1306 i); 1307 if (pkt_buf->data_len == 0) 1308 rte_panic("bad buffer: segment %u has 0 data length\n", 1309 i); 1310 pkt_len += pkt_buf->data_len; 1311 nb_segs--; 1312 i++; 1313 1314 } while (nb_segs && (buf = pkt_buf->next) != NULL); 1315 1316 if (nb_segs != 0) 1317 rte_panic("bad buffer: expected %u segments found %u\n", 1318 first_buf->nb_segs, (first_buf->nb_segs - nb_segs)); 1319 if (pkt_len != first_buf->pkt_len) 1320 rte_panic("bad buffer: expected length %u found %u\n", 1321 first_buf->pkt_len, pkt_len); 1322 } 1323 1324 #define avp_dev_buffer_sanity_check(a, b) \ 1325 __avp_dev_buffer_sanity_check((a), (b)) 1326 1327 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */ 1328 1329 #define avp_dev_buffer_sanity_check(a, b) do {} while (0) 1330 1331 #endif 1332 1333 /* 1334 * Copy a host buffer chain to a set of mbufs. This function assumes that 1335 * there exactly the required number of mbufs to copy all source bytes. 1336 */ 1337 static inline struct rte_mbuf * 1338 avp_dev_copy_from_buffers(struct avp_dev *avp, 1339 struct rte_avp_desc *buf, 1340 struct rte_mbuf **mbufs, 1341 unsigned int count) 1342 { 1343 struct rte_mbuf *m_previous = NULL; 1344 struct rte_avp_desc *pkt_buf; 1345 unsigned int total_length = 0; 1346 unsigned int copy_length; 1347 unsigned int src_offset; 1348 struct rte_mbuf *m; 1349 uint16_t ol_flags; 1350 uint16_t vlan_tci; 1351 void *pkt_data; 1352 unsigned int i; 1353 1354 avp_dev_buffer_sanity_check(avp, buf); 1355 1356 /* setup the first source buffer */ 1357 pkt_buf = avp_dev_translate_buffer(avp, buf); 1358 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1359 total_length = pkt_buf->pkt_len; 1360 src_offset = 0; 1361 1362 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1363 ol_flags = PKT_RX_VLAN; 1364 vlan_tci = pkt_buf->vlan_tci; 1365 } else { 1366 ol_flags = 0; 1367 vlan_tci = 0; 1368 } 1369 1370 for (i = 0; (i < count) && (buf != NULL); i++) { 1371 /* fill each destination buffer */ 1372 m = mbufs[i]; 1373 1374 if (m_previous != NULL) 1375 m_previous->next = m; 1376 1377 m_previous = m; 1378 1379 do { 1380 /* 1381 * Copy as many source buffers as will fit in the 1382 * destination buffer. 1383 */ 1384 copy_length = RTE_MIN((avp->guest_mbuf_size - 1385 rte_pktmbuf_data_len(m)), 1386 (pkt_buf->data_len - 1387 src_offset)); 1388 rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1389 rte_pktmbuf_data_len(m)), 1390 RTE_PTR_ADD(pkt_data, src_offset), 1391 copy_length); 1392 rte_pktmbuf_data_len(m) += copy_length; 1393 src_offset += copy_length; 1394 1395 if (likely(src_offset == pkt_buf->data_len)) { 1396 /* need a new source buffer */ 1397 buf = pkt_buf->next; 1398 if (buf != NULL) { 1399 pkt_buf = avp_dev_translate_buffer( 1400 avp, buf); 1401 pkt_data = avp_dev_translate_buffer( 1402 avp, pkt_buf->data); 1403 src_offset = 0; 1404 } 1405 } 1406 1407 if (unlikely(rte_pktmbuf_data_len(m) == 1408 avp->guest_mbuf_size)) { 1409 /* need a new destination mbuf */ 1410 break; 1411 } 1412 1413 } while (buf != NULL); 1414 } 1415 1416 m = mbufs[0]; 1417 m->ol_flags = ol_flags; 1418 m->nb_segs = count; 1419 rte_pktmbuf_pkt_len(m) = total_length; 1420 m->vlan_tci = vlan_tci; 1421 1422 __rte_mbuf_sanity_check(m, 1); 1423 1424 return m; 1425 } 1426 1427 static uint16_t 1428 avp_recv_scattered_pkts(void *rx_queue, 1429 struct rte_mbuf **rx_pkts, 1430 uint16_t nb_pkts) 1431 { 1432 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1433 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1434 struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS]; 1435 struct avp_dev *avp = rxq->avp; 1436 struct rte_avp_desc *pkt_buf; 1437 struct rte_avp_fifo *free_q; 1438 struct rte_avp_fifo *rx_q; 1439 struct rte_avp_desc *buf; 1440 unsigned int count, avail, n; 1441 unsigned int guest_mbuf_size; 1442 struct rte_mbuf *m; 1443 unsigned int required; 1444 unsigned int buf_len; 1445 unsigned int port_id; 1446 unsigned int i; 1447 1448 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1449 /* VM live migration in progress */ 1450 return 0; 1451 } 1452 1453 guest_mbuf_size = avp->guest_mbuf_size; 1454 port_id = avp->port_id; 1455 rx_q = avp->rx_q[rxq->queue_id]; 1456 free_q = avp->free_q[rxq->queue_id]; 1457 1458 /* setup next queue to service */ 1459 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1460 (rxq->queue_id + 1) : rxq->queue_base; 1461 1462 /* determine how many slots are available in the free queue */ 1463 count = avp_fifo_free_count(free_q); 1464 1465 /* determine how many packets are available in the rx queue */ 1466 avail = avp_fifo_count(rx_q); 1467 1468 /* determine how many packets can be received */ 1469 count = RTE_MIN(count, avail); 1470 count = RTE_MIN(count, nb_pkts); 1471 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1472 1473 if (unlikely(count == 0)) { 1474 /* no free buffers, or no buffers on the rx queue */ 1475 return 0; 1476 } 1477 1478 /* retrieve pending packets */ 1479 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1480 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1481 count, rx_q); 1482 1483 count = 0; 1484 for (i = 0; i < n; i++) { 1485 /* prefetch next entry while processing current one */ 1486 if (i + 1 < n) { 1487 pkt_buf = avp_dev_translate_buffer(avp, 1488 avp_bufs[i + 1]); 1489 rte_prefetch0(pkt_buf); 1490 } 1491 buf = avp_bufs[i]; 1492 1493 /* Peek into the first buffer to determine the total length */ 1494 pkt_buf = avp_dev_translate_buffer(avp, buf); 1495 buf_len = pkt_buf->pkt_len; 1496 1497 /* Allocate enough mbufs to receive the entire packet */ 1498 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size; 1499 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) { 1500 rxq->dev_data->rx_mbuf_alloc_failed++; 1501 continue; 1502 } 1503 1504 /* Copy the data from the buffers to our mbufs */ 1505 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required); 1506 1507 /* finalize mbuf */ 1508 m->port = port_id; 1509 1510 if (_avp_mac_filter(avp, m) != 0) { 1511 /* silently discard packets not destined to our MAC */ 1512 rte_pktmbuf_free(m); 1513 continue; 1514 } 1515 1516 /* return new mbuf to caller */ 1517 rx_pkts[count++] = m; 1518 rxq->bytes += buf_len; 1519 } 1520 1521 rxq->packets += count; 1522 1523 /* return the buffers to the free queue */ 1524 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1525 1526 return count; 1527 } 1528 1529 1530 static uint16_t 1531 avp_recv_pkts(void *rx_queue, 1532 struct rte_mbuf **rx_pkts, 1533 uint16_t nb_pkts) 1534 { 1535 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1536 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1537 struct avp_dev *avp = rxq->avp; 1538 struct rte_avp_desc *pkt_buf; 1539 struct rte_avp_fifo *free_q; 1540 struct rte_avp_fifo *rx_q; 1541 unsigned int count, avail, n; 1542 unsigned int pkt_len; 1543 struct rte_mbuf *m; 1544 char *pkt_data; 1545 unsigned int i; 1546 1547 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1548 /* VM live migration in progress */ 1549 return 0; 1550 } 1551 1552 rx_q = avp->rx_q[rxq->queue_id]; 1553 free_q = avp->free_q[rxq->queue_id]; 1554 1555 /* setup next queue to service */ 1556 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1557 (rxq->queue_id + 1) : rxq->queue_base; 1558 1559 /* determine how many slots are available in the free queue */ 1560 count = avp_fifo_free_count(free_q); 1561 1562 /* determine how many packets are available in the rx queue */ 1563 avail = avp_fifo_count(rx_q); 1564 1565 /* determine how many packets can be received */ 1566 count = RTE_MIN(count, avail); 1567 count = RTE_MIN(count, nb_pkts); 1568 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1569 1570 if (unlikely(count == 0)) { 1571 /* no free buffers, or no buffers on the rx queue */ 1572 return 0; 1573 } 1574 1575 /* retrieve pending packets */ 1576 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1577 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1578 count, rx_q); 1579 1580 count = 0; 1581 for (i = 0; i < n; i++) { 1582 /* prefetch next entry while processing current one */ 1583 if (i < n - 1) { 1584 pkt_buf = avp_dev_translate_buffer(avp, 1585 avp_bufs[i + 1]); 1586 rte_prefetch0(pkt_buf); 1587 } 1588 1589 /* Adjust host pointers for guest addressing */ 1590 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1591 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1592 pkt_len = pkt_buf->pkt_len; 1593 1594 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1595 (pkt_buf->nb_segs > 1))) { 1596 /* 1597 * application should be using the scattered receive 1598 * function 1599 */ 1600 rxq->errors++; 1601 continue; 1602 } 1603 1604 /* process each packet to be transmitted */ 1605 m = rte_pktmbuf_alloc(avp->pool); 1606 if (unlikely(m == NULL)) { 1607 rxq->dev_data->rx_mbuf_alloc_failed++; 1608 continue; 1609 } 1610 1611 /* copy data out of the host buffer to our buffer */ 1612 m->data_off = RTE_PKTMBUF_HEADROOM; 1613 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len); 1614 1615 /* initialize the local mbuf */ 1616 rte_pktmbuf_data_len(m) = pkt_len; 1617 rte_pktmbuf_pkt_len(m) = pkt_len; 1618 m->port = avp->port_id; 1619 1620 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1621 m->ol_flags = PKT_RX_VLAN; 1622 m->vlan_tci = pkt_buf->vlan_tci; 1623 } 1624 1625 if (_avp_mac_filter(avp, m) != 0) { 1626 /* silently discard packets not destined to our MAC */ 1627 rte_pktmbuf_free(m); 1628 continue; 1629 } 1630 1631 /* return new mbuf to caller */ 1632 rx_pkts[count++] = m; 1633 rxq->bytes += pkt_len; 1634 } 1635 1636 rxq->packets += count; 1637 1638 /* return the buffers to the free queue */ 1639 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1640 1641 return count; 1642 } 1643 1644 /* 1645 * Copy a chained mbuf to a set of host buffers. This function assumes that 1646 * there are sufficient destination buffers to contain the entire source 1647 * packet. 1648 */ 1649 static inline uint16_t 1650 avp_dev_copy_to_buffers(struct avp_dev *avp, 1651 struct rte_mbuf *mbuf, 1652 struct rte_avp_desc **buffers, 1653 unsigned int count) 1654 { 1655 struct rte_avp_desc *previous_buf = NULL; 1656 struct rte_avp_desc *first_buf = NULL; 1657 struct rte_avp_desc *pkt_buf; 1658 struct rte_avp_desc *buf; 1659 size_t total_length; 1660 struct rte_mbuf *m; 1661 size_t copy_length; 1662 size_t src_offset; 1663 char *pkt_data; 1664 unsigned int i; 1665 1666 __rte_mbuf_sanity_check(mbuf, 1); 1667 1668 m = mbuf; 1669 src_offset = 0; 1670 total_length = rte_pktmbuf_pkt_len(m); 1671 for (i = 0; (i < count) && (m != NULL); i++) { 1672 /* fill each destination buffer */ 1673 buf = buffers[i]; 1674 1675 if (i < count - 1) { 1676 /* prefetch next entry while processing this one */ 1677 pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]); 1678 rte_prefetch0(pkt_buf); 1679 } 1680 1681 /* Adjust pointers for guest addressing */ 1682 pkt_buf = avp_dev_translate_buffer(avp, buf); 1683 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1684 1685 /* setup the buffer chain */ 1686 if (previous_buf != NULL) 1687 previous_buf->next = buf; 1688 else 1689 first_buf = pkt_buf; 1690 1691 previous_buf = pkt_buf; 1692 1693 do { 1694 /* 1695 * copy as many source mbuf segments as will fit in the 1696 * destination buffer. 1697 */ 1698 copy_length = RTE_MIN((avp->host_mbuf_size - 1699 pkt_buf->data_len), 1700 (rte_pktmbuf_data_len(m) - 1701 src_offset)); 1702 rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len), 1703 RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1704 src_offset), 1705 copy_length); 1706 pkt_buf->data_len += copy_length; 1707 src_offset += copy_length; 1708 1709 if (likely(src_offset == rte_pktmbuf_data_len(m))) { 1710 /* need a new source buffer */ 1711 m = m->next; 1712 src_offset = 0; 1713 } 1714 1715 if (unlikely(pkt_buf->data_len == 1716 avp->host_mbuf_size)) { 1717 /* need a new destination buffer */ 1718 break; 1719 } 1720 1721 } while (m != NULL); 1722 } 1723 1724 first_buf->nb_segs = count; 1725 first_buf->pkt_len = total_length; 1726 1727 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 1728 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1729 first_buf->vlan_tci = mbuf->vlan_tci; 1730 } 1731 1732 avp_dev_buffer_sanity_check(avp, buffers[0]); 1733 1734 return total_length; 1735 } 1736 1737 1738 static uint16_t 1739 avp_xmit_scattered_pkts(void *tx_queue, 1740 struct rte_mbuf **tx_pkts, 1741 uint16_t nb_pkts) 1742 { 1743 struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * 1744 RTE_AVP_MAX_MBUF_SEGMENTS)]; 1745 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1746 struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; 1747 struct avp_dev *avp = txq->avp; 1748 struct rte_avp_fifo *alloc_q; 1749 struct rte_avp_fifo *tx_q; 1750 unsigned int count, avail, n; 1751 unsigned int orig_nb_pkts; 1752 struct rte_mbuf *m; 1753 unsigned int required; 1754 unsigned int segments; 1755 unsigned int tx_bytes; 1756 unsigned int i; 1757 1758 orig_nb_pkts = nb_pkts; 1759 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1760 /* VM live migration in progress */ 1761 /* TODO ... buffer for X packets then drop? */ 1762 txq->errors += nb_pkts; 1763 return 0; 1764 } 1765 1766 tx_q = avp->tx_q[txq->queue_id]; 1767 alloc_q = avp->alloc_q[txq->queue_id]; 1768 1769 /* limit the number of transmitted packets to the max burst size */ 1770 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1771 nb_pkts = AVP_MAX_TX_BURST; 1772 1773 /* determine how many buffers are available to copy into */ 1774 avail = avp_fifo_count(alloc_q); 1775 if (unlikely(avail > (AVP_MAX_TX_BURST * 1776 RTE_AVP_MAX_MBUF_SEGMENTS))) 1777 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS; 1778 1779 /* determine how many slots are available in the transmit queue */ 1780 count = avp_fifo_free_count(tx_q); 1781 1782 /* determine how many packets can be sent */ 1783 nb_pkts = RTE_MIN(count, nb_pkts); 1784 1785 /* determine how many packets will fit in the available buffers */ 1786 count = 0; 1787 segments = 0; 1788 for (i = 0; i < nb_pkts; i++) { 1789 m = tx_pkts[i]; 1790 if (likely(i < (unsigned int)nb_pkts - 1)) { 1791 /* prefetch next entry while processing this one */ 1792 rte_prefetch0(tx_pkts[i + 1]); 1793 } 1794 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1795 avp->host_mbuf_size; 1796 1797 if (unlikely((required == 0) || 1798 (required > RTE_AVP_MAX_MBUF_SEGMENTS))) 1799 break; 1800 else if (unlikely(required + segments > avail)) 1801 break; 1802 segments += required; 1803 count++; 1804 } 1805 nb_pkts = count; 1806 1807 if (unlikely(nb_pkts == 0)) { 1808 /* no available buffers, or no space on the tx queue */ 1809 txq->errors += orig_nb_pkts; 1810 return 0; 1811 } 1812 1813 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1814 nb_pkts, tx_q); 1815 1816 /* retrieve sufficient send buffers */ 1817 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments); 1818 if (unlikely(n != segments)) { 1819 PMD_TX_LOG(DEBUG, "Failed to allocate buffers " 1820 "n=%u, segments=%u, orig=%u\n", 1821 n, segments, orig_nb_pkts); 1822 txq->errors += orig_nb_pkts; 1823 return 0; 1824 } 1825 1826 tx_bytes = 0; 1827 count = 0; 1828 for (i = 0; i < nb_pkts; i++) { 1829 /* process each packet to be transmitted */ 1830 m = tx_pkts[i]; 1831 1832 /* determine how many buffers are required for this packet */ 1833 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1834 avp->host_mbuf_size; 1835 1836 tx_bytes += avp_dev_copy_to_buffers(avp, m, 1837 &avp_bufs[count], required); 1838 tx_bufs[i] = avp_bufs[count]; 1839 count += required; 1840 1841 /* free the original mbuf */ 1842 rte_pktmbuf_free(m); 1843 } 1844 1845 txq->packets += nb_pkts; 1846 txq->bytes += tx_bytes; 1847 1848 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1849 for (i = 0; i < nb_pkts; i++) 1850 avp_dev_buffer_sanity_check(avp, tx_bufs[i]); 1851 #endif 1852 1853 /* send the packets */ 1854 n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts); 1855 if (unlikely(n != orig_nb_pkts)) 1856 txq->errors += (orig_nb_pkts - n); 1857 1858 return n; 1859 } 1860 1861 1862 static uint16_t 1863 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1864 { 1865 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1866 struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST]; 1867 struct avp_dev *avp = txq->avp; 1868 struct rte_avp_desc *pkt_buf; 1869 struct rte_avp_fifo *alloc_q; 1870 struct rte_avp_fifo *tx_q; 1871 unsigned int count, avail, n; 1872 struct rte_mbuf *m; 1873 unsigned int pkt_len; 1874 unsigned int tx_bytes; 1875 char *pkt_data; 1876 unsigned int i; 1877 1878 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1879 /* VM live migration in progress */ 1880 /* TODO ... buffer for X packets then drop?! */ 1881 txq->errors++; 1882 return 0; 1883 } 1884 1885 tx_q = avp->tx_q[txq->queue_id]; 1886 alloc_q = avp->alloc_q[txq->queue_id]; 1887 1888 /* limit the number of transmitted packets to the max burst size */ 1889 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1890 nb_pkts = AVP_MAX_TX_BURST; 1891 1892 /* determine how many buffers are available to copy into */ 1893 avail = avp_fifo_count(alloc_q); 1894 1895 /* determine how many slots are available in the transmit queue */ 1896 count = avp_fifo_free_count(tx_q); 1897 1898 /* determine how many packets can be sent */ 1899 count = RTE_MIN(count, avail); 1900 count = RTE_MIN(count, nb_pkts); 1901 1902 if (unlikely(count == 0)) { 1903 /* no available buffers, or no space on the tx queue */ 1904 txq->errors += nb_pkts; 1905 return 0; 1906 } 1907 1908 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1909 count, tx_q); 1910 1911 /* retrieve sufficient send buffers */ 1912 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count); 1913 if (unlikely(n != count)) { 1914 txq->errors++; 1915 return 0; 1916 } 1917 1918 tx_bytes = 0; 1919 for (i = 0; i < count; i++) { 1920 /* prefetch next entry while processing the current one */ 1921 if (i < count - 1) { 1922 pkt_buf = avp_dev_translate_buffer(avp, 1923 avp_bufs[i + 1]); 1924 rte_prefetch0(pkt_buf); 1925 } 1926 1927 /* process each packet to be transmitted */ 1928 m = tx_pkts[i]; 1929 1930 /* Adjust pointers for guest addressing */ 1931 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1932 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1933 pkt_len = rte_pktmbuf_pkt_len(m); 1934 1935 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1936 (pkt_len > avp->host_mbuf_size))) { 1937 /* 1938 * application should be using the scattered transmit 1939 * function; send it truncated to avoid the performance 1940 * hit of having to manage returning the already 1941 * allocated buffer to the free list. This should not 1942 * happen since the application should have set the 1943 * max_rx_pkt_len based on its MTU and it should be 1944 * policing its own packet sizes. 1945 */ 1946 txq->errors++; 1947 pkt_len = RTE_MIN(avp->guest_mbuf_size, 1948 avp->host_mbuf_size); 1949 } 1950 1951 /* copy data out of our mbuf and into the AVP buffer */ 1952 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len); 1953 pkt_buf->pkt_len = pkt_len; 1954 pkt_buf->data_len = pkt_len; 1955 pkt_buf->nb_segs = 1; 1956 pkt_buf->next = NULL; 1957 1958 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1959 pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1960 pkt_buf->vlan_tci = m->vlan_tci; 1961 } 1962 1963 tx_bytes += pkt_len; 1964 1965 /* free the original mbuf */ 1966 rte_pktmbuf_free(m); 1967 } 1968 1969 txq->packets += count; 1970 txq->bytes += tx_bytes; 1971 1972 /* send the packets */ 1973 n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count); 1974 1975 return n; 1976 } 1977 1978 static void 1979 avp_dev_rx_queue_release(void *rx_queue) 1980 { 1981 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1982 struct avp_dev *avp = rxq->avp; 1983 struct rte_eth_dev_data *data = avp->dev_data; 1984 unsigned int i; 1985 1986 for (i = 0; i < avp->num_rx_queues; i++) { 1987 if (data->rx_queues[i] == rxq) 1988 data->rx_queues[i] = NULL; 1989 } 1990 } 1991 1992 static void 1993 avp_dev_tx_queue_release(void *tx_queue) 1994 { 1995 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1996 struct avp_dev *avp = txq->avp; 1997 struct rte_eth_dev_data *data = avp->dev_data; 1998 unsigned int i; 1999 2000 for (i = 0; i < avp->num_tx_queues; i++) { 2001 if (data->tx_queues[i] == txq) 2002 data->tx_queues[i] = NULL; 2003 } 2004 } 2005 2006 static int 2007 avp_dev_configure(struct rte_eth_dev *eth_dev) 2008 { 2009 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2010 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2011 struct rte_avp_device_info *host_info; 2012 struct rte_avp_device_config config; 2013 int mask = 0; 2014 void *addr; 2015 int ret; 2016 2017 rte_spinlock_lock(&avp->lock); 2018 if (avp->flags & AVP_F_DETACHED) { 2019 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2020 ret = -ENOTSUP; 2021 goto unlock; 2022 } 2023 2024 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 2025 host_info = (struct rte_avp_device_info *)addr; 2026 2027 /* Setup required number of queues */ 2028 _avp_set_queue_counts(eth_dev); 2029 2030 mask = (ETH_VLAN_STRIP_MASK | 2031 ETH_VLAN_FILTER_MASK | 2032 ETH_VLAN_EXTEND_MASK); 2033 ret = avp_vlan_offload_set(eth_dev, mask); 2034 if (ret < 0) { 2035 PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n", 2036 ret); 2037 goto unlock; 2038 } 2039 2040 /* update device config */ 2041 memset(&config, 0, sizeof(config)); 2042 config.device_id = host_info->device_id; 2043 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 2044 config.driver_version = AVP_DPDK_DRIVER_VERSION; 2045 config.features = avp->features; 2046 config.num_tx_queues = avp->num_tx_queues; 2047 config.num_rx_queues = avp->num_rx_queues; 2048 2049 ret = avp_dev_ctrl_set_config(eth_dev, &config); 2050 if (ret < 0) { 2051 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 2052 ret); 2053 goto unlock; 2054 } 2055 2056 avp->flags |= AVP_F_CONFIGURED; 2057 ret = 0; 2058 2059 unlock: 2060 rte_spinlock_unlock(&avp->lock); 2061 return ret; 2062 } 2063 2064 static int 2065 avp_dev_start(struct rte_eth_dev *eth_dev) 2066 { 2067 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2068 int ret; 2069 2070 rte_spinlock_lock(&avp->lock); 2071 if (avp->flags & AVP_F_DETACHED) { 2072 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2073 ret = -ENOTSUP; 2074 goto unlock; 2075 } 2076 2077 /* disable features that we do not support */ 2078 eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0; 2079 eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0; 2080 eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0; 2081 eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0; 2082 2083 /* update link state */ 2084 ret = avp_dev_ctrl_set_link_state(eth_dev, 1); 2085 if (ret < 0) { 2086 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2087 ret); 2088 goto unlock; 2089 } 2090 2091 /* remember current link state */ 2092 avp->flags |= AVP_F_LINKUP; 2093 2094 ret = 0; 2095 2096 unlock: 2097 rte_spinlock_unlock(&avp->lock); 2098 return ret; 2099 } 2100 2101 static void 2102 avp_dev_stop(struct rte_eth_dev *eth_dev) 2103 { 2104 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2105 int ret; 2106 2107 rte_spinlock_lock(&avp->lock); 2108 if (avp->flags & AVP_F_DETACHED) { 2109 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2110 goto unlock; 2111 } 2112 2113 /* remember current link state */ 2114 avp->flags &= ~AVP_F_LINKUP; 2115 2116 /* update link state */ 2117 ret = avp_dev_ctrl_set_link_state(eth_dev, 0); 2118 if (ret < 0) { 2119 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2120 ret); 2121 } 2122 2123 unlock: 2124 rte_spinlock_unlock(&avp->lock); 2125 } 2126 2127 static void 2128 avp_dev_close(struct rte_eth_dev *eth_dev) 2129 { 2130 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2131 int ret; 2132 2133 rte_spinlock_lock(&avp->lock); 2134 if (avp->flags & AVP_F_DETACHED) { 2135 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2136 goto unlock; 2137 } 2138 2139 /* remember current link state */ 2140 avp->flags &= ~AVP_F_LINKUP; 2141 avp->flags &= ~AVP_F_CONFIGURED; 2142 2143 ret = avp_dev_disable_interrupts(eth_dev); 2144 if (ret < 0) { 2145 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n"); 2146 /* continue */ 2147 } 2148 2149 /* update device state */ 2150 ret = avp_dev_ctrl_shutdown(eth_dev); 2151 if (ret < 0) { 2152 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n", 2153 ret); 2154 /* continue */ 2155 } 2156 2157 unlock: 2158 rte_spinlock_unlock(&avp->lock); 2159 } 2160 2161 static int 2162 avp_dev_link_update(struct rte_eth_dev *eth_dev, 2163 __rte_unused int wait_to_complete) 2164 { 2165 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2166 struct rte_eth_link *link = ð_dev->data->dev_link; 2167 2168 link->link_speed = ETH_SPEED_NUM_10G; 2169 link->link_duplex = ETH_LINK_FULL_DUPLEX; 2170 link->link_status = !!(avp->flags & AVP_F_LINKUP); 2171 2172 return -1; 2173 } 2174 2175 static void 2176 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) 2177 { 2178 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2179 2180 rte_spinlock_lock(&avp->lock); 2181 if ((avp->flags & AVP_F_PROMISC) == 0) { 2182 avp->flags |= AVP_F_PROMISC; 2183 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n", 2184 eth_dev->data->port_id); 2185 } 2186 rte_spinlock_unlock(&avp->lock); 2187 } 2188 2189 static void 2190 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) 2191 { 2192 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2193 2194 rte_spinlock_lock(&avp->lock); 2195 if ((avp->flags & AVP_F_PROMISC) != 0) { 2196 avp->flags &= ~AVP_F_PROMISC; 2197 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n", 2198 eth_dev->data->port_id); 2199 } 2200 rte_spinlock_unlock(&avp->lock); 2201 } 2202 2203 static void 2204 avp_dev_info_get(struct rte_eth_dev *eth_dev, 2205 struct rte_eth_dev_info *dev_info) 2206 { 2207 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2208 2209 dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2210 dev_info->max_rx_queues = avp->max_rx_queues; 2211 dev_info->max_tx_queues = avp->max_tx_queues; 2212 dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE; 2213 dev_info->max_rx_pktlen = avp->max_rx_pkt_len; 2214 dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS; 2215 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2216 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 2217 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 2218 } 2219 } 2220 2221 static int 2222 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) 2223 { 2224 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2225 2226 if (mask & ETH_VLAN_STRIP_MASK) { 2227 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2228 if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip) 2229 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD; 2230 else 2231 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD; 2232 } else { 2233 PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n"); 2234 } 2235 } 2236 2237 if (mask & ETH_VLAN_FILTER_MASK) { 2238 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter) 2239 PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n"); 2240 } 2241 2242 if (mask & ETH_VLAN_EXTEND_MASK) { 2243 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend) 2244 PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n"); 2245 } 2246 2247 return 0; 2248 } 2249 2250 static int 2251 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats) 2252 { 2253 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2254 unsigned int i; 2255 2256 for (i = 0; i < avp->num_rx_queues; i++) { 2257 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2258 2259 if (rxq) { 2260 stats->ipackets += rxq->packets; 2261 stats->ibytes += rxq->bytes; 2262 stats->ierrors += rxq->errors; 2263 2264 stats->q_ipackets[i] += rxq->packets; 2265 stats->q_ibytes[i] += rxq->bytes; 2266 stats->q_errors[i] += rxq->errors; 2267 } 2268 } 2269 2270 for (i = 0; i < avp->num_tx_queues; i++) { 2271 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2272 2273 if (txq) { 2274 stats->opackets += txq->packets; 2275 stats->obytes += txq->bytes; 2276 stats->oerrors += txq->errors; 2277 2278 stats->q_opackets[i] += txq->packets; 2279 stats->q_obytes[i] += txq->bytes; 2280 stats->q_errors[i] += txq->errors; 2281 } 2282 } 2283 2284 return 0; 2285 } 2286 2287 static void 2288 avp_dev_stats_reset(struct rte_eth_dev *eth_dev) 2289 { 2290 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2291 unsigned int i; 2292 2293 for (i = 0; i < avp->num_rx_queues; i++) { 2294 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2295 2296 if (rxq) { 2297 rxq->bytes = 0; 2298 rxq->packets = 0; 2299 rxq->errors = 0; 2300 } 2301 } 2302 2303 for (i = 0; i < avp->num_tx_queues; i++) { 2304 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2305 2306 if (txq) { 2307 txq->bytes = 0; 2308 txq->packets = 0; 2309 txq->errors = 0; 2310 } 2311 } 2312 } 2313 2314 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd); 2315 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map); 2316 2317 RTE_INIT(avp_init_log); 2318 static void 2319 avp_init_log(void) 2320 { 2321 avp_logtype_driver = rte_log_register("pmd.avp.driver"); 2322 if (avp_logtype_driver >= 0) 2323 rte_log_set_level(avp_logtype_driver, RTE_LOG_NOTICE); 2324 } 2325