1 /* 2 * BSD LICENSE 3 * 4 * Copyright (c) 2013-2017, Wind River Systems, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1) Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * 2) Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3) Neither the name of Wind River Systems nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <stdint.h> 34 #include <string.h> 35 #include <stdio.h> 36 #include <errno.h> 37 #include <unistd.h> 38 39 #include <rte_ethdev.h> 40 #include <rte_ethdev_pci.h> 41 #include <rte_memcpy.h> 42 #include <rte_string_fns.h> 43 #include <rte_memzone.h> 44 #include <rte_malloc.h> 45 #include <rte_atomic.h> 46 #include <rte_branch_prediction.h> 47 #include <rte_pci.h> 48 #include <rte_ether.h> 49 #include <rte_common.h> 50 #include <rte_cycles.h> 51 #include <rte_spinlock.h> 52 #include <rte_byteorder.h> 53 #include <rte_dev.h> 54 #include <rte_memory.h> 55 #include <rte_eal.h> 56 #include <rte_io.h> 57 58 #include "rte_avp_common.h" 59 #include "rte_avp_fifo.h" 60 61 #include "avp_logs.h" 62 63 64 static int avp_dev_create(struct rte_pci_device *pci_dev, 65 struct rte_eth_dev *eth_dev); 66 67 static int avp_dev_configure(struct rte_eth_dev *dev); 68 static int avp_dev_start(struct rte_eth_dev *dev); 69 static void avp_dev_stop(struct rte_eth_dev *dev); 70 static void avp_dev_close(struct rte_eth_dev *dev); 71 static void avp_dev_info_get(struct rte_eth_dev *dev, 72 struct rte_eth_dev_info *dev_info); 73 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask); 74 static int avp_dev_link_update(struct rte_eth_dev *dev, 75 __rte_unused int wait_to_complete); 76 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev); 77 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev); 78 79 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev, 80 uint16_t rx_queue_id, 81 uint16_t nb_rx_desc, 82 unsigned int socket_id, 83 const struct rte_eth_rxconf *rx_conf, 84 struct rte_mempool *pool); 85 86 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev, 87 uint16_t tx_queue_id, 88 uint16_t nb_tx_desc, 89 unsigned int socket_id, 90 const struct rte_eth_txconf *tx_conf); 91 92 static uint16_t avp_recv_scattered_pkts(void *rx_queue, 93 struct rte_mbuf **rx_pkts, 94 uint16_t nb_pkts); 95 96 static uint16_t avp_recv_pkts(void *rx_queue, 97 struct rte_mbuf **rx_pkts, 98 uint16_t nb_pkts); 99 100 static uint16_t avp_xmit_scattered_pkts(void *tx_queue, 101 struct rte_mbuf **tx_pkts, 102 uint16_t nb_pkts); 103 104 static uint16_t avp_xmit_pkts(void *tx_queue, 105 struct rte_mbuf **tx_pkts, 106 uint16_t nb_pkts); 107 108 static void avp_dev_rx_queue_release(void *rxq); 109 static void avp_dev_tx_queue_release(void *txq); 110 111 static void avp_dev_stats_get(struct rte_eth_dev *dev, 112 struct rte_eth_stats *stats); 113 static void avp_dev_stats_reset(struct rte_eth_dev *dev); 114 115 116 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) 117 118 119 #define AVP_MAX_RX_BURST 64 120 #define AVP_MAX_TX_BURST 64 121 #define AVP_MAX_MAC_ADDRS 1 122 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN 123 124 125 /* 126 * Defines the number of microseconds to wait before checking the response 127 * queue for completion. 128 */ 129 #define AVP_REQUEST_DELAY_USECS (5000) 130 131 /* 132 * Defines the number times to check the response queue for completion before 133 * declaring a timeout. 134 */ 135 #define AVP_MAX_REQUEST_RETRY (100) 136 137 /* Defines the current PCI driver version number */ 138 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION 139 140 /* 141 * The set of PCI devices this driver supports 142 */ 143 static const struct rte_pci_id pci_id_avp_map[] = { 144 { .vendor_id = RTE_AVP_PCI_VENDOR_ID, 145 .device_id = RTE_AVP_PCI_DEVICE_ID, 146 .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID, 147 .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID, 148 .class_id = RTE_CLASS_ANY_ID, 149 }, 150 151 { .vendor_id = 0, /* sentinel */ 152 }, 153 }; 154 155 /* 156 * dev_ops for avp, bare necessities for basic operation 157 */ 158 static const struct eth_dev_ops avp_eth_dev_ops = { 159 .dev_configure = avp_dev_configure, 160 .dev_start = avp_dev_start, 161 .dev_stop = avp_dev_stop, 162 .dev_close = avp_dev_close, 163 .dev_infos_get = avp_dev_info_get, 164 .vlan_offload_set = avp_vlan_offload_set, 165 .stats_get = avp_dev_stats_get, 166 .stats_reset = avp_dev_stats_reset, 167 .link_update = avp_dev_link_update, 168 .promiscuous_enable = avp_dev_promiscuous_enable, 169 .promiscuous_disable = avp_dev_promiscuous_disable, 170 .rx_queue_setup = avp_dev_rx_queue_setup, 171 .rx_queue_release = avp_dev_rx_queue_release, 172 .tx_queue_setup = avp_dev_tx_queue_setup, 173 .tx_queue_release = avp_dev_tx_queue_release, 174 }; 175 176 /**@{ AVP device flags */ 177 #define AVP_F_PROMISC (1 << 1) 178 #define AVP_F_CONFIGURED (1 << 2) 179 #define AVP_F_LINKUP (1 << 3) 180 #define AVP_F_DETACHED (1 << 4) 181 /**@} */ 182 183 /* Ethernet device validation marker */ 184 #define AVP_ETHDEV_MAGIC 0x92972862 185 186 /* 187 * Defines the AVP device attributes which are attached to an RTE ethernet 188 * device 189 */ 190 struct avp_dev { 191 uint32_t magic; /**< Memory validation marker */ 192 uint64_t device_id; /**< Unique system identifier */ 193 struct ether_addr ethaddr; /**< Host specified MAC address */ 194 struct rte_eth_dev_data *dev_data; 195 /**< Back pointer to ethernet device data */ 196 volatile uint32_t flags; /**< Device operational flags */ 197 uint8_t port_id; /**< Ethernet port identifier */ 198 struct rte_mempool *pool; /**< pkt mbuf mempool */ 199 unsigned int guest_mbuf_size; /**< local pool mbuf size */ 200 unsigned int host_mbuf_size; /**< host mbuf size */ 201 unsigned int max_rx_pkt_len; /**< maximum receive unit */ 202 uint32_t host_features; /**< Supported feature bitmap */ 203 uint32_t features; /**< Enabled feature bitmap */ 204 unsigned int num_tx_queues; /**< Negotiated number of transmit queues */ 205 unsigned int max_tx_queues; /**< Maximum number of transmit queues */ 206 unsigned int num_rx_queues; /**< Negotiated number of receive queues */ 207 unsigned int max_rx_queues; /**< Maximum number of receive queues */ 208 209 struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */ 210 struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */ 211 struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES]; 212 /**< Allocated mbufs queue */ 213 struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES]; 214 /**< To be freed mbufs queue */ 215 216 /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */ 217 rte_spinlock_t lock; 218 219 /* For request & response */ 220 struct rte_avp_fifo *req_q; /**< Request queue */ 221 struct rte_avp_fifo *resp_q; /**< Response queue */ 222 void *host_sync_addr; /**< (host) Req/Resp Mem address */ 223 void *sync_addr; /**< Req/Resp Mem address */ 224 void *host_mbuf_addr; /**< (host) MBUF pool start address */ 225 void *mbuf_addr; /**< MBUF pool start address */ 226 } __rte_cache_aligned; 227 228 /* RTE ethernet private data */ 229 struct avp_adapter { 230 struct avp_dev avp; 231 } __rte_cache_aligned; 232 233 234 /* 32-bit MMIO register write */ 235 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr)) 236 237 /* 32-bit MMIO register read */ 238 #define AVP_READ32(_addr) rte_read32_relaxed((_addr)) 239 240 /* Macro to cast the ethernet device private data to a AVP object */ 241 #define AVP_DEV_PRIVATE_TO_HW(adapter) \ 242 (&((struct avp_adapter *)adapter)->avp) 243 244 /* 245 * Defines the structure of a AVP device queue for the purpose of handling the 246 * receive and transmit burst callback functions 247 */ 248 struct avp_queue { 249 struct rte_eth_dev_data *dev_data; 250 /**< Backpointer to ethernet device data */ 251 struct avp_dev *avp; /**< Backpointer to AVP device */ 252 uint16_t queue_id; 253 /**< Queue identifier used for indexing current queue */ 254 uint16_t queue_base; 255 /**< Base queue identifier for queue servicing */ 256 uint16_t queue_limit; 257 /**< Maximum queue identifier for queue servicing */ 258 259 uint64_t packets; 260 uint64_t bytes; 261 uint64_t errors; 262 }; 263 264 /* send a request and wait for a response 265 * 266 * @warning must be called while holding the avp->lock spinlock. 267 */ 268 static int 269 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request) 270 { 271 unsigned int retry = AVP_MAX_REQUEST_RETRY; 272 void *resp_addr = NULL; 273 unsigned int count; 274 int ret; 275 276 PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id); 277 278 request->result = -ENOTSUP; 279 280 /* Discard any stale responses before starting a new request */ 281 while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1)) 282 PMD_DRV_LOG(DEBUG, "Discarding stale response\n"); 283 284 rte_memcpy(avp->sync_addr, request, sizeof(*request)); 285 count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1); 286 if (count < 1) { 287 PMD_DRV_LOG(ERR, "Cannot send request %u to host\n", 288 request->req_id); 289 ret = -EBUSY; 290 goto done; 291 } 292 293 while (retry--) { 294 /* wait for a response */ 295 usleep(AVP_REQUEST_DELAY_USECS); 296 297 count = avp_fifo_count(avp->resp_q); 298 if (count >= 1) { 299 /* response received */ 300 break; 301 } 302 303 if ((count < 1) && (retry == 0)) { 304 PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n", 305 request->req_id); 306 ret = -ETIME; 307 goto done; 308 } 309 } 310 311 /* retrieve the response */ 312 count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1); 313 if ((count != 1) || (resp_addr != avp->host_sync_addr)) { 314 PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n", 315 count, resp_addr, avp->host_sync_addr); 316 ret = -ENODATA; 317 goto done; 318 } 319 320 /* copy to user buffer */ 321 rte_memcpy(request, avp->sync_addr, sizeof(*request)); 322 ret = 0; 323 324 PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n", 325 request->result, request->req_id); 326 327 done: 328 return ret; 329 } 330 331 static int 332 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state) 333 { 334 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 335 struct rte_avp_request request; 336 int ret; 337 338 /* setup a link state change request */ 339 memset(&request, 0, sizeof(request)); 340 request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF; 341 request.if_up = state; 342 343 ret = avp_dev_process_request(avp, &request); 344 345 return ret == 0 ? request.result : ret; 346 } 347 348 static int 349 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev, 350 struct rte_avp_device_config *config) 351 { 352 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 353 struct rte_avp_request request; 354 int ret; 355 356 /* setup a configure request */ 357 memset(&request, 0, sizeof(request)); 358 request.req_id = RTE_AVP_REQ_CFG_DEVICE; 359 memcpy(&request.config, config, sizeof(request.config)); 360 361 ret = avp_dev_process_request(avp, &request); 362 363 return ret == 0 ? request.result : ret; 364 } 365 366 static int 367 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev) 368 { 369 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 370 struct rte_avp_request request; 371 int ret; 372 373 /* setup a shutdown request */ 374 memset(&request, 0, sizeof(request)); 375 request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE; 376 377 ret = avp_dev_process_request(avp, &request); 378 379 return ret == 0 ? request.result : ret; 380 } 381 382 /* translate from host mbuf virtual address to guest virtual address */ 383 static inline void * 384 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address) 385 { 386 return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address, 387 (uintptr_t)avp->host_mbuf_addr), 388 (uintptr_t)avp->mbuf_addr); 389 } 390 391 /* translate from host physical address to guest virtual address */ 392 static void * 393 avp_dev_translate_address(struct rte_eth_dev *eth_dev, 394 phys_addr_t host_phys_addr) 395 { 396 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 397 struct rte_mem_resource *resource; 398 struct rte_avp_memmap_info *info; 399 struct rte_avp_memmap *map; 400 off_t offset; 401 void *addr; 402 unsigned int i; 403 404 addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr; 405 resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR]; 406 info = (struct rte_avp_memmap_info *)resource->addr; 407 408 offset = 0; 409 for (i = 0; i < info->nb_maps; i++) { 410 /* search all segments looking for a matching address */ 411 map = &info->maps[i]; 412 413 if ((host_phys_addr >= map->phys_addr) && 414 (host_phys_addr < (map->phys_addr + map->length))) { 415 /* address is within this segment */ 416 offset += (host_phys_addr - map->phys_addr); 417 addr = RTE_PTR_ADD(addr, offset); 418 419 PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n", 420 host_phys_addr, addr); 421 422 return addr; 423 } 424 offset += map->length; 425 } 426 427 return NULL; 428 } 429 430 /* verify that the incoming device version is compatible with our version */ 431 static int 432 avp_dev_version_check(uint32_t version) 433 { 434 uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION); 435 uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version); 436 437 if (device <= driver) { 438 /* the host driver version is less than or equal to ours */ 439 return 0; 440 } 441 442 return 1; 443 } 444 445 /* verify that memory regions have expected version and validation markers */ 446 static int 447 avp_dev_check_regions(struct rte_eth_dev *eth_dev) 448 { 449 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 450 struct rte_avp_memmap_info *memmap; 451 struct rte_avp_device_info *info; 452 struct rte_mem_resource *resource; 453 unsigned int i; 454 455 /* Dump resource info for debug */ 456 for (i = 0; i < PCI_MAX_RESOURCE; i++) { 457 resource = &pci_dev->mem_resource[i]; 458 if ((resource->phys_addr == 0) || (resource->len == 0)) 459 continue; 460 461 PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n", 462 i, resource->phys_addr, 463 resource->len, resource->addr); 464 465 switch (i) { 466 case RTE_AVP_PCI_MEMMAP_BAR: 467 memmap = (struct rte_avp_memmap_info *)resource->addr; 468 if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) || 469 (memmap->version != RTE_AVP_MEMMAP_VERSION)) { 470 PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n", 471 memmap->magic, memmap->version); 472 return -EINVAL; 473 } 474 break; 475 476 case RTE_AVP_PCI_DEVICE_BAR: 477 info = (struct rte_avp_device_info *)resource->addr; 478 if ((info->magic != RTE_AVP_DEVICE_MAGIC) || 479 avp_dev_version_check(info->version)) { 480 PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n", 481 info->magic, info->version, 482 AVP_DPDK_DRIVER_VERSION); 483 return -EINVAL; 484 } 485 break; 486 487 case RTE_AVP_PCI_MEMORY_BAR: 488 case RTE_AVP_PCI_MMIO_BAR: 489 if (resource->addr == NULL) { 490 PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n", 491 i); 492 return -EINVAL; 493 } 494 break; 495 496 case RTE_AVP_PCI_MSIX_BAR: 497 default: 498 /* no validation required */ 499 break; 500 } 501 } 502 503 return 0; 504 } 505 506 static int 507 avp_dev_detach(struct rte_eth_dev *eth_dev) 508 { 509 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 510 int ret; 511 512 PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n", 513 eth_dev->data->port_id, avp->device_id); 514 515 rte_spinlock_lock(&avp->lock); 516 517 if (avp->flags & AVP_F_DETACHED) { 518 PMD_DRV_LOG(NOTICE, "port %u already detached\n", 519 eth_dev->data->port_id); 520 ret = 0; 521 goto unlock; 522 } 523 524 /* shutdown the device first so the host stops sending us packets. */ 525 ret = avp_dev_ctrl_shutdown(eth_dev); 526 if (ret < 0) { 527 PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n", 528 ret); 529 avp->flags &= ~AVP_F_DETACHED; 530 goto unlock; 531 } 532 533 avp->flags |= AVP_F_DETACHED; 534 rte_wmb(); 535 536 /* wait for queues to acknowledge the presence of the detach flag */ 537 rte_delay_ms(1); 538 539 ret = 0; 540 541 unlock: 542 rte_spinlock_unlock(&avp->lock); 543 return ret; 544 } 545 546 static void 547 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 548 { 549 struct avp_dev *avp = 550 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 551 struct avp_queue *rxq; 552 uint16_t queue_count; 553 uint16_t remainder; 554 555 rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id]; 556 557 /* 558 * Must map all AVP fifos as evenly as possible between the configured 559 * device queues. Each device queue will service a subset of the AVP 560 * fifos. If there is an odd number of device queues the first set of 561 * device queues will get the extra AVP fifos. 562 */ 563 queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues; 564 remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues; 565 if (rx_queue_id < remainder) { 566 /* these queues must service one extra FIFO */ 567 rxq->queue_base = rx_queue_id * (queue_count + 1); 568 rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1; 569 } else { 570 /* these queues service the regular number of FIFO */ 571 rxq->queue_base = ((remainder * (queue_count + 1)) + 572 ((rx_queue_id - remainder) * queue_count)); 573 rxq->queue_limit = rxq->queue_base + queue_count - 1; 574 } 575 576 PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n", 577 rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit); 578 579 rxq->queue_id = rxq->queue_base; 580 } 581 582 static void 583 _avp_set_queue_counts(struct rte_eth_dev *eth_dev) 584 { 585 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 586 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 587 struct rte_avp_device_info *host_info; 588 void *addr; 589 590 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 591 host_info = (struct rte_avp_device_info *)addr; 592 593 /* 594 * the transmit direction is not negotiated beyond respecting the max 595 * number of queues because the host can handle arbitrary guest tx 596 * queues (host rx queues). 597 */ 598 avp->num_tx_queues = eth_dev->data->nb_tx_queues; 599 600 /* 601 * the receive direction is more restrictive. The host requires a 602 * minimum number of guest rx queues (host tx queues) therefore 603 * negotiate a value that is at least as large as the host minimum 604 * requirement. If the host and guest values are not identical then a 605 * mapping will be established in the receive_queue_setup function. 606 */ 607 avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues, 608 eth_dev->data->nb_rx_queues); 609 610 PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n", 611 avp->num_tx_queues, avp->num_rx_queues); 612 } 613 614 static int 615 avp_dev_attach(struct rte_eth_dev *eth_dev) 616 { 617 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 618 struct rte_avp_device_config config; 619 unsigned int i; 620 int ret; 621 622 PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n", 623 eth_dev->data->port_id, avp->device_id); 624 625 rte_spinlock_lock(&avp->lock); 626 627 if (!(avp->flags & AVP_F_DETACHED)) { 628 PMD_DRV_LOG(NOTICE, "port %u already attached\n", 629 eth_dev->data->port_id); 630 ret = 0; 631 goto unlock; 632 } 633 634 /* 635 * make sure that the detached flag is set prior to reconfiguring the 636 * queues. 637 */ 638 avp->flags |= AVP_F_DETACHED; 639 rte_wmb(); 640 641 /* 642 * re-run the device create utility which will parse the new host info 643 * and setup the AVP device queue pointers. 644 */ 645 ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev); 646 if (ret < 0) { 647 PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n", 648 ret); 649 goto unlock; 650 } 651 652 if (avp->flags & AVP_F_CONFIGURED) { 653 /* 654 * Update the receive queue mapping to handle cases where the 655 * source and destination hosts have different queue 656 * requirements. As long as the DETACHED flag is asserted the 657 * queue table should not be referenced so it should be safe to 658 * update it. 659 */ 660 _avp_set_queue_counts(eth_dev); 661 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) 662 _avp_set_rx_queue_mappings(eth_dev, i); 663 664 /* 665 * Update the host with our config details so that it knows the 666 * device is active. 667 */ 668 memset(&config, 0, sizeof(config)); 669 config.device_id = avp->device_id; 670 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 671 config.driver_version = AVP_DPDK_DRIVER_VERSION; 672 config.features = avp->features; 673 config.num_tx_queues = avp->num_tx_queues; 674 config.num_rx_queues = avp->num_rx_queues; 675 config.if_up = !!(avp->flags & AVP_F_LINKUP); 676 677 ret = avp_dev_ctrl_set_config(eth_dev, &config); 678 if (ret < 0) { 679 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 680 ret); 681 goto unlock; 682 } 683 } 684 685 rte_wmb(); 686 avp->flags &= ~AVP_F_DETACHED; 687 688 ret = 0; 689 690 unlock: 691 rte_spinlock_unlock(&avp->lock); 692 return ret; 693 } 694 695 static void 696 avp_dev_interrupt_handler(void *data) 697 { 698 struct rte_eth_dev *eth_dev = data; 699 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 700 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 701 uint32_t status, value; 702 int ret; 703 704 if (registers == NULL) 705 rte_panic("no mapped MMIO register space\n"); 706 707 /* read the interrupt status register 708 * note: this register clears on read so all raised interrupts must be 709 * handled or remembered for later processing 710 */ 711 status = AVP_READ32( 712 RTE_PTR_ADD(registers, 713 RTE_AVP_INTERRUPT_STATUS_OFFSET)); 714 715 if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) { 716 /* handle interrupt based on current status */ 717 value = AVP_READ32( 718 RTE_PTR_ADD(registers, 719 RTE_AVP_MIGRATION_STATUS_OFFSET)); 720 switch (value) { 721 case RTE_AVP_MIGRATION_DETACHED: 722 ret = avp_dev_detach(eth_dev); 723 break; 724 case RTE_AVP_MIGRATION_ATTACHED: 725 ret = avp_dev_attach(eth_dev); 726 break; 727 default: 728 PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n", 729 value); 730 ret = -EINVAL; 731 } 732 733 /* acknowledge the request by writing out our current status */ 734 value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR); 735 AVP_WRITE32(value, 736 RTE_PTR_ADD(registers, 737 RTE_AVP_MIGRATION_ACK_OFFSET)); 738 739 PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n"); 740 } 741 742 if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK) 743 PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n", 744 status); 745 746 /* re-enable UIO interrupt handling */ 747 ret = rte_intr_enable(&pci_dev->intr_handle); 748 if (ret < 0) { 749 PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", 750 ret); 751 /* continue */ 752 } 753 } 754 755 static int 756 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev) 757 { 758 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 759 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 760 int ret; 761 762 if (registers == NULL) 763 return -EINVAL; 764 765 /* enable UIO interrupt handling */ 766 ret = rte_intr_enable(&pci_dev->intr_handle); 767 if (ret < 0) { 768 PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n", 769 ret); 770 return ret; 771 } 772 773 /* inform the device that all interrupts are enabled */ 774 AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK, 775 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 776 777 return 0; 778 } 779 780 static int 781 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev) 782 { 783 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 784 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 785 int ret; 786 787 if (registers == NULL) 788 return 0; 789 790 /* inform the device that all interrupts are disabled */ 791 AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK, 792 RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); 793 794 /* enable UIO interrupt handling */ 795 ret = rte_intr_disable(&pci_dev->intr_handle); 796 if (ret < 0) { 797 PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n", 798 ret); 799 return ret; 800 } 801 802 return 0; 803 } 804 805 static int 806 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev) 807 { 808 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 809 int ret; 810 811 /* register a callback handler with UIO for interrupt notifications */ 812 ret = rte_intr_callback_register(&pci_dev->intr_handle, 813 avp_dev_interrupt_handler, 814 (void *)eth_dev); 815 if (ret < 0) { 816 PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n", 817 ret); 818 return ret; 819 } 820 821 /* enable interrupt processing */ 822 return avp_dev_enable_interrupts(eth_dev); 823 } 824 825 static int 826 avp_dev_migration_pending(struct rte_eth_dev *eth_dev) 827 { 828 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 829 void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; 830 uint32_t value; 831 832 if (registers == NULL) 833 return 0; 834 835 value = AVP_READ32(RTE_PTR_ADD(registers, 836 RTE_AVP_MIGRATION_STATUS_OFFSET)); 837 if (value == RTE_AVP_MIGRATION_DETACHED) { 838 /* migration is in progress; ack it if we have not already */ 839 AVP_WRITE32(value, 840 RTE_PTR_ADD(registers, 841 RTE_AVP_MIGRATION_ACK_OFFSET)); 842 return 1; 843 } 844 return 0; 845 } 846 847 /* 848 * create a AVP device using the supplied device info by first translating it 849 * to guest address space(s). 850 */ 851 static int 852 avp_dev_create(struct rte_pci_device *pci_dev, 853 struct rte_eth_dev *eth_dev) 854 { 855 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 856 struct rte_avp_device_info *host_info; 857 struct rte_mem_resource *resource; 858 unsigned int i; 859 860 resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR]; 861 if (resource->addr == NULL) { 862 PMD_DRV_LOG(ERR, "BAR%u is not mapped\n", 863 RTE_AVP_PCI_DEVICE_BAR); 864 return -EFAULT; 865 } 866 host_info = (struct rte_avp_device_info *)resource->addr; 867 868 if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) || 869 avp_dev_version_check(host_info->version)) { 870 PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n", 871 host_info->magic, host_info->version, 872 AVP_DPDK_DRIVER_VERSION); 873 return -EINVAL; 874 } 875 876 PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n", 877 RTE_AVP_GET_RELEASE_VERSION(host_info->version), 878 RTE_AVP_GET_MAJOR_VERSION(host_info->version), 879 RTE_AVP_GET_MINOR_VERSION(host_info->version)); 880 881 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n", 882 host_info->min_tx_queues, host_info->max_tx_queues); 883 PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n", 884 host_info->min_rx_queues, host_info->max_rx_queues); 885 PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n", 886 host_info->features); 887 888 if (avp->magic != AVP_ETHDEV_MAGIC) { 889 /* 890 * First time initialization (i.e., not during a VM 891 * migration) 892 */ 893 memset(avp, 0, sizeof(*avp)); 894 avp->magic = AVP_ETHDEV_MAGIC; 895 avp->dev_data = eth_dev->data; 896 avp->port_id = eth_dev->data->port_id; 897 avp->host_mbuf_size = host_info->mbuf_size; 898 avp->host_features = host_info->features; 899 rte_spinlock_init(&avp->lock); 900 memcpy(&avp->ethaddr.addr_bytes[0], 901 host_info->ethaddr, ETHER_ADDR_LEN); 902 /* adjust max values to not exceed our max */ 903 avp->max_tx_queues = 904 RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES); 905 avp->max_rx_queues = 906 RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES); 907 } else { 908 /* Re-attaching during migration */ 909 910 /* TODO... requires validation of host values */ 911 if ((host_info->features & avp->features) != avp->features) { 912 PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n", 913 avp->features, host_info->features); 914 /* this should not be possible; continue for now */ 915 } 916 } 917 918 /* the device id is allowed to change over migrations */ 919 avp->device_id = host_info->device_id; 920 921 /* translate incoming host addresses to guest address space */ 922 PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n", 923 host_info->tx_phys); 924 PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n", 925 host_info->alloc_phys); 926 for (i = 0; i < avp->max_tx_queues; i++) { 927 avp->tx_q[i] = avp_dev_translate_address(eth_dev, 928 host_info->tx_phys + (i * host_info->tx_size)); 929 930 avp->alloc_q[i] = avp_dev_translate_address(eth_dev, 931 host_info->alloc_phys + (i * host_info->alloc_size)); 932 } 933 934 PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n", 935 host_info->rx_phys); 936 PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n", 937 host_info->free_phys); 938 for (i = 0; i < avp->max_rx_queues; i++) { 939 avp->rx_q[i] = avp_dev_translate_address(eth_dev, 940 host_info->rx_phys + (i * host_info->rx_size)); 941 avp->free_q[i] = avp_dev_translate_address(eth_dev, 942 host_info->free_phys + (i * host_info->free_size)); 943 } 944 945 PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n", 946 host_info->req_phys); 947 PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n", 948 host_info->resp_phys); 949 PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n", 950 host_info->sync_phys); 951 PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n", 952 host_info->mbuf_phys); 953 avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys); 954 avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys); 955 avp->sync_addr = 956 avp_dev_translate_address(eth_dev, host_info->sync_phys); 957 avp->mbuf_addr = 958 avp_dev_translate_address(eth_dev, host_info->mbuf_phys); 959 960 /* 961 * store the host mbuf virtual address so that we can calculate 962 * relative offsets for each mbuf as they are processed 963 */ 964 avp->host_mbuf_addr = host_info->mbuf_va; 965 avp->host_sync_addr = host_info->sync_va; 966 967 /* 968 * store the maximum packet length that is supported by the host. 969 */ 970 avp->max_rx_pkt_len = host_info->max_rx_pkt_len; 971 PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n", 972 host_info->max_rx_pkt_len); 973 974 return 0; 975 } 976 977 /* 978 * This function is based on probe() function in avp_pci.c 979 * It returns 0 on success. 980 */ 981 static int 982 eth_avp_dev_init(struct rte_eth_dev *eth_dev) 983 { 984 struct avp_dev *avp = 985 AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 986 struct rte_pci_device *pci_dev; 987 int ret; 988 989 pci_dev = AVP_DEV_TO_PCI(eth_dev); 990 eth_dev->dev_ops = &avp_eth_dev_ops; 991 eth_dev->rx_pkt_burst = &avp_recv_pkts; 992 eth_dev->tx_pkt_burst = &avp_xmit_pkts; 993 994 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 995 /* 996 * no setup required on secondary processes. All data is saved 997 * in dev_private by the primary process. All resource should 998 * be mapped to the same virtual address so all pointers should 999 * be valid. 1000 */ 1001 if (eth_dev->data->scattered_rx) { 1002 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 1003 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1004 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1005 } 1006 return 0; 1007 } 1008 1009 rte_eth_copy_pci_info(eth_dev, pci_dev); 1010 1011 eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; 1012 1013 /* Check current migration status */ 1014 if (avp_dev_migration_pending(eth_dev)) { 1015 PMD_DRV_LOG(ERR, "VM live migration operation in progress\n"); 1016 return -EBUSY; 1017 } 1018 1019 /* Check BAR resources */ 1020 ret = avp_dev_check_regions(eth_dev); 1021 if (ret < 0) { 1022 PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n", 1023 ret); 1024 return ret; 1025 } 1026 1027 /* Enable interrupts */ 1028 ret = avp_dev_setup_interrupts(eth_dev); 1029 if (ret < 0) { 1030 PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret); 1031 return ret; 1032 } 1033 1034 /* Handle each subtype */ 1035 ret = avp_dev_create(pci_dev, eth_dev); 1036 if (ret < 0) { 1037 PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret); 1038 return ret; 1039 } 1040 1041 /* Allocate memory for storing MAC addresses */ 1042 eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0); 1043 if (eth_dev->data->mac_addrs == NULL) { 1044 PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n", 1045 ETHER_ADDR_LEN); 1046 return -ENOMEM; 1047 } 1048 1049 /* Get a mac from device config */ 1050 ether_addr_copy(&avp->ethaddr, ð_dev->data->mac_addrs[0]); 1051 1052 return 0; 1053 } 1054 1055 static int 1056 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev) 1057 { 1058 int ret; 1059 1060 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1061 return -EPERM; 1062 1063 if (eth_dev->data == NULL) 1064 return 0; 1065 1066 ret = avp_dev_disable_interrupts(eth_dev); 1067 if (ret != 0) { 1068 PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret); 1069 return ret; 1070 } 1071 1072 if (eth_dev->data->mac_addrs != NULL) { 1073 rte_free(eth_dev->data->mac_addrs); 1074 eth_dev->data->mac_addrs = NULL; 1075 } 1076 1077 return 0; 1078 } 1079 1080 static int 1081 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1082 struct rte_pci_device *pci_dev) 1083 { 1084 struct rte_eth_dev *eth_dev; 1085 int ret; 1086 1087 eth_dev = rte_eth_dev_pci_allocate(pci_dev, 1088 sizeof(struct avp_adapter)); 1089 if (eth_dev == NULL) 1090 return -ENOMEM; 1091 1092 ret = eth_avp_dev_init(eth_dev); 1093 if (ret) 1094 rte_eth_dev_pci_release(eth_dev); 1095 1096 return ret; 1097 } 1098 1099 static int 1100 eth_avp_pci_remove(struct rte_pci_device *pci_dev) 1101 { 1102 return rte_eth_dev_pci_generic_remove(pci_dev, 1103 eth_avp_dev_uninit); 1104 } 1105 1106 static struct rte_pci_driver rte_avp_pmd = { 1107 .id_table = pci_id_avp_map, 1108 .drv_flags = RTE_PCI_DRV_NEED_MAPPING, 1109 .probe = eth_avp_pci_probe, 1110 .remove = eth_avp_pci_remove, 1111 }; 1112 1113 static int 1114 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev, 1115 struct avp_dev *avp) 1116 { 1117 unsigned int max_rx_pkt_len; 1118 1119 max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len; 1120 1121 if ((max_rx_pkt_len > avp->guest_mbuf_size) || 1122 (max_rx_pkt_len > avp->host_mbuf_size)) { 1123 /* 1124 * If the guest MTU is greater than either the host or guest 1125 * buffers then chained mbufs have to be enabled in the TX 1126 * direction. It is assumed that the application will not need 1127 * to send packets larger than their max_rx_pkt_len (MRU). 1128 */ 1129 return 1; 1130 } 1131 1132 if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) || 1133 (avp->max_rx_pkt_len > avp->host_mbuf_size)) { 1134 /* 1135 * If the host MRU is greater than its own mbuf size or the 1136 * guest mbuf size then chained mbufs have to be enabled in the 1137 * RX direction. 1138 */ 1139 return 1; 1140 } 1141 1142 return 0; 1143 } 1144 1145 static int 1146 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, 1147 uint16_t rx_queue_id, 1148 uint16_t nb_rx_desc, 1149 unsigned int socket_id, 1150 const struct rte_eth_rxconf *rx_conf, 1151 struct rte_mempool *pool) 1152 { 1153 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1154 struct rte_pktmbuf_pool_private *mbp_priv; 1155 struct avp_queue *rxq; 1156 1157 if (rx_queue_id >= eth_dev->data->nb_rx_queues) { 1158 PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n", 1159 rx_queue_id, eth_dev->data->nb_rx_queues); 1160 return -EINVAL; 1161 } 1162 1163 /* Save mbuf pool pointer */ 1164 avp->pool = pool; 1165 1166 /* Save the local mbuf size */ 1167 mbp_priv = rte_mempool_get_priv(pool); 1168 avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size); 1169 avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM; 1170 1171 if (avp_dev_enable_scattered(eth_dev, avp)) { 1172 if (!eth_dev->data->scattered_rx) { 1173 PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n"); 1174 eth_dev->data->scattered_rx = 1; 1175 eth_dev->rx_pkt_burst = avp_recv_scattered_pkts; 1176 eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts; 1177 } 1178 } 1179 1180 PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n", 1181 avp->max_rx_pkt_len, 1182 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len, 1183 avp->host_mbuf_size, 1184 avp->guest_mbuf_size); 1185 1186 /* allocate a queue object */ 1187 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue), 1188 RTE_CACHE_LINE_SIZE, socket_id); 1189 if (rxq == NULL) { 1190 PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n"); 1191 return -ENOMEM; 1192 } 1193 1194 /* save back pointers to AVP and Ethernet devices */ 1195 rxq->avp = avp; 1196 rxq->dev_data = eth_dev->data; 1197 eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq; 1198 1199 /* setup the queue receive mapping for the current queue. */ 1200 _avp_set_rx_queue_mappings(eth_dev, rx_queue_id); 1201 1202 PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq); 1203 1204 (void)nb_rx_desc; 1205 (void)rx_conf; 1206 return 0; 1207 } 1208 1209 static int 1210 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, 1211 uint16_t tx_queue_id, 1212 uint16_t nb_tx_desc, 1213 unsigned int socket_id, 1214 const struct rte_eth_txconf *tx_conf) 1215 { 1216 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 1217 struct avp_queue *txq; 1218 1219 if (tx_queue_id >= eth_dev->data->nb_tx_queues) { 1220 PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n", 1221 tx_queue_id, eth_dev->data->nb_tx_queues); 1222 return -EINVAL; 1223 } 1224 1225 /* allocate a queue object */ 1226 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue), 1227 RTE_CACHE_LINE_SIZE, socket_id); 1228 if (txq == NULL) { 1229 PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n"); 1230 return -ENOMEM; 1231 } 1232 1233 /* only the configured set of transmit queues are used */ 1234 txq->queue_id = tx_queue_id; 1235 txq->queue_base = tx_queue_id; 1236 txq->queue_limit = tx_queue_id; 1237 1238 /* save back pointers to AVP and Ethernet devices */ 1239 txq->avp = avp; 1240 txq->dev_data = eth_dev->data; 1241 eth_dev->data->tx_queues[tx_queue_id] = (void *)txq; 1242 1243 PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq); 1244 1245 (void)nb_tx_desc; 1246 (void)tx_conf; 1247 return 0; 1248 } 1249 1250 static inline int 1251 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b) 1252 { 1253 uint16_t *_a = (uint16_t *)&a->addr_bytes[0]; 1254 uint16_t *_b = (uint16_t *)&b->addr_bytes[0]; 1255 return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]); 1256 } 1257 1258 static inline int 1259 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m) 1260 { 1261 struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); 1262 1263 if (likely(_avp_cmp_ether_addr(&avp->ethaddr, ð->d_addr) == 0)) { 1264 /* allow all packets destined to our address */ 1265 return 0; 1266 } 1267 1268 if (likely(is_broadcast_ether_addr(ð->d_addr))) { 1269 /* allow all broadcast packets */ 1270 return 0; 1271 } 1272 1273 if (likely(is_multicast_ether_addr(ð->d_addr))) { 1274 /* allow all multicast packets */ 1275 return 0; 1276 } 1277 1278 if (avp->flags & AVP_F_PROMISC) { 1279 /* allow all packets when in promiscuous mode */ 1280 return 0; 1281 } 1282 1283 return -1; 1284 } 1285 1286 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1287 static inline void 1288 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf) 1289 { 1290 struct rte_avp_desc *first_buf; 1291 struct rte_avp_desc *pkt_buf; 1292 unsigned int pkt_len; 1293 unsigned int nb_segs; 1294 void *pkt_data; 1295 unsigned int i; 1296 1297 first_buf = avp_dev_translate_buffer(avp, buf); 1298 1299 i = 0; 1300 pkt_len = 0; 1301 nb_segs = first_buf->nb_segs; 1302 do { 1303 /* Adjust pointers for guest addressing */ 1304 pkt_buf = avp_dev_translate_buffer(avp, buf); 1305 if (pkt_buf == NULL) 1306 rte_panic("bad buffer: segment %u has an invalid address %p\n", 1307 i, buf); 1308 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1309 if (pkt_data == NULL) 1310 rte_panic("bad buffer: segment %u has a NULL data pointer\n", 1311 i); 1312 if (pkt_buf->data_len == 0) 1313 rte_panic("bad buffer: segment %u has 0 data length\n", 1314 i); 1315 pkt_len += pkt_buf->data_len; 1316 nb_segs--; 1317 i++; 1318 1319 } while (nb_segs && (buf = pkt_buf->next) != NULL); 1320 1321 if (nb_segs != 0) 1322 rte_panic("bad buffer: expected %u segments found %u\n", 1323 first_buf->nb_segs, (first_buf->nb_segs - nb_segs)); 1324 if (pkt_len != first_buf->pkt_len) 1325 rte_panic("bad buffer: expected length %u found %u\n", 1326 first_buf->pkt_len, pkt_len); 1327 } 1328 1329 #define avp_dev_buffer_sanity_check(a, b) \ 1330 __avp_dev_buffer_sanity_check((a), (b)) 1331 1332 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */ 1333 1334 #define avp_dev_buffer_sanity_check(a, b) do {} while (0) 1335 1336 #endif 1337 1338 /* 1339 * Copy a host buffer chain to a set of mbufs. This function assumes that 1340 * there exactly the required number of mbufs to copy all source bytes. 1341 */ 1342 static inline struct rte_mbuf * 1343 avp_dev_copy_from_buffers(struct avp_dev *avp, 1344 struct rte_avp_desc *buf, 1345 struct rte_mbuf **mbufs, 1346 unsigned int count) 1347 { 1348 struct rte_mbuf *m_previous = NULL; 1349 struct rte_avp_desc *pkt_buf; 1350 unsigned int total_length = 0; 1351 unsigned int copy_length; 1352 unsigned int src_offset; 1353 struct rte_mbuf *m; 1354 uint16_t ol_flags; 1355 uint16_t vlan_tci; 1356 void *pkt_data; 1357 unsigned int i; 1358 1359 avp_dev_buffer_sanity_check(avp, buf); 1360 1361 /* setup the first source buffer */ 1362 pkt_buf = avp_dev_translate_buffer(avp, buf); 1363 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1364 total_length = pkt_buf->pkt_len; 1365 src_offset = 0; 1366 1367 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1368 ol_flags = PKT_RX_VLAN_PKT; 1369 vlan_tci = pkt_buf->vlan_tci; 1370 } else { 1371 ol_flags = 0; 1372 vlan_tci = 0; 1373 } 1374 1375 for (i = 0; (i < count) && (buf != NULL); i++) { 1376 /* fill each destination buffer */ 1377 m = mbufs[i]; 1378 1379 if (m_previous != NULL) 1380 m_previous->next = m; 1381 1382 m_previous = m; 1383 1384 do { 1385 /* 1386 * Copy as many source buffers as will fit in the 1387 * destination buffer. 1388 */ 1389 copy_length = RTE_MIN((avp->guest_mbuf_size - 1390 rte_pktmbuf_data_len(m)), 1391 (pkt_buf->data_len - 1392 src_offset)); 1393 rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1394 rte_pktmbuf_data_len(m)), 1395 RTE_PTR_ADD(pkt_data, src_offset), 1396 copy_length); 1397 rte_pktmbuf_data_len(m) += copy_length; 1398 src_offset += copy_length; 1399 1400 if (likely(src_offset == pkt_buf->data_len)) { 1401 /* need a new source buffer */ 1402 buf = pkt_buf->next; 1403 if (buf != NULL) { 1404 pkt_buf = avp_dev_translate_buffer( 1405 avp, buf); 1406 pkt_data = avp_dev_translate_buffer( 1407 avp, pkt_buf->data); 1408 src_offset = 0; 1409 } 1410 } 1411 1412 if (unlikely(rte_pktmbuf_data_len(m) == 1413 avp->guest_mbuf_size)) { 1414 /* need a new destination mbuf */ 1415 break; 1416 } 1417 1418 } while (buf != NULL); 1419 } 1420 1421 m = mbufs[0]; 1422 m->ol_flags = ol_flags; 1423 m->nb_segs = count; 1424 rte_pktmbuf_pkt_len(m) = total_length; 1425 m->vlan_tci = vlan_tci; 1426 1427 __rte_mbuf_sanity_check(m, 1); 1428 1429 return m; 1430 } 1431 1432 static uint16_t 1433 avp_recv_scattered_pkts(void *rx_queue, 1434 struct rte_mbuf **rx_pkts, 1435 uint16_t nb_pkts) 1436 { 1437 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1438 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1439 struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS]; 1440 struct avp_dev *avp = rxq->avp; 1441 struct rte_avp_desc *pkt_buf; 1442 struct rte_avp_fifo *free_q; 1443 struct rte_avp_fifo *rx_q; 1444 struct rte_avp_desc *buf; 1445 unsigned int count, avail, n; 1446 unsigned int guest_mbuf_size; 1447 struct rte_mbuf *m; 1448 unsigned int required; 1449 unsigned int buf_len; 1450 unsigned int port_id; 1451 unsigned int i; 1452 1453 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1454 /* VM live migration in progress */ 1455 return 0; 1456 } 1457 1458 guest_mbuf_size = avp->guest_mbuf_size; 1459 port_id = avp->port_id; 1460 rx_q = avp->rx_q[rxq->queue_id]; 1461 free_q = avp->free_q[rxq->queue_id]; 1462 1463 /* setup next queue to service */ 1464 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1465 (rxq->queue_id + 1) : rxq->queue_base; 1466 1467 /* determine how many slots are available in the free queue */ 1468 count = avp_fifo_free_count(free_q); 1469 1470 /* determine how many packets are available in the rx queue */ 1471 avail = avp_fifo_count(rx_q); 1472 1473 /* determine how many packets can be received */ 1474 count = RTE_MIN(count, avail); 1475 count = RTE_MIN(count, nb_pkts); 1476 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1477 1478 if (unlikely(count == 0)) { 1479 /* no free buffers, or no buffers on the rx queue */ 1480 return 0; 1481 } 1482 1483 /* retrieve pending packets */ 1484 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1485 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1486 count, rx_q); 1487 1488 count = 0; 1489 for (i = 0; i < n; i++) { 1490 /* prefetch next entry while processing current one */ 1491 if (i + 1 < n) { 1492 pkt_buf = avp_dev_translate_buffer(avp, 1493 avp_bufs[i + 1]); 1494 rte_prefetch0(pkt_buf); 1495 } 1496 buf = avp_bufs[i]; 1497 1498 /* Peek into the first buffer to determine the total length */ 1499 pkt_buf = avp_dev_translate_buffer(avp, buf); 1500 buf_len = pkt_buf->pkt_len; 1501 1502 /* Allocate enough mbufs to receive the entire packet */ 1503 required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size; 1504 if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) { 1505 rxq->dev_data->rx_mbuf_alloc_failed++; 1506 continue; 1507 } 1508 1509 /* Copy the data from the buffers to our mbufs */ 1510 m = avp_dev_copy_from_buffers(avp, buf, mbufs, required); 1511 1512 /* finalize mbuf */ 1513 m->port = port_id; 1514 1515 if (_avp_mac_filter(avp, m) != 0) { 1516 /* silently discard packets not destined to our MAC */ 1517 rte_pktmbuf_free(m); 1518 continue; 1519 } 1520 1521 /* return new mbuf to caller */ 1522 rx_pkts[count++] = m; 1523 rxq->bytes += buf_len; 1524 } 1525 1526 rxq->packets += count; 1527 1528 /* return the buffers to the free queue */ 1529 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1530 1531 return count; 1532 } 1533 1534 1535 static uint16_t 1536 avp_recv_pkts(void *rx_queue, 1537 struct rte_mbuf **rx_pkts, 1538 uint16_t nb_pkts) 1539 { 1540 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1541 struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST]; 1542 struct avp_dev *avp = rxq->avp; 1543 struct rte_avp_desc *pkt_buf; 1544 struct rte_avp_fifo *free_q; 1545 struct rte_avp_fifo *rx_q; 1546 unsigned int count, avail, n; 1547 unsigned int pkt_len; 1548 struct rte_mbuf *m; 1549 char *pkt_data; 1550 unsigned int i; 1551 1552 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1553 /* VM live migration in progress */ 1554 return 0; 1555 } 1556 1557 rx_q = avp->rx_q[rxq->queue_id]; 1558 free_q = avp->free_q[rxq->queue_id]; 1559 1560 /* setup next queue to service */ 1561 rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ? 1562 (rxq->queue_id + 1) : rxq->queue_base; 1563 1564 /* determine how many slots are available in the free queue */ 1565 count = avp_fifo_free_count(free_q); 1566 1567 /* determine how many packets are available in the rx queue */ 1568 avail = avp_fifo_count(rx_q); 1569 1570 /* determine how many packets can be received */ 1571 count = RTE_MIN(count, avail); 1572 count = RTE_MIN(count, nb_pkts); 1573 count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST); 1574 1575 if (unlikely(count == 0)) { 1576 /* no free buffers, or no buffers on the rx queue */ 1577 return 0; 1578 } 1579 1580 /* retrieve pending packets */ 1581 n = avp_fifo_get(rx_q, (void **)&avp_bufs, count); 1582 PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n", 1583 count, rx_q); 1584 1585 count = 0; 1586 for (i = 0; i < n; i++) { 1587 /* prefetch next entry while processing current one */ 1588 if (i < n - 1) { 1589 pkt_buf = avp_dev_translate_buffer(avp, 1590 avp_bufs[i + 1]); 1591 rte_prefetch0(pkt_buf); 1592 } 1593 1594 /* Adjust host pointers for guest addressing */ 1595 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1596 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1597 pkt_len = pkt_buf->pkt_len; 1598 1599 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1600 (pkt_buf->nb_segs > 1))) { 1601 /* 1602 * application should be using the scattered receive 1603 * function 1604 */ 1605 rxq->errors++; 1606 continue; 1607 } 1608 1609 /* process each packet to be transmitted */ 1610 m = rte_pktmbuf_alloc(avp->pool); 1611 if (unlikely(m == NULL)) { 1612 rxq->dev_data->rx_mbuf_alloc_failed++; 1613 continue; 1614 } 1615 1616 /* copy data out of the host buffer to our buffer */ 1617 m->data_off = RTE_PKTMBUF_HEADROOM; 1618 rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len); 1619 1620 /* initialize the local mbuf */ 1621 rte_pktmbuf_data_len(m) = pkt_len; 1622 rte_pktmbuf_pkt_len(m) = pkt_len; 1623 m->port = avp->port_id; 1624 1625 if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) { 1626 m->ol_flags = PKT_RX_VLAN_PKT; 1627 m->vlan_tci = pkt_buf->vlan_tci; 1628 } 1629 1630 if (_avp_mac_filter(avp, m) != 0) { 1631 /* silently discard packets not destined to our MAC */ 1632 rte_pktmbuf_free(m); 1633 continue; 1634 } 1635 1636 /* return new mbuf to caller */ 1637 rx_pkts[count++] = m; 1638 rxq->bytes += pkt_len; 1639 } 1640 1641 rxq->packets += count; 1642 1643 /* return the buffers to the free queue */ 1644 avp_fifo_put(free_q, (void **)&avp_bufs[0], n); 1645 1646 return count; 1647 } 1648 1649 /* 1650 * Copy a chained mbuf to a set of host buffers. This function assumes that 1651 * there are sufficient destination buffers to contain the entire source 1652 * packet. 1653 */ 1654 static inline uint16_t 1655 avp_dev_copy_to_buffers(struct avp_dev *avp, 1656 struct rte_mbuf *mbuf, 1657 struct rte_avp_desc **buffers, 1658 unsigned int count) 1659 { 1660 struct rte_avp_desc *previous_buf = NULL; 1661 struct rte_avp_desc *first_buf = NULL; 1662 struct rte_avp_desc *pkt_buf; 1663 struct rte_avp_desc *buf; 1664 size_t total_length; 1665 struct rte_mbuf *m; 1666 size_t copy_length; 1667 size_t src_offset; 1668 char *pkt_data; 1669 unsigned int i; 1670 1671 __rte_mbuf_sanity_check(mbuf, 1); 1672 1673 m = mbuf; 1674 src_offset = 0; 1675 total_length = rte_pktmbuf_pkt_len(m); 1676 for (i = 0; (i < count) && (m != NULL); i++) { 1677 /* fill each destination buffer */ 1678 buf = buffers[i]; 1679 1680 if (i < count - 1) { 1681 /* prefetch next entry while processing this one */ 1682 pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]); 1683 rte_prefetch0(pkt_buf); 1684 } 1685 1686 /* Adjust pointers for guest addressing */ 1687 pkt_buf = avp_dev_translate_buffer(avp, buf); 1688 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1689 1690 /* setup the buffer chain */ 1691 if (previous_buf != NULL) 1692 previous_buf->next = buf; 1693 else 1694 first_buf = pkt_buf; 1695 1696 previous_buf = pkt_buf; 1697 1698 do { 1699 /* 1700 * copy as many source mbuf segments as will fit in the 1701 * destination buffer. 1702 */ 1703 copy_length = RTE_MIN((avp->host_mbuf_size - 1704 pkt_buf->data_len), 1705 (rte_pktmbuf_data_len(m) - 1706 src_offset)); 1707 rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len), 1708 RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *), 1709 src_offset), 1710 copy_length); 1711 pkt_buf->data_len += copy_length; 1712 src_offset += copy_length; 1713 1714 if (likely(src_offset == rte_pktmbuf_data_len(m))) { 1715 /* need a new source buffer */ 1716 m = m->next; 1717 src_offset = 0; 1718 } 1719 1720 if (unlikely(pkt_buf->data_len == 1721 avp->host_mbuf_size)) { 1722 /* need a new destination buffer */ 1723 break; 1724 } 1725 1726 } while (m != NULL); 1727 } 1728 1729 first_buf->nb_segs = count; 1730 first_buf->pkt_len = total_length; 1731 1732 if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { 1733 first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1734 first_buf->vlan_tci = mbuf->vlan_tci; 1735 } 1736 1737 avp_dev_buffer_sanity_check(avp, buffers[0]); 1738 1739 return total_length; 1740 } 1741 1742 1743 static uint16_t 1744 avp_xmit_scattered_pkts(void *tx_queue, 1745 struct rte_mbuf **tx_pkts, 1746 uint16_t nb_pkts) 1747 { 1748 struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * 1749 RTE_AVP_MAX_MBUF_SEGMENTS)]; 1750 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1751 struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; 1752 struct avp_dev *avp = txq->avp; 1753 struct rte_avp_fifo *alloc_q; 1754 struct rte_avp_fifo *tx_q; 1755 unsigned int count, avail, n; 1756 unsigned int orig_nb_pkts; 1757 struct rte_mbuf *m; 1758 unsigned int required; 1759 unsigned int segments; 1760 unsigned int tx_bytes; 1761 unsigned int i; 1762 1763 orig_nb_pkts = nb_pkts; 1764 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1765 /* VM live migration in progress */ 1766 /* TODO ... buffer for X packets then drop? */ 1767 txq->errors += nb_pkts; 1768 return 0; 1769 } 1770 1771 tx_q = avp->tx_q[txq->queue_id]; 1772 alloc_q = avp->alloc_q[txq->queue_id]; 1773 1774 /* limit the number of transmitted packets to the max burst size */ 1775 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1776 nb_pkts = AVP_MAX_TX_BURST; 1777 1778 /* determine how many buffers are available to copy into */ 1779 avail = avp_fifo_count(alloc_q); 1780 if (unlikely(avail > (AVP_MAX_TX_BURST * 1781 RTE_AVP_MAX_MBUF_SEGMENTS))) 1782 avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS; 1783 1784 /* determine how many slots are available in the transmit queue */ 1785 count = avp_fifo_free_count(tx_q); 1786 1787 /* determine how many packets can be sent */ 1788 nb_pkts = RTE_MIN(count, nb_pkts); 1789 1790 /* determine how many packets will fit in the available buffers */ 1791 count = 0; 1792 segments = 0; 1793 for (i = 0; i < nb_pkts; i++) { 1794 m = tx_pkts[i]; 1795 if (likely(i < (unsigned int)nb_pkts - 1)) { 1796 /* prefetch next entry while processing this one */ 1797 rte_prefetch0(tx_pkts[i + 1]); 1798 } 1799 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1800 avp->host_mbuf_size; 1801 1802 if (unlikely((required == 0) || 1803 (required > RTE_AVP_MAX_MBUF_SEGMENTS))) 1804 break; 1805 else if (unlikely(required + segments > avail)) 1806 break; 1807 segments += required; 1808 count++; 1809 } 1810 nb_pkts = count; 1811 1812 if (unlikely(nb_pkts == 0)) { 1813 /* no available buffers, or no space on the tx queue */ 1814 txq->errors += orig_nb_pkts; 1815 return 0; 1816 } 1817 1818 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1819 nb_pkts, tx_q); 1820 1821 /* retrieve sufficient send buffers */ 1822 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments); 1823 if (unlikely(n != segments)) { 1824 PMD_TX_LOG(DEBUG, "Failed to allocate buffers " 1825 "n=%u, segments=%u, orig=%u\n", 1826 n, segments, orig_nb_pkts); 1827 txq->errors += orig_nb_pkts; 1828 return 0; 1829 } 1830 1831 tx_bytes = 0; 1832 count = 0; 1833 for (i = 0; i < nb_pkts; i++) { 1834 /* process each packet to be transmitted */ 1835 m = tx_pkts[i]; 1836 1837 /* determine how many buffers are required for this packet */ 1838 required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) / 1839 avp->host_mbuf_size; 1840 1841 tx_bytes += avp_dev_copy_to_buffers(avp, m, 1842 &avp_bufs[count], required); 1843 tx_bufs[i] = avp_bufs[count]; 1844 count += required; 1845 1846 /* free the original mbuf */ 1847 rte_pktmbuf_free(m); 1848 } 1849 1850 txq->packets += nb_pkts; 1851 txq->bytes += tx_bytes; 1852 1853 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS 1854 for (i = 0; i < nb_pkts; i++) 1855 avp_dev_buffer_sanity_check(avp, tx_bufs[i]); 1856 #endif 1857 1858 /* send the packets */ 1859 n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts); 1860 if (unlikely(n != orig_nb_pkts)) 1861 txq->errors += (orig_nb_pkts - n); 1862 1863 return n; 1864 } 1865 1866 1867 static uint16_t 1868 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1869 { 1870 struct avp_queue *txq = (struct avp_queue *)tx_queue; 1871 struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST]; 1872 struct avp_dev *avp = txq->avp; 1873 struct rte_avp_desc *pkt_buf; 1874 struct rte_avp_fifo *alloc_q; 1875 struct rte_avp_fifo *tx_q; 1876 unsigned int count, avail, n; 1877 struct rte_mbuf *m; 1878 unsigned int pkt_len; 1879 unsigned int tx_bytes; 1880 char *pkt_data; 1881 unsigned int i; 1882 1883 if (unlikely(avp->flags & AVP_F_DETACHED)) { 1884 /* VM live migration in progress */ 1885 /* TODO ... buffer for X packets then drop?! */ 1886 txq->errors++; 1887 return 0; 1888 } 1889 1890 tx_q = avp->tx_q[txq->queue_id]; 1891 alloc_q = avp->alloc_q[txq->queue_id]; 1892 1893 /* limit the number of transmitted packets to the max burst size */ 1894 if (unlikely(nb_pkts > AVP_MAX_TX_BURST)) 1895 nb_pkts = AVP_MAX_TX_BURST; 1896 1897 /* determine how many buffers are available to copy into */ 1898 avail = avp_fifo_count(alloc_q); 1899 1900 /* determine how many slots are available in the transmit queue */ 1901 count = avp_fifo_free_count(tx_q); 1902 1903 /* determine how many packets can be sent */ 1904 count = RTE_MIN(count, avail); 1905 count = RTE_MIN(count, nb_pkts); 1906 1907 if (unlikely(count == 0)) { 1908 /* no available buffers, or no space on the tx queue */ 1909 txq->errors += nb_pkts; 1910 return 0; 1911 } 1912 1913 PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n", 1914 count, tx_q); 1915 1916 /* retrieve sufficient send buffers */ 1917 n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count); 1918 if (unlikely(n != count)) { 1919 txq->errors++; 1920 return 0; 1921 } 1922 1923 tx_bytes = 0; 1924 for (i = 0; i < count; i++) { 1925 /* prefetch next entry while processing the current one */ 1926 if (i < count - 1) { 1927 pkt_buf = avp_dev_translate_buffer(avp, 1928 avp_bufs[i + 1]); 1929 rte_prefetch0(pkt_buf); 1930 } 1931 1932 /* process each packet to be transmitted */ 1933 m = tx_pkts[i]; 1934 1935 /* Adjust pointers for guest addressing */ 1936 pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]); 1937 pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data); 1938 pkt_len = rte_pktmbuf_pkt_len(m); 1939 1940 if (unlikely((pkt_len > avp->guest_mbuf_size) || 1941 (pkt_len > avp->host_mbuf_size))) { 1942 /* 1943 * application should be using the scattered transmit 1944 * function; send it truncated to avoid the performance 1945 * hit of having to manage returning the already 1946 * allocated buffer to the free list. This should not 1947 * happen since the application should have set the 1948 * max_rx_pkt_len based on its MTU and it should be 1949 * policing its own packet sizes. 1950 */ 1951 txq->errors++; 1952 pkt_len = RTE_MIN(avp->guest_mbuf_size, 1953 avp->host_mbuf_size); 1954 } 1955 1956 /* copy data out of our mbuf and into the AVP buffer */ 1957 rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len); 1958 pkt_buf->pkt_len = pkt_len; 1959 pkt_buf->data_len = pkt_len; 1960 pkt_buf->nb_segs = 1; 1961 pkt_buf->next = NULL; 1962 1963 if (m->ol_flags & PKT_TX_VLAN_PKT) { 1964 pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT; 1965 pkt_buf->vlan_tci = m->vlan_tci; 1966 } 1967 1968 tx_bytes += pkt_len; 1969 1970 /* free the original mbuf */ 1971 rte_pktmbuf_free(m); 1972 } 1973 1974 txq->packets += count; 1975 txq->bytes += tx_bytes; 1976 1977 /* send the packets */ 1978 n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count); 1979 1980 return n; 1981 } 1982 1983 static void 1984 avp_dev_rx_queue_release(void *rx_queue) 1985 { 1986 struct avp_queue *rxq = (struct avp_queue *)rx_queue; 1987 struct avp_dev *avp = rxq->avp; 1988 struct rte_eth_dev_data *data = avp->dev_data; 1989 unsigned int i; 1990 1991 for (i = 0; i < avp->num_rx_queues; i++) { 1992 if (data->rx_queues[i] == rxq) 1993 data->rx_queues[i] = NULL; 1994 } 1995 } 1996 1997 static void 1998 avp_dev_tx_queue_release(void *tx_queue) 1999 { 2000 struct avp_queue *txq = (struct avp_queue *)tx_queue; 2001 struct avp_dev *avp = txq->avp; 2002 struct rte_eth_dev_data *data = avp->dev_data; 2003 unsigned int i; 2004 2005 for (i = 0; i < avp->num_tx_queues; i++) { 2006 if (data->tx_queues[i] == txq) 2007 data->tx_queues[i] = NULL; 2008 } 2009 } 2010 2011 static int 2012 avp_dev_configure(struct rte_eth_dev *eth_dev) 2013 { 2014 struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); 2015 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2016 struct rte_avp_device_info *host_info; 2017 struct rte_avp_device_config config; 2018 int mask = 0; 2019 void *addr; 2020 int ret; 2021 2022 rte_spinlock_lock(&avp->lock); 2023 if (avp->flags & AVP_F_DETACHED) { 2024 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2025 ret = -ENOTSUP; 2026 goto unlock; 2027 } 2028 2029 addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; 2030 host_info = (struct rte_avp_device_info *)addr; 2031 2032 /* Setup required number of queues */ 2033 _avp_set_queue_counts(eth_dev); 2034 2035 mask = (ETH_VLAN_STRIP_MASK | 2036 ETH_VLAN_FILTER_MASK | 2037 ETH_VLAN_EXTEND_MASK); 2038 avp_vlan_offload_set(eth_dev, mask); 2039 2040 /* update device config */ 2041 memset(&config, 0, sizeof(config)); 2042 config.device_id = host_info->device_id; 2043 config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; 2044 config.driver_version = AVP_DPDK_DRIVER_VERSION; 2045 config.features = avp->features; 2046 config.num_tx_queues = avp->num_tx_queues; 2047 config.num_rx_queues = avp->num_rx_queues; 2048 2049 ret = avp_dev_ctrl_set_config(eth_dev, &config); 2050 if (ret < 0) { 2051 PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", 2052 ret); 2053 goto unlock; 2054 } 2055 2056 avp->flags |= AVP_F_CONFIGURED; 2057 ret = 0; 2058 2059 unlock: 2060 rte_spinlock_unlock(&avp->lock); 2061 return ret; 2062 } 2063 2064 static int 2065 avp_dev_start(struct rte_eth_dev *eth_dev) 2066 { 2067 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2068 int ret; 2069 2070 rte_spinlock_lock(&avp->lock); 2071 if (avp->flags & AVP_F_DETACHED) { 2072 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2073 ret = -ENOTSUP; 2074 goto unlock; 2075 } 2076 2077 /* disable features that we do not support */ 2078 eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0; 2079 eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0; 2080 eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0; 2081 eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0; 2082 2083 /* update link state */ 2084 ret = avp_dev_ctrl_set_link_state(eth_dev, 1); 2085 if (ret < 0) { 2086 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2087 ret); 2088 goto unlock; 2089 } 2090 2091 /* remember current link state */ 2092 avp->flags |= AVP_F_LINKUP; 2093 2094 ret = 0; 2095 2096 unlock: 2097 rte_spinlock_unlock(&avp->lock); 2098 return ret; 2099 } 2100 2101 static void 2102 avp_dev_stop(struct rte_eth_dev *eth_dev) 2103 { 2104 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2105 int ret; 2106 2107 rte_spinlock_lock(&avp->lock); 2108 if (avp->flags & AVP_F_DETACHED) { 2109 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2110 goto unlock; 2111 } 2112 2113 /* remember current link state */ 2114 avp->flags &= ~AVP_F_LINKUP; 2115 2116 /* update link state */ 2117 ret = avp_dev_ctrl_set_link_state(eth_dev, 0); 2118 if (ret < 0) { 2119 PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", 2120 ret); 2121 } 2122 2123 unlock: 2124 rte_spinlock_unlock(&avp->lock); 2125 } 2126 2127 static void 2128 avp_dev_close(struct rte_eth_dev *eth_dev) 2129 { 2130 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2131 int ret; 2132 2133 rte_spinlock_lock(&avp->lock); 2134 if (avp->flags & AVP_F_DETACHED) { 2135 PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); 2136 goto unlock; 2137 } 2138 2139 /* remember current link state */ 2140 avp->flags &= ~AVP_F_LINKUP; 2141 avp->flags &= ~AVP_F_CONFIGURED; 2142 2143 ret = avp_dev_disable_interrupts(eth_dev); 2144 if (ret < 0) { 2145 PMD_DRV_LOG(ERR, "Failed to disable interrupts\n"); 2146 /* continue */ 2147 } 2148 2149 /* update device state */ 2150 ret = avp_dev_ctrl_shutdown(eth_dev); 2151 if (ret < 0) { 2152 PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n", 2153 ret); 2154 /* continue */ 2155 } 2156 2157 unlock: 2158 rte_spinlock_unlock(&avp->lock); 2159 } 2160 2161 static int 2162 avp_dev_link_update(struct rte_eth_dev *eth_dev, 2163 __rte_unused int wait_to_complete) 2164 { 2165 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2166 struct rte_eth_link *link = ð_dev->data->dev_link; 2167 2168 link->link_speed = ETH_SPEED_NUM_10G; 2169 link->link_duplex = ETH_LINK_FULL_DUPLEX; 2170 link->link_status = !!(avp->flags & AVP_F_LINKUP); 2171 2172 return -1; 2173 } 2174 2175 static void 2176 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) 2177 { 2178 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2179 2180 rte_spinlock_lock(&avp->lock); 2181 if ((avp->flags & AVP_F_PROMISC) == 0) { 2182 avp->flags |= AVP_F_PROMISC; 2183 PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n", 2184 eth_dev->data->port_id); 2185 } 2186 rte_spinlock_unlock(&avp->lock); 2187 } 2188 2189 static void 2190 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) 2191 { 2192 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2193 2194 rte_spinlock_lock(&avp->lock); 2195 if ((avp->flags & AVP_F_PROMISC) != 0) { 2196 avp->flags &= ~AVP_F_PROMISC; 2197 PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n", 2198 eth_dev->data->port_id); 2199 } 2200 rte_spinlock_unlock(&avp->lock); 2201 } 2202 2203 static void 2204 avp_dev_info_get(struct rte_eth_dev *eth_dev, 2205 struct rte_eth_dev_info *dev_info) 2206 { 2207 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2208 2209 dev_info->driver_name = "rte_avp_pmd"; 2210 dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); 2211 dev_info->max_rx_queues = avp->max_rx_queues; 2212 dev_info->max_tx_queues = avp->max_tx_queues; 2213 dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE; 2214 dev_info->max_rx_pktlen = avp->max_rx_pkt_len; 2215 dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS; 2216 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2217 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 2218 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 2219 } 2220 } 2221 2222 static void 2223 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask) 2224 { 2225 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2226 2227 if (mask & ETH_VLAN_STRIP_MASK) { 2228 if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) { 2229 if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip) 2230 avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD; 2231 else 2232 avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD; 2233 } else { 2234 PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n"); 2235 } 2236 } 2237 2238 if (mask & ETH_VLAN_FILTER_MASK) { 2239 if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter) 2240 PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n"); 2241 } 2242 2243 if (mask & ETH_VLAN_EXTEND_MASK) { 2244 if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend) 2245 PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n"); 2246 } 2247 } 2248 2249 static void 2250 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats) 2251 { 2252 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2253 unsigned int i; 2254 2255 for (i = 0; i < avp->num_rx_queues; i++) { 2256 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2257 2258 if (rxq) { 2259 stats->ipackets += rxq->packets; 2260 stats->ibytes += rxq->bytes; 2261 stats->ierrors += rxq->errors; 2262 2263 stats->q_ipackets[i] += rxq->packets; 2264 stats->q_ibytes[i] += rxq->bytes; 2265 stats->q_errors[i] += rxq->errors; 2266 } 2267 } 2268 2269 for (i = 0; i < avp->num_tx_queues; i++) { 2270 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2271 2272 if (txq) { 2273 stats->opackets += txq->packets; 2274 stats->obytes += txq->bytes; 2275 stats->oerrors += txq->errors; 2276 2277 stats->q_opackets[i] += txq->packets; 2278 stats->q_obytes[i] += txq->bytes; 2279 stats->q_errors[i] += txq->errors; 2280 } 2281 } 2282 } 2283 2284 static void 2285 avp_dev_stats_reset(struct rte_eth_dev *eth_dev) 2286 { 2287 struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); 2288 unsigned int i; 2289 2290 for (i = 0; i < avp->num_rx_queues; i++) { 2291 struct avp_queue *rxq = avp->dev_data->rx_queues[i]; 2292 2293 if (rxq) { 2294 rxq->bytes = 0; 2295 rxq->packets = 0; 2296 rxq->errors = 0; 2297 } 2298 } 2299 2300 for (i = 0; i < avp->num_tx_queues; i++) { 2301 struct avp_queue *txq = avp->dev_data->tx_queues[i]; 2302 2303 if (txq) { 2304 txq->bytes = 0; 2305 txq->packets = 0; 2306 txq->errors = 0; 2307 } 2308 } 2309 } 2310 2311 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd); 2312 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map); 2313