xref: /dpdk/drivers/net/avp/avp_ethdev.c (revision b79e4c00af0e7cfb8601ab0208659d226b82bd10)
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38 
39 #include <rte_ethdev.h>
40 #include <rte_ethdev_pci.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_memzone.h>
44 #include <rte_malloc.h>
45 #include <rte_atomic.h>
46 #include <rte_branch_prediction.h>
47 #include <rte_pci.h>
48 #include <rte_ether.h>
49 #include <rte_common.h>
50 #include <rte_cycles.h>
51 #include <rte_spinlock.h>
52 #include <rte_byteorder.h>
53 #include <rte_dev.h>
54 #include <rte_memory.h>
55 #include <rte_eal.h>
56 #include <rte_io.h>
57 
58 #include "rte_avp_common.h"
59 #include "rte_avp_fifo.h"
60 
61 #include "avp_logs.h"
62 
63 
64 static int avp_dev_create(struct rte_pci_device *pci_dev,
65 			  struct rte_eth_dev *eth_dev);
66 
67 static int avp_dev_configure(struct rte_eth_dev *dev);
68 static int avp_dev_start(struct rte_eth_dev *dev);
69 static void avp_dev_stop(struct rte_eth_dev *dev);
70 static void avp_dev_close(struct rte_eth_dev *dev);
71 static void avp_dev_info_get(struct rte_eth_dev *dev,
72 			     struct rte_eth_dev_info *dev_info);
73 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
74 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77 
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79 				  uint16_t rx_queue_id,
80 				  uint16_t nb_rx_desc,
81 				  unsigned int socket_id,
82 				  const struct rte_eth_rxconf *rx_conf,
83 				  struct rte_mempool *pool);
84 
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86 				  uint16_t tx_queue_id,
87 				  uint16_t nb_tx_desc,
88 				  unsigned int socket_id,
89 				  const struct rte_eth_txconf *tx_conf);
90 
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92 					struct rte_mbuf **rx_pkts,
93 					uint16_t nb_pkts);
94 
95 static uint16_t avp_recv_pkts(void *rx_queue,
96 			      struct rte_mbuf **rx_pkts,
97 			      uint16_t nb_pkts);
98 
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100 					struct rte_mbuf **tx_pkts,
101 					uint16_t nb_pkts);
102 
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104 			      struct rte_mbuf **tx_pkts,
105 			      uint16_t nb_pkts);
106 
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109 
110 static void avp_dev_stats_get(struct rte_eth_dev *dev,
111 			      struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113 
114 
115 #define AVP_MAX_RX_BURST 64
116 #define AVP_MAX_TX_BURST 64
117 #define AVP_MAX_MAC_ADDRS 1
118 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
119 
120 
121 /*
122  * Defines the number of microseconds to wait before checking the response
123  * queue for completion.
124  */
125 #define AVP_REQUEST_DELAY_USECS (5000)
126 
127 /*
128  * Defines the number times to check the response queue for completion before
129  * declaring a timeout.
130  */
131 #define AVP_MAX_REQUEST_RETRY (100)
132 
133 /* Defines the current PCI driver version number */
134 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
135 
136 /*
137  * The set of PCI devices this driver supports
138  */
139 static const struct rte_pci_id pci_id_avp_map[] = {
140 	{ .vendor_id = RTE_AVP_PCI_VENDOR_ID,
141 	  .device_id = RTE_AVP_PCI_DEVICE_ID,
142 	  .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
143 	  .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
144 	  .class_id = RTE_CLASS_ANY_ID,
145 	},
146 
147 	{ .vendor_id = 0, /* sentinel */
148 	},
149 };
150 
151 /*
152  * dev_ops for avp, bare necessities for basic operation
153  */
154 static const struct eth_dev_ops avp_eth_dev_ops = {
155 	.dev_configure       = avp_dev_configure,
156 	.dev_start           = avp_dev_start,
157 	.dev_stop            = avp_dev_stop,
158 	.dev_close           = avp_dev_close,
159 	.dev_infos_get       = avp_dev_info_get,
160 	.vlan_offload_set    = avp_vlan_offload_set,
161 	.stats_get           = avp_dev_stats_get,
162 	.stats_reset         = avp_dev_stats_reset,
163 	.link_update         = avp_dev_link_update,
164 	.promiscuous_enable  = avp_dev_promiscuous_enable,
165 	.promiscuous_disable = avp_dev_promiscuous_disable,
166 	.rx_queue_setup      = avp_dev_rx_queue_setup,
167 	.rx_queue_release    = avp_dev_rx_queue_release,
168 	.tx_queue_setup      = avp_dev_tx_queue_setup,
169 	.tx_queue_release    = avp_dev_tx_queue_release,
170 };
171 
172 /**@{ AVP device flags */
173 #define AVP_F_PROMISC (1 << 1)
174 #define AVP_F_CONFIGURED (1 << 2)
175 #define AVP_F_LINKUP (1 << 3)
176 #define AVP_F_DETACHED (1 << 4)
177 /**@} */
178 
179 /* Ethernet device validation marker */
180 #define AVP_ETHDEV_MAGIC 0x92972862
181 
182 /*
183  * Defines the AVP device attributes which are attached to an RTE ethernet
184  * device
185  */
186 struct avp_dev {
187 	uint32_t magic; /**< Memory validation marker */
188 	uint64_t device_id; /**< Unique system identifier */
189 	struct ether_addr ethaddr; /**< Host specified MAC address */
190 	struct rte_eth_dev_data *dev_data;
191 	/**< Back pointer to ethernet device data */
192 	volatile uint32_t flags; /**< Device operational flags */
193 	uint8_t port_id; /**< Ethernet port identifier */
194 	struct rte_mempool *pool; /**< pkt mbuf mempool */
195 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
196 	unsigned int host_mbuf_size; /**< host mbuf size */
197 	unsigned int max_rx_pkt_len; /**< maximum receive unit */
198 	uint32_t host_features; /**< Supported feature bitmap */
199 	uint32_t features; /**< Enabled feature bitmap */
200 	unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
201 	unsigned int max_tx_queues; /**< Maximum number of transmit queues */
202 	unsigned int num_rx_queues; /**< Negotiated number of receive queues */
203 	unsigned int max_rx_queues; /**< Maximum number of receive queues */
204 
205 	struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
206 	struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
207 	struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
208 	/**< Allocated mbufs queue */
209 	struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
210 	/**< To be freed mbufs queue */
211 
212 	/* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
213 	rte_spinlock_t lock;
214 
215 	/* For request & response */
216 	struct rte_avp_fifo *req_q; /**< Request queue */
217 	struct rte_avp_fifo *resp_q; /**< Response queue */
218 	void *host_sync_addr; /**< (host) Req/Resp Mem address */
219 	void *sync_addr; /**< Req/Resp Mem address */
220 	void *host_mbuf_addr; /**< (host) MBUF pool start address */
221 	void *mbuf_addr; /**< MBUF pool start address */
222 } __rte_cache_aligned;
223 
224 /* RTE ethernet private data */
225 struct avp_adapter {
226 	struct avp_dev avp;
227 } __rte_cache_aligned;
228 
229 
230 /* 32-bit MMIO register write */
231 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
232 
233 /* 32-bit MMIO register read */
234 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
235 
236 /* Macro to cast the ethernet device private data to a AVP object */
237 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
238 	(&((struct avp_adapter *)adapter)->avp)
239 
240 /*
241  * Defines the structure of a AVP device queue for the purpose of handling the
242  * receive and transmit burst callback functions
243  */
244 struct avp_queue {
245 	struct rte_eth_dev_data *dev_data;
246 	/**< Backpointer to ethernet device data */
247 	struct avp_dev *avp; /**< Backpointer to AVP device */
248 	uint16_t queue_id;
249 	/**< Queue identifier used for indexing current queue */
250 	uint16_t queue_base;
251 	/**< Base queue identifier for queue servicing */
252 	uint16_t queue_limit;
253 	/**< Maximum queue identifier for queue servicing */
254 
255 	uint64_t packets;
256 	uint64_t bytes;
257 	uint64_t errors;
258 };
259 
260 /* send a request and wait for a response
261  *
262  * @warning must be called while holding the avp->lock spinlock.
263  */
264 static int
265 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
266 {
267 	unsigned int retry = AVP_MAX_REQUEST_RETRY;
268 	void *resp_addr = NULL;
269 	unsigned int count;
270 	int ret;
271 
272 	PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
273 
274 	request->result = -ENOTSUP;
275 
276 	/* Discard any stale responses before starting a new request */
277 	while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
278 		PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
279 
280 	rte_memcpy(avp->sync_addr, request, sizeof(*request));
281 	count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
282 	if (count < 1) {
283 		PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
284 			    request->req_id);
285 		ret = -EBUSY;
286 		goto done;
287 	}
288 
289 	while (retry--) {
290 		/* wait for a response */
291 		usleep(AVP_REQUEST_DELAY_USECS);
292 
293 		count = avp_fifo_count(avp->resp_q);
294 		if (count >= 1) {
295 			/* response received */
296 			break;
297 		}
298 
299 		if ((count < 1) && (retry == 0)) {
300 			PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
301 				    request->req_id);
302 			ret = -ETIME;
303 			goto done;
304 		}
305 	}
306 
307 	/* retrieve the response */
308 	count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
309 	if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
310 		PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
311 			    count, resp_addr, avp->host_sync_addr);
312 		ret = -ENODATA;
313 		goto done;
314 	}
315 
316 	/* copy to user buffer */
317 	rte_memcpy(request, avp->sync_addr, sizeof(*request));
318 	ret = 0;
319 
320 	PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
321 		    request->result, request->req_id);
322 
323 done:
324 	return ret;
325 }
326 
327 static int
328 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
329 {
330 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
331 	struct rte_avp_request request;
332 	int ret;
333 
334 	/* setup a link state change request */
335 	memset(&request, 0, sizeof(request));
336 	request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
337 	request.if_up = state;
338 
339 	ret = avp_dev_process_request(avp, &request);
340 
341 	return ret == 0 ? request.result : ret;
342 }
343 
344 static int
345 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
346 			struct rte_avp_device_config *config)
347 {
348 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
349 	struct rte_avp_request request;
350 	int ret;
351 
352 	/* setup a configure request */
353 	memset(&request, 0, sizeof(request));
354 	request.req_id = RTE_AVP_REQ_CFG_DEVICE;
355 	memcpy(&request.config, config, sizeof(request.config));
356 
357 	ret = avp_dev_process_request(avp, &request);
358 
359 	return ret == 0 ? request.result : ret;
360 }
361 
362 static int
363 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
364 {
365 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
366 	struct rte_avp_request request;
367 	int ret;
368 
369 	/* setup a shutdown request */
370 	memset(&request, 0, sizeof(request));
371 	request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
372 
373 	ret = avp_dev_process_request(avp, &request);
374 
375 	return ret == 0 ? request.result : ret;
376 }
377 
378 /* translate from host mbuf virtual address to guest virtual address */
379 static inline void *
380 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
381 {
382 	return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
383 				       (uintptr_t)avp->host_mbuf_addr),
384 			   (uintptr_t)avp->mbuf_addr);
385 }
386 
387 /* translate from host physical address to guest virtual address */
388 static void *
389 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
390 			  phys_addr_t host_phys_addr)
391 {
392 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
393 	struct rte_mem_resource *resource;
394 	struct rte_avp_memmap_info *info;
395 	struct rte_avp_memmap *map;
396 	off_t offset;
397 	void *addr;
398 	unsigned int i;
399 
400 	addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
401 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
402 	info = (struct rte_avp_memmap_info *)resource->addr;
403 
404 	offset = 0;
405 	for (i = 0; i < info->nb_maps; i++) {
406 		/* search all segments looking for a matching address */
407 		map = &info->maps[i];
408 
409 		if ((host_phys_addr >= map->phys_addr) &&
410 			(host_phys_addr < (map->phys_addr + map->length))) {
411 			/* address is within this segment */
412 			offset += (host_phys_addr - map->phys_addr);
413 			addr = RTE_PTR_ADD(addr, offset);
414 
415 			PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
416 				    host_phys_addr, addr);
417 
418 			return addr;
419 		}
420 		offset += map->length;
421 	}
422 
423 	return NULL;
424 }
425 
426 /* verify that the incoming device version is compatible with our version */
427 static int
428 avp_dev_version_check(uint32_t version)
429 {
430 	uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
431 	uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
432 
433 	if (device <= driver) {
434 		/* the host driver version is less than or equal to ours */
435 		return 0;
436 	}
437 
438 	return 1;
439 }
440 
441 /* verify that memory regions have expected version and validation markers */
442 static int
443 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
444 {
445 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
446 	struct rte_avp_memmap_info *memmap;
447 	struct rte_avp_device_info *info;
448 	struct rte_mem_resource *resource;
449 	unsigned int i;
450 
451 	/* Dump resource info for debug */
452 	for (i = 0; i < PCI_MAX_RESOURCE; i++) {
453 		resource = &pci_dev->mem_resource[i];
454 		if ((resource->phys_addr == 0) || (resource->len == 0))
455 			continue;
456 
457 		PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
458 			    i, resource->phys_addr,
459 			    resource->len, resource->addr);
460 
461 		switch (i) {
462 		case RTE_AVP_PCI_MEMMAP_BAR:
463 			memmap = (struct rte_avp_memmap_info *)resource->addr;
464 			if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
465 			    (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
466 				PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
467 					    memmap->magic, memmap->version);
468 				return -EINVAL;
469 			}
470 			break;
471 
472 		case RTE_AVP_PCI_DEVICE_BAR:
473 			info = (struct rte_avp_device_info *)resource->addr;
474 			if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
475 			    avp_dev_version_check(info->version)) {
476 				PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
477 					    info->magic, info->version,
478 					    AVP_DPDK_DRIVER_VERSION);
479 				return -EINVAL;
480 			}
481 			break;
482 
483 		case RTE_AVP_PCI_MEMORY_BAR:
484 		case RTE_AVP_PCI_MMIO_BAR:
485 			if (resource->addr == NULL) {
486 				PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
487 					    i);
488 				return -EINVAL;
489 			}
490 			break;
491 
492 		case RTE_AVP_PCI_MSIX_BAR:
493 		default:
494 			/* no validation required */
495 			break;
496 		}
497 	}
498 
499 	return 0;
500 }
501 
502 static int
503 avp_dev_detach(struct rte_eth_dev *eth_dev)
504 {
505 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
506 	int ret;
507 
508 	PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
509 		    eth_dev->data->port_id, avp->device_id);
510 
511 	rte_spinlock_lock(&avp->lock);
512 
513 	if (avp->flags & AVP_F_DETACHED) {
514 		PMD_DRV_LOG(NOTICE, "port %u already detached\n",
515 			    eth_dev->data->port_id);
516 		ret = 0;
517 		goto unlock;
518 	}
519 
520 	/* shutdown the device first so the host stops sending us packets. */
521 	ret = avp_dev_ctrl_shutdown(eth_dev);
522 	if (ret < 0) {
523 		PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
524 			    ret);
525 		avp->flags &= ~AVP_F_DETACHED;
526 		goto unlock;
527 	}
528 
529 	avp->flags |= AVP_F_DETACHED;
530 	rte_wmb();
531 
532 	/* wait for queues to acknowledge the presence of the detach flag */
533 	rte_delay_ms(1);
534 
535 	ret = 0;
536 
537 unlock:
538 	rte_spinlock_unlock(&avp->lock);
539 	return ret;
540 }
541 
542 static void
543 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
544 {
545 	struct avp_dev *avp =
546 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
547 	struct avp_queue *rxq;
548 	uint16_t queue_count;
549 	uint16_t remainder;
550 
551 	rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
552 
553 	/*
554 	 * Must map all AVP fifos as evenly as possible between the configured
555 	 * device queues.  Each device queue will service a subset of the AVP
556 	 * fifos. If there is an odd number of device queues the first set of
557 	 * device queues will get the extra AVP fifos.
558 	 */
559 	queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
560 	remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
561 	if (rx_queue_id < remainder) {
562 		/* these queues must service one extra FIFO */
563 		rxq->queue_base = rx_queue_id * (queue_count + 1);
564 		rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
565 	} else {
566 		/* these queues service the regular number of FIFO */
567 		rxq->queue_base = ((remainder * (queue_count + 1)) +
568 				   ((rx_queue_id - remainder) * queue_count));
569 		rxq->queue_limit = rxq->queue_base + queue_count - 1;
570 	}
571 
572 	PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
573 		    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
574 
575 	rxq->queue_id = rxq->queue_base;
576 }
577 
578 static void
579 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
580 {
581 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
582 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
583 	struct rte_avp_device_info *host_info;
584 	void *addr;
585 
586 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
587 	host_info = (struct rte_avp_device_info *)addr;
588 
589 	/*
590 	 * the transmit direction is not negotiated beyond respecting the max
591 	 * number of queues because the host can handle arbitrary guest tx
592 	 * queues (host rx queues).
593 	 */
594 	avp->num_tx_queues = eth_dev->data->nb_tx_queues;
595 
596 	/*
597 	 * the receive direction is more restrictive.  The host requires a
598 	 * minimum number of guest rx queues (host tx queues) therefore
599 	 * negotiate a value that is at least as large as the host minimum
600 	 * requirement.  If the host and guest values are not identical then a
601 	 * mapping will be established in the receive_queue_setup function.
602 	 */
603 	avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
604 				     eth_dev->data->nb_rx_queues);
605 
606 	PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
607 		    avp->num_tx_queues, avp->num_rx_queues);
608 }
609 
610 static int
611 avp_dev_attach(struct rte_eth_dev *eth_dev)
612 {
613 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
614 	struct rte_avp_device_config config;
615 	unsigned int i;
616 	int ret;
617 
618 	PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
619 		    eth_dev->data->port_id, avp->device_id);
620 
621 	rte_spinlock_lock(&avp->lock);
622 
623 	if (!(avp->flags & AVP_F_DETACHED)) {
624 		PMD_DRV_LOG(NOTICE, "port %u already attached\n",
625 			    eth_dev->data->port_id);
626 		ret = 0;
627 		goto unlock;
628 	}
629 
630 	/*
631 	 * make sure that the detached flag is set prior to reconfiguring the
632 	 * queues.
633 	 */
634 	avp->flags |= AVP_F_DETACHED;
635 	rte_wmb();
636 
637 	/*
638 	 * re-run the device create utility which will parse the new host info
639 	 * and setup the AVP device queue pointers.
640 	 */
641 	ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
642 	if (ret < 0) {
643 		PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
644 			    ret);
645 		goto unlock;
646 	}
647 
648 	if (avp->flags & AVP_F_CONFIGURED) {
649 		/*
650 		 * Update the receive queue mapping to handle cases where the
651 		 * source and destination hosts have different queue
652 		 * requirements.  As long as the DETACHED flag is asserted the
653 		 * queue table should not be referenced so it should be safe to
654 		 * update it.
655 		 */
656 		_avp_set_queue_counts(eth_dev);
657 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
658 			_avp_set_rx_queue_mappings(eth_dev, i);
659 
660 		/*
661 		 * Update the host with our config details so that it knows the
662 		 * device is active.
663 		 */
664 		memset(&config, 0, sizeof(config));
665 		config.device_id = avp->device_id;
666 		config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
667 		config.driver_version = AVP_DPDK_DRIVER_VERSION;
668 		config.features = avp->features;
669 		config.num_tx_queues = avp->num_tx_queues;
670 		config.num_rx_queues = avp->num_rx_queues;
671 		config.if_up = !!(avp->flags & AVP_F_LINKUP);
672 
673 		ret = avp_dev_ctrl_set_config(eth_dev, &config);
674 		if (ret < 0) {
675 			PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
676 				    ret);
677 			goto unlock;
678 		}
679 	}
680 
681 	rte_wmb();
682 	avp->flags &= ~AVP_F_DETACHED;
683 
684 	ret = 0;
685 
686 unlock:
687 	rte_spinlock_unlock(&avp->lock);
688 	return ret;
689 }
690 
691 static void
692 avp_dev_interrupt_handler(void *data)
693 {
694 	struct rte_eth_dev *eth_dev = data;
695 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
696 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
697 	uint32_t status, value;
698 	int ret;
699 
700 	if (registers == NULL)
701 		rte_panic("no mapped MMIO register space\n");
702 
703 	/* read the interrupt status register
704 	 * note: this register clears on read so all raised interrupts must be
705 	 *    handled or remembered for later processing
706 	 */
707 	status = AVP_READ32(
708 		RTE_PTR_ADD(registers,
709 			    RTE_AVP_INTERRUPT_STATUS_OFFSET));
710 
711 	if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
712 		/* handle interrupt based on current status */
713 		value = AVP_READ32(
714 			RTE_PTR_ADD(registers,
715 				    RTE_AVP_MIGRATION_STATUS_OFFSET));
716 		switch (value) {
717 		case RTE_AVP_MIGRATION_DETACHED:
718 			ret = avp_dev_detach(eth_dev);
719 			break;
720 		case RTE_AVP_MIGRATION_ATTACHED:
721 			ret = avp_dev_attach(eth_dev);
722 			break;
723 		default:
724 			PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
725 				    value);
726 			ret = -EINVAL;
727 		}
728 
729 		/* acknowledge the request by writing out our current status */
730 		value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
731 		AVP_WRITE32(value,
732 			    RTE_PTR_ADD(registers,
733 					RTE_AVP_MIGRATION_ACK_OFFSET));
734 
735 		PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
736 	}
737 
738 	if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
739 		PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
740 			    status);
741 
742 	/* re-enable UIO interrupt handling */
743 	ret = rte_intr_enable(&pci_dev->intr_handle);
744 	if (ret < 0) {
745 		PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
746 			    ret);
747 		/* continue */
748 	}
749 }
750 
751 static int
752 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
753 {
754 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
755 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
756 	int ret;
757 
758 	if (registers == NULL)
759 		return -EINVAL;
760 
761 	/* enable UIO interrupt handling */
762 	ret = rte_intr_enable(&pci_dev->intr_handle);
763 	if (ret < 0) {
764 		PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
765 			    ret);
766 		return ret;
767 	}
768 
769 	/* inform the device that all interrupts are enabled */
770 	AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
771 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
772 
773 	return 0;
774 }
775 
776 static int
777 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
778 {
779 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
780 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
781 	int ret;
782 
783 	if (registers == NULL)
784 		return 0;
785 
786 	/* inform the device that all interrupts are disabled */
787 	AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
788 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
789 
790 	/* enable UIO interrupt handling */
791 	ret = rte_intr_disable(&pci_dev->intr_handle);
792 	if (ret < 0) {
793 		PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
794 			    ret);
795 		return ret;
796 	}
797 
798 	return 0;
799 }
800 
801 static int
802 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
803 {
804 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
805 	int ret;
806 
807 	/* register a callback handler with UIO for interrupt notifications */
808 	ret = rte_intr_callback_register(&pci_dev->intr_handle,
809 					 avp_dev_interrupt_handler,
810 					 (void *)eth_dev);
811 	if (ret < 0) {
812 		PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
813 			    ret);
814 		return ret;
815 	}
816 
817 	/* enable interrupt processing */
818 	return avp_dev_enable_interrupts(eth_dev);
819 }
820 
821 static int
822 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
823 {
824 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
825 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
826 	uint32_t value;
827 
828 	if (registers == NULL)
829 		return 0;
830 
831 	value = AVP_READ32(RTE_PTR_ADD(registers,
832 				       RTE_AVP_MIGRATION_STATUS_OFFSET));
833 	if (value == RTE_AVP_MIGRATION_DETACHED) {
834 		/* migration is in progress; ack it if we have not already */
835 		AVP_WRITE32(value,
836 			    RTE_PTR_ADD(registers,
837 					RTE_AVP_MIGRATION_ACK_OFFSET));
838 		return 1;
839 	}
840 	return 0;
841 }
842 
843 /*
844  * create a AVP device using the supplied device info by first translating it
845  * to guest address space(s).
846  */
847 static int
848 avp_dev_create(struct rte_pci_device *pci_dev,
849 	       struct rte_eth_dev *eth_dev)
850 {
851 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
852 	struct rte_avp_device_info *host_info;
853 	struct rte_mem_resource *resource;
854 	unsigned int i;
855 
856 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
857 	if (resource->addr == NULL) {
858 		PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
859 			    RTE_AVP_PCI_DEVICE_BAR);
860 		return -EFAULT;
861 	}
862 	host_info = (struct rte_avp_device_info *)resource->addr;
863 
864 	if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
865 		avp_dev_version_check(host_info->version)) {
866 		PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
867 			    host_info->magic, host_info->version,
868 			    AVP_DPDK_DRIVER_VERSION);
869 		return -EINVAL;
870 	}
871 
872 	PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
873 		    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
874 		    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
875 		    RTE_AVP_GET_MINOR_VERSION(host_info->version));
876 
877 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
878 		    host_info->min_tx_queues, host_info->max_tx_queues);
879 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
880 		    host_info->min_rx_queues, host_info->max_rx_queues);
881 	PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
882 		    host_info->features);
883 
884 	if (avp->magic != AVP_ETHDEV_MAGIC) {
885 		/*
886 		 * First time initialization (i.e., not during a VM
887 		 * migration)
888 		 */
889 		memset(avp, 0, sizeof(*avp));
890 		avp->magic = AVP_ETHDEV_MAGIC;
891 		avp->dev_data = eth_dev->data;
892 		avp->port_id = eth_dev->data->port_id;
893 		avp->host_mbuf_size = host_info->mbuf_size;
894 		avp->host_features = host_info->features;
895 		rte_spinlock_init(&avp->lock);
896 		memcpy(&avp->ethaddr.addr_bytes[0],
897 		       host_info->ethaddr, ETHER_ADDR_LEN);
898 		/* adjust max values to not exceed our max */
899 		avp->max_tx_queues =
900 			RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
901 		avp->max_rx_queues =
902 			RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
903 	} else {
904 		/* Re-attaching during migration */
905 
906 		/* TODO... requires validation of host values */
907 		if ((host_info->features & avp->features) != avp->features) {
908 			PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
909 				    avp->features, host_info->features);
910 			/* this should not be possible; continue for now */
911 		}
912 	}
913 
914 	/* the device id is allowed to change over migrations */
915 	avp->device_id = host_info->device_id;
916 
917 	/* translate incoming host addresses to guest address space */
918 	PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
919 		    host_info->tx_phys);
920 	PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
921 		    host_info->alloc_phys);
922 	for (i = 0; i < avp->max_tx_queues; i++) {
923 		avp->tx_q[i] = avp_dev_translate_address(eth_dev,
924 			host_info->tx_phys + (i * host_info->tx_size));
925 
926 		avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
927 			host_info->alloc_phys + (i * host_info->alloc_size));
928 	}
929 
930 	PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
931 		    host_info->rx_phys);
932 	PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
933 		    host_info->free_phys);
934 	for (i = 0; i < avp->max_rx_queues; i++) {
935 		avp->rx_q[i] = avp_dev_translate_address(eth_dev,
936 			host_info->rx_phys + (i * host_info->rx_size));
937 		avp->free_q[i] = avp_dev_translate_address(eth_dev,
938 			host_info->free_phys + (i * host_info->free_size));
939 	}
940 
941 	PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
942 		    host_info->req_phys);
943 	PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
944 		    host_info->resp_phys);
945 	PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
946 		    host_info->sync_phys);
947 	PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
948 		    host_info->mbuf_phys);
949 	avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
950 	avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
951 	avp->sync_addr =
952 		avp_dev_translate_address(eth_dev, host_info->sync_phys);
953 	avp->mbuf_addr =
954 		avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
955 
956 	/*
957 	 * store the host mbuf virtual address so that we can calculate
958 	 * relative offsets for each mbuf as they are processed
959 	 */
960 	avp->host_mbuf_addr = host_info->mbuf_va;
961 	avp->host_sync_addr = host_info->sync_va;
962 
963 	/*
964 	 * store the maximum packet length that is supported by the host.
965 	 */
966 	avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
967 	PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
968 				host_info->max_rx_pkt_len);
969 
970 	return 0;
971 }
972 
973 /*
974  * This function is based on probe() function in avp_pci.c
975  * It returns 0 on success.
976  */
977 static int
978 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
979 {
980 	struct avp_dev *avp =
981 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
982 	struct rte_pci_device *pci_dev;
983 	int ret;
984 
985 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
986 	eth_dev->dev_ops = &avp_eth_dev_ops;
987 	eth_dev->rx_pkt_burst = &avp_recv_pkts;
988 	eth_dev->tx_pkt_burst = &avp_xmit_pkts;
989 
990 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
991 		/*
992 		 * no setup required on secondary processes.  All data is saved
993 		 * in dev_private by the primary process. All resource should
994 		 * be mapped to the same virtual address so all pointers should
995 		 * be valid.
996 		 */
997 		if (eth_dev->data->scattered_rx) {
998 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
999 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1000 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1001 		}
1002 		return 0;
1003 	}
1004 
1005 	rte_eth_copy_pci_info(eth_dev, pci_dev);
1006 
1007 	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1008 
1009 	/* Check current migration status */
1010 	if (avp_dev_migration_pending(eth_dev)) {
1011 		PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1012 		return -EBUSY;
1013 	}
1014 
1015 	/* Check BAR resources */
1016 	ret = avp_dev_check_regions(eth_dev);
1017 	if (ret < 0) {
1018 		PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1019 			    ret);
1020 		return ret;
1021 	}
1022 
1023 	/* Enable interrupts */
1024 	ret = avp_dev_setup_interrupts(eth_dev);
1025 	if (ret < 0) {
1026 		PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1027 		return ret;
1028 	}
1029 
1030 	/* Handle each subtype */
1031 	ret = avp_dev_create(pci_dev, eth_dev);
1032 	if (ret < 0) {
1033 		PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1034 		return ret;
1035 	}
1036 
1037 	/* Allocate memory for storing MAC addresses */
1038 	eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1039 	if (eth_dev->data->mac_addrs == NULL) {
1040 		PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1041 			    ETHER_ADDR_LEN);
1042 		return -ENOMEM;
1043 	}
1044 
1045 	/* Get a mac from device config */
1046 	ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1047 
1048 	return 0;
1049 }
1050 
1051 static int
1052 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1053 {
1054 	int ret;
1055 
1056 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1057 		return -EPERM;
1058 
1059 	if (eth_dev->data == NULL)
1060 		return 0;
1061 
1062 	ret = avp_dev_disable_interrupts(eth_dev);
1063 	if (ret != 0) {
1064 		PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1065 		return ret;
1066 	}
1067 
1068 	if (eth_dev->data->mac_addrs != NULL) {
1069 		rte_free(eth_dev->data->mac_addrs);
1070 		eth_dev->data->mac_addrs = NULL;
1071 	}
1072 
1073 	return 0;
1074 }
1075 
1076 static int
1077 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1078 		  struct rte_pci_device *pci_dev)
1079 {
1080 	struct rte_eth_dev *eth_dev;
1081 	int ret;
1082 
1083 	eth_dev = rte_eth_dev_pci_allocate(pci_dev,
1084 					   sizeof(struct avp_adapter));
1085 	if (eth_dev == NULL)
1086 		return -ENOMEM;
1087 
1088 	ret = eth_avp_dev_init(eth_dev);
1089 	if (ret)
1090 		rte_eth_dev_pci_release(eth_dev);
1091 
1092 	return ret;
1093 }
1094 
1095 static int
1096 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1097 {
1098 	return rte_eth_dev_pci_generic_remove(pci_dev,
1099 					      eth_avp_dev_uninit);
1100 }
1101 
1102 static struct rte_pci_driver rte_avp_pmd = {
1103 	.id_table = pci_id_avp_map,
1104 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1105 	.probe = eth_avp_pci_probe,
1106 	.remove = eth_avp_pci_remove,
1107 };
1108 
1109 static int
1110 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1111 			 struct avp_dev *avp)
1112 {
1113 	unsigned int max_rx_pkt_len;
1114 
1115 	max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1116 
1117 	if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1118 	    (max_rx_pkt_len > avp->host_mbuf_size)) {
1119 		/*
1120 		 * If the guest MTU is greater than either the host or guest
1121 		 * buffers then chained mbufs have to be enabled in the TX
1122 		 * direction.  It is assumed that the application will not need
1123 		 * to send packets larger than their max_rx_pkt_len (MRU).
1124 		 */
1125 		return 1;
1126 	}
1127 
1128 	if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1129 	    (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1130 		/*
1131 		 * If the host MRU is greater than its own mbuf size or the
1132 		 * guest mbuf size then chained mbufs have to be enabled in the
1133 		 * RX direction.
1134 		 */
1135 		return 1;
1136 	}
1137 
1138 	return 0;
1139 }
1140 
1141 static int
1142 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1143 		       uint16_t rx_queue_id,
1144 		       uint16_t nb_rx_desc,
1145 		       unsigned int socket_id,
1146 		       const struct rte_eth_rxconf *rx_conf,
1147 		       struct rte_mempool *pool)
1148 {
1149 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1150 	struct rte_pktmbuf_pool_private *mbp_priv;
1151 	struct avp_queue *rxq;
1152 
1153 	if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1154 		PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1155 			    rx_queue_id, eth_dev->data->nb_rx_queues);
1156 		return -EINVAL;
1157 	}
1158 
1159 	/* Save mbuf pool pointer */
1160 	avp->pool = pool;
1161 
1162 	/* Save the local mbuf size */
1163 	mbp_priv = rte_mempool_get_priv(pool);
1164 	avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1165 	avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1166 
1167 	if (avp_dev_enable_scattered(eth_dev, avp)) {
1168 		if (!eth_dev->data->scattered_rx) {
1169 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1170 			eth_dev->data->scattered_rx = 1;
1171 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1172 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1173 		}
1174 	}
1175 
1176 	PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1177 		    avp->max_rx_pkt_len,
1178 		    eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1179 		    avp->host_mbuf_size,
1180 		    avp->guest_mbuf_size);
1181 
1182 	/* allocate a queue object */
1183 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1184 				 RTE_CACHE_LINE_SIZE, socket_id);
1185 	if (rxq == NULL) {
1186 		PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1187 		return -ENOMEM;
1188 	}
1189 
1190 	/* save back pointers to AVP and Ethernet devices */
1191 	rxq->avp = avp;
1192 	rxq->dev_data = eth_dev->data;
1193 	eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1194 
1195 	/* setup the queue receive mapping for the current queue. */
1196 	_avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1197 
1198 	PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1199 
1200 	(void)nb_rx_desc;
1201 	(void)rx_conf;
1202 	return 0;
1203 }
1204 
1205 static int
1206 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1207 		       uint16_t tx_queue_id,
1208 		       uint16_t nb_tx_desc,
1209 		       unsigned int socket_id,
1210 		       const struct rte_eth_txconf *tx_conf)
1211 {
1212 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1213 	struct avp_queue *txq;
1214 
1215 	if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1216 		PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1217 			    tx_queue_id, eth_dev->data->nb_tx_queues);
1218 		return -EINVAL;
1219 	}
1220 
1221 	/* allocate a queue object */
1222 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1223 				 RTE_CACHE_LINE_SIZE, socket_id);
1224 	if (txq == NULL) {
1225 		PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1226 		return -ENOMEM;
1227 	}
1228 
1229 	/* only the configured set of transmit queues are used */
1230 	txq->queue_id = tx_queue_id;
1231 	txq->queue_base = tx_queue_id;
1232 	txq->queue_limit = tx_queue_id;
1233 
1234 	/* save back pointers to AVP and Ethernet devices */
1235 	txq->avp = avp;
1236 	txq->dev_data = eth_dev->data;
1237 	eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1238 
1239 	PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1240 
1241 	(void)nb_tx_desc;
1242 	(void)tx_conf;
1243 	return 0;
1244 }
1245 
1246 static inline int
1247 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1248 {
1249 	uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1250 	uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1251 	return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1252 }
1253 
1254 static inline int
1255 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1256 {
1257 	struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1258 
1259 	if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1260 		/* allow all packets destined to our address */
1261 		return 0;
1262 	}
1263 
1264 	if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1265 		/* allow all broadcast packets */
1266 		return 0;
1267 	}
1268 
1269 	if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1270 		/* allow all multicast packets */
1271 		return 0;
1272 	}
1273 
1274 	if (avp->flags & AVP_F_PROMISC) {
1275 		/* allow all packets when in promiscuous mode */
1276 		return 0;
1277 	}
1278 
1279 	return -1;
1280 }
1281 
1282 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1283 static inline void
1284 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1285 {
1286 	struct rte_avp_desc *first_buf;
1287 	struct rte_avp_desc *pkt_buf;
1288 	unsigned int pkt_len;
1289 	unsigned int nb_segs;
1290 	void *pkt_data;
1291 	unsigned int i;
1292 
1293 	first_buf = avp_dev_translate_buffer(avp, buf);
1294 
1295 	i = 0;
1296 	pkt_len = 0;
1297 	nb_segs = first_buf->nb_segs;
1298 	do {
1299 		/* Adjust pointers for guest addressing */
1300 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1301 		if (pkt_buf == NULL)
1302 			rte_panic("bad buffer: segment %u has an invalid address %p\n",
1303 				  i, buf);
1304 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1305 		if (pkt_data == NULL)
1306 			rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1307 				  i);
1308 		if (pkt_buf->data_len == 0)
1309 			rte_panic("bad buffer: segment %u has 0 data length\n",
1310 				  i);
1311 		pkt_len += pkt_buf->data_len;
1312 		nb_segs--;
1313 		i++;
1314 
1315 	} while (nb_segs && (buf = pkt_buf->next) != NULL);
1316 
1317 	if (nb_segs != 0)
1318 		rte_panic("bad buffer: expected %u segments found %u\n",
1319 			  first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1320 	if (pkt_len != first_buf->pkt_len)
1321 		rte_panic("bad buffer: expected length %u found %u\n",
1322 			  first_buf->pkt_len, pkt_len);
1323 }
1324 
1325 #define avp_dev_buffer_sanity_check(a, b) \
1326 	__avp_dev_buffer_sanity_check((a), (b))
1327 
1328 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1329 
1330 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1331 
1332 #endif
1333 
1334 /*
1335  * Copy a host buffer chain to a set of mbufs.	This function assumes that
1336  * there exactly the required number of mbufs to copy all source bytes.
1337  */
1338 static inline struct rte_mbuf *
1339 avp_dev_copy_from_buffers(struct avp_dev *avp,
1340 			  struct rte_avp_desc *buf,
1341 			  struct rte_mbuf **mbufs,
1342 			  unsigned int count)
1343 {
1344 	struct rte_mbuf *m_previous = NULL;
1345 	struct rte_avp_desc *pkt_buf;
1346 	unsigned int total_length = 0;
1347 	unsigned int copy_length;
1348 	unsigned int src_offset;
1349 	struct rte_mbuf *m;
1350 	uint16_t ol_flags;
1351 	uint16_t vlan_tci;
1352 	void *pkt_data;
1353 	unsigned int i;
1354 
1355 	avp_dev_buffer_sanity_check(avp, buf);
1356 
1357 	/* setup the first source buffer */
1358 	pkt_buf = avp_dev_translate_buffer(avp, buf);
1359 	pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1360 	total_length = pkt_buf->pkt_len;
1361 	src_offset = 0;
1362 
1363 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1364 		ol_flags = PKT_RX_VLAN_PKT;
1365 		vlan_tci = pkt_buf->vlan_tci;
1366 	} else {
1367 		ol_flags = 0;
1368 		vlan_tci = 0;
1369 	}
1370 
1371 	for (i = 0; (i < count) && (buf != NULL); i++) {
1372 		/* fill each destination buffer */
1373 		m = mbufs[i];
1374 
1375 		if (m_previous != NULL)
1376 			m_previous->next = m;
1377 
1378 		m_previous = m;
1379 
1380 		do {
1381 			/*
1382 			 * Copy as many source buffers as will fit in the
1383 			 * destination buffer.
1384 			 */
1385 			copy_length = RTE_MIN((avp->guest_mbuf_size -
1386 					       rte_pktmbuf_data_len(m)),
1387 					      (pkt_buf->data_len -
1388 					       src_offset));
1389 			rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1390 					       rte_pktmbuf_data_len(m)),
1391 				   RTE_PTR_ADD(pkt_data, src_offset),
1392 				   copy_length);
1393 			rte_pktmbuf_data_len(m) += copy_length;
1394 			src_offset += copy_length;
1395 
1396 			if (likely(src_offset == pkt_buf->data_len)) {
1397 				/* need a new source buffer */
1398 				buf = pkt_buf->next;
1399 				if (buf != NULL) {
1400 					pkt_buf = avp_dev_translate_buffer(
1401 						avp, buf);
1402 					pkt_data = avp_dev_translate_buffer(
1403 						avp, pkt_buf->data);
1404 					src_offset = 0;
1405 				}
1406 			}
1407 
1408 			if (unlikely(rte_pktmbuf_data_len(m) ==
1409 				     avp->guest_mbuf_size)) {
1410 				/* need a new destination mbuf */
1411 				break;
1412 			}
1413 
1414 		} while (buf != NULL);
1415 	}
1416 
1417 	m = mbufs[0];
1418 	m->ol_flags = ol_flags;
1419 	m->nb_segs = count;
1420 	rte_pktmbuf_pkt_len(m) = total_length;
1421 	m->vlan_tci = vlan_tci;
1422 
1423 	__rte_mbuf_sanity_check(m, 1);
1424 
1425 	return m;
1426 }
1427 
1428 static uint16_t
1429 avp_recv_scattered_pkts(void *rx_queue,
1430 			struct rte_mbuf **rx_pkts,
1431 			uint16_t nb_pkts)
1432 {
1433 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1434 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1435 	struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1436 	struct avp_dev *avp = rxq->avp;
1437 	struct rte_avp_desc *pkt_buf;
1438 	struct rte_avp_fifo *free_q;
1439 	struct rte_avp_fifo *rx_q;
1440 	struct rte_avp_desc *buf;
1441 	unsigned int count, avail, n;
1442 	unsigned int guest_mbuf_size;
1443 	struct rte_mbuf *m;
1444 	unsigned int required;
1445 	unsigned int buf_len;
1446 	unsigned int port_id;
1447 	unsigned int i;
1448 
1449 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1450 		/* VM live migration in progress */
1451 		return 0;
1452 	}
1453 
1454 	guest_mbuf_size = avp->guest_mbuf_size;
1455 	port_id = avp->port_id;
1456 	rx_q = avp->rx_q[rxq->queue_id];
1457 	free_q = avp->free_q[rxq->queue_id];
1458 
1459 	/* setup next queue to service */
1460 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1461 		(rxq->queue_id + 1) : rxq->queue_base;
1462 
1463 	/* determine how many slots are available in the free queue */
1464 	count = avp_fifo_free_count(free_q);
1465 
1466 	/* determine how many packets are available in the rx queue */
1467 	avail = avp_fifo_count(rx_q);
1468 
1469 	/* determine how many packets can be received */
1470 	count = RTE_MIN(count, avail);
1471 	count = RTE_MIN(count, nb_pkts);
1472 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1473 
1474 	if (unlikely(count == 0)) {
1475 		/* no free buffers, or no buffers on the rx queue */
1476 		return 0;
1477 	}
1478 
1479 	/* retrieve pending packets */
1480 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1481 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1482 		   count, rx_q);
1483 
1484 	count = 0;
1485 	for (i = 0; i < n; i++) {
1486 		/* prefetch next entry while processing current one */
1487 		if (i + 1 < n) {
1488 			pkt_buf = avp_dev_translate_buffer(avp,
1489 							   avp_bufs[i + 1]);
1490 			rte_prefetch0(pkt_buf);
1491 		}
1492 		buf = avp_bufs[i];
1493 
1494 		/* Peek into the first buffer to determine the total length */
1495 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1496 		buf_len = pkt_buf->pkt_len;
1497 
1498 		/* Allocate enough mbufs to receive the entire packet */
1499 		required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1500 		if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1501 			rxq->dev_data->rx_mbuf_alloc_failed++;
1502 			continue;
1503 		}
1504 
1505 		/* Copy the data from the buffers to our mbufs */
1506 		m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1507 
1508 		/* finalize mbuf */
1509 		m->port = port_id;
1510 
1511 		if (_avp_mac_filter(avp, m) != 0) {
1512 			/* silently discard packets not destined to our MAC */
1513 			rte_pktmbuf_free(m);
1514 			continue;
1515 		}
1516 
1517 		/* return new mbuf to caller */
1518 		rx_pkts[count++] = m;
1519 		rxq->bytes += buf_len;
1520 	}
1521 
1522 	rxq->packets += count;
1523 
1524 	/* return the buffers to the free queue */
1525 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1526 
1527 	return count;
1528 }
1529 
1530 
1531 static uint16_t
1532 avp_recv_pkts(void *rx_queue,
1533 	      struct rte_mbuf **rx_pkts,
1534 	      uint16_t nb_pkts)
1535 {
1536 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1537 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1538 	struct avp_dev *avp = rxq->avp;
1539 	struct rte_avp_desc *pkt_buf;
1540 	struct rte_avp_fifo *free_q;
1541 	struct rte_avp_fifo *rx_q;
1542 	unsigned int count, avail, n;
1543 	unsigned int pkt_len;
1544 	struct rte_mbuf *m;
1545 	char *pkt_data;
1546 	unsigned int i;
1547 
1548 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1549 		/* VM live migration in progress */
1550 		return 0;
1551 	}
1552 
1553 	rx_q = avp->rx_q[rxq->queue_id];
1554 	free_q = avp->free_q[rxq->queue_id];
1555 
1556 	/* setup next queue to service */
1557 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1558 		(rxq->queue_id + 1) : rxq->queue_base;
1559 
1560 	/* determine how many slots are available in the free queue */
1561 	count = avp_fifo_free_count(free_q);
1562 
1563 	/* determine how many packets are available in the rx queue */
1564 	avail = avp_fifo_count(rx_q);
1565 
1566 	/* determine how many packets can be received */
1567 	count = RTE_MIN(count, avail);
1568 	count = RTE_MIN(count, nb_pkts);
1569 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1570 
1571 	if (unlikely(count == 0)) {
1572 		/* no free buffers, or no buffers on the rx queue */
1573 		return 0;
1574 	}
1575 
1576 	/* retrieve pending packets */
1577 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1578 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1579 		   count, rx_q);
1580 
1581 	count = 0;
1582 	for (i = 0; i < n; i++) {
1583 		/* prefetch next entry while processing current one */
1584 		if (i < n - 1) {
1585 			pkt_buf = avp_dev_translate_buffer(avp,
1586 							   avp_bufs[i + 1]);
1587 			rte_prefetch0(pkt_buf);
1588 		}
1589 
1590 		/* Adjust host pointers for guest addressing */
1591 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1592 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1593 		pkt_len = pkt_buf->pkt_len;
1594 
1595 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1596 			     (pkt_buf->nb_segs > 1))) {
1597 			/*
1598 			 * application should be using the scattered receive
1599 			 * function
1600 			 */
1601 			rxq->errors++;
1602 			continue;
1603 		}
1604 
1605 		/* process each packet to be transmitted */
1606 		m = rte_pktmbuf_alloc(avp->pool);
1607 		if (unlikely(m == NULL)) {
1608 			rxq->dev_data->rx_mbuf_alloc_failed++;
1609 			continue;
1610 		}
1611 
1612 		/* copy data out of the host buffer to our buffer */
1613 		m->data_off = RTE_PKTMBUF_HEADROOM;
1614 		rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1615 
1616 		/* initialize the local mbuf */
1617 		rte_pktmbuf_data_len(m) = pkt_len;
1618 		rte_pktmbuf_pkt_len(m) = pkt_len;
1619 		m->port = avp->port_id;
1620 
1621 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1622 			m->ol_flags = PKT_RX_VLAN_PKT;
1623 			m->vlan_tci = pkt_buf->vlan_tci;
1624 		}
1625 
1626 		if (_avp_mac_filter(avp, m) != 0) {
1627 			/* silently discard packets not destined to our MAC */
1628 			rte_pktmbuf_free(m);
1629 			continue;
1630 		}
1631 
1632 		/* return new mbuf to caller */
1633 		rx_pkts[count++] = m;
1634 		rxq->bytes += pkt_len;
1635 	}
1636 
1637 	rxq->packets += count;
1638 
1639 	/* return the buffers to the free queue */
1640 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1641 
1642 	return count;
1643 }
1644 
1645 /*
1646  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1647  * there are sufficient destination buffers to contain the entire source
1648  * packet.
1649  */
1650 static inline uint16_t
1651 avp_dev_copy_to_buffers(struct avp_dev *avp,
1652 			struct rte_mbuf *mbuf,
1653 			struct rte_avp_desc **buffers,
1654 			unsigned int count)
1655 {
1656 	struct rte_avp_desc *previous_buf = NULL;
1657 	struct rte_avp_desc *first_buf = NULL;
1658 	struct rte_avp_desc *pkt_buf;
1659 	struct rte_avp_desc *buf;
1660 	size_t total_length;
1661 	struct rte_mbuf *m;
1662 	size_t copy_length;
1663 	size_t src_offset;
1664 	char *pkt_data;
1665 	unsigned int i;
1666 
1667 	__rte_mbuf_sanity_check(mbuf, 1);
1668 
1669 	m = mbuf;
1670 	src_offset = 0;
1671 	total_length = rte_pktmbuf_pkt_len(m);
1672 	for (i = 0; (i < count) && (m != NULL); i++) {
1673 		/* fill each destination buffer */
1674 		buf = buffers[i];
1675 
1676 		if (i < count - 1) {
1677 			/* prefetch next entry while processing this one */
1678 			pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1679 			rte_prefetch0(pkt_buf);
1680 		}
1681 
1682 		/* Adjust pointers for guest addressing */
1683 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1684 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1685 
1686 		/* setup the buffer chain */
1687 		if (previous_buf != NULL)
1688 			previous_buf->next = buf;
1689 		else
1690 			first_buf = pkt_buf;
1691 
1692 		previous_buf = pkt_buf;
1693 
1694 		do {
1695 			/*
1696 			 * copy as many source mbuf segments as will fit in the
1697 			 * destination buffer.
1698 			 */
1699 			copy_length = RTE_MIN((avp->host_mbuf_size -
1700 					       pkt_buf->data_len),
1701 					      (rte_pktmbuf_data_len(m) -
1702 					       src_offset));
1703 			rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1704 				   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1705 					       src_offset),
1706 				   copy_length);
1707 			pkt_buf->data_len += copy_length;
1708 			src_offset += copy_length;
1709 
1710 			if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1711 				/* need a new source buffer */
1712 				m = m->next;
1713 				src_offset = 0;
1714 			}
1715 
1716 			if (unlikely(pkt_buf->data_len ==
1717 				     avp->host_mbuf_size)) {
1718 				/* need a new destination buffer */
1719 				break;
1720 			}
1721 
1722 		} while (m != NULL);
1723 	}
1724 
1725 	first_buf->nb_segs = count;
1726 	first_buf->pkt_len = total_length;
1727 
1728 	if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1729 		first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1730 		first_buf->vlan_tci = mbuf->vlan_tci;
1731 	}
1732 
1733 	avp_dev_buffer_sanity_check(avp, buffers[0]);
1734 
1735 	return total_length;
1736 }
1737 
1738 
1739 static uint16_t
1740 avp_xmit_scattered_pkts(void *tx_queue,
1741 			struct rte_mbuf **tx_pkts,
1742 			uint16_t nb_pkts)
1743 {
1744 	struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1745 				       RTE_AVP_MAX_MBUF_SEGMENTS)];
1746 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1747 	struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1748 	struct avp_dev *avp = txq->avp;
1749 	struct rte_avp_fifo *alloc_q;
1750 	struct rte_avp_fifo *tx_q;
1751 	unsigned int count, avail, n;
1752 	unsigned int orig_nb_pkts;
1753 	struct rte_mbuf *m;
1754 	unsigned int required;
1755 	unsigned int segments;
1756 	unsigned int tx_bytes;
1757 	unsigned int i;
1758 
1759 	orig_nb_pkts = nb_pkts;
1760 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1761 		/* VM live migration in progress */
1762 		/* TODO ... buffer for X packets then drop? */
1763 		txq->errors += nb_pkts;
1764 		return 0;
1765 	}
1766 
1767 	tx_q = avp->tx_q[txq->queue_id];
1768 	alloc_q = avp->alloc_q[txq->queue_id];
1769 
1770 	/* limit the number of transmitted packets to the max burst size */
1771 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1772 		nb_pkts = AVP_MAX_TX_BURST;
1773 
1774 	/* determine how many buffers are available to copy into */
1775 	avail = avp_fifo_count(alloc_q);
1776 	if (unlikely(avail > (AVP_MAX_TX_BURST *
1777 			      RTE_AVP_MAX_MBUF_SEGMENTS)))
1778 		avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1779 
1780 	/* determine how many slots are available in the transmit queue */
1781 	count = avp_fifo_free_count(tx_q);
1782 
1783 	/* determine how many packets can be sent */
1784 	nb_pkts = RTE_MIN(count, nb_pkts);
1785 
1786 	/* determine how many packets will fit in the available buffers */
1787 	count = 0;
1788 	segments = 0;
1789 	for (i = 0; i < nb_pkts; i++) {
1790 		m = tx_pkts[i];
1791 		if (likely(i < (unsigned int)nb_pkts - 1)) {
1792 			/* prefetch next entry while processing this one */
1793 			rte_prefetch0(tx_pkts[i + 1]);
1794 		}
1795 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1796 			avp->host_mbuf_size;
1797 
1798 		if (unlikely((required == 0) ||
1799 			     (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1800 			break;
1801 		else if (unlikely(required + segments > avail))
1802 			break;
1803 		segments += required;
1804 		count++;
1805 	}
1806 	nb_pkts = count;
1807 
1808 	if (unlikely(nb_pkts == 0)) {
1809 		/* no available buffers, or no space on the tx queue */
1810 		txq->errors += orig_nb_pkts;
1811 		return 0;
1812 	}
1813 
1814 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1815 		   nb_pkts, tx_q);
1816 
1817 	/* retrieve sufficient send buffers */
1818 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1819 	if (unlikely(n != segments)) {
1820 		PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1821 			   "n=%u, segments=%u, orig=%u\n",
1822 			   n, segments, orig_nb_pkts);
1823 		txq->errors += orig_nb_pkts;
1824 		return 0;
1825 	}
1826 
1827 	tx_bytes = 0;
1828 	count = 0;
1829 	for (i = 0; i < nb_pkts; i++) {
1830 		/* process each packet to be transmitted */
1831 		m = tx_pkts[i];
1832 
1833 		/* determine how many buffers are required for this packet */
1834 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1835 			avp->host_mbuf_size;
1836 
1837 		tx_bytes += avp_dev_copy_to_buffers(avp, m,
1838 						    &avp_bufs[count], required);
1839 		tx_bufs[i] = avp_bufs[count];
1840 		count += required;
1841 
1842 		/* free the original mbuf */
1843 		rte_pktmbuf_free(m);
1844 	}
1845 
1846 	txq->packets += nb_pkts;
1847 	txq->bytes += tx_bytes;
1848 
1849 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1850 	for (i = 0; i < nb_pkts; i++)
1851 		avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1852 #endif
1853 
1854 	/* send the packets */
1855 	n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1856 	if (unlikely(n != orig_nb_pkts))
1857 		txq->errors += (orig_nb_pkts - n);
1858 
1859 	return n;
1860 }
1861 
1862 
1863 static uint16_t
1864 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1865 {
1866 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1867 	struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1868 	struct avp_dev *avp = txq->avp;
1869 	struct rte_avp_desc *pkt_buf;
1870 	struct rte_avp_fifo *alloc_q;
1871 	struct rte_avp_fifo *tx_q;
1872 	unsigned int count, avail, n;
1873 	struct rte_mbuf *m;
1874 	unsigned int pkt_len;
1875 	unsigned int tx_bytes;
1876 	char *pkt_data;
1877 	unsigned int i;
1878 
1879 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1880 		/* VM live migration in progress */
1881 		/* TODO ... buffer for X packets then drop?! */
1882 		txq->errors++;
1883 		return 0;
1884 	}
1885 
1886 	tx_q = avp->tx_q[txq->queue_id];
1887 	alloc_q = avp->alloc_q[txq->queue_id];
1888 
1889 	/* limit the number of transmitted packets to the max burst size */
1890 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1891 		nb_pkts = AVP_MAX_TX_BURST;
1892 
1893 	/* determine how many buffers are available to copy into */
1894 	avail = avp_fifo_count(alloc_q);
1895 
1896 	/* determine how many slots are available in the transmit queue */
1897 	count = avp_fifo_free_count(tx_q);
1898 
1899 	/* determine how many packets can be sent */
1900 	count = RTE_MIN(count, avail);
1901 	count = RTE_MIN(count, nb_pkts);
1902 
1903 	if (unlikely(count == 0)) {
1904 		/* no available buffers, or no space on the tx queue */
1905 		txq->errors += nb_pkts;
1906 		return 0;
1907 	}
1908 
1909 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1910 		   count, tx_q);
1911 
1912 	/* retrieve sufficient send buffers */
1913 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1914 	if (unlikely(n != count)) {
1915 		txq->errors++;
1916 		return 0;
1917 	}
1918 
1919 	tx_bytes = 0;
1920 	for (i = 0; i < count; i++) {
1921 		/* prefetch next entry while processing the current one */
1922 		if (i < count - 1) {
1923 			pkt_buf = avp_dev_translate_buffer(avp,
1924 							   avp_bufs[i + 1]);
1925 			rte_prefetch0(pkt_buf);
1926 		}
1927 
1928 		/* process each packet to be transmitted */
1929 		m = tx_pkts[i];
1930 
1931 		/* Adjust pointers for guest addressing */
1932 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1933 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1934 		pkt_len = rte_pktmbuf_pkt_len(m);
1935 
1936 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1937 					 (pkt_len > avp->host_mbuf_size))) {
1938 			/*
1939 			 * application should be using the scattered transmit
1940 			 * function; send it truncated to avoid the performance
1941 			 * hit of having to manage returning the already
1942 			 * allocated buffer to the free list.  This should not
1943 			 * happen since the application should have set the
1944 			 * max_rx_pkt_len based on its MTU and it should be
1945 			 * policing its own packet sizes.
1946 			 */
1947 			txq->errors++;
1948 			pkt_len = RTE_MIN(avp->guest_mbuf_size,
1949 					  avp->host_mbuf_size);
1950 		}
1951 
1952 		/* copy data out of our mbuf and into the AVP buffer */
1953 		rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1954 		pkt_buf->pkt_len = pkt_len;
1955 		pkt_buf->data_len = pkt_len;
1956 		pkt_buf->nb_segs = 1;
1957 		pkt_buf->next = NULL;
1958 
1959 		if (m->ol_flags & PKT_TX_VLAN_PKT) {
1960 			pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1961 			pkt_buf->vlan_tci = m->vlan_tci;
1962 		}
1963 
1964 		tx_bytes += pkt_len;
1965 
1966 		/* free the original mbuf */
1967 		rte_pktmbuf_free(m);
1968 	}
1969 
1970 	txq->packets += count;
1971 	txq->bytes += tx_bytes;
1972 
1973 	/* send the packets */
1974 	n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1975 
1976 	return n;
1977 }
1978 
1979 static void
1980 avp_dev_rx_queue_release(void *rx_queue)
1981 {
1982 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1983 	struct avp_dev *avp = rxq->avp;
1984 	struct rte_eth_dev_data *data = avp->dev_data;
1985 	unsigned int i;
1986 
1987 	for (i = 0; i < avp->num_rx_queues; i++) {
1988 		if (data->rx_queues[i] == rxq)
1989 			data->rx_queues[i] = NULL;
1990 	}
1991 }
1992 
1993 static void
1994 avp_dev_tx_queue_release(void *tx_queue)
1995 {
1996 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1997 	struct avp_dev *avp = txq->avp;
1998 	struct rte_eth_dev_data *data = avp->dev_data;
1999 	unsigned int i;
2000 
2001 	for (i = 0; i < avp->num_tx_queues; i++) {
2002 		if (data->tx_queues[i] == txq)
2003 			data->tx_queues[i] = NULL;
2004 	}
2005 }
2006 
2007 static int
2008 avp_dev_configure(struct rte_eth_dev *eth_dev)
2009 {
2010 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2011 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2012 	struct rte_avp_device_info *host_info;
2013 	struct rte_avp_device_config config;
2014 	int mask = 0;
2015 	void *addr;
2016 	int ret;
2017 
2018 	rte_spinlock_lock(&avp->lock);
2019 	if (avp->flags & AVP_F_DETACHED) {
2020 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2021 		ret = -ENOTSUP;
2022 		goto unlock;
2023 	}
2024 
2025 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2026 	host_info = (struct rte_avp_device_info *)addr;
2027 
2028 	/* Setup required number of queues */
2029 	_avp_set_queue_counts(eth_dev);
2030 
2031 	mask = (ETH_VLAN_STRIP_MASK |
2032 		ETH_VLAN_FILTER_MASK |
2033 		ETH_VLAN_EXTEND_MASK);
2034 	avp_vlan_offload_set(eth_dev, mask);
2035 
2036 	/* update device config */
2037 	memset(&config, 0, sizeof(config));
2038 	config.device_id = host_info->device_id;
2039 	config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2040 	config.driver_version = AVP_DPDK_DRIVER_VERSION;
2041 	config.features = avp->features;
2042 	config.num_tx_queues = avp->num_tx_queues;
2043 	config.num_rx_queues = avp->num_rx_queues;
2044 
2045 	ret = avp_dev_ctrl_set_config(eth_dev, &config);
2046 	if (ret < 0) {
2047 		PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2048 			    ret);
2049 		goto unlock;
2050 	}
2051 
2052 	avp->flags |= AVP_F_CONFIGURED;
2053 	ret = 0;
2054 
2055 unlock:
2056 	rte_spinlock_unlock(&avp->lock);
2057 	return ret;
2058 }
2059 
2060 static int
2061 avp_dev_start(struct rte_eth_dev *eth_dev)
2062 {
2063 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2064 	int ret;
2065 
2066 	rte_spinlock_lock(&avp->lock);
2067 	if (avp->flags & AVP_F_DETACHED) {
2068 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2069 		ret = -ENOTSUP;
2070 		goto unlock;
2071 	}
2072 
2073 	/* disable features that we do not support */
2074 	eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2075 	eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2076 	eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2077 	eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2078 
2079 	/* update link state */
2080 	ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2081 	if (ret < 0) {
2082 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2083 			    ret);
2084 		goto unlock;
2085 	}
2086 
2087 	/* remember current link state */
2088 	avp->flags |= AVP_F_LINKUP;
2089 
2090 	ret = 0;
2091 
2092 unlock:
2093 	rte_spinlock_unlock(&avp->lock);
2094 	return ret;
2095 }
2096 
2097 static void
2098 avp_dev_stop(struct rte_eth_dev *eth_dev)
2099 {
2100 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2101 	int ret;
2102 
2103 	rte_spinlock_lock(&avp->lock);
2104 	if (avp->flags & AVP_F_DETACHED) {
2105 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2106 		goto unlock;
2107 	}
2108 
2109 	/* remember current link state */
2110 	avp->flags &= ~AVP_F_LINKUP;
2111 
2112 	/* update link state */
2113 	ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2114 	if (ret < 0) {
2115 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2116 			    ret);
2117 	}
2118 
2119 unlock:
2120 	rte_spinlock_unlock(&avp->lock);
2121 }
2122 
2123 static void
2124 avp_dev_close(struct rte_eth_dev *eth_dev)
2125 {
2126 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2127 	int ret;
2128 
2129 	rte_spinlock_lock(&avp->lock);
2130 	if (avp->flags & AVP_F_DETACHED) {
2131 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2132 		goto unlock;
2133 	}
2134 
2135 	/* remember current link state */
2136 	avp->flags &= ~AVP_F_LINKUP;
2137 	avp->flags &= ~AVP_F_CONFIGURED;
2138 
2139 	ret = avp_dev_disable_interrupts(eth_dev);
2140 	if (ret < 0) {
2141 		PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2142 		/* continue */
2143 	}
2144 
2145 	/* update device state */
2146 	ret = avp_dev_ctrl_shutdown(eth_dev);
2147 	if (ret < 0) {
2148 		PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2149 			    ret);
2150 		/* continue */
2151 	}
2152 
2153 unlock:
2154 	rte_spinlock_unlock(&avp->lock);
2155 }
2156 
2157 static int
2158 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2159 					__rte_unused int wait_to_complete)
2160 {
2161 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2162 	struct rte_eth_link *link = &eth_dev->data->dev_link;
2163 
2164 	link->link_speed = ETH_SPEED_NUM_10G;
2165 	link->link_duplex = ETH_LINK_FULL_DUPLEX;
2166 	link->link_status = !!(avp->flags & AVP_F_LINKUP);
2167 
2168 	return -1;
2169 }
2170 
2171 static void
2172 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2173 {
2174 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2175 
2176 	rte_spinlock_lock(&avp->lock);
2177 	if ((avp->flags & AVP_F_PROMISC) == 0) {
2178 		avp->flags |= AVP_F_PROMISC;
2179 		PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2180 			    eth_dev->data->port_id);
2181 	}
2182 	rte_spinlock_unlock(&avp->lock);
2183 }
2184 
2185 static void
2186 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2187 {
2188 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2189 
2190 	rte_spinlock_lock(&avp->lock);
2191 	if ((avp->flags & AVP_F_PROMISC) != 0) {
2192 		avp->flags &= ~AVP_F_PROMISC;
2193 		PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2194 			    eth_dev->data->port_id);
2195 	}
2196 	rte_spinlock_unlock(&avp->lock);
2197 }
2198 
2199 static void
2200 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2201 		 struct rte_eth_dev_info *dev_info)
2202 {
2203 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2204 
2205 	dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2206 	dev_info->max_rx_queues = avp->max_rx_queues;
2207 	dev_info->max_tx_queues = avp->max_tx_queues;
2208 	dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2209 	dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2210 	dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2211 	if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2212 		dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2213 		dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2214 	}
2215 }
2216 
2217 static void
2218 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2219 {
2220 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2221 
2222 	if (mask & ETH_VLAN_STRIP_MASK) {
2223 		if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2224 			if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2225 				avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2226 			else
2227 				avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2228 		} else {
2229 			PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2230 		}
2231 	}
2232 
2233 	if (mask & ETH_VLAN_FILTER_MASK) {
2234 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2235 			PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2236 	}
2237 
2238 	if (mask & ETH_VLAN_EXTEND_MASK) {
2239 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2240 			PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2241 	}
2242 }
2243 
2244 static void
2245 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2246 {
2247 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2248 	unsigned int i;
2249 
2250 	for (i = 0; i < avp->num_rx_queues; i++) {
2251 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2252 
2253 		if (rxq) {
2254 			stats->ipackets += rxq->packets;
2255 			stats->ibytes += rxq->bytes;
2256 			stats->ierrors += rxq->errors;
2257 
2258 			stats->q_ipackets[i] += rxq->packets;
2259 			stats->q_ibytes[i] += rxq->bytes;
2260 			stats->q_errors[i] += rxq->errors;
2261 		}
2262 	}
2263 
2264 	for (i = 0; i < avp->num_tx_queues; i++) {
2265 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2266 
2267 		if (txq) {
2268 			stats->opackets += txq->packets;
2269 			stats->obytes += txq->bytes;
2270 			stats->oerrors += txq->errors;
2271 
2272 			stats->q_opackets[i] += txq->packets;
2273 			stats->q_obytes[i] += txq->bytes;
2274 			stats->q_errors[i] += txq->errors;
2275 		}
2276 	}
2277 }
2278 
2279 static void
2280 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2281 {
2282 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2283 	unsigned int i;
2284 
2285 	for (i = 0; i < avp->num_rx_queues; i++) {
2286 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2287 
2288 		if (rxq) {
2289 			rxq->bytes = 0;
2290 			rxq->packets = 0;
2291 			rxq->errors = 0;
2292 		}
2293 	}
2294 
2295 	for (i = 0; i < avp->num_tx_queues; i++) {
2296 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2297 
2298 		if (txq) {
2299 			txq->bytes = 0;
2300 			txq->packets = 0;
2301 			txq->errors = 0;
2302 		}
2303 	}
2304 }
2305 
2306 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2307 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2308