xref: /dpdk/drivers/net/avp/avp_ethdev.c (revision 4e30ead5e7ca886535e2b30632b2948d2aac1681)
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38 
39 #include <rte_ethdev.h>
40 #include <rte_ethdev_pci.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_memzone.h>
44 #include <rte_malloc.h>
45 #include <rte_atomic.h>
46 #include <rte_branch_prediction.h>
47 #include <rte_pci.h>
48 #include <rte_ether.h>
49 #include <rte_common.h>
50 #include <rte_cycles.h>
51 #include <rte_spinlock.h>
52 #include <rte_byteorder.h>
53 #include <rte_dev.h>
54 #include <rte_memory.h>
55 #include <rte_eal.h>
56 #include <rte_io.h>
57 
58 #include "rte_avp_common.h"
59 #include "rte_avp_fifo.h"
60 
61 #include "avp_logs.h"
62 
63 
64 static int avp_dev_create(struct rte_pci_device *pci_dev,
65 			  struct rte_eth_dev *eth_dev);
66 
67 static int avp_dev_configure(struct rte_eth_dev *dev);
68 static int avp_dev_start(struct rte_eth_dev *dev);
69 static void avp_dev_stop(struct rte_eth_dev *dev);
70 static void avp_dev_close(struct rte_eth_dev *dev);
71 static void avp_dev_info_get(struct rte_eth_dev *dev,
72 			     struct rte_eth_dev_info *dev_info);
73 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
74 static int avp_dev_link_update(struct rte_eth_dev *dev,
75 			       __rte_unused int wait_to_complete);
76 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
77 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
78 
79 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
80 				  uint16_t rx_queue_id,
81 				  uint16_t nb_rx_desc,
82 				  unsigned int socket_id,
83 				  const struct rte_eth_rxconf *rx_conf,
84 				  struct rte_mempool *pool);
85 
86 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
87 				  uint16_t tx_queue_id,
88 				  uint16_t nb_tx_desc,
89 				  unsigned int socket_id,
90 				  const struct rte_eth_txconf *tx_conf);
91 
92 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
93 					struct rte_mbuf **rx_pkts,
94 					uint16_t nb_pkts);
95 
96 static uint16_t avp_recv_pkts(void *rx_queue,
97 			      struct rte_mbuf **rx_pkts,
98 			      uint16_t nb_pkts);
99 
100 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
101 					struct rte_mbuf **tx_pkts,
102 					uint16_t nb_pkts);
103 
104 static uint16_t avp_xmit_pkts(void *tx_queue,
105 			      struct rte_mbuf **tx_pkts,
106 			      uint16_t nb_pkts);
107 
108 static void avp_dev_rx_queue_release(void *rxq);
109 static void avp_dev_tx_queue_release(void *txq);
110 
111 static void avp_dev_stats_get(struct rte_eth_dev *dev,
112 			      struct rte_eth_stats *stats);
113 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
114 
115 
116 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
117 
118 
119 #define AVP_MAX_RX_BURST 64
120 #define AVP_MAX_TX_BURST 64
121 #define AVP_MAX_MAC_ADDRS 1
122 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
123 
124 
125 /*
126  * Defines the number of microseconds to wait before checking the response
127  * queue for completion.
128  */
129 #define AVP_REQUEST_DELAY_USECS (5000)
130 
131 /*
132  * Defines the number times to check the response queue for completion before
133  * declaring a timeout.
134  */
135 #define AVP_MAX_REQUEST_RETRY (100)
136 
137 /* Defines the current PCI driver version number */
138 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
139 
140 /*
141  * The set of PCI devices this driver supports
142  */
143 static const struct rte_pci_id pci_id_avp_map[] = {
144 	{ .vendor_id = RTE_AVP_PCI_VENDOR_ID,
145 	  .device_id = RTE_AVP_PCI_DEVICE_ID,
146 	  .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
147 	  .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
148 	  .class_id = RTE_CLASS_ANY_ID,
149 	},
150 
151 	{ .vendor_id = 0, /* sentinel */
152 	},
153 };
154 
155 /*
156  * dev_ops for avp, bare necessities for basic operation
157  */
158 static const struct eth_dev_ops avp_eth_dev_ops = {
159 	.dev_configure       = avp_dev_configure,
160 	.dev_start           = avp_dev_start,
161 	.dev_stop            = avp_dev_stop,
162 	.dev_close           = avp_dev_close,
163 	.dev_infos_get       = avp_dev_info_get,
164 	.vlan_offload_set    = avp_vlan_offload_set,
165 	.stats_get           = avp_dev_stats_get,
166 	.stats_reset         = avp_dev_stats_reset,
167 	.link_update         = avp_dev_link_update,
168 	.promiscuous_enable  = avp_dev_promiscuous_enable,
169 	.promiscuous_disable = avp_dev_promiscuous_disable,
170 	.rx_queue_setup      = avp_dev_rx_queue_setup,
171 	.rx_queue_release    = avp_dev_rx_queue_release,
172 	.tx_queue_setup      = avp_dev_tx_queue_setup,
173 	.tx_queue_release    = avp_dev_tx_queue_release,
174 };
175 
176 /**@{ AVP device flags */
177 #define AVP_F_PROMISC (1 << 1)
178 #define AVP_F_CONFIGURED (1 << 2)
179 #define AVP_F_LINKUP (1 << 3)
180 #define AVP_F_DETACHED (1 << 4)
181 /**@} */
182 
183 /* Ethernet device validation marker */
184 #define AVP_ETHDEV_MAGIC 0x92972862
185 
186 /*
187  * Defines the AVP device attributes which are attached to an RTE ethernet
188  * device
189  */
190 struct avp_dev {
191 	uint32_t magic; /**< Memory validation marker */
192 	uint64_t device_id; /**< Unique system identifier */
193 	struct ether_addr ethaddr; /**< Host specified MAC address */
194 	struct rte_eth_dev_data *dev_data;
195 	/**< Back pointer to ethernet device data */
196 	volatile uint32_t flags; /**< Device operational flags */
197 	uint8_t port_id; /**< Ethernet port identifier */
198 	struct rte_mempool *pool; /**< pkt mbuf mempool */
199 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
200 	unsigned int host_mbuf_size; /**< host mbuf size */
201 	unsigned int max_rx_pkt_len; /**< maximum receive unit */
202 	uint32_t host_features; /**< Supported feature bitmap */
203 	uint32_t features; /**< Enabled feature bitmap */
204 	unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
205 	unsigned int max_tx_queues; /**< Maximum number of transmit queues */
206 	unsigned int num_rx_queues; /**< Negotiated number of receive queues */
207 	unsigned int max_rx_queues; /**< Maximum number of receive queues */
208 
209 	struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
210 	struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
211 	struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
212 	/**< Allocated mbufs queue */
213 	struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
214 	/**< To be freed mbufs queue */
215 
216 	/* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
217 	rte_spinlock_t lock;
218 
219 	/* For request & response */
220 	struct rte_avp_fifo *req_q; /**< Request queue */
221 	struct rte_avp_fifo *resp_q; /**< Response queue */
222 	void *host_sync_addr; /**< (host) Req/Resp Mem address */
223 	void *sync_addr; /**< Req/Resp Mem address */
224 	void *host_mbuf_addr; /**< (host) MBUF pool start address */
225 	void *mbuf_addr; /**< MBUF pool start address */
226 } __rte_cache_aligned;
227 
228 /* RTE ethernet private data */
229 struct avp_adapter {
230 	struct avp_dev avp;
231 } __rte_cache_aligned;
232 
233 
234 /* 32-bit MMIO register write */
235 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
236 
237 /* 32-bit MMIO register read */
238 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
239 
240 /* Macro to cast the ethernet device private data to a AVP object */
241 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
242 	(&((struct avp_adapter *)adapter)->avp)
243 
244 /*
245  * Defines the structure of a AVP device queue for the purpose of handling the
246  * receive and transmit burst callback functions
247  */
248 struct avp_queue {
249 	struct rte_eth_dev_data *dev_data;
250 	/**< Backpointer to ethernet device data */
251 	struct avp_dev *avp; /**< Backpointer to AVP device */
252 	uint16_t queue_id;
253 	/**< Queue identifier used for indexing current queue */
254 	uint16_t queue_base;
255 	/**< Base queue identifier for queue servicing */
256 	uint16_t queue_limit;
257 	/**< Maximum queue identifier for queue servicing */
258 
259 	uint64_t packets;
260 	uint64_t bytes;
261 	uint64_t errors;
262 };
263 
264 /* send a request and wait for a response
265  *
266  * @warning must be called while holding the avp->lock spinlock.
267  */
268 static int
269 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
270 {
271 	unsigned int retry = AVP_MAX_REQUEST_RETRY;
272 	void *resp_addr = NULL;
273 	unsigned int count;
274 	int ret;
275 
276 	PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
277 
278 	request->result = -ENOTSUP;
279 
280 	/* Discard any stale responses before starting a new request */
281 	while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
282 		PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
283 
284 	rte_memcpy(avp->sync_addr, request, sizeof(*request));
285 	count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
286 	if (count < 1) {
287 		PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
288 			    request->req_id);
289 		ret = -EBUSY;
290 		goto done;
291 	}
292 
293 	while (retry--) {
294 		/* wait for a response */
295 		usleep(AVP_REQUEST_DELAY_USECS);
296 
297 		count = avp_fifo_count(avp->resp_q);
298 		if (count >= 1) {
299 			/* response received */
300 			break;
301 		}
302 
303 		if ((count < 1) && (retry == 0)) {
304 			PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
305 				    request->req_id);
306 			ret = -ETIME;
307 			goto done;
308 		}
309 	}
310 
311 	/* retrieve the response */
312 	count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
313 	if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
314 		PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
315 			    count, resp_addr, avp->host_sync_addr);
316 		ret = -ENODATA;
317 		goto done;
318 	}
319 
320 	/* copy to user buffer */
321 	rte_memcpy(request, avp->sync_addr, sizeof(*request));
322 	ret = 0;
323 
324 	PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
325 		    request->result, request->req_id);
326 
327 done:
328 	return ret;
329 }
330 
331 static int
332 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
333 {
334 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
335 	struct rte_avp_request request;
336 	int ret;
337 
338 	/* setup a link state change request */
339 	memset(&request, 0, sizeof(request));
340 	request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
341 	request.if_up = state;
342 
343 	ret = avp_dev_process_request(avp, &request);
344 
345 	return ret == 0 ? request.result : ret;
346 }
347 
348 static int
349 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
350 			struct rte_avp_device_config *config)
351 {
352 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
353 	struct rte_avp_request request;
354 	int ret;
355 
356 	/* setup a configure request */
357 	memset(&request, 0, sizeof(request));
358 	request.req_id = RTE_AVP_REQ_CFG_DEVICE;
359 	memcpy(&request.config, config, sizeof(request.config));
360 
361 	ret = avp_dev_process_request(avp, &request);
362 
363 	return ret == 0 ? request.result : ret;
364 }
365 
366 static int
367 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
368 {
369 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
370 	struct rte_avp_request request;
371 	int ret;
372 
373 	/* setup a shutdown request */
374 	memset(&request, 0, sizeof(request));
375 	request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
376 
377 	ret = avp_dev_process_request(avp, &request);
378 
379 	return ret == 0 ? request.result : ret;
380 }
381 
382 /* translate from host mbuf virtual address to guest virtual address */
383 static inline void *
384 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
385 {
386 	return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
387 				       (uintptr_t)avp->host_mbuf_addr),
388 			   (uintptr_t)avp->mbuf_addr);
389 }
390 
391 /* translate from host physical address to guest virtual address */
392 static void *
393 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
394 			  phys_addr_t host_phys_addr)
395 {
396 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
397 	struct rte_mem_resource *resource;
398 	struct rte_avp_memmap_info *info;
399 	struct rte_avp_memmap *map;
400 	off_t offset;
401 	void *addr;
402 	unsigned int i;
403 
404 	addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
405 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
406 	info = (struct rte_avp_memmap_info *)resource->addr;
407 
408 	offset = 0;
409 	for (i = 0; i < info->nb_maps; i++) {
410 		/* search all segments looking for a matching address */
411 		map = &info->maps[i];
412 
413 		if ((host_phys_addr >= map->phys_addr) &&
414 			(host_phys_addr < (map->phys_addr + map->length))) {
415 			/* address is within this segment */
416 			offset += (host_phys_addr - map->phys_addr);
417 			addr = RTE_PTR_ADD(addr, offset);
418 
419 			PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
420 				    host_phys_addr, addr);
421 
422 			return addr;
423 		}
424 		offset += map->length;
425 	}
426 
427 	return NULL;
428 }
429 
430 /* verify that the incoming device version is compatible with our version */
431 static int
432 avp_dev_version_check(uint32_t version)
433 {
434 	uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
435 	uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
436 
437 	if (device <= driver) {
438 		/* the host driver version is less than or equal to ours */
439 		return 0;
440 	}
441 
442 	return 1;
443 }
444 
445 /* verify that memory regions have expected version and validation markers */
446 static int
447 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
448 {
449 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
450 	struct rte_avp_memmap_info *memmap;
451 	struct rte_avp_device_info *info;
452 	struct rte_mem_resource *resource;
453 	unsigned int i;
454 
455 	/* Dump resource info for debug */
456 	for (i = 0; i < PCI_MAX_RESOURCE; i++) {
457 		resource = &pci_dev->mem_resource[i];
458 		if ((resource->phys_addr == 0) || (resource->len == 0))
459 			continue;
460 
461 		PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
462 			    i, resource->phys_addr,
463 			    resource->len, resource->addr);
464 
465 		switch (i) {
466 		case RTE_AVP_PCI_MEMMAP_BAR:
467 			memmap = (struct rte_avp_memmap_info *)resource->addr;
468 			if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
469 			    (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
470 				PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
471 					    memmap->magic, memmap->version);
472 				return -EINVAL;
473 			}
474 			break;
475 
476 		case RTE_AVP_PCI_DEVICE_BAR:
477 			info = (struct rte_avp_device_info *)resource->addr;
478 			if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
479 			    avp_dev_version_check(info->version)) {
480 				PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
481 					    info->magic, info->version,
482 					    AVP_DPDK_DRIVER_VERSION);
483 				return -EINVAL;
484 			}
485 			break;
486 
487 		case RTE_AVP_PCI_MEMORY_BAR:
488 		case RTE_AVP_PCI_MMIO_BAR:
489 			if (resource->addr == NULL) {
490 				PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
491 					    i);
492 				return -EINVAL;
493 			}
494 			break;
495 
496 		case RTE_AVP_PCI_MSIX_BAR:
497 		default:
498 			/* no validation required */
499 			break;
500 		}
501 	}
502 
503 	return 0;
504 }
505 
506 static int
507 avp_dev_detach(struct rte_eth_dev *eth_dev)
508 {
509 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
510 	int ret;
511 
512 	PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
513 		    eth_dev->data->port_id, avp->device_id);
514 
515 	rte_spinlock_lock(&avp->lock);
516 
517 	if (avp->flags & AVP_F_DETACHED) {
518 		PMD_DRV_LOG(NOTICE, "port %u already detached\n",
519 			    eth_dev->data->port_id);
520 		ret = 0;
521 		goto unlock;
522 	}
523 
524 	/* shutdown the device first so the host stops sending us packets. */
525 	ret = avp_dev_ctrl_shutdown(eth_dev);
526 	if (ret < 0) {
527 		PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
528 			    ret);
529 		avp->flags &= ~AVP_F_DETACHED;
530 		goto unlock;
531 	}
532 
533 	avp->flags |= AVP_F_DETACHED;
534 	rte_wmb();
535 
536 	/* wait for queues to acknowledge the presence of the detach flag */
537 	rte_delay_ms(1);
538 
539 	ret = 0;
540 
541 unlock:
542 	rte_spinlock_unlock(&avp->lock);
543 	return ret;
544 }
545 
546 static void
547 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
548 {
549 	struct avp_dev *avp =
550 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
551 	struct avp_queue *rxq;
552 	uint16_t queue_count;
553 	uint16_t remainder;
554 
555 	rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
556 
557 	/*
558 	 * Must map all AVP fifos as evenly as possible between the configured
559 	 * device queues.  Each device queue will service a subset of the AVP
560 	 * fifos. If there is an odd number of device queues the first set of
561 	 * device queues will get the extra AVP fifos.
562 	 */
563 	queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
564 	remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
565 	if (rx_queue_id < remainder) {
566 		/* these queues must service one extra FIFO */
567 		rxq->queue_base = rx_queue_id * (queue_count + 1);
568 		rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
569 	} else {
570 		/* these queues service the regular number of FIFO */
571 		rxq->queue_base = ((remainder * (queue_count + 1)) +
572 				   ((rx_queue_id - remainder) * queue_count));
573 		rxq->queue_limit = rxq->queue_base + queue_count - 1;
574 	}
575 
576 	PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
577 		    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
578 
579 	rxq->queue_id = rxq->queue_base;
580 }
581 
582 static void
583 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
584 {
585 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
586 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
587 	struct rte_avp_device_info *host_info;
588 	void *addr;
589 
590 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
591 	host_info = (struct rte_avp_device_info *)addr;
592 
593 	/*
594 	 * the transmit direction is not negotiated beyond respecting the max
595 	 * number of queues because the host can handle arbitrary guest tx
596 	 * queues (host rx queues).
597 	 */
598 	avp->num_tx_queues = eth_dev->data->nb_tx_queues;
599 
600 	/*
601 	 * the receive direction is more restrictive.  The host requires a
602 	 * minimum number of guest rx queues (host tx queues) therefore
603 	 * negotiate a value that is at least as large as the host minimum
604 	 * requirement.  If the host and guest values are not identical then a
605 	 * mapping will be established in the receive_queue_setup function.
606 	 */
607 	avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
608 				     eth_dev->data->nb_rx_queues);
609 
610 	PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
611 		    avp->num_tx_queues, avp->num_rx_queues);
612 }
613 
614 static int
615 avp_dev_attach(struct rte_eth_dev *eth_dev)
616 {
617 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
618 	struct rte_avp_device_config config;
619 	unsigned int i;
620 	int ret;
621 
622 	PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
623 		    eth_dev->data->port_id, avp->device_id);
624 
625 	rte_spinlock_lock(&avp->lock);
626 
627 	if (!(avp->flags & AVP_F_DETACHED)) {
628 		PMD_DRV_LOG(NOTICE, "port %u already attached\n",
629 			    eth_dev->data->port_id);
630 		ret = 0;
631 		goto unlock;
632 	}
633 
634 	/*
635 	 * make sure that the detached flag is set prior to reconfiguring the
636 	 * queues.
637 	 */
638 	avp->flags |= AVP_F_DETACHED;
639 	rte_wmb();
640 
641 	/*
642 	 * re-run the device create utility which will parse the new host info
643 	 * and setup the AVP device queue pointers.
644 	 */
645 	ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
646 	if (ret < 0) {
647 		PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
648 			    ret);
649 		goto unlock;
650 	}
651 
652 	if (avp->flags & AVP_F_CONFIGURED) {
653 		/*
654 		 * Update the receive queue mapping to handle cases where the
655 		 * source and destination hosts have different queue
656 		 * requirements.  As long as the DETACHED flag is asserted the
657 		 * queue table should not be referenced so it should be safe to
658 		 * update it.
659 		 */
660 		_avp_set_queue_counts(eth_dev);
661 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
662 			_avp_set_rx_queue_mappings(eth_dev, i);
663 
664 		/*
665 		 * Update the host with our config details so that it knows the
666 		 * device is active.
667 		 */
668 		memset(&config, 0, sizeof(config));
669 		config.device_id = avp->device_id;
670 		config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
671 		config.driver_version = AVP_DPDK_DRIVER_VERSION;
672 		config.features = avp->features;
673 		config.num_tx_queues = avp->num_tx_queues;
674 		config.num_rx_queues = avp->num_rx_queues;
675 		config.if_up = !!(avp->flags & AVP_F_LINKUP);
676 
677 		ret = avp_dev_ctrl_set_config(eth_dev, &config);
678 		if (ret < 0) {
679 			PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
680 				    ret);
681 			goto unlock;
682 		}
683 	}
684 
685 	rte_wmb();
686 	avp->flags &= ~AVP_F_DETACHED;
687 
688 	ret = 0;
689 
690 unlock:
691 	rte_spinlock_unlock(&avp->lock);
692 	return ret;
693 }
694 
695 static void
696 avp_dev_interrupt_handler(void *data)
697 {
698 	struct rte_eth_dev *eth_dev = data;
699 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
700 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
701 	uint32_t status, value;
702 	int ret;
703 
704 	if (registers == NULL)
705 		rte_panic("no mapped MMIO register space\n");
706 
707 	/* read the interrupt status register
708 	 * note: this register clears on read so all raised interrupts must be
709 	 *    handled or remembered for later processing
710 	 */
711 	status = AVP_READ32(
712 		RTE_PTR_ADD(registers,
713 			    RTE_AVP_INTERRUPT_STATUS_OFFSET));
714 
715 	if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) {
716 		/* handle interrupt based on current status */
717 		value = AVP_READ32(
718 			RTE_PTR_ADD(registers,
719 				    RTE_AVP_MIGRATION_STATUS_OFFSET));
720 		switch (value) {
721 		case RTE_AVP_MIGRATION_DETACHED:
722 			ret = avp_dev_detach(eth_dev);
723 			break;
724 		case RTE_AVP_MIGRATION_ATTACHED:
725 			ret = avp_dev_attach(eth_dev);
726 			break;
727 		default:
728 			PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
729 				    value);
730 			ret = -EINVAL;
731 		}
732 
733 		/* acknowledge the request by writing out our current status */
734 		value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
735 		AVP_WRITE32(value,
736 			    RTE_PTR_ADD(registers,
737 					RTE_AVP_MIGRATION_ACK_OFFSET));
738 
739 		PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
740 	}
741 
742 	if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
743 		PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
744 			    status);
745 
746 	/* re-enable UIO interrupt handling */
747 	ret = rte_intr_enable(&pci_dev->intr_handle);
748 	if (ret < 0) {
749 		PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
750 			    ret);
751 		/* continue */
752 	}
753 }
754 
755 static int
756 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
757 {
758 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
759 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
760 	int ret;
761 
762 	if (registers == NULL)
763 		return -EINVAL;
764 
765 	/* enable UIO interrupt handling */
766 	ret = rte_intr_enable(&pci_dev->intr_handle);
767 	if (ret < 0) {
768 		PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
769 			    ret);
770 		return ret;
771 	}
772 
773 	/* inform the device that all interrupts are enabled */
774 	AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
775 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
776 
777 	return 0;
778 }
779 
780 static int
781 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
782 {
783 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
784 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
785 	int ret;
786 
787 	if (registers == NULL)
788 		return 0;
789 
790 	/* inform the device that all interrupts are disabled */
791 	AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
792 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
793 
794 	/* enable UIO interrupt handling */
795 	ret = rte_intr_disable(&pci_dev->intr_handle);
796 	if (ret < 0) {
797 		PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
798 			    ret);
799 		return ret;
800 	}
801 
802 	return 0;
803 }
804 
805 static int
806 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
807 {
808 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
809 	int ret;
810 
811 	/* register a callback handler with UIO for interrupt notifications */
812 	ret = rte_intr_callback_register(&pci_dev->intr_handle,
813 					 avp_dev_interrupt_handler,
814 					 (void *)eth_dev);
815 	if (ret < 0) {
816 		PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
817 			    ret);
818 		return ret;
819 	}
820 
821 	/* enable interrupt processing */
822 	return avp_dev_enable_interrupts(eth_dev);
823 }
824 
825 static int
826 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
827 {
828 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
829 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
830 	uint32_t value;
831 
832 	if (registers == NULL)
833 		return 0;
834 
835 	value = AVP_READ32(RTE_PTR_ADD(registers,
836 				       RTE_AVP_MIGRATION_STATUS_OFFSET));
837 	if (value == RTE_AVP_MIGRATION_DETACHED) {
838 		/* migration is in progress; ack it if we have not already */
839 		AVP_WRITE32(value,
840 			    RTE_PTR_ADD(registers,
841 					RTE_AVP_MIGRATION_ACK_OFFSET));
842 		return 1;
843 	}
844 	return 0;
845 }
846 
847 /*
848  * create a AVP device using the supplied device info by first translating it
849  * to guest address space(s).
850  */
851 static int
852 avp_dev_create(struct rte_pci_device *pci_dev,
853 	       struct rte_eth_dev *eth_dev)
854 {
855 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
856 	struct rte_avp_device_info *host_info;
857 	struct rte_mem_resource *resource;
858 	unsigned int i;
859 
860 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
861 	if (resource->addr == NULL) {
862 		PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
863 			    RTE_AVP_PCI_DEVICE_BAR);
864 		return -EFAULT;
865 	}
866 	host_info = (struct rte_avp_device_info *)resource->addr;
867 
868 	if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
869 		avp_dev_version_check(host_info->version)) {
870 		PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
871 			    host_info->magic, host_info->version,
872 			    AVP_DPDK_DRIVER_VERSION);
873 		return -EINVAL;
874 	}
875 
876 	PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
877 		    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
878 		    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
879 		    RTE_AVP_GET_MINOR_VERSION(host_info->version));
880 
881 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
882 		    host_info->min_tx_queues, host_info->max_tx_queues);
883 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
884 		    host_info->min_rx_queues, host_info->max_rx_queues);
885 	PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
886 		    host_info->features);
887 
888 	if (avp->magic != AVP_ETHDEV_MAGIC) {
889 		/*
890 		 * First time initialization (i.e., not during a VM
891 		 * migration)
892 		 */
893 		memset(avp, 0, sizeof(*avp));
894 		avp->magic = AVP_ETHDEV_MAGIC;
895 		avp->dev_data = eth_dev->data;
896 		avp->port_id = eth_dev->data->port_id;
897 		avp->host_mbuf_size = host_info->mbuf_size;
898 		avp->host_features = host_info->features;
899 		rte_spinlock_init(&avp->lock);
900 		memcpy(&avp->ethaddr.addr_bytes[0],
901 		       host_info->ethaddr, ETHER_ADDR_LEN);
902 		/* adjust max values to not exceed our max */
903 		avp->max_tx_queues =
904 			RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
905 		avp->max_rx_queues =
906 			RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
907 	} else {
908 		/* Re-attaching during migration */
909 
910 		/* TODO... requires validation of host values */
911 		if ((host_info->features & avp->features) != avp->features) {
912 			PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
913 				    avp->features, host_info->features);
914 			/* this should not be possible; continue for now */
915 		}
916 	}
917 
918 	/* the device id is allowed to change over migrations */
919 	avp->device_id = host_info->device_id;
920 
921 	/* translate incoming host addresses to guest address space */
922 	PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
923 		    host_info->tx_phys);
924 	PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
925 		    host_info->alloc_phys);
926 	for (i = 0; i < avp->max_tx_queues; i++) {
927 		avp->tx_q[i] = avp_dev_translate_address(eth_dev,
928 			host_info->tx_phys + (i * host_info->tx_size));
929 
930 		avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
931 			host_info->alloc_phys + (i * host_info->alloc_size));
932 	}
933 
934 	PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
935 		    host_info->rx_phys);
936 	PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
937 		    host_info->free_phys);
938 	for (i = 0; i < avp->max_rx_queues; i++) {
939 		avp->rx_q[i] = avp_dev_translate_address(eth_dev,
940 			host_info->rx_phys + (i * host_info->rx_size));
941 		avp->free_q[i] = avp_dev_translate_address(eth_dev,
942 			host_info->free_phys + (i * host_info->free_size));
943 	}
944 
945 	PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
946 		    host_info->req_phys);
947 	PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
948 		    host_info->resp_phys);
949 	PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
950 		    host_info->sync_phys);
951 	PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
952 		    host_info->mbuf_phys);
953 	avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
954 	avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
955 	avp->sync_addr =
956 		avp_dev_translate_address(eth_dev, host_info->sync_phys);
957 	avp->mbuf_addr =
958 		avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
959 
960 	/*
961 	 * store the host mbuf virtual address so that we can calculate
962 	 * relative offsets for each mbuf as they are processed
963 	 */
964 	avp->host_mbuf_addr = host_info->mbuf_va;
965 	avp->host_sync_addr = host_info->sync_va;
966 
967 	/*
968 	 * store the maximum packet length that is supported by the host.
969 	 */
970 	avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
971 	PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
972 				host_info->max_rx_pkt_len);
973 
974 	return 0;
975 }
976 
977 /*
978  * This function is based on probe() function in avp_pci.c
979  * It returns 0 on success.
980  */
981 static int
982 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
983 {
984 	struct avp_dev *avp =
985 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
986 	struct rte_pci_device *pci_dev;
987 	int ret;
988 
989 	pci_dev = AVP_DEV_TO_PCI(eth_dev);
990 	eth_dev->dev_ops = &avp_eth_dev_ops;
991 	eth_dev->rx_pkt_burst = &avp_recv_pkts;
992 	eth_dev->tx_pkt_burst = &avp_xmit_pkts;
993 
994 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
995 		/*
996 		 * no setup required on secondary processes.  All data is saved
997 		 * in dev_private by the primary process. All resource should
998 		 * be mapped to the same virtual address so all pointers should
999 		 * be valid.
1000 		 */
1001 		if (eth_dev->data->scattered_rx) {
1002 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1003 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1004 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1005 		}
1006 		return 0;
1007 	}
1008 
1009 	rte_eth_copy_pci_info(eth_dev, pci_dev);
1010 
1011 	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1012 
1013 	/* Check current migration status */
1014 	if (avp_dev_migration_pending(eth_dev)) {
1015 		PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1016 		return -EBUSY;
1017 	}
1018 
1019 	/* Check BAR resources */
1020 	ret = avp_dev_check_regions(eth_dev);
1021 	if (ret < 0) {
1022 		PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1023 			    ret);
1024 		return ret;
1025 	}
1026 
1027 	/* Enable interrupts */
1028 	ret = avp_dev_setup_interrupts(eth_dev);
1029 	if (ret < 0) {
1030 		PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1031 		return ret;
1032 	}
1033 
1034 	/* Handle each subtype */
1035 	ret = avp_dev_create(pci_dev, eth_dev);
1036 	if (ret < 0) {
1037 		PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1038 		return ret;
1039 	}
1040 
1041 	/* Allocate memory for storing MAC addresses */
1042 	eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1043 	if (eth_dev->data->mac_addrs == NULL) {
1044 		PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1045 			    ETHER_ADDR_LEN);
1046 		return -ENOMEM;
1047 	}
1048 
1049 	/* Get a mac from device config */
1050 	ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1051 
1052 	return 0;
1053 }
1054 
1055 static int
1056 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1057 {
1058 	int ret;
1059 
1060 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1061 		return -EPERM;
1062 
1063 	if (eth_dev->data == NULL)
1064 		return 0;
1065 
1066 	ret = avp_dev_disable_interrupts(eth_dev);
1067 	if (ret != 0) {
1068 		PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1069 		return ret;
1070 	}
1071 
1072 	if (eth_dev->data->mac_addrs != NULL) {
1073 		rte_free(eth_dev->data->mac_addrs);
1074 		eth_dev->data->mac_addrs = NULL;
1075 	}
1076 
1077 	return 0;
1078 }
1079 
1080 static int
1081 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1082 		  struct rte_pci_device *pci_dev)
1083 {
1084 	struct rte_eth_dev *eth_dev;
1085 	int ret;
1086 
1087 	eth_dev = rte_eth_dev_pci_allocate(pci_dev,
1088 					   sizeof(struct avp_adapter));
1089 	if (eth_dev == NULL)
1090 		return -ENOMEM;
1091 
1092 	ret = eth_avp_dev_init(eth_dev);
1093 	if (ret)
1094 		rte_eth_dev_pci_release(eth_dev);
1095 
1096 	return ret;
1097 }
1098 
1099 static int
1100 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1101 {
1102 	return rte_eth_dev_pci_generic_remove(pci_dev,
1103 					      eth_avp_dev_uninit);
1104 }
1105 
1106 static struct rte_pci_driver rte_avp_pmd = {
1107 	.id_table = pci_id_avp_map,
1108 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1109 	.probe = eth_avp_pci_probe,
1110 	.remove = eth_avp_pci_remove,
1111 };
1112 
1113 static int
1114 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1115 			 struct avp_dev *avp)
1116 {
1117 	unsigned int max_rx_pkt_len;
1118 
1119 	max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1120 
1121 	if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1122 	    (max_rx_pkt_len > avp->host_mbuf_size)) {
1123 		/*
1124 		 * If the guest MTU is greater than either the host or guest
1125 		 * buffers then chained mbufs have to be enabled in the TX
1126 		 * direction.  It is assumed that the application will not need
1127 		 * to send packets larger than their max_rx_pkt_len (MRU).
1128 		 */
1129 		return 1;
1130 	}
1131 
1132 	if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1133 	    (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1134 		/*
1135 		 * If the host MRU is greater than its own mbuf size or the
1136 		 * guest mbuf size then chained mbufs have to be enabled in the
1137 		 * RX direction.
1138 		 */
1139 		return 1;
1140 	}
1141 
1142 	return 0;
1143 }
1144 
1145 static int
1146 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1147 		       uint16_t rx_queue_id,
1148 		       uint16_t nb_rx_desc,
1149 		       unsigned int socket_id,
1150 		       const struct rte_eth_rxconf *rx_conf,
1151 		       struct rte_mempool *pool)
1152 {
1153 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1154 	struct rte_pktmbuf_pool_private *mbp_priv;
1155 	struct avp_queue *rxq;
1156 
1157 	if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1158 		PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1159 			    rx_queue_id, eth_dev->data->nb_rx_queues);
1160 		return -EINVAL;
1161 	}
1162 
1163 	/* Save mbuf pool pointer */
1164 	avp->pool = pool;
1165 
1166 	/* Save the local mbuf size */
1167 	mbp_priv = rte_mempool_get_priv(pool);
1168 	avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1169 	avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1170 
1171 	if (avp_dev_enable_scattered(eth_dev, avp)) {
1172 		if (!eth_dev->data->scattered_rx) {
1173 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1174 			eth_dev->data->scattered_rx = 1;
1175 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1176 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1177 		}
1178 	}
1179 
1180 	PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1181 		    avp->max_rx_pkt_len,
1182 		    eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1183 		    avp->host_mbuf_size,
1184 		    avp->guest_mbuf_size);
1185 
1186 	/* allocate a queue object */
1187 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1188 				 RTE_CACHE_LINE_SIZE, socket_id);
1189 	if (rxq == NULL) {
1190 		PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1191 		return -ENOMEM;
1192 	}
1193 
1194 	/* save back pointers to AVP and Ethernet devices */
1195 	rxq->avp = avp;
1196 	rxq->dev_data = eth_dev->data;
1197 	eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1198 
1199 	/* setup the queue receive mapping for the current queue. */
1200 	_avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1201 
1202 	PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1203 
1204 	(void)nb_rx_desc;
1205 	(void)rx_conf;
1206 	return 0;
1207 }
1208 
1209 static int
1210 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1211 		       uint16_t tx_queue_id,
1212 		       uint16_t nb_tx_desc,
1213 		       unsigned int socket_id,
1214 		       const struct rte_eth_txconf *tx_conf)
1215 {
1216 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1217 	struct avp_queue *txq;
1218 
1219 	if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1220 		PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1221 			    tx_queue_id, eth_dev->data->nb_tx_queues);
1222 		return -EINVAL;
1223 	}
1224 
1225 	/* allocate a queue object */
1226 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1227 				 RTE_CACHE_LINE_SIZE, socket_id);
1228 	if (txq == NULL) {
1229 		PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1230 		return -ENOMEM;
1231 	}
1232 
1233 	/* only the configured set of transmit queues are used */
1234 	txq->queue_id = tx_queue_id;
1235 	txq->queue_base = tx_queue_id;
1236 	txq->queue_limit = tx_queue_id;
1237 
1238 	/* save back pointers to AVP and Ethernet devices */
1239 	txq->avp = avp;
1240 	txq->dev_data = eth_dev->data;
1241 	eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1242 
1243 	PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1244 
1245 	(void)nb_tx_desc;
1246 	(void)tx_conf;
1247 	return 0;
1248 }
1249 
1250 static inline int
1251 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1252 {
1253 	uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1254 	uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1255 	return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1256 }
1257 
1258 static inline int
1259 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1260 {
1261 	struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1262 
1263 	if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1264 		/* allow all packets destined to our address */
1265 		return 0;
1266 	}
1267 
1268 	if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1269 		/* allow all broadcast packets */
1270 		return 0;
1271 	}
1272 
1273 	if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1274 		/* allow all multicast packets */
1275 		return 0;
1276 	}
1277 
1278 	if (avp->flags & AVP_F_PROMISC) {
1279 		/* allow all packets when in promiscuous mode */
1280 		return 0;
1281 	}
1282 
1283 	return -1;
1284 }
1285 
1286 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1287 static inline void
1288 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1289 {
1290 	struct rte_avp_desc *first_buf;
1291 	struct rte_avp_desc *pkt_buf;
1292 	unsigned int pkt_len;
1293 	unsigned int nb_segs;
1294 	void *pkt_data;
1295 	unsigned int i;
1296 
1297 	first_buf = avp_dev_translate_buffer(avp, buf);
1298 
1299 	i = 0;
1300 	pkt_len = 0;
1301 	nb_segs = first_buf->nb_segs;
1302 	do {
1303 		/* Adjust pointers for guest addressing */
1304 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1305 		if (pkt_buf == NULL)
1306 			rte_panic("bad buffer: segment %u has an invalid address %p\n",
1307 				  i, buf);
1308 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1309 		if (pkt_data == NULL)
1310 			rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1311 				  i);
1312 		if (pkt_buf->data_len == 0)
1313 			rte_panic("bad buffer: segment %u has 0 data length\n",
1314 				  i);
1315 		pkt_len += pkt_buf->data_len;
1316 		nb_segs--;
1317 		i++;
1318 
1319 	} while (nb_segs && (buf = pkt_buf->next) != NULL);
1320 
1321 	if (nb_segs != 0)
1322 		rte_panic("bad buffer: expected %u segments found %u\n",
1323 			  first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1324 	if (pkt_len != first_buf->pkt_len)
1325 		rte_panic("bad buffer: expected length %u found %u\n",
1326 			  first_buf->pkt_len, pkt_len);
1327 }
1328 
1329 #define avp_dev_buffer_sanity_check(a, b) \
1330 	__avp_dev_buffer_sanity_check((a), (b))
1331 
1332 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1333 
1334 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1335 
1336 #endif
1337 
1338 /*
1339  * Copy a host buffer chain to a set of mbufs.	This function assumes that
1340  * there exactly the required number of mbufs to copy all source bytes.
1341  */
1342 static inline struct rte_mbuf *
1343 avp_dev_copy_from_buffers(struct avp_dev *avp,
1344 			  struct rte_avp_desc *buf,
1345 			  struct rte_mbuf **mbufs,
1346 			  unsigned int count)
1347 {
1348 	struct rte_mbuf *m_previous = NULL;
1349 	struct rte_avp_desc *pkt_buf;
1350 	unsigned int total_length = 0;
1351 	unsigned int copy_length;
1352 	unsigned int src_offset;
1353 	struct rte_mbuf *m;
1354 	uint16_t ol_flags;
1355 	uint16_t vlan_tci;
1356 	void *pkt_data;
1357 	unsigned int i;
1358 
1359 	avp_dev_buffer_sanity_check(avp, buf);
1360 
1361 	/* setup the first source buffer */
1362 	pkt_buf = avp_dev_translate_buffer(avp, buf);
1363 	pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1364 	total_length = pkt_buf->pkt_len;
1365 	src_offset = 0;
1366 
1367 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1368 		ol_flags = PKT_RX_VLAN_PKT;
1369 		vlan_tci = pkt_buf->vlan_tci;
1370 	} else {
1371 		ol_flags = 0;
1372 		vlan_tci = 0;
1373 	}
1374 
1375 	for (i = 0; (i < count) && (buf != NULL); i++) {
1376 		/* fill each destination buffer */
1377 		m = mbufs[i];
1378 
1379 		if (m_previous != NULL)
1380 			m_previous->next = m;
1381 
1382 		m_previous = m;
1383 
1384 		do {
1385 			/*
1386 			 * Copy as many source buffers as will fit in the
1387 			 * destination buffer.
1388 			 */
1389 			copy_length = RTE_MIN((avp->guest_mbuf_size -
1390 					       rte_pktmbuf_data_len(m)),
1391 					      (pkt_buf->data_len -
1392 					       src_offset));
1393 			rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1394 					       rte_pktmbuf_data_len(m)),
1395 				   RTE_PTR_ADD(pkt_data, src_offset),
1396 				   copy_length);
1397 			rte_pktmbuf_data_len(m) += copy_length;
1398 			src_offset += copy_length;
1399 
1400 			if (likely(src_offset == pkt_buf->data_len)) {
1401 				/* need a new source buffer */
1402 				buf = pkt_buf->next;
1403 				if (buf != NULL) {
1404 					pkt_buf = avp_dev_translate_buffer(
1405 						avp, buf);
1406 					pkt_data = avp_dev_translate_buffer(
1407 						avp, pkt_buf->data);
1408 					src_offset = 0;
1409 				}
1410 			}
1411 
1412 			if (unlikely(rte_pktmbuf_data_len(m) ==
1413 				     avp->guest_mbuf_size)) {
1414 				/* need a new destination mbuf */
1415 				break;
1416 			}
1417 
1418 		} while (buf != NULL);
1419 	}
1420 
1421 	m = mbufs[0];
1422 	m->ol_flags = ol_flags;
1423 	m->nb_segs = count;
1424 	rte_pktmbuf_pkt_len(m) = total_length;
1425 	m->vlan_tci = vlan_tci;
1426 
1427 	__rte_mbuf_sanity_check(m, 1);
1428 
1429 	return m;
1430 }
1431 
1432 static uint16_t
1433 avp_recv_scattered_pkts(void *rx_queue,
1434 			struct rte_mbuf **rx_pkts,
1435 			uint16_t nb_pkts)
1436 {
1437 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1438 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1439 	struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1440 	struct avp_dev *avp = rxq->avp;
1441 	struct rte_avp_desc *pkt_buf;
1442 	struct rte_avp_fifo *free_q;
1443 	struct rte_avp_fifo *rx_q;
1444 	struct rte_avp_desc *buf;
1445 	unsigned int count, avail, n;
1446 	unsigned int guest_mbuf_size;
1447 	struct rte_mbuf *m;
1448 	unsigned int required;
1449 	unsigned int buf_len;
1450 	unsigned int port_id;
1451 	unsigned int i;
1452 
1453 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1454 		/* VM live migration in progress */
1455 		return 0;
1456 	}
1457 
1458 	guest_mbuf_size = avp->guest_mbuf_size;
1459 	port_id = avp->port_id;
1460 	rx_q = avp->rx_q[rxq->queue_id];
1461 	free_q = avp->free_q[rxq->queue_id];
1462 
1463 	/* setup next queue to service */
1464 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1465 		(rxq->queue_id + 1) : rxq->queue_base;
1466 
1467 	/* determine how many slots are available in the free queue */
1468 	count = avp_fifo_free_count(free_q);
1469 
1470 	/* determine how many packets are available in the rx queue */
1471 	avail = avp_fifo_count(rx_q);
1472 
1473 	/* determine how many packets can be received */
1474 	count = RTE_MIN(count, avail);
1475 	count = RTE_MIN(count, nb_pkts);
1476 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1477 
1478 	if (unlikely(count == 0)) {
1479 		/* no free buffers, or no buffers on the rx queue */
1480 		return 0;
1481 	}
1482 
1483 	/* retrieve pending packets */
1484 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1485 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1486 		   count, rx_q);
1487 
1488 	count = 0;
1489 	for (i = 0; i < n; i++) {
1490 		/* prefetch next entry while processing current one */
1491 		if (i + 1 < n) {
1492 			pkt_buf = avp_dev_translate_buffer(avp,
1493 							   avp_bufs[i + 1]);
1494 			rte_prefetch0(pkt_buf);
1495 		}
1496 		buf = avp_bufs[i];
1497 
1498 		/* Peek into the first buffer to determine the total length */
1499 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1500 		buf_len = pkt_buf->pkt_len;
1501 
1502 		/* Allocate enough mbufs to receive the entire packet */
1503 		required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1504 		if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1505 			rxq->dev_data->rx_mbuf_alloc_failed++;
1506 			continue;
1507 		}
1508 
1509 		/* Copy the data from the buffers to our mbufs */
1510 		m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1511 
1512 		/* finalize mbuf */
1513 		m->port = port_id;
1514 
1515 		if (_avp_mac_filter(avp, m) != 0) {
1516 			/* silently discard packets not destined to our MAC */
1517 			rte_pktmbuf_free(m);
1518 			continue;
1519 		}
1520 
1521 		/* return new mbuf to caller */
1522 		rx_pkts[count++] = m;
1523 		rxq->bytes += buf_len;
1524 	}
1525 
1526 	rxq->packets += count;
1527 
1528 	/* return the buffers to the free queue */
1529 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1530 
1531 	return count;
1532 }
1533 
1534 
1535 static uint16_t
1536 avp_recv_pkts(void *rx_queue,
1537 	      struct rte_mbuf **rx_pkts,
1538 	      uint16_t nb_pkts)
1539 {
1540 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1541 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1542 	struct avp_dev *avp = rxq->avp;
1543 	struct rte_avp_desc *pkt_buf;
1544 	struct rte_avp_fifo *free_q;
1545 	struct rte_avp_fifo *rx_q;
1546 	unsigned int count, avail, n;
1547 	unsigned int pkt_len;
1548 	struct rte_mbuf *m;
1549 	char *pkt_data;
1550 	unsigned int i;
1551 
1552 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1553 		/* VM live migration in progress */
1554 		return 0;
1555 	}
1556 
1557 	rx_q = avp->rx_q[rxq->queue_id];
1558 	free_q = avp->free_q[rxq->queue_id];
1559 
1560 	/* setup next queue to service */
1561 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1562 		(rxq->queue_id + 1) : rxq->queue_base;
1563 
1564 	/* determine how many slots are available in the free queue */
1565 	count = avp_fifo_free_count(free_q);
1566 
1567 	/* determine how many packets are available in the rx queue */
1568 	avail = avp_fifo_count(rx_q);
1569 
1570 	/* determine how many packets can be received */
1571 	count = RTE_MIN(count, avail);
1572 	count = RTE_MIN(count, nb_pkts);
1573 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1574 
1575 	if (unlikely(count == 0)) {
1576 		/* no free buffers, or no buffers on the rx queue */
1577 		return 0;
1578 	}
1579 
1580 	/* retrieve pending packets */
1581 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1582 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1583 		   count, rx_q);
1584 
1585 	count = 0;
1586 	for (i = 0; i < n; i++) {
1587 		/* prefetch next entry while processing current one */
1588 		if (i < n - 1) {
1589 			pkt_buf = avp_dev_translate_buffer(avp,
1590 							   avp_bufs[i + 1]);
1591 			rte_prefetch0(pkt_buf);
1592 		}
1593 
1594 		/* Adjust host pointers for guest addressing */
1595 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1596 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1597 		pkt_len = pkt_buf->pkt_len;
1598 
1599 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1600 			     (pkt_buf->nb_segs > 1))) {
1601 			/*
1602 			 * application should be using the scattered receive
1603 			 * function
1604 			 */
1605 			rxq->errors++;
1606 			continue;
1607 		}
1608 
1609 		/* process each packet to be transmitted */
1610 		m = rte_pktmbuf_alloc(avp->pool);
1611 		if (unlikely(m == NULL)) {
1612 			rxq->dev_data->rx_mbuf_alloc_failed++;
1613 			continue;
1614 		}
1615 
1616 		/* copy data out of the host buffer to our buffer */
1617 		m->data_off = RTE_PKTMBUF_HEADROOM;
1618 		rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1619 
1620 		/* initialize the local mbuf */
1621 		rte_pktmbuf_data_len(m) = pkt_len;
1622 		rte_pktmbuf_pkt_len(m) = pkt_len;
1623 		m->port = avp->port_id;
1624 
1625 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1626 			m->ol_flags = PKT_RX_VLAN_PKT;
1627 			m->vlan_tci = pkt_buf->vlan_tci;
1628 		}
1629 
1630 		if (_avp_mac_filter(avp, m) != 0) {
1631 			/* silently discard packets not destined to our MAC */
1632 			rte_pktmbuf_free(m);
1633 			continue;
1634 		}
1635 
1636 		/* return new mbuf to caller */
1637 		rx_pkts[count++] = m;
1638 		rxq->bytes += pkt_len;
1639 	}
1640 
1641 	rxq->packets += count;
1642 
1643 	/* return the buffers to the free queue */
1644 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1645 
1646 	return count;
1647 }
1648 
1649 /*
1650  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1651  * there are sufficient destination buffers to contain the entire source
1652  * packet.
1653  */
1654 static inline uint16_t
1655 avp_dev_copy_to_buffers(struct avp_dev *avp,
1656 			struct rte_mbuf *mbuf,
1657 			struct rte_avp_desc **buffers,
1658 			unsigned int count)
1659 {
1660 	struct rte_avp_desc *previous_buf = NULL;
1661 	struct rte_avp_desc *first_buf = NULL;
1662 	struct rte_avp_desc *pkt_buf;
1663 	struct rte_avp_desc *buf;
1664 	size_t total_length;
1665 	struct rte_mbuf *m;
1666 	size_t copy_length;
1667 	size_t src_offset;
1668 	char *pkt_data;
1669 	unsigned int i;
1670 
1671 	__rte_mbuf_sanity_check(mbuf, 1);
1672 
1673 	m = mbuf;
1674 	src_offset = 0;
1675 	total_length = rte_pktmbuf_pkt_len(m);
1676 	for (i = 0; (i < count) && (m != NULL); i++) {
1677 		/* fill each destination buffer */
1678 		buf = buffers[i];
1679 
1680 		if (i < count - 1) {
1681 			/* prefetch next entry while processing this one */
1682 			pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1683 			rte_prefetch0(pkt_buf);
1684 		}
1685 
1686 		/* Adjust pointers for guest addressing */
1687 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1688 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1689 
1690 		/* setup the buffer chain */
1691 		if (previous_buf != NULL)
1692 			previous_buf->next = buf;
1693 		else
1694 			first_buf = pkt_buf;
1695 
1696 		previous_buf = pkt_buf;
1697 
1698 		do {
1699 			/*
1700 			 * copy as many source mbuf segments as will fit in the
1701 			 * destination buffer.
1702 			 */
1703 			copy_length = RTE_MIN((avp->host_mbuf_size -
1704 					       pkt_buf->data_len),
1705 					      (rte_pktmbuf_data_len(m) -
1706 					       src_offset));
1707 			rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1708 				   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1709 					       src_offset),
1710 				   copy_length);
1711 			pkt_buf->data_len += copy_length;
1712 			src_offset += copy_length;
1713 
1714 			if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1715 				/* need a new source buffer */
1716 				m = m->next;
1717 				src_offset = 0;
1718 			}
1719 
1720 			if (unlikely(pkt_buf->data_len ==
1721 				     avp->host_mbuf_size)) {
1722 				/* need a new destination buffer */
1723 				break;
1724 			}
1725 
1726 		} while (m != NULL);
1727 	}
1728 
1729 	first_buf->nb_segs = count;
1730 	first_buf->pkt_len = total_length;
1731 
1732 	if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1733 		first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1734 		first_buf->vlan_tci = mbuf->vlan_tci;
1735 	}
1736 
1737 	avp_dev_buffer_sanity_check(avp, buffers[0]);
1738 
1739 	return total_length;
1740 }
1741 
1742 
1743 static uint16_t
1744 avp_xmit_scattered_pkts(void *tx_queue,
1745 			struct rte_mbuf **tx_pkts,
1746 			uint16_t nb_pkts)
1747 {
1748 	struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1749 				       RTE_AVP_MAX_MBUF_SEGMENTS)];
1750 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1751 	struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1752 	struct avp_dev *avp = txq->avp;
1753 	struct rte_avp_fifo *alloc_q;
1754 	struct rte_avp_fifo *tx_q;
1755 	unsigned int count, avail, n;
1756 	unsigned int orig_nb_pkts;
1757 	struct rte_mbuf *m;
1758 	unsigned int required;
1759 	unsigned int segments;
1760 	unsigned int tx_bytes;
1761 	unsigned int i;
1762 
1763 	orig_nb_pkts = nb_pkts;
1764 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1765 		/* VM live migration in progress */
1766 		/* TODO ... buffer for X packets then drop? */
1767 		txq->errors += nb_pkts;
1768 		return 0;
1769 	}
1770 
1771 	tx_q = avp->tx_q[txq->queue_id];
1772 	alloc_q = avp->alloc_q[txq->queue_id];
1773 
1774 	/* limit the number of transmitted packets to the max burst size */
1775 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1776 		nb_pkts = AVP_MAX_TX_BURST;
1777 
1778 	/* determine how many buffers are available to copy into */
1779 	avail = avp_fifo_count(alloc_q);
1780 	if (unlikely(avail > (AVP_MAX_TX_BURST *
1781 			      RTE_AVP_MAX_MBUF_SEGMENTS)))
1782 		avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1783 
1784 	/* determine how many slots are available in the transmit queue */
1785 	count = avp_fifo_free_count(tx_q);
1786 
1787 	/* determine how many packets can be sent */
1788 	nb_pkts = RTE_MIN(count, nb_pkts);
1789 
1790 	/* determine how many packets will fit in the available buffers */
1791 	count = 0;
1792 	segments = 0;
1793 	for (i = 0; i < nb_pkts; i++) {
1794 		m = tx_pkts[i];
1795 		if (likely(i < (unsigned int)nb_pkts - 1)) {
1796 			/* prefetch next entry while processing this one */
1797 			rte_prefetch0(tx_pkts[i + 1]);
1798 		}
1799 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1800 			avp->host_mbuf_size;
1801 
1802 		if (unlikely((required == 0) ||
1803 			     (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1804 			break;
1805 		else if (unlikely(required + segments > avail))
1806 			break;
1807 		segments += required;
1808 		count++;
1809 	}
1810 	nb_pkts = count;
1811 
1812 	if (unlikely(nb_pkts == 0)) {
1813 		/* no available buffers, or no space on the tx queue */
1814 		txq->errors += orig_nb_pkts;
1815 		return 0;
1816 	}
1817 
1818 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1819 		   nb_pkts, tx_q);
1820 
1821 	/* retrieve sufficient send buffers */
1822 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1823 	if (unlikely(n != segments)) {
1824 		PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1825 			   "n=%u, segments=%u, orig=%u\n",
1826 			   n, segments, orig_nb_pkts);
1827 		txq->errors += orig_nb_pkts;
1828 		return 0;
1829 	}
1830 
1831 	tx_bytes = 0;
1832 	count = 0;
1833 	for (i = 0; i < nb_pkts; i++) {
1834 		/* process each packet to be transmitted */
1835 		m = tx_pkts[i];
1836 
1837 		/* determine how many buffers are required for this packet */
1838 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1839 			avp->host_mbuf_size;
1840 
1841 		tx_bytes += avp_dev_copy_to_buffers(avp, m,
1842 						    &avp_bufs[count], required);
1843 		tx_bufs[i] = avp_bufs[count];
1844 		count += required;
1845 
1846 		/* free the original mbuf */
1847 		rte_pktmbuf_free(m);
1848 	}
1849 
1850 	txq->packets += nb_pkts;
1851 	txq->bytes += tx_bytes;
1852 
1853 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1854 	for (i = 0; i < nb_pkts; i++)
1855 		avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1856 #endif
1857 
1858 	/* send the packets */
1859 	n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1860 	if (unlikely(n != orig_nb_pkts))
1861 		txq->errors += (orig_nb_pkts - n);
1862 
1863 	return n;
1864 }
1865 
1866 
1867 static uint16_t
1868 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1869 {
1870 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1871 	struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1872 	struct avp_dev *avp = txq->avp;
1873 	struct rte_avp_desc *pkt_buf;
1874 	struct rte_avp_fifo *alloc_q;
1875 	struct rte_avp_fifo *tx_q;
1876 	unsigned int count, avail, n;
1877 	struct rte_mbuf *m;
1878 	unsigned int pkt_len;
1879 	unsigned int tx_bytes;
1880 	char *pkt_data;
1881 	unsigned int i;
1882 
1883 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1884 		/* VM live migration in progress */
1885 		/* TODO ... buffer for X packets then drop?! */
1886 		txq->errors++;
1887 		return 0;
1888 	}
1889 
1890 	tx_q = avp->tx_q[txq->queue_id];
1891 	alloc_q = avp->alloc_q[txq->queue_id];
1892 
1893 	/* limit the number of transmitted packets to the max burst size */
1894 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1895 		nb_pkts = AVP_MAX_TX_BURST;
1896 
1897 	/* determine how many buffers are available to copy into */
1898 	avail = avp_fifo_count(alloc_q);
1899 
1900 	/* determine how many slots are available in the transmit queue */
1901 	count = avp_fifo_free_count(tx_q);
1902 
1903 	/* determine how many packets can be sent */
1904 	count = RTE_MIN(count, avail);
1905 	count = RTE_MIN(count, nb_pkts);
1906 
1907 	if (unlikely(count == 0)) {
1908 		/* no available buffers, or no space on the tx queue */
1909 		txq->errors += nb_pkts;
1910 		return 0;
1911 	}
1912 
1913 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1914 		   count, tx_q);
1915 
1916 	/* retrieve sufficient send buffers */
1917 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1918 	if (unlikely(n != count)) {
1919 		txq->errors++;
1920 		return 0;
1921 	}
1922 
1923 	tx_bytes = 0;
1924 	for (i = 0; i < count; i++) {
1925 		/* prefetch next entry while processing the current one */
1926 		if (i < count - 1) {
1927 			pkt_buf = avp_dev_translate_buffer(avp,
1928 							   avp_bufs[i + 1]);
1929 			rte_prefetch0(pkt_buf);
1930 		}
1931 
1932 		/* process each packet to be transmitted */
1933 		m = tx_pkts[i];
1934 
1935 		/* Adjust pointers for guest addressing */
1936 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1937 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1938 		pkt_len = rte_pktmbuf_pkt_len(m);
1939 
1940 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1941 					 (pkt_len > avp->host_mbuf_size))) {
1942 			/*
1943 			 * application should be using the scattered transmit
1944 			 * function; send it truncated to avoid the performance
1945 			 * hit of having to manage returning the already
1946 			 * allocated buffer to the free list.  This should not
1947 			 * happen since the application should have set the
1948 			 * max_rx_pkt_len based on its MTU and it should be
1949 			 * policing its own packet sizes.
1950 			 */
1951 			txq->errors++;
1952 			pkt_len = RTE_MIN(avp->guest_mbuf_size,
1953 					  avp->host_mbuf_size);
1954 		}
1955 
1956 		/* copy data out of our mbuf and into the AVP buffer */
1957 		rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1958 		pkt_buf->pkt_len = pkt_len;
1959 		pkt_buf->data_len = pkt_len;
1960 		pkt_buf->nb_segs = 1;
1961 		pkt_buf->next = NULL;
1962 
1963 		if (m->ol_flags & PKT_TX_VLAN_PKT) {
1964 			pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1965 			pkt_buf->vlan_tci = m->vlan_tci;
1966 		}
1967 
1968 		tx_bytes += pkt_len;
1969 
1970 		/* free the original mbuf */
1971 		rte_pktmbuf_free(m);
1972 	}
1973 
1974 	txq->packets += count;
1975 	txq->bytes += tx_bytes;
1976 
1977 	/* send the packets */
1978 	n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1979 
1980 	return n;
1981 }
1982 
1983 static void
1984 avp_dev_rx_queue_release(void *rx_queue)
1985 {
1986 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1987 	struct avp_dev *avp = rxq->avp;
1988 	struct rte_eth_dev_data *data = avp->dev_data;
1989 	unsigned int i;
1990 
1991 	for (i = 0; i < avp->num_rx_queues; i++) {
1992 		if (data->rx_queues[i] == rxq)
1993 			data->rx_queues[i] = NULL;
1994 	}
1995 }
1996 
1997 static void
1998 avp_dev_tx_queue_release(void *tx_queue)
1999 {
2000 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
2001 	struct avp_dev *avp = txq->avp;
2002 	struct rte_eth_dev_data *data = avp->dev_data;
2003 	unsigned int i;
2004 
2005 	for (i = 0; i < avp->num_tx_queues; i++) {
2006 		if (data->tx_queues[i] == txq)
2007 			data->tx_queues[i] = NULL;
2008 	}
2009 }
2010 
2011 static int
2012 avp_dev_configure(struct rte_eth_dev *eth_dev)
2013 {
2014 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
2015 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2016 	struct rte_avp_device_info *host_info;
2017 	struct rte_avp_device_config config;
2018 	int mask = 0;
2019 	void *addr;
2020 	int ret;
2021 
2022 	rte_spinlock_lock(&avp->lock);
2023 	if (avp->flags & AVP_F_DETACHED) {
2024 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2025 		ret = -ENOTSUP;
2026 		goto unlock;
2027 	}
2028 
2029 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2030 	host_info = (struct rte_avp_device_info *)addr;
2031 
2032 	/* Setup required number of queues */
2033 	_avp_set_queue_counts(eth_dev);
2034 
2035 	mask = (ETH_VLAN_STRIP_MASK |
2036 		ETH_VLAN_FILTER_MASK |
2037 		ETH_VLAN_EXTEND_MASK);
2038 	avp_vlan_offload_set(eth_dev, mask);
2039 
2040 	/* update device config */
2041 	memset(&config, 0, sizeof(config));
2042 	config.device_id = host_info->device_id;
2043 	config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2044 	config.driver_version = AVP_DPDK_DRIVER_VERSION;
2045 	config.features = avp->features;
2046 	config.num_tx_queues = avp->num_tx_queues;
2047 	config.num_rx_queues = avp->num_rx_queues;
2048 
2049 	ret = avp_dev_ctrl_set_config(eth_dev, &config);
2050 	if (ret < 0) {
2051 		PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2052 			    ret);
2053 		goto unlock;
2054 	}
2055 
2056 	avp->flags |= AVP_F_CONFIGURED;
2057 	ret = 0;
2058 
2059 unlock:
2060 	rte_spinlock_unlock(&avp->lock);
2061 	return ret;
2062 }
2063 
2064 static int
2065 avp_dev_start(struct rte_eth_dev *eth_dev)
2066 {
2067 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2068 	int ret;
2069 
2070 	rte_spinlock_lock(&avp->lock);
2071 	if (avp->flags & AVP_F_DETACHED) {
2072 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2073 		ret = -ENOTSUP;
2074 		goto unlock;
2075 	}
2076 
2077 	/* disable features that we do not support */
2078 	eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2079 	eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2080 	eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2081 	eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2082 
2083 	/* update link state */
2084 	ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2085 	if (ret < 0) {
2086 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2087 			    ret);
2088 		goto unlock;
2089 	}
2090 
2091 	/* remember current link state */
2092 	avp->flags |= AVP_F_LINKUP;
2093 
2094 	ret = 0;
2095 
2096 unlock:
2097 	rte_spinlock_unlock(&avp->lock);
2098 	return ret;
2099 }
2100 
2101 static void
2102 avp_dev_stop(struct rte_eth_dev *eth_dev)
2103 {
2104 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2105 	int ret;
2106 
2107 	rte_spinlock_lock(&avp->lock);
2108 	if (avp->flags & AVP_F_DETACHED) {
2109 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2110 		goto unlock;
2111 	}
2112 
2113 	/* remember current link state */
2114 	avp->flags &= ~AVP_F_LINKUP;
2115 
2116 	/* update link state */
2117 	ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2118 	if (ret < 0) {
2119 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2120 			    ret);
2121 	}
2122 
2123 unlock:
2124 	rte_spinlock_unlock(&avp->lock);
2125 }
2126 
2127 static void
2128 avp_dev_close(struct rte_eth_dev *eth_dev)
2129 {
2130 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2131 	int ret;
2132 
2133 	rte_spinlock_lock(&avp->lock);
2134 	if (avp->flags & AVP_F_DETACHED) {
2135 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2136 		goto unlock;
2137 	}
2138 
2139 	/* remember current link state */
2140 	avp->flags &= ~AVP_F_LINKUP;
2141 	avp->flags &= ~AVP_F_CONFIGURED;
2142 
2143 	ret = avp_dev_disable_interrupts(eth_dev);
2144 	if (ret < 0) {
2145 		PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2146 		/* continue */
2147 	}
2148 
2149 	/* update device state */
2150 	ret = avp_dev_ctrl_shutdown(eth_dev);
2151 	if (ret < 0) {
2152 		PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2153 			    ret);
2154 		/* continue */
2155 	}
2156 
2157 unlock:
2158 	rte_spinlock_unlock(&avp->lock);
2159 }
2160 
2161 static int
2162 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2163 					__rte_unused int wait_to_complete)
2164 {
2165 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2166 	struct rte_eth_link *link = &eth_dev->data->dev_link;
2167 
2168 	link->link_speed = ETH_SPEED_NUM_10G;
2169 	link->link_duplex = ETH_LINK_FULL_DUPLEX;
2170 	link->link_status = !!(avp->flags & AVP_F_LINKUP);
2171 
2172 	return -1;
2173 }
2174 
2175 static void
2176 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2177 {
2178 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2179 
2180 	rte_spinlock_lock(&avp->lock);
2181 	if ((avp->flags & AVP_F_PROMISC) == 0) {
2182 		avp->flags |= AVP_F_PROMISC;
2183 		PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2184 			    eth_dev->data->port_id);
2185 	}
2186 	rte_spinlock_unlock(&avp->lock);
2187 }
2188 
2189 static void
2190 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2191 {
2192 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2193 
2194 	rte_spinlock_lock(&avp->lock);
2195 	if ((avp->flags & AVP_F_PROMISC) != 0) {
2196 		avp->flags &= ~AVP_F_PROMISC;
2197 		PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2198 			    eth_dev->data->port_id);
2199 	}
2200 	rte_spinlock_unlock(&avp->lock);
2201 }
2202 
2203 static void
2204 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2205 		 struct rte_eth_dev_info *dev_info)
2206 {
2207 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2208 
2209 	dev_info->driver_name = "rte_avp_pmd";
2210 	dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
2211 	dev_info->max_rx_queues = avp->max_rx_queues;
2212 	dev_info->max_tx_queues = avp->max_tx_queues;
2213 	dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2214 	dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2215 	dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2216 	if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2217 		dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2218 		dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2219 	}
2220 }
2221 
2222 static void
2223 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2224 {
2225 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2226 
2227 	if (mask & ETH_VLAN_STRIP_MASK) {
2228 		if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2229 			if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2230 				avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2231 			else
2232 				avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2233 		} else {
2234 			PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2235 		}
2236 	}
2237 
2238 	if (mask & ETH_VLAN_FILTER_MASK) {
2239 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2240 			PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2241 	}
2242 
2243 	if (mask & ETH_VLAN_EXTEND_MASK) {
2244 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2245 			PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2246 	}
2247 }
2248 
2249 static void
2250 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2251 {
2252 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2253 	unsigned int i;
2254 
2255 	for (i = 0; i < avp->num_rx_queues; i++) {
2256 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2257 
2258 		if (rxq) {
2259 			stats->ipackets += rxq->packets;
2260 			stats->ibytes += rxq->bytes;
2261 			stats->ierrors += rxq->errors;
2262 
2263 			stats->q_ipackets[i] += rxq->packets;
2264 			stats->q_ibytes[i] += rxq->bytes;
2265 			stats->q_errors[i] += rxq->errors;
2266 		}
2267 	}
2268 
2269 	for (i = 0; i < avp->num_tx_queues; i++) {
2270 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2271 
2272 		if (txq) {
2273 			stats->opackets += txq->packets;
2274 			stats->obytes += txq->bytes;
2275 			stats->oerrors += txq->errors;
2276 
2277 			stats->q_opackets[i] += txq->packets;
2278 			stats->q_obytes[i] += txq->bytes;
2279 			stats->q_errors[i] += txq->errors;
2280 		}
2281 	}
2282 }
2283 
2284 static void
2285 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2286 {
2287 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2288 	unsigned int i;
2289 
2290 	for (i = 0; i < avp->num_rx_queues; i++) {
2291 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2292 
2293 		if (rxq) {
2294 			rxq->bytes = 0;
2295 			rxq->packets = 0;
2296 			rxq->errors = 0;
2297 		}
2298 	}
2299 
2300 	for (i = 0; i < avp->num_tx_queues; i++) {
2301 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2302 
2303 		if (txq) {
2304 			txq->bytes = 0;
2305 			txq->packets = 0;
2306 			txq->errors = 0;
2307 		}
2308 	}
2309 }
2310 
2311 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2312 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2313