xref: /dpdk/drivers/net/avp/avp_ethdev.c (revision a83d9f26a005003877c85df8b766098b98c54f93)
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38 
39 #include <rte_ethdev.h>
40 #include <rte_memcpy.h>
41 #include <rte_string_fns.h>
42 #include <rte_memzone.h>
43 #include <rte_malloc.h>
44 #include <rte_atomic.h>
45 #include <rte_branch_prediction.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
50 #include <rte_spinlock.h>
51 #include <rte_byteorder.h>
52 #include <rte_dev.h>
53 #include <rte_memory.h>
54 #include <rte_eal.h>
55 #include <rte_io.h>
56 
57 #include "rte_avp_common.h"
58 #include "rte_avp_fifo.h"
59 
60 #include "avp_logs.h"
61 
62 
63 static int avp_dev_create(struct rte_pci_device *pci_dev,
64 			  struct rte_eth_dev *eth_dev);
65 
66 static int avp_dev_configure(struct rte_eth_dev *dev);
67 static int avp_dev_start(struct rte_eth_dev *dev);
68 static void avp_dev_stop(struct rte_eth_dev *dev);
69 static void avp_dev_close(struct rte_eth_dev *dev);
70 static void avp_dev_info_get(struct rte_eth_dev *dev,
71 			     struct rte_eth_dev_info *dev_info);
72 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
73 static int avp_dev_link_update(struct rte_eth_dev *dev,
74 			       __rte_unused int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77 
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79 				  uint16_t rx_queue_id,
80 				  uint16_t nb_rx_desc,
81 				  unsigned int socket_id,
82 				  const struct rte_eth_rxconf *rx_conf,
83 				  struct rte_mempool *pool);
84 
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86 				  uint16_t tx_queue_id,
87 				  uint16_t nb_tx_desc,
88 				  unsigned int socket_id,
89 				  const struct rte_eth_txconf *tx_conf);
90 
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92 					struct rte_mbuf **rx_pkts,
93 					uint16_t nb_pkts);
94 
95 static uint16_t avp_recv_pkts(void *rx_queue,
96 			      struct rte_mbuf **rx_pkts,
97 			      uint16_t nb_pkts);
98 
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100 					struct rte_mbuf **tx_pkts,
101 					uint16_t nb_pkts);
102 
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104 			      struct rte_mbuf **tx_pkts,
105 			      uint16_t nb_pkts);
106 
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109 
110 static void avp_dev_stats_get(struct rte_eth_dev *dev,
111 			      struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113 
114 
115 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
116 
117 
118 #define AVP_MAX_RX_BURST 64
119 #define AVP_MAX_TX_BURST 64
120 #define AVP_MAX_MAC_ADDRS 1
121 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
122 
123 
124 /*
125  * Defines the number of microseconds to wait before checking the response
126  * queue for completion.
127  */
128 #define AVP_REQUEST_DELAY_USECS (5000)
129 
130 /*
131  * Defines the number times to check the response queue for completion before
132  * declaring a timeout.
133  */
134 #define AVP_MAX_REQUEST_RETRY (100)
135 
136 /* Defines the current PCI driver version number */
137 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
138 
139 /*
140  * The set of PCI devices this driver supports
141  */
142 static const struct rte_pci_id pci_id_avp_map[] = {
143 	{ .vendor_id = RTE_AVP_PCI_VENDOR_ID,
144 	  .device_id = RTE_AVP_PCI_DEVICE_ID,
145 	  .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
146 	  .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
147 	  .class_id = RTE_CLASS_ANY_ID,
148 	},
149 
150 	{ .vendor_id = 0, /* sentinel */
151 	},
152 };
153 
154 /*
155  * dev_ops for avp, bare necessities for basic operation
156  */
157 static const struct eth_dev_ops avp_eth_dev_ops = {
158 	.dev_configure       = avp_dev_configure,
159 	.dev_start           = avp_dev_start,
160 	.dev_stop            = avp_dev_stop,
161 	.dev_close           = avp_dev_close,
162 	.dev_infos_get       = avp_dev_info_get,
163 	.vlan_offload_set    = avp_vlan_offload_set,
164 	.stats_get           = avp_dev_stats_get,
165 	.stats_reset         = avp_dev_stats_reset,
166 	.link_update         = avp_dev_link_update,
167 	.promiscuous_enable  = avp_dev_promiscuous_enable,
168 	.promiscuous_disable = avp_dev_promiscuous_disable,
169 	.rx_queue_setup      = avp_dev_rx_queue_setup,
170 	.rx_queue_release    = avp_dev_rx_queue_release,
171 	.tx_queue_setup      = avp_dev_tx_queue_setup,
172 	.tx_queue_release    = avp_dev_tx_queue_release,
173 };
174 
175 /**@{ AVP device flags */
176 #define AVP_F_PROMISC (1 << 1)
177 #define AVP_F_CONFIGURED (1 << 2)
178 #define AVP_F_LINKUP (1 << 3)
179 #define AVP_F_DETACHED (1 << 4)
180 /**@} */
181 
182 /* Ethernet device validation marker */
183 #define AVP_ETHDEV_MAGIC 0x92972862
184 
185 /*
186  * Defines the AVP device attributes which are attached to an RTE ethernet
187  * device
188  */
189 struct avp_dev {
190 	uint32_t magic; /**< Memory validation marker */
191 	uint64_t device_id; /**< Unique system identifier */
192 	struct ether_addr ethaddr; /**< Host specified MAC address */
193 	struct rte_eth_dev_data *dev_data;
194 	/**< Back pointer to ethernet device data */
195 	volatile uint32_t flags; /**< Device operational flags */
196 	uint8_t port_id; /**< Ethernet port identifier */
197 	struct rte_mempool *pool; /**< pkt mbuf mempool */
198 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
199 	unsigned int host_mbuf_size; /**< host mbuf size */
200 	unsigned int max_rx_pkt_len; /**< maximum receive unit */
201 	uint32_t host_features; /**< Supported feature bitmap */
202 	uint32_t features; /**< Enabled feature bitmap */
203 	unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
204 	unsigned int max_tx_queues; /**< Maximum number of transmit queues */
205 	unsigned int num_rx_queues; /**< Negotiated number of receive queues */
206 	unsigned int max_rx_queues; /**< Maximum number of receive queues */
207 
208 	struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
209 	struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
210 	struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
211 	/**< Allocated mbufs queue */
212 	struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
213 	/**< To be freed mbufs queue */
214 
215 	/* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
216 	rte_spinlock_t lock;
217 
218 	/* For request & response */
219 	struct rte_avp_fifo *req_q; /**< Request queue */
220 	struct rte_avp_fifo *resp_q; /**< Response queue */
221 	void *host_sync_addr; /**< (host) Req/Resp Mem address */
222 	void *sync_addr; /**< Req/Resp Mem address */
223 	void *host_mbuf_addr; /**< (host) MBUF pool start address */
224 	void *mbuf_addr; /**< MBUF pool start address */
225 } __rte_cache_aligned;
226 
227 /* RTE ethernet private data */
228 struct avp_adapter {
229 	struct avp_dev avp;
230 } __rte_cache_aligned;
231 
232 
233 /* 32-bit MMIO register write */
234 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
235 
236 /* 32-bit MMIO register read */
237 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
238 
239 /* Macro to cast the ethernet device private data to a AVP object */
240 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
241 	(&((struct avp_adapter *)adapter)->avp)
242 
243 /*
244  * Defines the structure of a AVP device queue for the purpose of handling the
245  * receive and transmit burst callback functions
246  */
247 struct avp_queue {
248 	struct rte_eth_dev_data *dev_data;
249 	/**< Backpointer to ethernet device data */
250 	struct avp_dev *avp; /**< Backpointer to AVP device */
251 	uint16_t queue_id;
252 	/**< Queue identifier used for indexing current queue */
253 	uint16_t queue_base;
254 	/**< Base queue identifier for queue servicing */
255 	uint16_t queue_limit;
256 	/**< Maximum queue identifier for queue servicing */
257 
258 	uint64_t packets;
259 	uint64_t bytes;
260 	uint64_t errors;
261 };
262 
263 /* send a request and wait for a response
264  *
265  * @warning must be called while holding the avp->lock spinlock.
266  */
267 static int
268 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
269 {
270 	unsigned int retry = AVP_MAX_REQUEST_RETRY;
271 	void *resp_addr = NULL;
272 	unsigned int count;
273 	int ret;
274 
275 	PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
276 
277 	request->result = -ENOTSUP;
278 
279 	/* Discard any stale responses before starting a new request */
280 	while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
281 		PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
282 
283 	rte_memcpy(avp->sync_addr, request, sizeof(*request));
284 	count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
285 	if (count < 1) {
286 		PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
287 			    request->req_id);
288 		ret = -EBUSY;
289 		goto done;
290 	}
291 
292 	while (retry--) {
293 		/* wait for a response */
294 		usleep(AVP_REQUEST_DELAY_USECS);
295 
296 		count = avp_fifo_count(avp->resp_q);
297 		if (count >= 1) {
298 			/* response received */
299 			break;
300 		}
301 
302 		if ((count < 1) && (retry == 0)) {
303 			PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
304 				    request->req_id);
305 			ret = -ETIME;
306 			goto done;
307 		}
308 	}
309 
310 	/* retrieve the response */
311 	count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
312 	if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
313 		PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
314 			    count, resp_addr, avp->host_sync_addr);
315 		ret = -ENODATA;
316 		goto done;
317 	}
318 
319 	/* copy to user buffer */
320 	rte_memcpy(request, avp->sync_addr, sizeof(*request));
321 	ret = 0;
322 
323 	PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
324 		    request->result, request->req_id);
325 
326 done:
327 	return ret;
328 }
329 
330 static int
331 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
332 {
333 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
334 	struct rte_avp_request request;
335 	int ret;
336 
337 	/* setup a link state change request */
338 	memset(&request, 0, sizeof(request));
339 	request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
340 	request.if_up = state;
341 
342 	ret = avp_dev_process_request(avp, &request);
343 
344 	return ret == 0 ? request.result : ret;
345 }
346 
347 static int
348 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
349 			struct rte_avp_device_config *config)
350 {
351 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
352 	struct rte_avp_request request;
353 	int ret;
354 
355 	/* setup a configure request */
356 	memset(&request, 0, sizeof(request));
357 	request.req_id = RTE_AVP_REQ_CFG_DEVICE;
358 	memcpy(&request.config, config, sizeof(request.config));
359 
360 	ret = avp_dev_process_request(avp, &request);
361 
362 	return ret == 0 ? request.result : ret;
363 }
364 
365 static int
366 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
367 {
368 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
369 	struct rte_avp_request request;
370 	int ret;
371 
372 	/* setup a shutdown request */
373 	memset(&request, 0, sizeof(request));
374 	request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
375 
376 	ret = avp_dev_process_request(avp, &request);
377 
378 	return ret == 0 ? request.result : ret;
379 }
380 
381 /* translate from host mbuf virtual address to guest virtual address */
382 static inline void *
383 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
384 {
385 	return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
386 				       (uintptr_t)avp->host_mbuf_addr),
387 			   (uintptr_t)avp->mbuf_addr);
388 }
389 
390 /* translate from host physical address to guest virtual address */
391 static void *
392 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
393 			  phys_addr_t host_phys_addr)
394 {
395 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
396 	struct rte_mem_resource *resource;
397 	struct rte_avp_memmap_info *info;
398 	struct rte_avp_memmap *map;
399 	off_t offset;
400 	void *addr;
401 	unsigned int i;
402 
403 	addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
404 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
405 	info = (struct rte_avp_memmap_info *)resource->addr;
406 
407 	offset = 0;
408 	for (i = 0; i < info->nb_maps; i++) {
409 		/* search all segments looking for a matching address */
410 		map = &info->maps[i];
411 
412 		if ((host_phys_addr >= map->phys_addr) &&
413 			(host_phys_addr < (map->phys_addr + map->length))) {
414 			/* address is within this segment */
415 			offset += (host_phys_addr - map->phys_addr);
416 			addr = RTE_PTR_ADD(addr, offset);
417 
418 			PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
419 				    host_phys_addr, addr);
420 
421 			return addr;
422 		}
423 		offset += map->length;
424 	}
425 
426 	return NULL;
427 }
428 
429 /* verify that the incoming device version is compatible with our version */
430 static int
431 avp_dev_version_check(uint32_t version)
432 {
433 	uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
434 	uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
435 
436 	if (device <= driver) {
437 		/* the host driver version is less than or equal to ours */
438 		return 0;
439 	}
440 
441 	return 1;
442 }
443 
444 /* verify that memory regions have expected version and validation markers */
445 static int
446 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
447 {
448 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
449 	struct rte_avp_memmap_info *memmap;
450 	struct rte_avp_device_info *info;
451 	struct rte_mem_resource *resource;
452 	unsigned int i;
453 
454 	/* Dump resource info for debug */
455 	for (i = 0; i < PCI_MAX_RESOURCE; i++) {
456 		resource = &pci_dev->mem_resource[i];
457 		if ((resource->phys_addr == 0) || (resource->len == 0))
458 			continue;
459 
460 		PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
461 			    i, resource->phys_addr,
462 			    resource->len, resource->addr);
463 
464 		switch (i) {
465 		case RTE_AVP_PCI_MEMMAP_BAR:
466 			memmap = (struct rte_avp_memmap_info *)resource->addr;
467 			if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
468 			    (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
469 				PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
470 					    memmap->magic, memmap->version);
471 				return -EINVAL;
472 			}
473 			break;
474 
475 		case RTE_AVP_PCI_DEVICE_BAR:
476 			info = (struct rte_avp_device_info *)resource->addr;
477 			if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
478 			    avp_dev_version_check(info->version)) {
479 				PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
480 					    info->magic, info->version,
481 					    AVP_DPDK_DRIVER_VERSION);
482 				return -EINVAL;
483 			}
484 			break;
485 
486 		case RTE_AVP_PCI_MEMORY_BAR:
487 		case RTE_AVP_PCI_MMIO_BAR:
488 			if (resource->addr == NULL) {
489 				PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
490 					    i);
491 				return -EINVAL;
492 			}
493 			break;
494 
495 		case RTE_AVP_PCI_MSIX_BAR:
496 		default:
497 			/* no validation required */
498 			break;
499 		}
500 	}
501 
502 	return 0;
503 }
504 
505 static int
506 avp_dev_detach(struct rte_eth_dev *eth_dev)
507 {
508 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
509 	int ret;
510 
511 	PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
512 		    eth_dev->data->port_id, avp->device_id);
513 
514 	rte_spinlock_lock(&avp->lock);
515 
516 	if (avp->flags & AVP_F_DETACHED) {
517 		PMD_DRV_LOG(NOTICE, "port %u already detached\n",
518 			    eth_dev->data->port_id);
519 		ret = 0;
520 		goto unlock;
521 	}
522 
523 	/* shutdown the device first so the host stops sending us packets. */
524 	ret = avp_dev_ctrl_shutdown(eth_dev);
525 	if (ret < 0) {
526 		PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
527 			    ret);
528 		avp->flags &= ~AVP_F_DETACHED;
529 		goto unlock;
530 	}
531 
532 	avp->flags |= AVP_F_DETACHED;
533 	rte_wmb();
534 
535 	/* wait for queues to acknowledge the presence of the detach flag */
536 	rte_delay_ms(1);
537 
538 	ret = 0;
539 
540 unlock:
541 	rte_spinlock_unlock(&avp->lock);
542 	return ret;
543 }
544 
545 static void
546 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
547 {
548 	struct avp_dev *avp =
549 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
550 	struct avp_queue *rxq;
551 	uint16_t queue_count;
552 	uint16_t remainder;
553 
554 	rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
555 
556 	/*
557 	 * Must map all AVP fifos as evenly as possible between the configured
558 	 * device queues.  Each device queue will service a subset of the AVP
559 	 * fifos. If there is an odd number of device queues the first set of
560 	 * device queues will get the extra AVP fifos.
561 	 */
562 	queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
563 	remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
564 	if (rx_queue_id < remainder) {
565 		/* these queues must service one extra FIFO */
566 		rxq->queue_base = rx_queue_id * (queue_count + 1);
567 		rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
568 	} else {
569 		/* these queues service the regular number of FIFO */
570 		rxq->queue_base = ((remainder * (queue_count + 1)) +
571 				   ((rx_queue_id - remainder) * queue_count));
572 		rxq->queue_limit = rxq->queue_base + queue_count - 1;
573 	}
574 
575 	PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
576 		    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
577 
578 	rxq->queue_id = rxq->queue_base;
579 }
580 
581 static void
582 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
583 {
584 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
585 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
586 	struct rte_avp_device_info *host_info;
587 	void *addr;
588 
589 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
590 	host_info = (struct rte_avp_device_info *)addr;
591 
592 	/*
593 	 * the transmit direction is not negotiated beyond respecting the max
594 	 * number of queues because the host can handle arbitrary guest tx
595 	 * queues (host rx queues).
596 	 */
597 	avp->num_tx_queues = eth_dev->data->nb_tx_queues;
598 
599 	/*
600 	 * the receive direction is more restrictive.  The host requires a
601 	 * minimum number of guest rx queues (host tx queues) therefore
602 	 * negotiate a value that is at least as large as the host minimum
603 	 * requirement.  If the host and guest values are not identical then a
604 	 * mapping will be established in the receive_queue_setup function.
605 	 */
606 	avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
607 				     eth_dev->data->nb_rx_queues);
608 
609 	PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
610 		    avp->num_tx_queues, avp->num_rx_queues);
611 }
612 
613 static int
614 avp_dev_attach(struct rte_eth_dev *eth_dev)
615 {
616 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
617 	struct rte_avp_device_config config;
618 	unsigned int i;
619 	int ret;
620 
621 	PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
622 		    eth_dev->data->port_id, avp->device_id);
623 
624 	rte_spinlock_lock(&avp->lock);
625 
626 	if (!(avp->flags & AVP_F_DETACHED)) {
627 		PMD_DRV_LOG(NOTICE, "port %u already attached\n",
628 			    eth_dev->data->port_id);
629 		ret = 0;
630 		goto unlock;
631 	}
632 
633 	/*
634 	 * make sure that the detached flag is set prior to reconfiguring the
635 	 * queues.
636 	 */
637 	avp->flags |= AVP_F_DETACHED;
638 	rte_wmb();
639 
640 	/*
641 	 * re-run the device create utility which will parse the new host info
642 	 * and setup the AVP device queue pointers.
643 	 */
644 	ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
645 	if (ret < 0) {
646 		PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
647 			    ret);
648 		goto unlock;
649 	}
650 
651 	if (avp->flags & AVP_F_CONFIGURED) {
652 		/*
653 		 * Update the receive queue mapping to handle cases where the
654 		 * source and destination hosts have different queue
655 		 * requirements.  As long as the DETACHED flag is asserted the
656 		 * queue table should not be referenced so it should be safe to
657 		 * update it.
658 		 */
659 		_avp_set_queue_counts(eth_dev);
660 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
661 			_avp_set_rx_queue_mappings(eth_dev, i);
662 
663 		/*
664 		 * Update the host with our config details so that it knows the
665 		 * device is active.
666 		 */
667 		memset(&config, 0, sizeof(config));
668 		config.device_id = avp->device_id;
669 		config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
670 		config.driver_version = AVP_DPDK_DRIVER_VERSION;
671 		config.features = avp->features;
672 		config.num_tx_queues = avp->num_tx_queues;
673 		config.num_rx_queues = avp->num_rx_queues;
674 		config.if_up = !!(avp->flags & AVP_F_LINKUP);
675 
676 		ret = avp_dev_ctrl_set_config(eth_dev, &config);
677 		if (ret < 0) {
678 			PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
679 				    ret);
680 			goto unlock;
681 		}
682 	}
683 
684 	rte_wmb();
685 	avp->flags &= ~AVP_F_DETACHED;
686 
687 	ret = 0;
688 
689 unlock:
690 	rte_spinlock_unlock(&avp->lock);
691 	return ret;
692 }
693 
694 static void
695 avp_dev_interrupt_handler(void *data)
696 {
697 	struct rte_eth_dev *eth_dev = data;
698 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
699 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
700 	uint32_t status, value;
701 	int ret;
702 
703 	if (registers == NULL)
704 		rte_panic("no mapped MMIO register space\n");
705 
706 	/* read the interrupt status register
707 	 * note: this register clears on read so all raised interrupts must be
708 	 *    handled or remembered for later processing
709 	 */
710 	status = AVP_READ32(
711 		RTE_PTR_ADD(registers,
712 			    RTE_AVP_INTERRUPT_STATUS_OFFSET));
713 
714 	if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) {
715 		/* handle interrupt based on current status */
716 		value = AVP_READ32(
717 			RTE_PTR_ADD(registers,
718 				    RTE_AVP_MIGRATION_STATUS_OFFSET));
719 		switch (value) {
720 		case RTE_AVP_MIGRATION_DETACHED:
721 			ret = avp_dev_detach(eth_dev);
722 			break;
723 		case RTE_AVP_MIGRATION_ATTACHED:
724 			ret = avp_dev_attach(eth_dev);
725 			break;
726 		default:
727 			PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
728 				    value);
729 			ret = -EINVAL;
730 		}
731 
732 		/* acknowledge the request by writing out our current status */
733 		value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
734 		AVP_WRITE32(value,
735 			    RTE_PTR_ADD(registers,
736 					RTE_AVP_MIGRATION_ACK_OFFSET));
737 
738 		PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
739 	}
740 
741 	if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
742 		PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
743 			    status);
744 
745 	/* re-enable UIO interrupt handling */
746 	ret = rte_intr_enable(&pci_dev->intr_handle);
747 	if (ret < 0) {
748 		PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
749 			    ret);
750 		/* continue */
751 	}
752 }
753 
754 static int
755 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
756 {
757 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
758 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
759 	int ret;
760 
761 	if (registers == NULL)
762 		return -EINVAL;
763 
764 	/* enable UIO interrupt handling */
765 	ret = rte_intr_enable(&pci_dev->intr_handle);
766 	if (ret < 0) {
767 		PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
768 			    ret);
769 		return ret;
770 	}
771 
772 	/* inform the device that all interrupts are enabled */
773 	AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
774 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
775 
776 	return 0;
777 }
778 
779 static int
780 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
781 {
782 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
783 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
784 	int ret;
785 
786 	if (registers == NULL)
787 		return 0;
788 
789 	/* inform the device that all interrupts are disabled */
790 	AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
791 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
792 
793 	/* enable UIO interrupt handling */
794 	ret = rte_intr_disable(&pci_dev->intr_handle);
795 	if (ret < 0) {
796 		PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
797 			    ret);
798 		return ret;
799 	}
800 
801 	return 0;
802 }
803 
804 static int
805 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
806 {
807 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
808 	int ret;
809 
810 	/* register a callback handler with UIO for interrupt notifications */
811 	ret = rte_intr_callback_register(&pci_dev->intr_handle,
812 					 avp_dev_interrupt_handler,
813 					 (void *)eth_dev);
814 	if (ret < 0) {
815 		PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
816 			    ret);
817 		return ret;
818 	}
819 
820 	/* enable interrupt processing */
821 	return avp_dev_enable_interrupts(eth_dev);
822 }
823 
824 static int
825 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
826 {
827 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
828 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
829 	uint32_t value;
830 
831 	if (registers == NULL)
832 		return 0;
833 
834 	value = AVP_READ32(RTE_PTR_ADD(registers,
835 				       RTE_AVP_MIGRATION_STATUS_OFFSET));
836 	if (value == RTE_AVP_MIGRATION_DETACHED) {
837 		/* migration is in progress; ack it if we have not already */
838 		AVP_WRITE32(value,
839 			    RTE_PTR_ADD(registers,
840 					RTE_AVP_MIGRATION_ACK_OFFSET));
841 		return 1;
842 	}
843 	return 0;
844 }
845 
846 /*
847  * create a AVP device using the supplied device info by first translating it
848  * to guest address space(s).
849  */
850 static int
851 avp_dev_create(struct rte_pci_device *pci_dev,
852 	       struct rte_eth_dev *eth_dev)
853 {
854 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
855 	struct rte_avp_device_info *host_info;
856 	struct rte_mem_resource *resource;
857 	unsigned int i;
858 
859 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
860 	if (resource->addr == NULL) {
861 		PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
862 			    RTE_AVP_PCI_DEVICE_BAR);
863 		return -EFAULT;
864 	}
865 	host_info = (struct rte_avp_device_info *)resource->addr;
866 
867 	if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
868 		avp_dev_version_check(host_info->version)) {
869 		PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
870 			    host_info->magic, host_info->version,
871 			    AVP_DPDK_DRIVER_VERSION);
872 		return -EINVAL;
873 	}
874 
875 	PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
876 		    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
877 		    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
878 		    RTE_AVP_GET_MINOR_VERSION(host_info->version));
879 
880 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
881 		    host_info->min_tx_queues, host_info->max_tx_queues);
882 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
883 		    host_info->min_rx_queues, host_info->max_rx_queues);
884 	PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
885 		    host_info->features);
886 
887 	if (avp->magic != AVP_ETHDEV_MAGIC) {
888 		/*
889 		 * First time initialization (i.e., not during a VM
890 		 * migration)
891 		 */
892 		memset(avp, 0, sizeof(*avp));
893 		avp->magic = AVP_ETHDEV_MAGIC;
894 		avp->dev_data = eth_dev->data;
895 		avp->port_id = eth_dev->data->port_id;
896 		avp->host_mbuf_size = host_info->mbuf_size;
897 		avp->host_features = host_info->features;
898 		rte_spinlock_init(&avp->lock);
899 		memcpy(&avp->ethaddr.addr_bytes[0],
900 		       host_info->ethaddr, ETHER_ADDR_LEN);
901 		/* adjust max values to not exceed our max */
902 		avp->max_tx_queues =
903 			RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
904 		avp->max_rx_queues =
905 			RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
906 	} else {
907 		/* Re-attaching during migration */
908 
909 		/* TODO... requires validation of host values */
910 		if ((host_info->features & avp->features) != avp->features) {
911 			PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
912 				    avp->features, host_info->features);
913 			/* this should not be possible; continue for now */
914 		}
915 	}
916 
917 	/* the device id is allowed to change over migrations */
918 	avp->device_id = host_info->device_id;
919 
920 	/* translate incoming host addresses to guest address space */
921 	PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
922 		    host_info->tx_phys);
923 	PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
924 		    host_info->alloc_phys);
925 	for (i = 0; i < avp->max_tx_queues; i++) {
926 		avp->tx_q[i] = avp_dev_translate_address(eth_dev,
927 			host_info->tx_phys + (i * host_info->tx_size));
928 
929 		avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
930 			host_info->alloc_phys + (i * host_info->alloc_size));
931 	}
932 
933 	PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
934 		    host_info->rx_phys);
935 	PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
936 		    host_info->free_phys);
937 	for (i = 0; i < avp->max_rx_queues; i++) {
938 		avp->rx_q[i] = avp_dev_translate_address(eth_dev,
939 			host_info->rx_phys + (i * host_info->rx_size));
940 		avp->free_q[i] = avp_dev_translate_address(eth_dev,
941 			host_info->free_phys + (i * host_info->free_size));
942 	}
943 
944 	PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
945 		    host_info->req_phys);
946 	PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
947 		    host_info->resp_phys);
948 	PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
949 		    host_info->sync_phys);
950 	PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
951 		    host_info->mbuf_phys);
952 	avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
953 	avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
954 	avp->sync_addr =
955 		avp_dev_translate_address(eth_dev, host_info->sync_phys);
956 	avp->mbuf_addr =
957 		avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
958 
959 	/*
960 	 * store the host mbuf virtual address so that we can calculate
961 	 * relative offsets for each mbuf as they are processed
962 	 */
963 	avp->host_mbuf_addr = host_info->mbuf_va;
964 	avp->host_sync_addr = host_info->sync_va;
965 
966 	/*
967 	 * store the maximum packet length that is supported by the host.
968 	 */
969 	avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
970 	PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
971 				host_info->max_rx_pkt_len);
972 
973 	return 0;
974 }
975 
976 /*
977  * This function is based on probe() function in avp_pci.c
978  * It returns 0 on success.
979  */
980 static int
981 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
982 {
983 	struct avp_dev *avp =
984 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
985 	struct rte_pci_device *pci_dev;
986 	int ret;
987 
988 	pci_dev = AVP_DEV_TO_PCI(eth_dev);
989 	eth_dev->dev_ops = &avp_eth_dev_ops;
990 	eth_dev->rx_pkt_burst = &avp_recv_pkts;
991 	eth_dev->tx_pkt_burst = &avp_xmit_pkts;
992 
993 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
994 		/*
995 		 * no setup required on secondary processes.  All data is saved
996 		 * in dev_private by the primary process. All resource should
997 		 * be mapped to the same virtual address so all pointers should
998 		 * be valid.
999 		 */
1000 		if (eth_dev->data->scattered_rx) {
1001 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1002 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1003 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1004 		}
1005 		return 0;
1006 	}
1007 
1008 	rte_eth_copy_pci_info(eth_dev, pci_dev);
1009 
1010 	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1011 
1012 	/* Check current migration status */
1013 	if (avp_dev_migration_pending(eth_dev)) {
1014 		PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1015 		return -EBUSY;
1016 	}
1017 
1018 	/* Check BAR resources */
1019 	ret = avp_dev_check_regions(eth_dev);
1020 	if (ret < 0) {
1021 		PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1022 			    ret);
1023 		return ret;
1024 	}
1025 
1026 	/* Enable interrupts */
1027 	ret = avp_dev_setup_interrupts(eth_dev);
1028 	if (ret < 0) {
1029 		PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1030 		return ret;
1031 	}
1032 
1033 	/* Handle each subtype */
1034 	ret = avp_dev_create(pci_dev, eth_dev);
1035 	if (ret < 0) {
1036 		PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1037 		return ret;
1038 	}
1039 
1040 	/* Allocate memory for storing MAC addresses */
1041 	eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1042 	if (eth_dev->data->mac_addrs == NULL) {
1043 		PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1044 			    ETHER_ADDR_LEN);
1045 		return -ENOMEM;
1046 	}
1047 
1048 	/* Get a mac from device config */
1049 	ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1050 
1051 	return 0;
1052 }
1053 
1054 static int
1055 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1056 {
1057 	int ret;
1058 
1059 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1060 		return -EPERM;
1061 
1062 	if (eth_dev->data == NULL)
1063 		return 0;
1064 
1065 	ret = avp_dev_disable_interrupts(eth_dev);
1066 	if (ret != 0) {
1067 		PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1068 		return ret;
1069 	}
1070 
1071 	if (eth_dev->data->mac_addrs != NULL) {
1072 		rte_free(eth_dev->data->mac_addrs);
1073 		eth_dev->data->mac_addrs = NULL;
1074 	}
1075 
1076 	return 0;
1077 }
1078 
1079 
1080 static struct eth_driver rte_avp_pmd = {
1081 	{
1082 		.id_table = pci_id_avp_map,
1083 		.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1084 		.probe = rte_eth_dev_pci_probe,
1085 		.remove = rte_eth_dev_pci_remove,
1086 	},
1087 	.eth_dev_init = eth_avp_dev_init,
1088 	.eth_dev_uninit = eth_avp_dev_uninit,
1089 	.dev_private_size = sizeof(struct avp_adapter),
1090 };
1091 
1092 static int
1093 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1094 			 struct avp_dev *avp)
1095 {
1096 	unsigned int max_rx_pkt_len;
1097 
1098 	max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1099 
1100 	if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1101 	    (max_rx_pkt_len > avp->host_mbuf_size)) {
1102 		/*
1103 		 * If the guest MTU is greater than either the host or guest
1104 		 * buffers then chained mbufs have to be enabled in the TX
1105 		 * direction.  It is assumed that the application will not need
1106 		 * to send packets larger than their max_rx_pkt_len (MRU).
1107 		 */
1108 		return 1;
1109 	}
1110 
1111 	if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1112 	    (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1113 		/*
1114 		 * If the host MRU is greater than its own mbuf size or the
1115 		 * guest mbuf size then chained mbufs have to be enabled in the
1116 		 * RX direction.
1117 		 */
1118 		return 1;
1119 	}
1120 
1121 	return 0;
1122 }
1123 
1124 static int
1125 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1126 		       uint16_t rx_queue_id,
1127 		       uint16_t nb_rx_desc,
1128 		       unsigned int socket_id,
1129 		       const struct rte_eth_rxconf *rx_conf,
1130 		       struct rte_mempool *pool)
1131 {
1132 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1133 	struct rte_pktmbuf_pool_private *mbp_priv;
1134 	struct avp_queue *rxq;
1135 
1136 	if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1137 		PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1138 			    rx_queue_id, eth_dev->data->nb_rx_queues);
1139 		return -EINVAL;
1140 	}
1141 
1142 	/* Save mbuf pool pointer */
1143 	avp->pool = pool;
1144 
1145 	/* Save the local mbuf size */
1146 	mbp_priv = rte_mempool_get_priv(pool);
1147 	avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1148 	avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1149 
1150 	if (avp_dev_enable_scattered(eth_dev, avp)) {
1151 		if (!eth_dev->data->scattered_rx) {
1152 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1153 			eth_dev->data->scattered_rx = 1;
1154 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1155 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1156 		}
1157 	}
1158 
1159 	PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1160 		    avp->max_rx_pkt_len,
1161 		    eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1162 		    avp->host_mbuf_size,
1163 		    avp->guest_mbuf_size);
1164 
1165 	/* allocate a queue object */
1166 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1167 				 RTE_CACHE_LINE_SIZE, socket_id);
1168 	if (rxq == NULL) {
1169 		PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1170 		return -ENOMEM;
1171 	}
1172 
1173 	/* save back pointers to AVP and Ethernet devices */
1174 	rxq->avp = avp;
1175 	rxq->dev_data = eth_dev->data;
1176 	eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1177 
1178 	/* setup the queue receive mapping for the current queue. */
1179 	_avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1180 
1181 	PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1182 
1183 	(void)nb_rx_desc;
1184 	(void)rx_conf;
1185 	return 0;
1186 }
1187 
1188 static int
1189 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1190 		       uint16_t tx_queue_id,
1191 		       uint16_t nb_tx_desc,
1192 		       unsigned int socket_id,
1193 		       const struct rte_eth_txconf *tx_conf)
1194 {
1195 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1196 	struct avp_queue *txq;
1197 
1198 	if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1199 		PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1200 			    tx_queue_id, eth_dev->data->nb_tx_queues);
1201 		return -EINVAL;
1202 	}
1203 
1204 	/* allocate a queue object */
1205 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1206 				 RTE_CACHE_LINE_SIZE, socket_id);
1207 	if (txq == NULL) {
1208 		PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1209 		return -ENOMEM;
1210 	}
1211 
1212 	/* only the configured set of transmit queues are used */
1213 	txq->queue_id = tx_queue_id;
1214 	txq->queue_base = tx_queue_id;
1215 	txq->queue_limit = tx_queue_id;
1216 
1217 	/* save back pointers to AVP and Ethernet devices */
1218 	txq->avp = avp;
1219 	txq->dev_data = eth_dev->data;
1220 	eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1221 
1222 	PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1223 
1224 	(void)nb_tx_desc;
1225 	(void)tx_conf;
1226 	return 0;
1227 }
1228 
1229 static inline int
1230 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1231 {
1232 	uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1233 	uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1234 	return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1235 }
1236 
1237 static inline int
1238 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1239 {
1240 	struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1241 
1242 	if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1243 		/* allow all packets destined to our address */
1244 		return 0;
1245 	}
1246 
1247 	if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1248 		/* allow all broadcast packets */
1249 		return 0;
1250 	}
1251 
1252 	if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1253 		/* allow all multicast packets */
1254 		return 0;
1255 	}
1256 
1257 	if (avp->flags & AVP_F_PROMISC) {
1258 		/* allow all packets when in promiscuous mode */
1259 		return 0;
1260 	}
1261 
1262 	return -1;
1263 }
1264 
1265 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1266 static inline void
1267 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1268 {
1269 	struct rte_avp_desc *first_buf;
1270 	struct rte_avp_desc *pkt_buf;
1271 	unsigned int pkt_len;
1272 	unsigned int nb_segs;
1273 	void *pkt_data;
1274 	unsigned int i;
1275 
1276 	first_buf = avp_dev_translate_buffer(avp, buf);
1277 
1278 	i = 0;
1279 	pkt_len = 0;
1280 	nb_segs = first_buf->nb_segs;
1281 	do {
1282 		/* Adjust pointers for guest addressing */
1283 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1284 		if (pkt_buf == NULL)
1285 			rte_panic("bad buffer: segment %u has an invalid address %p\n",
1286 				  i, buf);
1287 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1288 		if (pkt_data == NULL)
1289 			rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1290 				  i);
1291 		if (pkt_buf->data_len == 0)
1292 			rte_panic("bad buffer: segment %u has 0 data length\n",
1293 				  i);
1294 		pkt_len += pkt_buf->data_len;
1295 		nb_segs--;
1296 		i++;
1297 
1298 	} while (nb_segs && (buf = pkt_buf->next) != NULL);
1299 
1300 	if (nb_segs != 0)
1301 		rte_panic("bad buffer: expected %u segments found %u\n",
1302 			  first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1303 	if (pkt_len != first_buf->pkt_len)
1304 		rte_panic("bad buffer: expected length %u found %u\n",
1305 			  first_buf->pkt_len, pkt_len);
1306 }
1307 
1308 #define avp_dev_buffer_sanity_check(a, b) \
1309 	__avp_dev_buffer_sanity_check((a), (b))
1310 
1311 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1312 
1313 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1314 
1315 #endif
1316 
1317 /*
1318  * Copy a host buffer chain to a set of mbufs.	This function assumes that
1319  * there exactly the required number of mbufs to copy all source bytes.
1320  */
1321 static inline struct rte_mbuf *
1322 avp_dev_copy_from_buffers(struct avp_dev *avp,
1323 			  struct rte_avp_desc *buf,
1324 			  struct rte_mbuf **mbufs,
1325 			  unsigned int count)
1326 {
1327 	struct rte_mbuf *m_previous = NULL;
1328 	struct rte_avp_desc *pkt_buf;
1329 	unsigned int total_length = 0;
1330 	unsigned int copy_length;
1331 	unsigned int src_offset;
1332 	struct rte_mbuf *m;
1333 	uint16_t ol_flags;
1334 	uint16_t vlan_tci;
1335 	void *pkt_data;
1336 	unsigned int i;
1337 
1338 	avp_dev_buffer_sanity_check(avp, buf);
1339 
1340 	/* setup the first source buffer */
1341 	pkt_buf = avp_dev_translate_buffer(avp, buf);
1342 	pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1343 	total_length = pkt_buf->pkt_len;
1344 	src_offset = 0;
1345 
1346 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1347 		ol_flags = PKT_RX_VLAN_PKT;
1348 		vlan_tci = pkt_buf->vlan_tci;
1349 	} else {
1350 		ol_flags = 0;
1351 		vlan_tci = 0;
1352 	}
1353 
1354 	for (i = 0; (i < count) && (buf != NULL); i++) {
1355 		/* fill each destination buffer */
1356 		m = mbufs[i];
1357 
1358 		if (m_previous != NULL)
1359 			m_previous->next = m;
1360 
1361 		m_previous = m;
1362 
1363 		do {
1364 			/*
1365 			 * Copy as many source buffers as will fit in the
1366 			 * destination buffer.
1367 			 */
1368 			copy_length = RTE_MIN((avp->guest_mbuf_size -
1369 					       rte_pktmbuf_data_len(m)),
1370 					      (pkt_buf->data_len -
1371 					       src_offset));
1372 			rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1373 					       rte_pktmbuf_data_len(m)),
1374 				   RTE_PTR_ADD(pkt_data, src_offset),
1375 				   copy_length);
1376 			rte_pktmbuf_data_len(m) += copy_length;
1377 			src_offset += copy_length;
1378 
1379 			if (likely(src_offset == pkt_buf->data_len)) {
1380 				/* need a new source buffer */
1381 				buf = pkt_buf->next;
1382 				if (buf != NULL) {
1383 					pkt_buf = avp_dev_translate_buffer(
1384 						avp, buf);
1385 					pkt_data = avp_dev_translate_buffer(
1386 						avp, pkt_buf->data);
1387 					src_offset = 0;
1388 				}
1389 			}
1390 
1391 			if (unlikely(rte_pktmbuf_data_len(m) ==
1392 				     avp->guest_mbuf_size)) {
1393 				/* need a new destination mbuf */
1394 				break;
1395 			}
1396 
1397 		} while (buf != NULL);
1398 	}
1399 
1400 	m = mbufs[0];
1401 	m->ol_flags = ol_flags;
1402 	m->nb_segs = count;
1403 	rte_pktmbuf_pkt_len(m) = total_length;
1404 	m->vlan_tci = vlan_tci;
1405 
1406 	__rte_mbuf_sanity_check(m, 1);
1407 
1408 	return m;
1409 }
1410 
1411 static uint16_t
1412 avp_recv_scattered_pkts(void *rx_queue,
1413 			struct rte_mbuf **rx_pkts,
1414 			uint16_t nb_pkts)
1415 {
1416 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1417 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1418 	struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1419 	struct avp_dev *avp = rxq->avp;
1420 	struct rte_avp_desc *pkt_buf;
1421 	struct rte_avp_fifo *free_q;
1422 	struct rte_avp_fifo *rx_q;
1423 	struct rte_avp_desc *buf;
1424 	unsigned int count, avail, n;
1425 	unsigned int guest_mbuf_size;
1426 	struct rte_mbuf *m;
1427 	unsigned int required;
1428 	unsigned int buf_len;
1429 	unsigned int port_id;
1430 	unsigned int i;
1431 
1432 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1433 		/* VM live migration in progress */
1434 		return 0;
1435 	}
1436 
1437 	guest_mbuf_size = avp->guest_mbuf_size;
1438 	port_id = avp->port_id;
1439 	rx_q = avp->rx_q[rxq->queue_id];
1440 	free_q = avp->free_q[rxq->queue_id];
1441 
1442 	/* setup next queue to service */
1443 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1444 		(rxq->queue_id + 1) : rxq->queue_base;
1445 
1446 	/* determine how many slots are available in the free queue */
1447 	count = avp_fifo_free_count(free_q);
1448 
1449 	/* determine how many packets are available in the rx queue */
1450 	avail = avp_fifo_count(rx_q);
1451 
1452 	/* determine how many packets can be received */
1453 	count = RTE_MIN(count, avail);
1454 	count = RTE_MIN(count, nb_pkts);
1455 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1456 
1457 	if (unlikely(count == 0)) {
1458 		/* no free buffers, or no buffers on the rx queue */
1459 		return 0;
1460 	}
1461 
1462 	/* retrieve pending packets */
1463 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1464 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1465 		   count, rx_q);
1466 
1467 	count = 0;
1468 	for (i = 0; i < n; i++) {
1469 		/* prefetch next entry while processing current one */
1470 		if (i + 1 < n) {
1471 			pkt_buf = avp_dev_translate_buffer(avp,
1472 							   avp_bufs[i + 1]);
1473 			rte_prefetch0(pkt_buf);
1474 		}
1475 		buf = avp_bufs[i];
1476 
1477 		/* Peek into the first buffer to determine the total length */
1478 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1479 		buf_len = pkt_buf->pkt_len;
1480 
1481 		/* Allocate enough mbufs to receive the entire packet */
1482 		required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1483 		if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1484 			rxq->dev_data->rx_mbuf_alloc_failed++;
1485 			continue;
1486 		}
1487 
1488 		/* Copy the data from the buffers to our mbufs */
1489 		m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1490 
1491 		/* finalize mbuf */
1492 		m->port = port_id;
1493 
1494 		if (_avp_mac_filter(avp, m) != 0) {
1495 			/* silently discard packets not destined to our MAC */
1496 			rte_pktmbuf_free(m);
1497 			continue;
1498 		}
1499 
1500 		/* return new mbuf to caller */
1501 		rx_pkts[count++] = m;
1502 		rxq->bytes += buf_len;
1503 	}
1504 
1505 	rxq->packets += count;
1506 
1507 	/* return the buffers to the free queue */
1508 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1509 
1510 	return count;
1511 }
1512 
1513 
1514 static uint16_t
1515 avp_recv_pkts(void *rx_queue,
1516 	      struct rte_mbuf **rx_pkts,
1517 	      uint16_t nb_pkts)
1518 {
1519 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1520 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1521 	struct avp_dev *avp = rxq->avp;
1522 	struct rte_avp_desc *pkt_buf;
1523 	struct rte_avp_fifo *free_q;
1524 	struct rte_avp_fifo *rx_q;
1525 	unsigned int count, avail, n;
1526 	unsigned int pkt_len;
1527 	struct rte_mbuf *m;
1528 	char *pkt_data;
1529 	unsigned int i;
1530 
1531 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1532 		/* VM live migration in progress */
1533 		return 0;
1534 	}
1535 
1536 	rx_q = avp->rx_q[rxq->queue_id];
1537 	free_q = avp->free_q[rxq->queue_id];
1538 
1539 	/* setup next queue to service */
1540 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1541 		(rxq->queue_id + 1) : rxq->queue_base;
1542 
1543 	/* determine how many slots are available in the free queue */
1544 	count = avp_fifo_free_count(free_q);
1545 
1546 	/* determine how many packets are available in the rx queue */
1547 	avail = avp_fifo_count(rx_q);
1548 
1549 	/* determine how many packets can be received */
1550 	count = RTE_MIN(count, avail);
1551 	count = RTE_MIN(count, nb_pkts);
1552 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1553 
1554 	if (unlikely(count == 0)) {
1555 		/* no free buffers, or no buffers on the rx queue */
1556 		return 0;
1557 	}
1558 
1559 	/* retrieve pending packets */
1560 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1561 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1562 		   count, rx_q);
1563 
1564 	count = 0;
1565 	for (i = 0; i < n; i++) {
1566 		/* prefetch next entry while processing current one */
1567 		if (i < n - 1) {
1568 			pkt_buf = avp_dev_translate_buffer(avp,
1569 							   avp_bufs[i + 1]);
1570 			rte_prefetch0(pkt_buf);
1571 		}
1572 
1573 		/* Adjust host pointers for guest addressing */
1574 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1575 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1576 		pkt_len = pkt_buf->pkt_len;
1577 
1578 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1579 			     (pkt_buf->nb_segs > 1))) {
1580 			/*
1581 			 * application should be using the scattered receive
1582 			 * function
1583 			 */
1584 			rxq->errors++;
1585 			continue;
1586 		}
1587 
1588 		/* process each packet to be transmitted */
1589 		m = rte_pktmbuf_alloc(avp->pool);
1590 		if (unlikely(m == NULL)) {
1591 			rxq->dev_data->rx_mbuf_alloc_failed++;
1592 			continue;
1593 		}
1594 
1595 		/* copy data out of the host buffer to our buffer */
1596 		m->data_off = RTE_PKTMBUF_HEADROOM;
1597 		rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1598 
1599 		/* initialize the local mbuf */
1600 		rte_pktmbuf_data_len(m) = pkt_len;
1601 		rte_pktmbuf_pkt_len(m) = pkt_len;
1602 		m->port = avp->port_id;
1603 
1604 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1605 			m->ol_flags = PKT_RX_VLAN_PKT;
1606 			m->vlan_tci = pkt_buf->vlan_tci;
1607 		}
1608 
1609 		if (_avp_mac_filter(avp, m) != 0) {
1610 			/* silently discard packets not destined to our MAC */
1611 			rte_pktmbuf_free(m);
1612 			continue;
1613 		}
1614 
1615 		/* return new mbuf to caller */
1616 		rx_pkts[count++] = m;
1617 		rxq->bytes += pkt_len;
1618 	}
1619 
1620 	rxq->packets += count;
1621 
1622 	/* return the buffers to the free queue */
1623 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1624 
1625 	return count;
1626 }
1627 
1628 /*
1629  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1630  * there are sufficient destination buffers to contain the entire source
1631  * packet.
1632  */
1633 static inline uint16_t
1634 avp_dev_copy_to_buffers(struct avp_dev *avp,
1635 			struct rte_mbuf *mbuf,
1636 			struct rte_avp_desc **buffers,
1637 			unsigned int count)
1638 {
1639 	struct rte_avp_desc *previous_buf = NULL;
1640 	struct rte_avp_desc *first_buf = NULL;
1641 	struct rte_avp_desc *pkt_buf;
1642 	struct rte_avp_desc *buf;
1643 	size_t total_length;
1644 	struct rte_mbuf *m;
1645 	size_t copy_length;
1646 	size_t src_offset;
1647 	char *pkt_data;
1648 	unsigned int i;
1649 
1650 	__rte_mbuf_sanity_check(mbuf, 1);
1651 
1652 	m = mbuf;
1653 	src_offset = 0;
1654 	total_length = rte_pktmbuf_pkt_len(m);
1655 	for (i = 0; (i < count) && (m != NULL); i++) {
1656 		/* fill each destination buffer */
1657 		buf = buffers[i];
1658 
1659 		if (i < count - 1) {
1660 			/* prefetch next entry while processing this one */
1661 			pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1662 			rte_prefetch0(pkt_buf);
1663 		}
1664 
1665 		/* Adjust pointers for guest addressing */
1666 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1667 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1668 
1669 		/* setup the buffer chain */
1670 		if (previous_buf != NULL)
1671 			previous_buf->next = buf;
1672 		else
1673 			first_buf = pkt_buf;
1674 
1675 		previous_buf = pkt_buf;
1676 
1677 		do {
1678 			/*
1679 			 * copy as many source mbuf segments as will fit in the
1680 			 * destination buffer.
1681 			 */
1682 			copy_length = RTE_MIN((avp->host_mbuf_size -
1683 					       pkt_buf->data_len),
1684 					      (rte_pktmbuf_data_len(m) -
1685 					       src_offset));
1686 			rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1687 				   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1688 					       src_offset),
1689 				   copy_length);
1690 			pkt_buf->data_len += copy_length;
1691 			src_offset += copy_length;
1692 
1693 			if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1694 				/* need a new source buffer */
1695 				m = m->next;
1696 				src_offset = 0;
1697 			}
1698 
1699 			if (unlikely(pkt_buf->data_len ==
1700 				     avp->host_mbuf_size)) {
1701 				/* need a new destination buffer */
1702 				break;
1703 			}
1704 
1705 		} while (m != NULL);
1706 	}
1707 
1708 	first_buf->nb_segs = count;
1709 	first_buf->pkt_len = total_length;
1710 
1711 	if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1712 		first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1713 		first_buf->vlan_tci = mbuf->vlan_tci;
1714 	}
1715 
1716 	avp_dev_buffer_sanity_check(avp, buffers[0]);
1717 
1718 	return total_length;
1719 }
1720 
1721 
1722 static uint16_t
1723 avp_xmit_scattered_pkts(void *tx_queue,
1724 			struct rte_mbuf **tx_pkts,
1725 			uint16_t nb_pkts)
1726 {
1727 	struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1728 				       RTE_AVP_MAX_MBUF_SEGMENTS)];
1729 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1730 	struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1731 	struct avp_dev *avp = txq->avp;
1732 	struct rte_avp_fifo *alloc_q;
1733 	struct rte_avp_fifo *tx_q;
1734 	unsigned int count, avail, n;
1735 	unsigned int orig_nb_pkts;
1736 	struct rte_mbuf *m;
1737 	unsigned int required;
1738 	unsigned int segments;
1739 	unsigned int tx_bytes;
1740 	unsigned int i;
1741 
1742 	orig_nb_pkts = nb_pkts;
1743 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1744 		/* VM live migration in progress */
1745 		/* TODO ... buffer for X packets then drop? */
1746 		txq->errors += nb_pkts;
1747 		return 0;
1748 	}
1749 
1750 	tx_q = avp->tx_q[txq->queue_id];
1751 	alloc_q = avp->alloc_q[txq->queue_id];
1752 
1753 	/* limit the number of transmitted packets to the max burst size */
1754 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1755 		nb_pkts = AVP_MAX_TX_BURST;
1756 
1757 	/* determine how many buffers are available to copy into */
1758 	avail = avp_fifo_count(alloc_q);
1759 	if (unlikely(avail > (AVP_MAX_TX_BURST *
1760 			      RTE_AVP_MAX_MBUF_SEGMENTS)))
1761 		avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1762 
1763 	/* determine how many slots are available in the transmit queue */
1764 	count = avp_fifo_free_count(tx_q);
1765 
1766 	/* determine how many packets can be sent */
1767 	nb_pkts = RTE_MIN(count, nb_pkts);
1768 
1769 	/* determine how many packets will fit in the available buffers */
1770 	count = 0;
1771 	segments = 0;
1772 	for (i = 0; i < nb_pkts; i++) {
1773 		m = tx_pkts[i];
1774 		if (likely(i < (unsigned int)nb_pkts - 1)) {
1775 			/* prefetch next entry while processing this one */
1776 			rte_prefetch0(tx_pkts[i + 1]);
1777 		}
1778 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1779 			avp->host_mbuf_size;
1780 
1781 		if (unlikely((required == 0) ||
1782 			     (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1783 			break;
1784 		else if (unlikely(required + segments > avail))
1785 			break;
1786 		segments += required;
1787 		count++;
1788 	}
1789 	nb_pkts = count;
1790 
1791 	if (unlikely(nb_pkts == 0)) {
1792 		/* no available buffers, or no space on the tx queue */
1793 		txq->errors += orig_nb_pkts;
1794 		return 0;
1795 	}
1796 
1797 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1798 		   nb_pkts, tx_q);
1799 
1800 	/* retrieve sufficient send buffers */
1801 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1802 	if (unlikely(n != segments)) {
1803 		PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1804 			   "n=%u, segments=%u, orig=%u\n",
1805 			   n, segments, orig_nb_pkts);
1806 		txq->errors += orig_nb_pkts;
1807 		return 0;
1808 	}
1809 
1810 	tx_bytes = 0;
1811 	count = 0;
1812 	for (i = 0; i < nb_pkts; i++) {
1813 		/* process each packet to be transmitted */
1814 		m = tx_pkts[i];
1815 
1816 		/* determine how many buffers are required for this packet */
1817 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1818 			avp->host_mbuf_size;
1819 
1820 		tx_bytes += avp_dev_copy_to_buffers(avp, m,
1821 						    &avp_bufs[count], required);
1822 		tx_bufs[i] = avp_bufs[count];
1823 		count += required;
1824 
1825 		/* free the original mbuf */
1826 		rte_pktmbuf_free(m);
1827 	}
1828 
1829 	txq->packets += nb_pkts;
1830 	txq->bytes += tx_bytes;
1831 
1832 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1833 	for (i = 0; i < nb_pkts; i++)
1834 		avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1835 #endif
1836 
1837 	/* send the packets */
1838 	n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1839 	if (unlikely(n != orig_nb_pkts))
1840 		txq->errors += (orig_nb_pkts - n);
1841 
1842 	return n;
1843 }
1844 
1845 
1846 static uint16_t
1847 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1848 {
1849 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1850 	struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1851 	struct avp_dev *avp = txq->avp;
1852 	struct rte_avp_desc *pkt_buf;
1853 	struct rte_avp_fifo *alloc_q;
1854 	struct rte_avp_fifo *tx_q;
1855 	unsigned int count, avail, n;
1856 	struct rte_mbuf *m;
1857 	unsigned int pkt_len;
1858 	unsigned int tx_bytes;
1859 	char *pkt_data;
1860 	unsigned int i;
1861 
1862 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1863 		/* VM live migration in progress */
1864 		/* TODO ... buffer for X packets then drop?! */
1865 		txq->errors++;
1866 		return 0;
1867 	}
1868 
1869 	tx_q = avp->tx_q[txq->queue_id];
1870 	alloc_q = avp->alloc_q[txq->queue_id];
1871 
1872 	/* limit the number of transmitted packets to the max burst size */
1873 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1874 		nb_pkts = AVP_MAX_TX_BURST;
1875 
1876 	/* determine how many buffers are available to copy into */
1877 	avail = avp_fifo_count(alloc_q);
1878 
1879 	/* determine how many slots are available in the transmit queue */
1880 	count = avp_fifo_free_count(tx_q);
1881 
1882 	/* determine how many packets can be sent */
1883 	count = RTE_MIN(count, avail);
1884 	count = RTE_MIN(count, nb_pkts);
1885 
1886 	if (unlikely(count == 0)) {
1887 		/* no available buffers, or no space on the tx queue */
1888 		txq->errors += nb_pkts;
1889 		return 0;
1890 	}
1891 
1892 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1893 		   count, tx_q);
1894 
1895 	/* retrieve sufficient send buffers */
1896 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1897 	if (unlikely(n != count)) {
1898 		txq->errors++;
1899 		return 0;
1900 	}
1901 
1902 	tx_bytes = 0;
1903 	for (i = 0; i < count; i++) {
1904 		/* prefetch next entry while processing the current one */
1905 		if (i < count - 1) {
1906 			pkt_buf = avp_dev_translate_buffer(avp,
1907 							   avp_bufs[i + 1]);
1908 			rte_prefetch0(pkt_buf);
1909 		}
1910 
1911 		/* process each packet to be transmitted */
1912 		m = tx_pkts[i];
1913 
1914 		/* Adjust pointers for guest addressing */
1915 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1916 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1917 		pkt_len = rte_pktmbuf_pkt_len(m);
1918 
1919 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1920 					 (pkt_len > avp->host_mbuf_size))) {
1921 			/*
1922 			 * application should be using the scattered transmit
1923 			 * function; send it truncated to avoid the performance
1924 			 * hit of having to manage returning the already
1925 			 * allocated buffer to the free list.  This should not
1926 			 * happen since the application should have set the
1927 			 * max_rx_pkt_len based on its MTU and it should be
1928 			 * policing its own packet sizes.
1929 			 */
1930 			txq->errors++;
1931 			pkt_len = RTE_MIN(avp->guest_mbuf_size,
1932 					  avp->host_mbuf_size);
1933 		}
1934 
1935 		/* copy data out of our mbuf and into the AVP buffer */
1936 		rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1937 		pkt_buf->pkt_len = pkt_len;
1938 		pkt_buf->data_len = pkt_len;
1939 		pkt_buf->nb_segs = 1;
1940 		pkt_buf->next = NULL;
1941 
1942 		if (m->ol_flags & PKT_TX_VLAN_PKT) {
1943 			pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1944 			pkt_buf->vlan_tci = m->vlan_tci;
1945 		}
1946 
1947 		tx_bytes += pkt_len;
1948 
1949 		/* free the original mbuf */
1950 		rte_pktmbuf_free(m);
1951 	}
1952 
1953 	txq->packets += count;
1954 	txq->bytes += tx_bytes;
1955 
1956 	/* send the packets */
1957 	n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1958 
1959 	return n;
1960 }
1961 
1962 static void
1963 avp_dev_rx_queue_release(void *rx_queue)
1964 {
1965 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1966 	struct avp_dev *avp = rxq->avp;
1967 	struct rte_eth_dev_data *data = avp->dev_data;
1968 	unsigned int i;
1969 
1970 	for (i = 0; i < avp->num_rx_queues; i++) {
1971 		if (data->rx_queues[i] == rxq)
1972 			data->rx_queues[i] = NULL;
1973 	}
1974 }
1975 
1976 static void
1977 avp_dev_tx_queue_release(void *tx_queue)
1978 {
1979 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1980 	struct avp_dev *avp = txq->avp;
1981 	struct rte_eth_dev_data *data = avp->dev_data;
1982 	unsigned int i;
1983 
1984 	for (i = 0; i < avp->num_tx_queues; i++) {
1985 		if (data->tx_queues[i] == txq)
1986 			data->tx_queues[i] = NULL;
1987 	}
1988 }
1989 
1990 static int
1991 avp_dev_configure(struct rte_eth_dev *eth_dev)
1992 {
1993 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
1994 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1995 	struct rte_avp_device_info *host_info;
1996 	struct rte_avp_device_config config;
1997 	int mask = 0;
1998 	void *addr;
1999 	int ret;
2000 
2001 	rte_spinlock_lock(&avp->lock);
2002 	if (avp->flags & AVP_F_DETACHED) {
2003 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2004 		ret = -ENOTSUP;
2005 		goto unlock;
2006 	}
2007 
2008 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2009 	host_info = (struct rte_avp_device_info *)addr;
2010 
2011 	/* Setup required number of queues */
2012 	_avp_set_queue_counts(eth_dev);
2013 
2014 	mask = (ETH_VLAN_STRIP_MASK |
2015 		ETH_VLAN_FILTER_MASK |
2016 		ETH_VLAN_EXTEND_MASK);
2017 	avp_vlan_offload_set(eth_dev, mask);
2018 
2019 	/* update device config */
2020 	memset(&config, 0, sizeof(config));
2021 	config.device_id = host_info->device_id;
2022 	config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2023 	config.driver_version = AVP_DPDK_DRIVER_VERSION;
2024 	config.features = avp->features;
2025 	config.num_tx_queues = avp->num_tx_queues;
2026 	config.num_rx_queues = avp->num_rx_queues;
2027 
2028 	ret = avp_dev_ctrl_set_config(eth_dev, &config);
2029 	if (ret < 0) {
2030 		PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2031 			    ret);
2032 		goto unlock;
2033 	}
2034 
2035 	avp->flags |= AVP_F_CONFIGURED;
2036 	ret = 0;
2037 
2038 unlock:
2039 	rte_spinlock_unlock(&avp->lock);
2040 	return ret;
2041 }
2042 
2043 static int
2044 avp_dev_start(struct rte_eth_dev *eth_dev)
2045 {
2046 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2047 	int ret;
2048 
2049 	rte_spinlock_lock(&avp->lock);
2050 	if (avp->flags & AVP_F_DETACHED) {
2051 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2052 		ret = -ENOTSUP;
2053 		goto unlock;
2054 	}
2055 
2056 	/* disable features that we do not support */
2057 	eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2058 	eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2059 	eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2060 	eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2061 
2062 	/* update link state */
2063 	ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2064 	if (ret < 0) {
2065 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2066 			    ret);
2067 		goto unlock;
2068 	}
2069 
2070 	/* remember current link state */
2071 	avp->flags |= AVP_F_LINKUP;
2072 
2073 	ret = 0;
2074 
2075 unlock:
2076 	rte_spinlock_unlock(&avp->lock);
2077 	return ret;
2078 }
2079 
2080 static void
2081 avp_dev_stop(struct rte_eth_dev *eth_dev)
2082 {
2083 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2084 	int ret;
2085 
2086 	rte_spinlock_lock(&avp->lock);
2087 	if (avp->flags & AVP_F_DETACHED) {
2088 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2089 		goto unlock;
2090 	}
2091 
2092 	/* remember current link state */
2093 	avp->flags &= ~AVP_F_LINKUP;
2094 
2095 	/* update link state */
2096 	ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2097 	if (ret < 0) {
2098 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2099 			    ret);
2100 	}
2101 
2102 unlock:
2103 	rte_spinlock_unlock(&avp->lock);
2104 }
2105 
2106 static void
2107 avp_dev_close(struct rte_eth_dev *eth_dev)
2108 {
2109 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2110 	int ret;
2111 
2112 	rte_spinlock_lock(&avp->lock);
2113 	if (avp->flags & AVP_F_DETACHED) {
2114 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2115 		goto unlock;
2116 	}
2117 
2118 	/* remember current link state */
2119 	avp->flags &= ~AVP_F_LINKUP;
2120 	avp->flags &= ~AVP_F_CONFIGURED;
2121 
2122 	ret = avp_dev_disable_interrupts(eth_dev);
2123 	if (ret < 0) {
2124 		PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2125 		/* continue */
2126 	}
2127 
2128 	/* update device state */
2129 	ret = avp_dev_ctrl_shutdown(eth_dev);
2130 	if (ret < 0) {
2131 		PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2132 			    ret);
2133 		/* continue */
2134 	}
2135 
2136 unlock:
2137 	rte_spinlock_unlock(&avp->lock);
2138 }
2139 
2140 static int
2141 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2142 					__rte_unused int wait_to_complete)
2143 {
2144 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2145 	struct rte_eth_link *link = &eth_dev->data->dev_link;
2146 
2147 	link->link_speed = ETH_SPEED_NUM_10G;
2148 	link->link_duplex = ETH_LINK_FULL_DUPLEX;
2149 	link->link_status = !!(avp->flags & AVP_F_LINKUP);
2150 
2151 	return -1;
2152 }
2153 
2154 static void
2155 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2156 {
2157 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2158 
2159 	rte_spinlock_lock(&avp->lock);
2160 	if ((avp->flags & AVP_F_PROMISC) == 0) {
2161 		avp->flags |= AVP_F_PROMISC;
2162 		PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2163 			    eth_dev->data->port_id);
2164 	}
2165 	rte_spinlock_unlock(&avp->lock);
2166 }
2167 
2168 static void
2169 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2170 {
2171 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2172 
2173 	rte_spinlock_lock(&avp->lock);
2174 	if ((avp->flags & AVP_F_PROMISC) != 0) {
2175 		avp->flags &= ~AVP_F_PROMISC;
2176 		PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2177 			    eth_dev->data->port_id);
2178 	}
2179 	rte_spinlock_unlock(&avp->lock);
2180 }
2181 
2182 static void
2183 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2184 		 struct rte_eth_dev_info *dev_info)
2185 {
2186 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2187 
2188 	dev_info->driver_name = "rte_avp_pmd";
2189 	dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
2190 	dev_info->max_rx_queues = avp->max_rx_queues;
2191 	dev_info->max_tx_queues = avp->max_tx_queues;
2192 	dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2193 	dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2194 	dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2195 	if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2196 		dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2197 		dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2198 	}
2199 }
2200 
2201 static void
2202 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2203 {
2204 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2205 
2206 	if (mask & ETH_VLAN_STRIP_MASK) {
2207 		if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2208 			if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2209 				avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2210 			else
2211 				avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2212 		} else {
2213 			PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2214 		}
2215 	}
2216 
2217 	if (mask & ETH_VLAN_FILTER_MASK) {
2218 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2219 			PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2220 	}
2221 
2222 	if (mask & ETH_VLAN_EXTEND_MASK) {
2223 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2224 			PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2225 	}
2226 }
2227 
2228 static void
2229 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2230 {
2231 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2232 	unsigned int i;
2233 
2234 	for (i = 0; i < avp->num_rx_queues; i++) {
2235 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2236 
2237 		if (rxq) {
2238 			stats->ipackets += rxq->packets;
2239 			stats->ibytes += rxq->bytes;
2240 			stats->ierrors += rxq->errors;
2241 
2242 			stats->q_ipackets[i] += rxq->packets;
2243 			stats->q_ibytes[i] += rxq->bytes;
2244 			stats->q_errors[i] += rxq->errors;
2245 		}
2246 	}
2247 
2248 	for (i = 0; i < avp->num_tx_queues; i++) {
2249 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2250 
2251 		if (txq) {
2252 			stats->opackets += txq->packets;
2253 			stats->obytes += txq->bytes;
2254 			stats->oerrors += txq->errors;
2255 
2256 			stats->q_opackets[i] += txq->packets;
2257 			stats->q_obytes[i] += txq->bytes;
2258 			stats->q_errors[i] += txq->errors;
2259 		}
2260 	}
2261 }
2262 
2263 static void
2264 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2265 {
2266 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2267 	unsigned int i;
2268 
2269 	for (i = 0; i < avp->num_rx_queues; i++) {
2270 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2271 
2272 		if (rxq) {
2273 			rxq->bytes = 0;
2274 			rxq->packets = 0;
2275 			rxq->errors = 0;
2276 		}
2277 	}
2278 
2279 	for (i = 0; i < avp->num_tx_queues; i++) {
2280 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2281 
2282 		if (txq) {
2283 			txq->bytes = 0;
2284 			txq->packets = 0;
2285 			txq->errors = 0;
2286 		}
2287 	}
2288 }
2289 
2290 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd.pci_drv);
2291 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2292