xref: /dpdk/drivers/net/avp/avp_ethdev.c (revision e99981af34632ecce3bac82d05db97b08308f9b5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2013-2017 Wind River Systems, Inc.
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_malloc.h>
16 #include <rte_atomic.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_pci.h>
19 #include <bus_pci_driver.h>
20 #include <rte_ether.h>
21 #include <rte_common.h>
22 #include <rte_cycles.h>
23 #include <rte_spinlock.h>
24 #include <rte_byteorder.h>
25 #include <dev_driver.h>
26 #include <rte_memory.h>
27 #include <rte_eal.h>
28 #include <rte_io.h>
29 
30 #include "rte_avp_common.h"
31 #include "rte_avp_fifo.h"
32 
33 #include "avp_logs.h"
34 
35 static int avp_dev_create(struct rte_pci_device *pci_dev,
36 			  struct rte_eth_dev *eth_dev);
37 
38 static int avp_dev_configure(struct rte_eth_dev *dev);
39 static int avp_dev_start(struct rte_eth_dev *dev);
40 static int avp_dev_stop(struct rte_eth_dev *dev);
41 static int avp_dev_close(struct rte_eth_dev *dev);
42 static int avp_dev_info_get(struct rte_eth_dev *dev,
43 			    struct rte_eth_dev_info *dev_info);
44 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
45 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
46 static int avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
47 static int avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
48 
49 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
50 				  uint16_t rx_queue_id,
51 				  uint16_t nb_rx_desc,
52 				  unsigned int socket_id,
53 				  const struct rte_eth_rxconf *rx_conf,
54 				  struct rte_mempool *pool);
55 
56 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
57 				  uint16_t tx_queue_id,
58 				  uint16_t nb_tx_desc,
59 				  unsigned int socket_id,
60 				  const struct rte_eth_txconf *tx_conf);
61 
62 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
63 					struct rte_mbuf **rx_pkts,
64 					uint16_t nb_pkts);
65 
66 static uint16_t avp_recv_pkts(void *rx_queue,
67 			      struct rte_mbuf **rx_pkts,
68 			      uint16_t nb_pkts);
69 
70 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
71 					struct rte_mbuf **tx_pkts,
72 					uint16_t nb_pkts);
73 
74 static uint16_t avp_xmit_pkts(void *tx_queue,
75 			      struct rte_mbuf **tx_pkts,
76 			      uint16_t nb_pkts);
77 
78 static void avp_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
79 static void avp_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
80 
81 static int avp_dev_stats_get(struct rte_eth_dev *dev,
82 			      struct rte_eth_stats *stats);
83 static int avp_dev_stats_reset(struct rte_eth_dev *dev);
84 
85 
86 #define AVP_MAX_RX_BURST 64
87 #define AVP_MAX_TX_BURST 64
88 #define AVP_MAX_MAC_ADDRS 1
89 #define AVP_MIN_RX_BUFSIZE RTE_ETHER_MIN_LEN
90 
91 
92 /*
93  * Defines the number of microseconds to wait before checking the response
94  * queue for completion.
95  */
96 #define AVP_REQUEST_DELAY_USECS (5000)
97 
98 /*
99  * Defines the number times to check the response queue for completion before
100  * declaring a timeout.
101  */
102 #define AVP_MAX_REQUEST_RETRY (100)
103 
104 /* Defines the current PCI driver version number */
105 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
106 
107 /*
108  * The set of PCI devices this driver supports
109  */
110 static const struct rte_pci_id pci_id_avp_map[] = {
111 	{ .vendor_id = RTE_AVP_PCI_VENDOR_ID,
112 	  .device_id = RTE_AVP_PCI_DEVICE_ID,
113 	  .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
114 	  .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
115 	  .class_id = RTE_CLASS_ANY_ID,
116 	},
117 
118 	{ .vendor_id = 0, /* sentinel */
119 	},
120 };
121 
122 /*
123  * dev_ops for avp, bare necessities for basic operation
124  */
125 static const struct eth_dev_ops avp_eth_dev_ops = {
126 	.dev_configure       = avp_dev_configure,
127 	.dev_start           = avp_dev_start,
128 	.dev_stop            = avp_dev_stop,
129 	.dev_close           = avp_dev_close,
130 	.dev_infos_get       = avp_dev_info_get,
131 	.vlan_offload_set    = avp_vlan_offload_set,
132 	.stats_get           = avp_dev_stats_get,
133 	.stats_reset         = avp_dev_stats_reset,
134 	.link_update         = avp_dev_link_update,
135 	.promiscuous_enable  = avp_dev_promiscuous_enable,
136 	.promiscuous_disable = avp_dev_promiscuous_disable,
137 	.rx_queue_setup      = avp_dev_rx_queue_setup,
138 	.rx_queue_release    = avp_dev_rx_queue_release,
139 	.tx_queue_setup      = avp_dev_tx_queue_setup,
140 	.tx_queue_release    = avp_dev_tx_queue_release,
141 };
142 
143 /**@{ AVP device flags */
144 #define AVP_F_PROMISC (1 << 1)
145 #define AVP_F_CONFIGURED (1 << 2)
146 #define AVP_F_LINKUP (1 << 3)
147 #define AVP_F_DETACHED (1 << 4)
148 /**@} */
149 
150 /* Ethernet device validation marker */
151 #define AVP_ETHDEV_MAGIC 0x92972862
152 
153 /*
154  * Defines the AVP device attributes which are attached to an RTE ethernet
155  * device
156  */
157 struct __rte_cache_aligned avp_dev {
158 	uint32_t magic; /**< Memory validation marker */
159 	uint64_t device_id; /**< Unique system identifier */
160 	struct rte_ether_addr ethaddr; /**< Host specified MAC address */
161 	struct rte_eth_dev_data *dev_data;
162 	/**< Back pointer to ethernet device data */
163 	volatile uint32_t flags; /**< Device operational flags */
164 	uint16_t port_id; /**< Ethernet port identifier */
165 	struct rte_mempool *pool; /**< pkt mbuf mempool */
166 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
167 	unsigned int host_mbuf_size; /**< host mbuf size */
168 	unsigned int max_rx_pkt_len; /**< maximum receive unit */
169 	uint32_t host_features; /**< Supported feature bitmap */
170 	uint32_t features; /**< Enabled feature bitmap */
171 	unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
172 	unsigned int max_tx_queues; /**< Maximum number of transmit queues */
173 	unsigned int num_rx_queues; /**< Negotiated number of receive queues */
174 	unsigned int max_rx_queues; /**< Maximum number of receive queues */
175 
176 	struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
177 	struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
178 	struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
179 	/**< Allocated mbufs queue */
180 	struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
181 	/**< To be freed mbufs queue */
182 
183 	/* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
184 	rte_spinlock_t lock;
185 
186 	/* For request & response */
187 	struct rte_avp_fifo *req_q; /**< Request queue */
188 	struct rte_avp_fifo *resp_q; /**< Response queue */
189 	void *host_sync_addr; /**< (host) Req/Resp Mem address */
190 	void *sync_addr; /**< Req/Resp Mem address */
191 	void *host_mbuf_addr; /**< (host) MBUF pool start address */
192 	void *mbuf_addr; /**< MBUF pool start address */
193 };
194 
195 /* RTE ethernet private data */
196 struct __rte_cache_aligned avp_adapter {
197 	struct avp_dev avp;
198 };
199 
200 
201 /* 32-bit MMIO register write */
202 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
203 
204 /* 32-bit MMIO register read */
205 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
206 
207 /* Macro to cast the ethernet device private data to a AVP object */
208 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
209 	(&((struct avp_adapter *)adapter)->avp)
210 
211 /*
212  * Defines the structure of a AVP device queue for the purpose of handling the
213  * receive and transmit burst callback functions
214  */
215 struct avp_queue {
216 	struct rte_eth_dev_data *dev_data;
217 	/**< Backpointer to ethernet device data */
218 	struct avp_dev *avp; /**< Backpointer to AVP device */
219 	uint16_t queue_id;
220 	/**< Queue identifier used for indexing current queue */
221 	uint16_t queue_base;
222 	/**< Base queue identifier for queue servicing */
223 	uint16_t queue_limit;
224 	/**< Maximum queue identifier for queue servicing */
225 
226 	uint64_t packets;
227 	uint64_t bytes;
228 	uint64_t errors;
229 };
230 
231 /* send a request and wait for a response
232  *
233  * @warning must be called while holding the avp->lock spinlock.
234  */
235 static int
236 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
237 {
238 	unsigned int retry = AVP_MAX_REQUEST_RETRY;
239 	void *resp_addr = NULL;
240 	unsigned int count;
241 	int ret;
242 
243 	PMD_DRV_LOG_LINE(DEBUG, "Sending request %u to host", request->req_id);
244 
245 	request->result = -ENOTSUP;
246 
247 	/* Discard any stale responses before starting a new request */
248 	while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
249 		PMD_DRV_LOG_LINE(DEBUG, "Discarding stale response");
250 
251 	rte_memcpy(avp->sync_addr, request, sizeof(*request));
252 	count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
253 	if (count < 1) {
254 		PMD_DRV_LOG_LINE(ERR, "Cannot send request %u to host",
255 			    request->req_id);
256 		ret = -EBUSY;
257 		goto done;
258 	}
259 
260 	while (retry--) {
261 		/* wait for a response */
262 		usleep(AVP_REQUEST_DELAY_USECS);
263 
264 		count = avp_fifo_count(avp->resp_q);
265 		if (count >= 1) {
266 			/* response received */
267 			break;
268 		}
269 
270 		if (retry == 0) {
271 			PMD_DRV_LOG_LINE(ERR, "Timeout while waiting for a response for %u",
272 				    request->req_id);
273 			ret = -ETIME;
274 			goto done;
275 		}
276 	}
277 
278 	/* retrieve the response */
279 	count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
280 	if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
281 		PMD_DRV_LOG_LINE(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p",
282 			    count, resp_addr, avp->host_sync_addr);
283 		ret = -ENODATA;
284 		goto done;
285 	}
286 
287 	/* copy to user buffer */
288 	rte_memcpy(request, avp->sync_addr, sizeof(*request));
289 	ret = 0;
290 
291 	PMD_DRV_LOG_LINE(DEBUG, "Result %d received for request %u",
292 		    request->result, request->req_id);
293 
294 done:
295 	return ret;
296 }
297 
298 static int
299 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
300 {
301 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
302 	struct rte_avp_request request;
303 	int ret;
304 
305 	/* setup a link state change request */
306 	memset(&request, 0, sizeof(request));
307 	request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
308 	request.if_up = state;
309 
310 	ret = avp_dev_process_request(avp, &request);
311 
312 	return ret == 0 ? request.result : ret;
313 }
314 
315 static int
316 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
317 			struct rte_avp_device_config *config)
318 {
319 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
320 	struct rte_avp_request request;
321 	int ret;
322 
323 	/* setup a configure request */
324 	memset(&request, 0, sizeof(request));
325 	request.req_id = RTE_AVP_REQ_CFG_DEVICE;
326 	memcpy(&request.config, config, sizeof(request.config));
327 
328 	ret = avp_dev_process_request(avp, &request);
329 
330 	return ret == 0 ? request.result : ret;
331 }
332 
333 static int
334 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
335 {
336 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
337 	struct rte_avp_request request;
338 	int ret;
339 
340 	/* setup a shutdown request */
341 	memset(&request, 0, sizeof(request));
342 	request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
343 
344 	ret = avp_dev_process_request(avp, &request);
345 
346 	return ret == 0 ? request.result : ret;
347 }
348 
349 /* translate from host mbuf virtual address to guest virtual address */
350 static inline void *
351 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
352 {
353 	return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
354 				       (uintptr_t)avp->host_mbuf_addr),
355 			   (uintptr_t)avp->mbuf_addr);
356 }
357 
358 /* translate from host physical address to guest virtual address */
359 static void *
360 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
361 			  rte_iova_t host_phys_addr)
362 {
363 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
364 	struct rte_mem_resource *resource;
365 	struct rte_avp_memmap_info *info;
366 	struct rte_avp_memmap *map;
367 	off_t offset;
368 	void *addr;
369 	unsigned int i;
370 
371 	addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
372 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
373 	info = (struct rte_avp_memmap_info *)resource->addr;
374 
375 	offset = 0;
376 	for (i = 0; i < info->nb_maps; i++) {
377 		/* search all segments looking for a matching address */
378 		map = &info->maps[i];
379 
380 		if ((host_phys_addr >= map->phys_addr) &&
381 			(host_phys_addr < (map->phys_addr + map->length))) {
382 			/* address is within this segment */
383 			offset += (host_phys_addr - map->phys_addr);
384 			addr = RTE_PTR_ADD(addr, (uintptr_t)offset);
385 
386 			PMD_DRV_LOG_LINE(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p",
387 				    host_phys_addr, addr);
388 
389 			return addr;
390 		}
391 		offset += map->length;
392 	}
393 
394 	return NULL;
395 }
396 
397 /* verify that the incoming device version is compatible with our version */
398 static int
399 avp_dev_version_check(uint32_t version)
400 {
401 	uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
402 	uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
403 
404 	if (device <= driver) {
405 		/* the host driver version is less than or equal to ours */
406 		return 0;
407 	}
408 
409 	return 1;
410 }
411 
412 /* verify that memory regions have expected version and validation markers */
413 static int
414 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
415 {
416 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
417 	struct rte_avp_memmap_info *memmap;
418 	struct rte_avp_device_info *info;
419 	struct rte_mem_resource *resource;
420 	unsigned int i;
421 
422 	/* Dump resource info for debug */
423 	for (i = 0; i < PCI_MAX_RESOURCE; i++) {
424 		resource = &pci_dev->mem_resource[i];
425 		if ((resource->phys_addr == 0) || (resource->len == 0))
426 			continue;
427 
428 		PMD_DRV_LOG_LINE(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p",
429 			    i, resource->phys_addr,
430 			    resource->len, resource->addr);
431 
432 		switch (i) {
433 		case RTE_AVP_PCI_MEMMAP_BAR:
434 			memmap = (struct rte_avp_memmap_info *)resource->addr;
435 			if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
436 			    (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
437 				PMD_DRV_LOG_LINE(ERR, "Invalid memmap magic 0x%08x and version %u",
438 					    memmap->magic, memmap->version);
439 				return -EINVAL;
440 			}
441 			break;
442 
443 		case RTE_AVP_PCI_DEVICE_BAR:
444 			info = (struct rte_avp_device_info *)resource->addr;
445 			if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
446 			    avp_dev_version_check(info->version)) {
447 				PMD_DRV_LOG_LINE(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x",
448 					    info->magic, info->version,
449 					    AVP_DPDK_DRIVER_VERSION);
450 				return -EINVAL;
451 			}
452 			break;
453 
454 		case RTE_AVP_PCI_MEMORY_BAR:
455 		case RTE_AVP_PCI_MMIO_BAR:
456 			if (resource->addr == NULL) {
457 				PMD_DRV_LOG_LINE(ERR, "Missing address space for BAR%u",
458 					    i);
459 				return -EINVAL;
460 			}
461 			break;
462 
463 		case RTE_AVP_PCI_MSIX_BAR:
464 		default:
465 			/* no validation required */
466 			break;
467 		}
468 	}
469 
470 	return 0;
471 }
472 
473 static int
474 avp_dev_detach(struct rte_eth_dev *eth_dev)
475 {
476 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
477 	int ret;
478 
479 	PMD_DRV_LOG_LINE(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "",
480 		    eth_dev->data->port_id, avp->device_id);
481 
482 	rte_spinlock_lock(&avp->lock);
483 
484 	if (avp->flags & AVP_F_DETACHED) {
485 		PMD_DRV_LOG_LINE(NOTICE, "port %u already detached",
486 			    eth_dev->data->port_id);
487 		ret = 0;
488 		goto unlock;
489 	}
490 
491 	/* shutdown the device first so the host stops sending us packets. */
492 	ret = avp_dev_ctrl_shutdown(eth_dev);
493 	if (ret < 0) {
494 		PMD_DRV_LOG_LINE(ERR, "Failed to send/recv shutdown to host, ret=%d",
495 			    ret);
496 		avp->flags &= ~AVP_F_DETACHED;
497 		goto unlock;
498 	}
499 
500 	avp->flags |= AVP_F_DETACHED;
501 	rte_wmb();
502 
503 	/* wait for queues to acknowledge the presence of the detach flag */
504 	rte_delay_ms(1);
505 
506 	ret = 0;
507 
508 unlock:
509 	rte_spinlock_unlock(&avp->lock);
510 	return ret;
511 }
512 
513 static void
514 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
515 {
516 	struct avp_dev *avp =
517 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
518 	struct avp_queue *rxq;
519 	uint16_t queue_count;
520 	uint16_t remainder;
521 
522 	rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
523 
524 	/*
525 	 * Must map all AVP fifos as evenly as possible between the configured
526 	 * device queues.  Each device queue will service a subset of the AVP
527 	 * fifos. If there is an odd number of device queues the first set of
528 	 * device queues will get the extra AVP fifos.
529 	 */
530 	queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
531 	remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
532 	if (rx_queue_id < remainder) {
533 		/* these queues must service one extra FIFO */
534 		rxq->queue_base = rx_queue_id * (queue_count + 1);
535 		rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
536 	} else {
537 		/* these queues service the regular number of FIFO */
538 		rxq->queue_base = ((remainder * (queue_count + 1)) +
539 				   ((rx_queue_id - remainder) * queue_count));
540 		rxq->queue_limit = rxq->queue_base + queue_count - 1;
541 	}
542 
543 	PMD_DRV_LOG_LINE(DEBUG, "rxq %u at %p base %u limit %u",
544 		    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
545 
546 	rxq->queue_id = rxq->queue_base;
547 }
548 
549 static void
550 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
551 {
552 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
553 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
554 	struct rte_avp_device_info *host_info;
555 	void *addr;
556 
557 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
558 	host_info = (struct rte_avp_device_info *)addr;
559 
560 	/*
561 	 * the transmit direction is not negotiated beyond respecting the max
562 	 * number of queues because the host can handle arbitrary guest tx
563 	 * queues (host rx queues).
564 	 */
565 	avp->num_tx_queues = eth_dev->data->nb_tx_queues;
566 
567 	/*
568 	 * the receive direction is more restrictive.  The host requires a
569 	 * minimum number of guest rx queues (host tx queues) therefore
570 	 * negotiate a value that is at least as large as the host minimum
571 	 * requirement.  If the host and guest values are not identical then a
572 	 * mapping will be established in the receive_queue_setup function.
573 	 */
574 	avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
575 				     eth_dev->data->nb_rx_queues);
576 
577 	PMD_DRV_LOG_LINE(DEBUG, "Requesting %u Tx and %u Rx queues from host",
578 		    avp->num_tx_queues, avp->num_rx_queues);
579 }
580 
581 static int
582 avp_dev_attach(struct rte_eth_dev *eth_dev)
583 {
584 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
585 	struct rte_avp_device_config config;
586 	unsigned int i;
587 	int ret;
588 
589 	PMD_DRV_LOG_LINE(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "",
590 		    eth_dev->data->port_id, avp->device_id);
591 
592 	rte_spinlock_lock(&avp->lock);
593 
594 	if (!(avp->flags & AVP_F_DETACHED)) {
595 		PMD_DRV_LOG_LINE(NOTICE, "port %u already attached",
596 			    eth_dev->data->port_id);
597 		ret = 0;
598 		goto unlock;
599 	}
600 
601 	/*
602 	 * make sure that the detached flag is set prior to reconfiguring the
603 	 * queues.
604 	 */
605 	avp->flags |= AVP_F_DETACHED;
606 	rte_wmb();
607 
608 	/*
609 	 * re-run the device create utility which will parse the new host info
610 	 * and setup the AVP device queue pointers.
611 	 */
612 	ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
613 	if (ret < 0) {
614 		PMD_DRV_LOG_LINE(ERR, "Failed to re-create AVP device, ret=%d",
615 			    ret);
616 		goto unlock;
617 	}
618 
619 	if (avp->flags & AVP_F_CONFIGURED) {
620 		/*
621 		 * Update the receive queue mapping to handle cases where the
622 		 * source and destination hosts have different queue
623 		 * requirements.  As long as the DETACHED flag is asserted the
624 		 * queue table should not be referenced so it should be safe to
625 		 * update it.
626 		 */
627 		_avp_set_queue_counts(eth_dev);
628 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
629 			_avp_set_rx_queue_mappings(eth_dev, i);
630 
631 		/*
632 		 * Update the host with our config details so that it knows the
633 		 * device is active.
634 		 */
635 		memset(&config, 0, sizeof(config));
636 		config.device_id = avp->device_id;
637 		config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
638 		config.driver_version = AVP_DPDK_DRIVER_VERSION;
639 		config.features = avp->features;
640 		config.num_tx_queues = avp->num_tx_queues;
641 		config.num_rx_queues = avp->num_rx_queues;
642 		config.if_up = !!(avp->flags & AVP_F_LINKUP);
643 
644 		ret = avp_dev_ctrl_set_config(eth_dev, &config);
645 		if (ret < 0) {
646 			PMD_DRV_LOG_LINE(ERR, "Config request failed by host, ret=%d",
647 				    ret);
648 			goto unlock;
649 		}
650 	}
651 
652 	rte_wmb();
653 	avp->flags &= ~AVP_F_DETACHED;
654 
655 	ret = 0;
656 
657 unlock:
658 	rte_spinlock_unlock(&avp->lock);
659 	return ret;
660 }
661 
662 static void
663 avp_dev_interrupt_handler(void *data)
664 {
665 	struct rte_eth_dev *eth_dev = data;
666 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
667 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
668 	uint32_t status, value;
669 	int ret;
670 
671 	if (registers == NULL)
672 		rte_panic("no mapped MMIO register space\n");
673 
674 	/* read the interrupt status register
675 	 * note: this register clears on read so all raised interrupts must be
676 	 *    handled or remembered for later processing
677 	 */
678 	status = AVP_READ32(
679 		RTE_PTR_ADD(registers,
680 			    RTE_AVP_INTERRUPT_STATUS_OFFSET));
681 
682 	if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
683 		/* handle interrupt based on current status */
684 		value = AVP_READ32(
685 			RTE_PTR_ADD(registers,
686 				    RTE_AVP_MIGRATION_STATUS_OFFSET));
687 		switch (value) {
688 		case RTE_AVP_MIGRATION_DETACHED:
689 			ret = avp_dev_detach(eth_dev);
690 			break;
691 		case RTE_AVP_MIGRATION_ATTACHED:
692 			ret = avp_dev_attach(eth_dev);
693 			break;
694 		default:
695 			PMD_DRV_LOG_LINE(ERR, "unexpected migration status, status=%u",
696 				    value);
697 			ret = -EINVAL;
698 		}
699 
700 		/* acknowledge the request by writing out our current status */
701 		value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
702 		AVP_WRITE32(value,
703 			    RTE_PTR_ADD(registers,
704 					RTE_AVP_MIGRATION_ACK_OFFSET));
705 
706 		PMD_DRV_LOG_LINE(NOTICE, "AVP migration interrupt handled");
707 	}
708 
709 	if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
710 		PMD_DRV_LOG_LINE(WARNING, "AVP unexpected interrupt, status=0x%08x",
711 			    status);
712 
713 	/* re-enable UIO interrupt handling */
714 	ret = rte_intr_ack(pci_dev->intr_handle);
715 	if (ret < 0) {
716 		PMD_DRV_LOG_LINE(ERR, "Failed to re-enable UIO interrupts, ret=%d",
717 			    ret);
718 		/* continue */
719 	}
720 }
721 
722 static int
723 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
724 {
725 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
726 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
727 	int ret;
728 
729 	if (registers == NULL)
730 		return -EINVAL;
731 
732 	/* enable UIO interrupt handling */
733 	ret = rte_intr_enable(pci_dev->intr_handle);
734 	if (ret < 0) {
735 		PMD_DRV_LOG_LINE(ERR, "Failed to enable UIO interrupts, ret=%d",
736 			    ret);
737 		return ret;
738 	}
739 
740 	/* inform the device that all interrupts are enabled */
741 	AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
742 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
743 
744 	return 0;
745 }
746 
747 static int
748 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
749 {
750 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
751 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
752 	int ret;
753 
754 	if (registers == NULL)
755 		return 0;
756 
757 	/* inform the device that all interrupts are disabled */
758 	AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
759 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
760 
761 	/* enable UIO interrupt handling */
762 	ret = rte_intr_disable(pci_dev->intr_handle);
763 	if (ret < 0) {
764 		PMD_DRV_LOG_LINE(ERR, "Failed to disable UIO interrupts, ret=%d",
765 			    ret);
766 		return ret;
767 	}
768 
769 	return 0;
770 }
771 
772 static int
773 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
774 {
775 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
776 	int ret;
777 
778 	/* register a callback handler with UIO for interrupt notifications */
779 	ret = rte_intr_callback_register(pci_dev->intr_handle,
780 					 avp_dev_interrupt_handler,
781 					 (void *)eth_dev);
782 	if (ret < 0) {
783 		PMD_DRV_LOG_LINE(ERR, "Failed to register UIO interrupt callback, ret=%d",
784 			    ret);
785 		return ret;
786 	}
787 
788 	/* enable interrupt processing */
789 	return avp_dev_enable_interrupts(eth_dev);
790 }
791 
792 static int
793 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
794 {
795 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
796 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
797 	uint32_t value;
798 
799 	if (registers == NULL)
800 		return 0;
801 
802 	value = AVP_READ32(RTE_PTR_ADD(registers,
803 				       RTE_AVP_MIGRATION_STATUS_OFFSET));
804 	if (value == RTE_AVP_MIGRATION_DETACHED) {
805 		/* migration is in progress; ack it if we have not already */
806 		AVP_WRITE32(value,
807 			    RTE_PTR_ADD(registers,
808 					RTE_AVP_MIGRATION_ACK_OFFSET));
809 		return 1;
810 	}
811 	return 0;
812 }
813 
814 /*
815  * create a AVP device using the supplied device info by first translating it
816  * to guest address space(s).
817  */
818 static int
819 avp_dev_create(struct rte_pci_device *pci_dev,
820 	       struct rte_eth_dev *eth_dev)
821 {
822 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
823 	struct rte_avp_device_info *host_info;
824 	struct rte_mem_resource *resource;
825 	unsigned int i;
826 
827 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
828 	if (resource->addr == NULL) {
829 		PMD_DRV_LOG_LINE(ERR, "BAR%u is not mapped",
830 			    RTE_AVP_PCI_DEVICE_BAR);
831 		return -EFAULT;
832 	}
833 	host_info = (struct rte_avp_device_info *)resource->addr;
834 
835 	if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
836 		avp_dev_version_check(host_info->version)) {
837 		PMD_DRV_LOG_LINE(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x",
838 			    host_info->magic, host_info->version,
839 			    AVP_DPDK_DRIVER_VERSION);
840 		return -EINVAL;
841 	}
842 
843 	PMD_DRV_LOG_LINE(DEBUG, "AVP host device is v%u.%u.%u",
844 		    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
845 		    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
846 		    RTE_AVP_GET_MINOR_VERSION(host_info->version));
847 
848 	PMD_DRV_LOG_LINE(DEBUG, "AVP host supports %u to %u TX queue(s)",
849 		    host_info->min_tx_queues, host_info->max_tx_queues);
850 	PMD_DRV_LOG_LINE(DEBUG, "AVP host supports %u to %u RX queue(s)",
851 		    host_info->min_rx_queues, host_info->max_rx_queues);
852 	PMD_DRV_LOG_LINE(DEBUG, "AVP host supports features 0x%08x",
853 		    host_info->features);
854 
855 	if (avp->magic != AVP_ETHDEV_MAGIC) {
856 		/*
857 		 * First time initialization (i.e., not during a VM
858 		 * migration)
859 		 */
860 		memset(avp, 0, sizeof(*avp));
861 		avp->magic = AVP_ETHDEV_MAGIC;
862 		avp->dev_data = eth_dev->data;
863 		avp->port_id = eth_dev->data->port_id;
864 		avp->host_mbuf_size = host_info->mbuf_size;
865 		avp->host_features = host_info->features;
866 		rte_spinlock_init(&avp->lock);
867 		memcpy(&avp->ethaddr.addr_bytes[0],
868 		       host_info->ethaddr, RTE_ETHER_ADDR_LEN);
869 		/* adjust max values to not exceed our max */
870 		avp->max_tx_queues =
871 			RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
872 		avp->max_rx_queues =
873 			RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
874 	} else {
875 		/* Re-attaching during migration */
876 
877 		/* TODO... requires validation of host values */
878 		if ((host_info->features & avp->features) != avp->features) {
879 			PMD_DRV_LOG_LINE(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x",
880 				    avp->features, host_info->features);
881 			/* this should not be possible; continue for now */
882 		}
883 	}
884 
885 	/* the device id is allowed to change over migrations */
886 	avp->device_id = host_info->device_id;
887 
888 	/* translate incoming host addresses to guest address space */
889 	PMD_DRV_LOG_LINE(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "",
890 		    host_info->tx_phys);
891 	PMD_DRV_LOG_LINE(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "",
892 		    host_info->alloc_phys);
893 	for (i = 0; i < avp->max_tx_queues; i++) {
894 		avp->tx_q[i] = avp_dev_translate_address(eth_dev,
895 			host_info->tx_phys + (i * host_info->tx_size));
896 
897 		avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
898 			host_info->alloc_phys + (i * host_info->alloc_size));
899 	}
900 
901 	PMD_DRV_LOG_LINE(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "",
902 		    host_info->rx_phys);
903 	PMD_DRV_LOG_LINE(DEBUG, "AVP first host free queue at 0x%" PRIx64 "",
904 		    host_info->free_phys);
905 	for (i = 0; i < avp->max_rx_queues; i++) {
906 		avp->rx_q[i] = avp_dev_translate_address(eth_dev,
907 			host_info->rx_phys + (i * host_info->rx_size));
908 		avp->free_q[i] = avp_dev_translate_address(eth_dev,
909 			host_info->free_phys + (i * host_info->free_size));
910 	}
911 
912 	PMD_DRV_LOG_LINE(DEBUG, "AVP host request queue at 0x%" PRIx64 "",
913 		    host_info->req_phys);
914 	PMD_DRV_LOG_LINE(DEBUG, "AVP host response queue at 0x%" PRIx64 "",
915 		    host_info->resp_phys);
916 	PMD_DRV_LOG_LINE(DEBUG, "AVP host sync address at 0x%" PRIx64 "",
917 		    host_info->sync_phys);
918 	PMD_DRV_LOG_LINE(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "",
919 		    host_info->mbuf_phys);
920 	avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
921 	avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
922 	avp->sync_addr =
923 		avp_dev_translate_address(eth_dev, host_info->sync_phys);
924 	avp->mbuf_addr =
925 		avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
926 
927 	/*
928 	 * store the host mbuf virtual address so that we can calculate
929 	 * relative offsets for each mbuf as they are processed
930 	 */
931 	avp->host_mbuf_addr = host_info->mbuf_va;
932 	avp->host_sync_addr = host_info->sync_va;
933 
934 	/*
935 	 * store the maximum packet length that is supported by the host.
936 	 */
937 	avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
938 	PMD_DRV_LOG_LINE(DEBUG, "AVP host max receive packet length is %u",
939 				host_info->max_rx_pkt_len);
940 
941 	return 0;
942 }
943 
944 /*
945  * This function is based on probe() function in avp_pci.c
946  * It returns 0 on success.
947  */
948 static int
949 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
950 {
951 	struct avp_dev *avp =
952 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
953 	struct rte_pci_device *pci_dev;
954 	int ret;
955 
956 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
957 	eth_dev->dev_ops = &avp_eth_dev_ops;
958 	eth_dev->rx_pkt_burst = &avp_recv_pkts;
959 	eth_dev->tx_pkt_burst = &avp_xmit_pkts;
960 
961 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
962 		/*
963 		 * no setup required on secondary processes.  All data is saved
964 		 * in dev_private by the primary process. All resource should
965 		 * be mapped to the same virtual address so all pointers should
966 		 * be valid.
967 		 */
968 		if (eth_dev->data->scattered_rx) {
969 			PMD_DRV_LOG_LINE(NOTICE, "AVP device configured for chained mbufs");
970 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
971 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
972 		}
973 		return 0;
974 	}
975 
976 	rte_eth_copy_pci_info(eth_dev, pci_dev);
977 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
978 
979 	/* Check current migration status */
980 	if (avp_dev_migration_pending(eth_dev)) {
981 		PMD_DRV_LOG_LINE(ERR, "VM live migration operation in progress");
982 		return -EBUSY;
983 	}
984 
985 	/* Check BAR resources */
986 	ret = avp_dev_check_regions(eth_dev);
987 	if (ret < 0) {
988 		PMD_DRV_LOG_LINE(ERR, "Failed to validate BAR resources, ret=%d",
989 			    ret);
990 		return ret;
991 	}
992 
993 	/* Enable interrupts */
994 	ret = avp_dev_setup_interrupts(eth_dev);
995 	if (ret < 0) {
996 		PMD_DRV_LOG_LINE(ERR, "Failed to enable interrupts, ret=%d", ret);
997 		return ret;
998 	}
999 
1000 	/* Handle each subtype */
1001 	ret = avp_dev_create(pci_dev, eth_dev);
1002 	if (ret < 0) {
1003 		PMD_DRV_LOG_LINE(ERR, "Failed to create device, ret=%d", ret);
1004 		return ret;
1005 	}
1006 
1007 	/* Allocate memory for storing MAC addresses */
1008 	eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev",
1009 					RTE_ETHER_ADDR_LEN, 0);
1010 	if (eth_dev->data->mac_addrs == NULL) {
1011 		PMD_DRV_LOG_LINE(ERR, "Failed to allocate %d bytes needed to store MAC addresses",
1012 			    RTE_ETHER_ADDR_LEN);
1013 		return -ENOMEM;
1014 	}
1015 
1016 	/* Get a mac from device config */
1017 	rte_ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1018 
1019 	return 0;
1020 }
1021 
1022 static int
1023 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1024 {
1025 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1026 		return -EPERM;
1027 
1028 	if (eth_dev->data == NULL)
1029 		return 0;
1030 
1031 	avp_dev_close(eth_dev);
1032 
1033 	return 0;
1034 }
1035 
1036 static int
1037 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1038 		  struct rte_pci_device *pci_dev)
1039 {
1040 	return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct avp_adapter),
1041 			eth_avp_dev_init);
1042 }
1043 
1044 static int
1045 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1046 {
1047 	return rte_eth_dev_pci_generic_remove(pci_dev,
1048 					      eth_avp_dev_uninit);
1049 }
1050 
1051 static struct rte_pci_driver rte_avp_pmd = {
1052 	.id_table = pci_id_avp_map,
1053 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1054 	.probe = eth_avp_pci_probe,
1055 	.remove = eth_avp_pci_remove,
1056 };
1057 
1058 static int
1059 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1060 			 struct avp_dev *avp)
1061 {
1062 	unsigned int max_rx_pktlen;
1063 
1064 	max_rx_pktlen = eth_dev->data->mtu + RTE_ETHER_HDR_LEN +
1065 		RTE_ETHER_CRC_LEN;
1066 
1067 	if (max_rx_pktlen > avp->guest_mbuf_size ||
1068 	    max_rx_pktlen > avp->host_mbuf_size) {
1069 		/*
1070 		 * If the guest MTU is greater than either the host or guest
1071 		 * buffers then chained mbufs have to be enabled in the TX
1072 		 * direction.  It is assumed that the application will not need
1073 		 * to send packets larger than their MTU.
1074 		 */
1075 		return 1;
1076 	}
1077 
1078 	if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1079 	    (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1080 		/*
1081 		 * If the host MRU is greater than its own mbuf size or the
1082 		 * guest mbuf size then chained mbufs have to be enabled in the
1083 		 * RX direction.
1084 		 */
1085 		return 1;
1086 	}
1087 
1088 	return 0;
1089 }
1090 
1091 static int
1092 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1093 		       uint16_t rx_queue_id,
1094 		       uint16_t nb_rx_desc,
1095 		       unsigned int socket_id,
1096 		       const struct rte_eth_rxconf *rx_conf,
1097 		       struct rte_mempool *pool)
1098 {
1099 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1100 	struct rte_pktmbuf_pool_private *mbp_priv;
1101 	struct avp_queue *rxq;
1102 
1103 	if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1104 		PMD_DRV_LOG_LINE(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u",
1105 			    rx_queue_id, eth_dev->data->nb_rx_queues);
1106 		return -EINVAL;
1107 	}
1108 
1109 	/* Save mbuf pool pointer */
1110 	avp->pool = pool;
1111 
1112 	/* Save the local mbuf size */
1113 	mbp_priv = rte_mempool_get_priv(pool);
1114 	avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1115 	avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1116 
1117 	if (avp_dev_enable_scattered(eth_dev, avp)) {
1118 		if (!eth_dev->data->scattered_rx) {
1119 			PMD_DRV_LOG_LINE(NOTICE, "AVP device configured for chained mbufs");
1120 			eth_dev->data->scattered_rx = 1;
1121 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1122 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1123 		}
1124 	}
1125 
1126 	PMD_DRV_LOG_LINE(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)",
1127 		    avp->max_rx_pkt_len,
1128 		    eth_dev->data->mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN,
1129 		    avp->host_mbuf_size,
1130 		    avp->guest_mbuf_size);
1131 
1132 	/* allocate a queue object */
1133 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1134 				 RTE_CACHE_LINE_SIZE, socket_id);
1135 	if (rxq == NULL) {
1136 		PMD_DRV_LOG_LINE(ERR, "Failed to allocate new Rx queue object");
1137 		return -ENOMEM;
1138 	}
1139 
1140 	/* save back pointers to AVP and Ethernet devices */
1141 	rxq->avp = avp;
1142 	rxq->dev_data = eth_dev->data;
1143 	eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1144 
1145 	/* setup the queue receive mapping for the current queue. */
1146 	_avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1147 
1148 	PMD_DRV_LOG_LINE(DEBUG, "Rx queue %u setup at %p", rx_queue_id, rxq);
1149 
1150 	(void)nb_rx_desc;
1151 	(void)rx_conf;
1152 	return 0;
1153 }
1154 
1155 static int
1156 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1157 		       uint16_t tx_queue_id,
1158 		       uint16_t nb_tx_desc,
1159 		       unsigned int socket_id,
1160 		       const struct rte_eth_txconf *tx_conf)
1161 {
1162 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1163 	struct avp_queue *txq;
1164 
1165 	if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1166 		PMD_DRV_LOG_LINE(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u",
1167 			    tx_queue_id, eth_dev->data->nb_tx_queues);
1168 		return -EINVAL;
1169 	}
1170 
1171 	/* allocate a queue object */
1172 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1173 				 RTE_CACHE_LINE_SIZE, socket_id);
1174 	if (txq == NULL) {
1175 		PMD_DRV_LOG_LINE(ERR, "Failed to allocate new Tx queue object");
1176 		return -ENOMEM;
1177 	}
1178 
1179 	/* only the configured set of transmit queues are used */
1180 	txq->queue_id = tx_queue_id;
1181 	txq->queue_base = tx_queue_id;
1182 	txq->queue_limit = tx_queue_id;
1183 
1184 	/* save back pointers to AVP and Ethernet devices */
1185 	txq->avp = avp;
1186 	txq->dev_data = eth_dev->data;
1187 	eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1188 
1189 	PMD_DRV_LOG_LINE(DEBUG, "Tx queue %u setup at %p", tx_queue_id, txq);
1190 
1191 	(void)nb_tx_desc;
1192 	(void)tx_conf;
1193 	return 0;
1194 }
1195 
1196 static inline int
1197 _avp_cmp_ether_addr(struct rte_ether_addr *a, struct rte_ether_addr *b)
1198 {
1199 	uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1200 	uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1201 	return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1202 }
1203 
1204 static inline int
1205 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1206 {
1207 	struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1208 
1209 	if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->dst_addr) == 0)) {
1210 		/* allow all packets destined to our address */
1211 		return 0;
1212 	}
1213 
1214 	if (likely(rte_is_broadcast_ether_addr(&eth->dst_addr))) {
1215 		/* allow all broadcast packets */
1216 		return 0;
1217 	}
1218 
1219 	if (likely(rte_is_multicast_ether_addr(&eth->dst_addr))) {
1220 		/* allow all multicast packets */
1221 		return 0;
1222 	}
1223 
1224 	if (avp->flags & AVP_F_PROMISC) {
1225 		/* allow all packets when in promiscuous mode */
1226 		return 0;
1227 	}
1228 
1229 	return -1;
1230 }
1231 
1232 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1233 static inline void
1234 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1235 {
1236 	struct rte_avp_desc *first_buf;
1237 	struct rte_avp_desc *pkt_buf;
1238 	unsigned int pkt_len;
1239 	unsigned int nb_segs;
1240 	void *pkt_data;
1241 	unsigned int i;
1242 
1243 	first_buf = avp_dev_translate_buffer(avp, buf);
1244 
1245 	i = 0;
1246 	pkt_len = 0;
1247 	nb_segs = first_buf->nb_segs;
1248 	do {
1249 		/* Adjust pointers for guest addressing */
1250 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1251 		if (pkt_buf == NULL)
1252 			rte_panic("bad buffer: segment %u has an invalid address %p\n",
1253 				  i, buf);
1254 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1255 		if (pkt_data == NULL)
1256 			rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1257 				  i);
1258 		if (pkt_buf->data_len == 0)
1259 			rte_panic("bad buffer: segment %u has 0 data length\n",
1260 				  i);
1261 		pkt_len += pkt_buf->data_len;
1262 		nb_segs--;
1263 		i++;
1264 
1265 	} while (nb_segs && (buf = pkt_buf->next) != NULL);
1266 
1267 	if (nb_segs != 0)
1268 		rte_panic("bad buffer: expected %u segments found %u\n",
1269 			  first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1270 	if (pkt_len != first_buf->pkt_len)
1271 		rte_panic("bad buffer: expected length %u found %u\n",
1272 			  first_buf->pkt_len, pkt_len);
1273 }
1274 
1275 #define avp_dev_buffer_sanity_check(a, b) \
1276 	__avp_dev_buffer_sanity_check((a), (b))
1277 
1278 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1279 
1280 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1281 
1282 #endif
1283 
1284 /*
1285  * Copy a host buffer chain to a set of mbufs.	This function assumes that
1286  * there exactly the required number of mbufs to copy all source bytes.
1287  */
1288 static inline struct rte_mbuf *
1289 avp_dev_copy_from_buffers(struct avp_dev *avp,
1290 			  struct rte_avp_desc *buf,
1291 			  struct rte_mbuf **mbufs,
1292 			  unsigned int count)
1293 {
1294 	struct rte_mbuf *m_previous = NULL;
1295 	struct rte_avp_desc *pkt_buf;
1296 	unsigned int total_length = 0;
1297 	unsigned int copy_length;
1298 	unsigned int src_offset;
1299 	struct rte_mbuf *m;
1300 	uint16_t ol_flags;
1301 	uint16_t vlan_tci;
1302 	void *pkt_data;
1303 	unsigned int i;
1304 
1305 	avp_dev_buffer_sanity_check(avp, buf);
1306 
1307 	/* setup the first source buffer */
1308 	pkt_buf = avp_dev_translate_buffer(avp, buf);
1309 	pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1310 	total_length = pkt_buf->pkt_len;
1311 	src_offset = 0;
1312 
1313 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1314 		ol_flags = RTE_MBUF_F_RX_VLAN;
1315 		vlan_tci = pkt_buf->vlan_tci;
1316 	} else {
1317 		ol_flags = 0;
1318 		vlan_tci = 0;
1319 	}
1320 
1321 	for (i = 0; (i < count) && (buf != NULL); i++) {
1322 		/* fill each destination buffer */
1323 		m = mbufs[i];
1324 
1325 		if (m_previous != NULL)
1326 			m_previous->next = m;
1327 
1328 		m_previous = m;
1329 
1330 		do {
1331 			/*
1332 			 * Copy as many source buffers as will fit in the
1333 			 * destination buffer.
1334 			 */
1335 			copy_length = RTE_MIN((avp->guest_mbuf_size -
1336 					       rte_pktmbuf_data_len(m)),
1337 					      (pkt_buf->data_len -
1338 					       src_offset));
1339 			rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1340 					       rte_pktmbuf_data_len(m)),
1341 				   RTE_PTR_ADD(pkt_data, src_offset),
1342 				   copy_length);
1343 			rte_pktmbuf_data_len(m) += copy_length;
1344 			src_offset += copy_length;
1345 
1346 			if (likely(src_offset == pkt_buf->data_len)) {
1347 				/* need a new source buffer */
1348 				buf = pkt_buf->next;
1349 				if (buf != NULL) {
1350 					pkt_buf = avp_dev_translate_buffer(
1351 						avp, buf);
1352 					pkt_data = avp_dev_translate_buffer(
1353 						avp, pkt_buf->data);
1354 					src_offset = 0;
1355 				}
1356 			}
1357 
1358 			if (unlikely(rte_pktmbuf_data_len(m) ==
1359 				     avp->guest_mbuf_size)) {
1360 				/* need a new destination mbuf */
1361 				break;
1362 			}
1363 
1364 		} while (buf != NULL);
1365 	}
1366 
1367 	m = mbufs[0];
1368 	m->ol_flags = ol_flags;
1369 	m->nb_segs = count;
1370 	rte_pktmbuf_pkt_len(m) = total_length;
1371 	m->vlan_tci = vlan_tci;
1372 
1373 	__rte_mbuf_sanity_check(m, 1);
1374 
1375 	return m;
1376 }
1377 
1378 static uint16_t
1379 avp_recv_scattered_pkts(void *rx_queue,
1380 			struct rte_mbuf **rx_pkts,
1381 			uint16_t nb_pkts)
1382 {
1383 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1384 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1385 	struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1386 	struct avp_dev *avp = rxq->avp;
1387 	struct rte_avp_desc *pkt_buf;
1388 	struct rte_avp_fifo *free_q;
1389 	struct rte_avp_fifo *rx_q;
1390 	struct rte_avp_desc *buf;
1391 	unsigned int count, avail, n;
1392 	unsigned int guest_mbuf_size;
1393 	struct rte_mbuf *m;
1394 	unsigned int required;
1395 	unsigned int buf_len;
1396 	unsigned int port_id;
1397 	unsigned int i;
1398 
1399 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1400 		/* VM live migration in progress */
1401 		return 0;
1402 	}
1403 
1404 	guest_mbuf_size = avp->guest_mbuf_size;
1405 	port_id = avp->port_id;
1406 	rx_q = avp->rx_q[rxq->queue_id];
1407 	free_q = avp->free_q[rxq->queue_id];
1408 
1409 	/* setup next queue to service */
1410 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1411 		(rxq->queue_id + 1) : rxq->queue_base;
1412 
1413 	/* determine how many slots are available in the free queue */
1414 	count = avp_fifo_free_count(free_q);
1415 
1416 	/* determine how many packets are available in the rx queue */
1417 	avail = avp_fifo_count(rx_q);
1418 
1419 	/* determine how many packets can be received */
1420 	count = RTE_MIN(count, avail);
1421 	count = RTE_MIN(count, nb_pkts);
1422 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1423 
1424 	if (unlikely(count == 0)) {
1425 		/* no free buffers, or no buffers on the rx queue */
1426 		return 0;
1427 	}
1428 
1429 	/* retrieve pending packets */
1430 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1431 	PMD_RX_LOG_LINE(DEBUG, "Receiving %u packets from Rx queue at %p",
1432 		   count, rx_q);
1433 
1434 	count = 0;
1435 	for (i = 0; i < n; i++) {
1436 		/* prefetch next entry while processing current one */
1437 		if (i + 1 < n) {
1438 			pkt_buf = avp_dev_translate_buffer(avp,
1439 							   avp_bufs[i + 1]);
1440 			rte_prefetch0(pkt_buf);
1441 		}
1442 		buf = avp_bufs[i];
1443 
1444 		/* Peek into the first buffer to determine the total length */
1445 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1446 		buf_len = pkt_buf->pkt_len;
1447 
1448 		/* Allocate enough mbufs to receive the entire packet */
1449 		required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1450 		if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1451 			rxq->dev_data->rx_mbuf_alloc_failed++;
1452 			continue;
1453 		}
1454 
1455 		/* Copy the data from the buffers to our mbufs */
1456 		m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1457 
1458 		/* finalize mbuf */
1459 		m->port = port_id;
1460 
1461 		if (_avp_mac_filter(avp, m) != 0) {
1462 			/* silently discard packets not destined to our MAC */
1463 			rte_pktmbuf_free(m);
1464 			continue;
1465 		}
1466 
1467 		/* return new mbuf to caller */
1468 		rx_pkts[count++] = m;
1469 		rxq->bytes += buf_len;
1470 	}
1471 
1472 	rxq->packets += count;
1473 
1474 	/* return the buffers to the free queue */
1475 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1476 
1477 	return count;
1478 }
1479 
1480 
1481 static uint16_t
1482 avp_recv_pkts(void *rx_queue,
1483 	      struct rte_mbuf **rx_pkts,
1484 	      uint16_t nb_pkts)
1485 {
1486 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1487 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1488 	struct avp_dev *avp = rxq->avp;
1489 	struct rte_avp_desc *pkt_buf;
1490 	struct rte_avp_fifo *free_q;
1491 	struct rte_avp_fifo *rx_q;
1492 	unsigned int count, avail, n;
1493 	unsigned int pkt_len;
1494 	struct rte_mbuf *m;
1495 	char *pkt_data;
1496 	unsigned int i;
1497 
1498 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1499 		/* VM live migration in progress */
1500 		return 0;
1501 	}
1502 
1503 	rx_q = avp->rx_q[rxq->queue_id];
1504 	free_q = avp->free_q[rxq->queue_id];
1505 
1506 	/* setup next queue to service */
1507 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1508 		(rxq->queue_id + 1) : rxq->queue_base;
1509 
1510 	/* determine how many slots are available in the free queue */
1511 	count = avp_fifo_free_count(free_q);
1512 
1513 	/* determine how many packets are available in the rx queue */
1514 	avail = avp_fifo_count(rx_q);
1515 
1516 	/* determine how many packets can be received */
1517 	count = RTE_MIN(count, avail);
1518 	count = RTE_MIN(count, nb_pkts);
1519 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1520 
1521 	if (unlikely(count == 0)) {
1522 		/* no free buffers, or no buffers on the rx queue */
1523 		return 0;
1524 	}
1525 
1526 	/* retrieve pending packets */
1527 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1528 	PMD_RX_LOG_LINE(DEBUG, "Receiving %u packets from Rx queue at %p",
1529 		   count, rx_q);
1530 
1531 	count = 0;
1532 	for (i = 0; i < n; i++) {
1533 		/* prefetch next entry while processing current one */
1534 		if (i < n - 1) {
1535 			pkt_buf = avp_dev_translate_buffer(avp,
1536 							   avp_bufs[i + 1]);
1537 			rte_prefetch0(pkt_buf);
1538 		}
1539 
1540 		/* Adjust host pointers for guest addressing */
1541 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1542 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1543 		pkt_len = pkt_buf->pkt_len;
1544 
1545 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1546 			     (pkt_buf->nb_segs > 1))) {
1547 			/*
1548 			 * application should be using the scattered receive
1549 			 * function
1550 			 */
1551 			rxq->errors++;
1552 			continue;
1553 		}
1554 
1555 		/* process each packet to be transmitted */
1556 		m = rte_pktmbuf_alloc(avp->pool);
1557 		if (unlikely(m == NULL)) {
1558 			rxq->dev_data->rx_mbuf_alloc_failed++;
1559 			continue;
1560 		}
1561 
1562 		/* copy data out of the host buffer to our buffer */
1563 		m->data_off = RTE_PKTMBUF_HEADROOM;
1564 		rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1565 
1566 		/* initialize the local mbuf */
1567 		rte_pktmbuf_data_len(m) = pkt_len;
1568 		rte_pktmbuf_pkt_len(m) = pkt_len;
1569 		m->port = avp->port_id;
1570 
1571 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1572 			m->ol_flags = RTE_MBUF_F_RX_VLAN;
1573 			m->vlan_tci = pkt_buf->vlan_tci;
1574 		}
1575 
1576 		if (_avp_mac_filter(avp, m) != 0) {
1577 			/* silently discard packets not destined to our MAC */
1578 			rte_pktmbuf_free(m);
1579 			continue;
1580 		}
1581 
1582 		/* return new mbuf to caller */
1583 		rx_pkts[count++] = m;
1584 		rxq->bytes += pkt_len;
1585 	}
1586 
1587 	rxq->packets += count;
1588 
1589 	/* return the buffers to the free queue */
1590 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1591 
1592 	return count;
1593 }
1594 
1595 /*
1596  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1597  * there are sufficient destination buffers to contain the entire source
1598  * packet.
1599  */
1600 static inline uint16_t
1601 avp_dev_copy_to_buffers(struct avp_dev *avp,
1602 			struct rte_mbuf *mbuf,
1603 			struct rte_avp_desc **buffers,
1604 			unsigned int count)
1605 {
1606 	struct rte_avp_desc *previous_buf = NULL;
1607 	struct rte_avp_desc *first_buf = NULL;
1608 	struct rte_avp_desc *pkt_buf;
1609 	struct rte_avp_desc *buf;
1610 	size_t total_length;
1611 	struct rte_mbuf *m;
1612 	size_t copy_length;
1613 	size_t src_offset;
1614 	char *pkt_data;
1615 	unsigned int i;
1616 
1617 	__rte_mbuf_sanity_check(mbuf, 1);
1618 
1619 	m = mbuf;
1620 	src_offset = 0;
1621 	total_length = rte_pktmbuf_pkt_len(m);
1622 	for (i = 0; (i < count) && (m != NULL); i++) {
1623 		/* fill each destination buffer */
1624 		buf = buffers[i];
1625 
1626 		if (i < count - 1) {
1627 			/* prefetch next entry while processing this one */
1628 			pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1629 			rte_prefetch0(pkt_buf);
1630 		}
1631 
1632 		/* Adjust pointers for guest addressing */
1633 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1634 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1635 
1636 		/* setup the buffer chain */
1637 		if (previous_buf != NULL)
1638 			previous_buf->next = buf;
1639 		else
1640 			first_buf = pkt_buf;
1641 
1642 		previous_buf = pkt_buf;
1643 
1644 		do {
1645 			/*
1646 			 * copy as many source mbuf segments as will fit in the
1647 			 * destination buffer.
1648 			 */
1649 			copy_length = RTE_MIN((avp->host_mbuf_size -
1650 					       pkt_buf->data_len),
1651 					      (rte_pktmbuf_data_len(m) -
1652 					       src_offset));
1653 			rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1654 				   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1655 					       src_offset),
1656 				   copy_length);
1657 			pkt_buf->data_len += copy_length;
1658 			src_offset += copy_length;
1659 
1660 			if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1661 				/* need a new source buffer */
1662 				m = m->next;
1663 				src_offset = 0;
1664 			}
1665 
1666 			if (unlikely(pkt_buf->data_len ==
1667 				     avp->host_mbuf_size)) {
1668 				/* need a new destination buffer */
1669 				break;
1670 			}
1671 
1672 		} while (m != NULL);
1673 	}
1674 
1675 	first_buf->nb_segs = count;
1676 	first_buf->pkt_len = total_length;
1677 
1678 	if (mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) {
1679 		first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1680 		first_buf->vlan_tci = mbuf->vlan_tci;
1681 	}
1682 
1683 	avp_dev_buffer_sanity_check(avp, buffers[0]);
1684 
1685 	return total_length;
1686 }
1687 
1688 
1689 static uint16_t
1690 avp_xmit_scattered_pkts(void *tx_queue,
1691 			struct rte_mbuf **tx_pkts,
1692 			uint16_t nb_pkts)
1693 {
1694 	struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1695 				       RTE_AVP_MAX_MBUF_SEGMENTS)] = {};
1696 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1697 	struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1698 	struct avp_dev *avp = txq->avp;
1699 	struct rte_avp_fifo *alloc_q;
1700 	struct rte_avp_fifo *tx_q;
1701 	unsigned int count, avail, n;
1702 	unsigned int orig_nb_pkts;
1703 	struct rte_mbuf *m;
1704 	unsigned int required;
1705 	unsigned int segments;
1706 	unsigned int tx_bytes;
1707 	unsigned int i;
1708 
1709 	orig_nb_pkts = nb_pkts;
1710 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1711 		/* VM live migration in progress */
1712 		/* TODO ... buffer for X packets then drop? */
1713 		txq->errors += nb_pkts;
1714 		return 0;
1715 	}
1716 
1717 	tx_q = avp->tx_q[txq->queue_id];
1718 	alloc_q = avp->alloc_q[txq->queue_id];
1719 
1720 	/* limit the number of transmitted packets to the max burst size */
1721 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1722 		nb_pkts = AVP_MAX_TX_BURST;
1723 
1724 	/* determine how many buffers are available to copy into */
1725 	avail = avp_fifo_count(alloc_q);
1726 	if (unlikely(avail > (AVP_MAX_TX_BURST *
1727 			      RTE_AVP_MAX_MBUF_SEGMENTS)))
1728 		avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1729 
1730 	/* determine how many slots are available in the transmit queue */
1731 	count = avp_fifo_free_count(tx_q);
1732 
1733 	/* determine how many packets can be sent */
1734 	nb_pkts = RTE_MIN(count, nb_pkts);
1735 
1736 	/* determine how many packets will fit in the available buffers */
1737 	count = 0;
1738 	segments = 0;
1739 	for (i = 0; i < nb_pkts; i++) {
1740 		m = tx_pkts[i];
1741 		if (likely(i < (unsigned int)nb_pkts - 1)) {
1742 			/* prefetch next entry while processing this one */
1743 			rte_prefetch0(tx_pkts[i + 1]);
1744 		}
1745 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1746 			avp->host_mbuf_size;
1747 
1748 		if (unlikely((required == 0) ||
1749 			     (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1750 			break;
1751 		else if (unlikely(required + segments > avail))
1752 			break;
1753 		segments += required;
1754 		count++;
1755 	}
1756 	nb_pkts = count;
1757 
1758 	if (unlikely(nb_pkts == 0)) {
1759 		/* no available buffers, or no space on the tx queue */
1760 		txq->errors += orig_nb_pkts;
1761 		return 0;
1762 	}
1763 
1764 	PMD_TX_LOG_LINE(DEBUG, "Sending %u packets on Tx queue at %p",
1765 		   nb_pkts, tx_q);
1766 
1767 	/* retrieve sufficient send buffers */
1768 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1769 	if (unlikely(n != segments)) {
1770 		PMD_TX_LOG_LINE(DEBUG, "Failed to allocate buffers n=%u, segments=%u, orig=%u",
1771 			   n, segments, orig_nb_pkts);
1772 		txq->errors += orig_nb_pkts;
1773 		return 0;
1774 	}
1775 
1776 	tx_bytes = 0;
1777 	count = 0;
1778 	for (i = 0; i < nb_pkts; i++) {
1779 		/* process each packet to be transmitted */
1780 		m = tx_pkts[i];
1781 
1782 		/* determine how many buffers are required for this packet */
1783 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1784 			avp->host_mbuf_size;
1785 
1786 		tx_bytes += avp_dev_copy_to_buffers(avp, m,
1787 						    &avp_bufs[count], required);
1788 		tx_bufs[i] = avp_bufs[count];
1789 		count += required;
1790 
1791 		/* free the original mbuf */
1792 		rte_pktmbuf_free(m);
1793 	}
1794 
1795 	txq->packets += nb_pkts;
1796 	txq->bytes += tx_bytes;
1797 
1798 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1799 	for (i = 0; i < nb_pkts; i++)
1800 		avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1801 #endif
1802 
1803 	/* send the packets */
1804 	n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1805 	if (unlikely(n != orig_nb_pkts))
1806 		txq->errors += (orig_nb_pkts - n);
1807 
1808 	return n;
1809 }
1810 
1811 
1812 static uint16_t
1813 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1814 {
1815 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1816 	struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1817 	struct avp_dev *avp = txq->avp;
1818 	struct rte_avp_desc *pkt_buf;
1819 	struct rte_avp_fifo *alloc_q;
1820 	struct rte_avp_fifo *tx_q;
1821 	unsigned int count, avail, n;
1822 	struct rte_mbuf *m;
1823 	unsigned int pkt_len;
1824 	unsigned int tx_bytes;
1825 	char *pkt_data;
1826 	unsigned int i;
1827 
1828 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1829 		/* VM live migration in progress */
1830 		/* TODO ... buffer for X packets then drop?! */
1831 		txq->errors++;
1832 		return 0;
1833 	}
1834 
1835 	tx_q = avp->tx_q[txq->queue_id];
1836 	alloc_q = avp->alloc_q[txq->queue_id];
1837 
1838 	/* limit the number of transmitted packets to the max burst size */
1839 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1840 		nb_pkts = AVP_MAX_TX_BURST;
1841 
1842 	/* determine how many buffers are available to copy into */
1843 	avail = avp_fifo_count(alloc_q);
1844 
1845 	/* determine how many slots are available in the transmit queue */
1846 	count = avp_fifo_free_count(tx_q);
1847 
1848 	/* determine how many packets can be sent */
1849 	count = RTE_MIN(count, avail);
1850 	count = RTE_MIN(count, nb_pkts);
1851 
1852 	if (unlikely(count == 0)) {
1853 		/* no available buffers, or no space on the tx queue */
1854 		txq->errors += nb_pkts;
1855 		return 0;
1856 	}
1857 
1858 	PMD_TX_LOG_LINE(DEBUG, "Sending %u packets on Tx queue at %p",
1859 		   count, tx_q);
1860 
1861 	/* retrieve sufficient send buffers */
1862 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1863 	if (unlikely(n != count)) {
1864 		txq->errors++;
1865 		return 0;
1866 	}
1867 
1868 	tx_bytes = 0;
1869 	for (i = 0; i < count; i++) {
1870 		/* prefetch next entry while processing the current one */
1871 		if (i < count - 1) {
1872 			pkt_buf = avp_dev_translate_buffer(avp,
1873 							   avp_bufs[i + 1]);
1874 			rte_prefetch0(pkt_buf);
1875 		}
1876 
1877 		/* process each packet to be transmitted */
1878 		m = tx_pkts[i];
1879 
1880 		/* Adjust pointers for guest addressing */
1881 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1882 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1883 		pkt_len = rte_pktmbuf_pkt_len(m);
1884 
1885 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1886 					 (pkt_len > avp->host_mbuf_size))) {
1887 			/*
1888 			 * application should be using the scattered transmit
1889 			 * function; send it truncated to avoid the performance
1890 			 * hit of having to manage returning the already
1891 			 * allocated buffer to the free list.  This should not
1892 			 * happen since the application should have not send
1893 			 * packages larger than its MTU and it should be
1894 			 * policing its own packet sizes.
1895 			 */
1896 			txq->errors++;
1897 			pkt_len = RTE_MIN(avp->guest_mbuf_size,
1898 					  avp->host_mbuf_size);
1899 		}
1900 
1901 		/* copy data out of our mbuf and into the AVP buffer */
1902 		rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1903 		pkt_buf->pkt_len = pkt_len;
1904 		pkt_buf->data_len = pkt_len;
1905 		pkt_buf->nb_segs = 1;
1906 		pkt_buf->next = NULL;
1907 
1908 		if (m->ol_flags & RTE_MBUF_F_TX_VLAN) {
1909 			pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1910 			pkt_buf->vlan_tci = m->vlan_tci;
1911 		}
1912 
1913 		tx_bytes += pkt_len;
1914 
1915 		/* free the original mbuf */
1916 		rte_pktmbuf_free(m);
1917 	}
1918 
1919 	txq->packets += count;
1920 	txq->bytes += tx_bytes;
1921 
1922 	/* send the packets */
1923 	n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1924 
1925 	return n;
1926 }
1927 
1928 static void
1929 avp_dev_rx_queue_release(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
1930 {
1931 	if (eth_dev->data->rx_queues[rx_queue_id] != NULL) {
1932 		rte_free(eth_dev->data->rx_queues[rx_queue_id]);
1933 		eth_dev->data->rx_queues[rx_queue_id] = NULL;
1934 	}
1935 }
1936 
1937 static void
1938 avp_dev_rx_queue_release_all(struct rte_eth_dev *eth_dev)
1939 {
1940 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1941 	struct rte_eth_dev_data *data = avp->dev_data;
1942 	unsigned int i;
1943 
1944 	for (i = 0; i < avp->num_rx_queues; i++) {
1945 		if (data->rx_queues[i]) {
1946 			rte_free(data->rx_queues[i]);
1947 			data->rx_queues[i] = NULL;
1948 		}
1949 	}
1950 }
1951 
1952 static void
1953 avp_dev_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id)
1954 {
1955 	if (eth_dev->data->tx_queues[tx_queue_id] != NULL) {
1956 		rte_free(eth_dev->data->tx_queues[tx_queue_id]);
1957 		eth_dev->data->tx_queues[tx_queue_id] = NULL;
1958 	}
1959 }
1960 
1961 static void
1962 avp_dev_tx_queue_release_all(struct rte_eth_dev *eth_dev)
1963 {
1964 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1965 	struct rte_eth_dev_data *data = avp->dev_data;
1966 	unsigned int i;
1967 
1968 	for (i = 0; i < avp->num_tx_queues; i++) {
1969 		if (data->tx_queues[i]) {
1970 			rte_free(data->tx_queues[i]);
1971 			data->tx_queues[i] = NULL;
1972 		}
1973 	}
1974 }
1975 
1976 static int
1977 avp_dev_configure(struct rte_eth_dev *eth_dev)
1978 {
1979 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1980 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1981 	struct rte_avp_device_info *host_info;
1982 	struct rte_avp_device_config config;
1983 	int mask = 0;
1984 	void *addr;
1985 	int ret;
1986 
1987 	rte_spinlock_lock(&avp->lock);
1988 	if (avp->flags & AVP_F_DETACHED) {
1989 		PMD_DRV_LOG_LINE(ERR, "Operation not supported during VM live migration");
1990 		ret = -ENOTSUP;
1991 		goto unlock;
1992 	}
1993 
1994 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
1995 	host_info = (struct rte_avp_device_info *)addr;
1996 
1997 	/* Setup required number of queues */
1998 	_avp_set_queue_counts(eth_dev);
1999 
2000 	mask = (RTE_ETH_VLAN_STRIP_MASK |
2001 		RTE_ETH_VLAN_FILTER_MASK |
2002 		RTE_ETH_VLAN_EXTEND_MASK);
2003 	ret = avp_vlan_offload_set(eth_dev, mask);
2004 	if (ret < 0) {
2005 		PMD_DRV_LOG_LINE(ERR, "VLAN offload set failed by host, ret=%d",
2006 			    ret);
2007 		goto unlock;
2008 	}
2009 
2010 	/* update device config */
2011 	memset(&config, 0, sizeof(config));
2012 	config.device_id = host_info->device_id;
2013 	config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2014 	config.driver_version = AVP_DPDK_DRIVER_VERSION;
2015 	config.features = avp->features;
2016 	config.num_tx_queues = avp->num_tx_queues;
2017 	config.num_rx_queues = avp->num_rx_queues;
2018 
2019 	ret = avp_dev_ctrl_set_config(eth_dev, &config);
2020 	if (ret < 0) {
2021 		PMD_DRV_LOG_LINE(ERR, "Config request failed by host, ret=%d",
2022 			    ret);
2023 		goto unlock;
2024 	}
2025 
2026 	avp->flags |= AVP_F_CONFIGURED;
2027 	ret = 0;
2028 
2029 unlock:
2030 	rte_spinlock_unlock(&avp->lock);
2031 	return ret;
2032 }
2033 
2034 static int
2035 avp_dev_start(struct rte_eth_dev *eth_dev)
2036 {
2037 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2038 	uint16_t i;
2039 	int ret;
2040 
2041 	rte_spinlock_lock(&avp->lock);
2042 	if (avp->flags & AVP_F_DETACHED) {
2043 		PMD_DRV_LOG_LINE(ERR, "Operation not supported during VM live migration");
2044 		ret = -ENOTSUP;
2045 		goto unlock;
2046 	}
2047 
2048 	/* update link state */
2049 	ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2050 	if (ret < 0) {
2051 		PMD_DRV_LOG_LINE(ERR, "Link state change failed by host, ret=%d",
2052 			    ret);
2053 		goto unlock;
2054 	}
2055 
2056 	/* remember current link state */
2057 	avp->flags |= AVP_F_LINKUP;
2058 
2059 	for (i = 0; i < avp->num_rx_queues; i++)
2060 		eth_dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
2061 	for (i = 0; i < avp->num_tx_queues; i++)
2062 		eth_dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
2063 
2064 	ret = 0;
2065 
2066 unlock:
2067 	rte_spinlock_unlock(&avp->lock);
2068 	return ret;
2069 }
2070 
2071 static int
2072 avp_dev_stop(struct rte_eth_dev *eth_dev)
2073 {
2074 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2075 	uint16_t i;
2076 	int ret;
2077 
2078 	rte_spinlock_lock(&avp->lock);
2079 	if (avp->flags & AVP_F_DETACHED) {
2080 		PMD_DRV_LOG_LINE(ERR, "Operation not supported during VM live migration");
2081 		ret = -ENOTSUP;
2082 		goto unlock;
2083 	}
2084 
2085 	/* remember current link state */
2086 	avp->flags &= ~AVP_F_LINKUP;
2087 
2088 	/* update link state */
2089 	ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2090 	if (ret < 0) {
2091 		PMD_DRV_LOG_LINE(ERR, "Link state change failed by host, ret=%d",
2092 			    ret);
2093 	}
2094 
2095 	for (i = 0; i < avp->num_rx_queues; i++)
2096 		eth_dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
2097 	for (i = 0; i < avp->num_tx_queues; i++)
2098 		eth_dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
2099 
2100 unlock:
2101 	rte_spinlock_unlock(&avp->lock);
2102 	return ret;
2103 }
2104 
2105 static int
2106 avp_dev_close(struct rte_eth_dev *eth_dev)
2107 {
2108 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2109 	int ret;
2110 
2111 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2112 		return 0;
2113 
2114 	rte_spinlock_lock(&avp->lock);
2115 	if (avp->flags & AVP_F_DETACHED) {
2116 		PMD_DRV_LOG_LINE(ERR, "Operation not supported during VM live migration");
2117 		goto unlock;
2118 	}
2119 
2120 	/* remember current link state */
2121 	avp->flags &= ~AVP_F_LINKUP;
2122 	avp->flags &= ~AVP_F_CONFIGURED;
2123 
2124 	ret = avp_dev_disable_interrupts(eth_dev);
2125 	if (ret < 0) {
2126 		PMD_DRV_LOG_LINE(ERR, "Failed to disable interrupts");
2127 		/* continue */
2128 	}
2129 
2130 	/* update device state */
2131 	ret = avp_dev_ctrl_shutdown(eth_dev);
2132 	if (ret < 0) {
2133 		PMD_DRV_LOG_LINE(ERR, "Device shutdown failed by host, ret=%d",
2134 			    ret);
2135 		/* continue */
2136 	}
2137 
2138 	/* release dynamic storage for rx/tx queues */
2139 	avp_dev_rx_queue_release_all(eth_dev);
2140 	avp_dev_tx_queue_release_all(eth_dev);
2141 
2142 unlock:
2143 	rte_spinlock_unlock(&avp->lock);
2144 	return 0;
2145 }
2146 
2147 static int
2148 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2149 					__rte_unused int wait_to_complete)
2150 {
2151 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2152 	struct rte_eth_link *link = &eth_dev->data->dev_link;
2153 
2154 	link->link_speed = RTE_ETH_SPEED_NUM_10G;
2155 	link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
2156 	link->link_status = !!(avp->flags & AVP_F_LINKUP);
2157 
2158 	return -1;
2159 }
2160 
2161 static int
2162 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2163 {
2164 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2165 
2166 	rte_spinlock_lock(&avp->lock);
2167 	if ((avp->flags & AVP_F_PROMISC) == 0) {
2168 		avp->flags |= AVP_F_PROMISC;
2169 		PMD_DRV_LOG_LINE(DEBUG, "Promiscuous mode enabled on %u",
2170 			    eth_dev->data->port_id);
2171 	}
2172 	rte_spinlock_unlock(&avp->lock);
2173 
2174 	return 0;
2175 }
2176 
2177 static int
2178 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2179 {
2180 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2181 
2182 	rte_spinlock_lock(&avp->lock);
2183 	if ((avp->flags & AVP_F_PROMISC) != 0) {
2184 		avp->flags &= ~AVP_F_PROMISC;
2185 		PMD_DRV_LOG_LINE(DEBUG, "Promiscuous mode disabled on %u",
2186 			    eth_dev->data->port_id);
2187 	}
2188 	rte_spinlock_unlock(&avp->lock);
2189 
2190 	return 0;
2191 }
2192 
2193 static int
2194 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2195 		 struct rte_eth_dev_info *dev_info)
2196 {
2197 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2198 
2199 	dev_info->max_rx_queues = avp->max_rx_queues;
2200 	dev_info->max_tx_queues = avp->max_tx_queues;
2201 	dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2202 	dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2203 	dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2204 	if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2205 		dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
2206 		dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
2207 	}
2208 
2209 	return 0;
2210 }
2211 
2212 static int
2213 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2214 {
2215 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2216 	struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
2217 	uint64_t offloads = dev_conf->rxmode.offloads;
2218 
2219 	if (mask & RTE_ETH_VLAN_STRIP_MASK) {
2220 		if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2221 			if (offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
2222 				avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2223 			else
2224 				avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2225 		} else {
2226 			PMD_DRV_LOG_LINE(ERR, "VLAN strip offload not supported");
2227 		}
2228 	}
2229 
2230 	if (mask & RTE_ETH_VLAN_FILTER_MASK) {
2231 		if (offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER)
2232 			PMD_DRV_LOG_LINE(ERR, "VLAN filter offload not supported");
2233 	}
2234 
2235 	if (mask & RTE_ETH_VLAN_EXTEND_MASK) {
2236 		if (offloads & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND)
2237 			PMD_DRV_LOG_LINE(ERR, "VLAN extend offload not supported");
2238 	}
2239 
2240 	return 0;
2241 }
2242 
2243 static int
2244 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2245 {
2246 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2247 	unsigned int i;
2248 
2249 	for (i = 0; i < avp->num_rx_queues; i++) {
2250 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2251 
2252 		if (rxq) {
2253 			stats->ipackets += rxq->packets;
2254 			stats->ibytes += rxq->bytes;
2255 			stats->ierrors += rxq->errors;
2256 
2257 			stats->q_ipackets[i] += rxq->packets;
2258 			stats->q_ibytes[i] += rxq->bytes;
2259 			stats->q_errors[i] += rxq->errors;
2260 		}
2261 	}
2262 
2263 	for (i = 0; i < avp->num_tx_queues; i++) {
2264 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2265 
2266 		if (txq) {
2267 			stats->opackets += txq->packets;
2268 			stats->obytes += txq->bytes;
2269 			stats->oerrors += txq->errors;
2270 
2271 			stats->q_opackets[i] += txq->packets;
2272 			stats->q_obytes[i] += txq->bytes;
2273 		}
2274 	}
2275 
2276 	return 0;
2277 }
2278 
2279 static int
2280 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2281 {
2282 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2283 	unsigned int i;
2284 
2285 	for (i = 0; i < avp->num_rx_queues; i++) {
2286 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2287 
2288 		if (rxq) {
2289 			rxq->bytes = 0;
2290 			rxq->packets = 0;
2291 			rxq->errors = 0;
2292 		}
2293 	}
2294 
2295 	for (i = 0; i < avp->num_tx_queues; i++) {
2296 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2297 
2298 		if (txq) {
2299 			txq->bytes = 0;
2300 			txq->packets = 0;
2301 			txq->errors = 0;
2302 		}
2303 	}
2304 
2305 	return 0;
2306 }
2307 
2308 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2309 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2310 RTE_LOG_REGISTER_SUFFIX(avp_logtype_driver, driver, NOTICE);
2311