xref: /dpdk/drivers/net/avp/avp_ethdev.c (revision 1315219a22a32aa438de3483eaacc104890ee71f)
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38 
39 #include <rte_ethdev.h>
40 #include <rte_memcpy.h>
41 #include <rte_string_fns.h>
42 #include <rte_memzone.h>
43 #include <rte_malloc.h>
44 #include <rte_atomic.h>
45 #include <rte_branch_prediction.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
50 #include <rte_spinlock.h>
51 #include <rte_byteorder.h>
52 #include <rte_dev.h>
53 #include <rte_memory.h>
54 #include <rte_eal.h>
55 #include <rte_io.h>
56 
57 #include "rte_avp_common.h"
58 #include "rte_avp_fifo.h"
59 
60 #include "avp_logs.h"
61 
62 
63 static int avp_dev_create(struct rte_pci_device *pci_dev,
64 			  struct rte_eth_dev *eth_dev);
65 
66 static int avp_dev_configure(struct rte_eth_dev *dev);
67 static int avp_dev_start(struct rte_eth_dev *dev);
68 static void avp_dev_stop(struct rte_eth_dev *dev);
69 static void avp_dev_close(struct rte_eth_dev *dev);
70 static void avp_dev_info_get(struct rte_eth_dev *dev,
71 			     struct rte_eth_dev_info *dev_info);
72 static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
73 static int avp_dev_link_update(struct rte_eth_dev *dev,
74 			       __rte_unused int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77 
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79 				  uint16_t rx_queue_id,
80 				  uint16_t nb_rx_desc,
81 				  unsigned int socket_id,
82 				  const struct rte_eth_rxconf *rx_conf,
83 				  struct rte_mempool *pool);
84 
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86 				  uint16_t tx_queue_id,
87 				  uint16_t nb_tx_desc,
88 				  unsigned int socket_id,
89 				  const struct rte_eth_txconf *tx_conf);
90 
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92 					struct rte_mbuf **rx_pkts,
93 					uint16_t nb_pkts);
94 
95 static uint16_t avp_recv_pkts(void *rx_queue,
96 			      struct rte_mbuf **rx_pkts,
97 			      uint16_t nb_pkts);
98 
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100 					struct rte_mbuf **tx_pkts,
101 					uint16_t nb_pkts);
102 
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104 			      struct rte_mbuf **tx_pkts,
105 			      uint16_t nb_pkts);
106 
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109 
110 static void avp_dev_stats_get(struct rte_eth_dev *dev,
111 			      struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113 
114 
115 #define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
116 
117 
118 #define AVP_MAX_RX_BURST 64
119 #define AVP_MAX_TX_BURST 64
120 #define AVP_MAX_MAC_ADDRS 1
121 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
122 
123 
124 /*
125  * Defines the number of microseconds to wait before checking the response
126  * queue for completion.
127  */
128 #define AVP_REQUEST_DELAY_USECS (5000)
129 
130 /*
131  * Defines the number times to check the response queue for completion before
132  * declaring a timeout.
133  */
134 #define AVP_MAX_REQUEST_RETRY (100)
135 
136 /* Defines the current PCI driver version number */
137 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
138 
139 /*
140  * The set of PCI devices this driver supports
141  */
142 static const struct rte_pci_id pci_id_avp_map[] = {
143 	{ .vendor_id = RTE_AVP_PCI_VENDOR_ID,
144 	  .device_id = RTE_AVP_PCI_DEVICE_ID,
145 	  .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
146 	  .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
147 	  .class_id = RTE_CLASS_ANY_ID,
148 	},
149 
150 	{ .vendor_id = 0, /* sentinel */
151 	},
152 };
153 
154 /*
155  * dev_ops for avp, bare necessities for basic operation
156  */
157 static const struct eth_dev_ops avp_eth_dev_ops = {
158 	.dev_configure       = avp_dev_configure,
159 	.dev_start           = avp_dev_start,
160 	.dev_stop            = avp_dev_stop,
161 	.dev_close           = avp_dev_close,
162 	.dev_infos_get       = avp_dev_info_get,
163 	.vlan_offload_set    = avp_vlan_offload_set,
164 	.stats_get           = avp_dev_stats_get,
165 	.stats_reset         = avp_dev_stats_reset,
166 	.link_update         = avp_dev_link_update,
167 	.promiscuous_enable  = avp_dev_promiscuous_enable,
168 	.promiscuous_disable = avp_dev_promiscuous_disable,
169 	.rx_queue_setup      = avp_dev_rx_queue_setup,
170 	.rx_queue_release    = avp_dev_rx_queue_release,
171 	.tx_queue_setup      = avp_dev_tx_queue_setup,
172 	.tx_queue_release    = avp_dev_tx_queue_release,
173 };
174 
175 /**@{ AVP device flags */
176 #define AVP_F_PROMISC (1 << 1)
177 #define AVP_F_CONFIGURED (1 << 2)
178 #define AVP_F_LINKUP (1 << 3)
179 #define AVP_F_DETACHED (1 << 4)
180 /**@} */
181 
182 /* Ethernet device validation marker */
183 #define AVP_ETHDEV_MAGIC 0x92972862
184 
185 /*
186  * Defines the AVP device attributes which are attached to an RTE ethernet
187  * device
188  */
189 struct avp_dev {
190 	uint32_t magic; /**< Memory validation marker */
191 	uint64_t device_id; /**< Unique system identifier */
192 	struct ether_addr ethaddr; /**< Host specified MAC address */
193 	struct rte_eth_dev_data *dev_data;
194 	/**< Back pointer to ethernet device data */
195 	volatile uint32_t flags; /**< Device operational flags */
196 	uint8_t port_id; /**< Ethernet port identifier */
197 	struct rte_mempool *pool; /**< pkt mbuf mempool */
198 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
199 	unsigned int host_mbuf_size; /**< host mbuf size */
200 	unsigned int max_rx_pkt_len; /**< maximum receive unit */
201 	uint32_t host_features; /**< Supported feature bitmap */
202 	uint32_t features; /**< Enabled feature bitmap */
203 	unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
204 	unsigned int max_tx_queues; /**< Maximum number of transmit queues */
205 	unsigned int num_rx_queues; /**< Negotiated number of receive queues */
206 	unsigned int max_rx_queues; /**< Maximum number of receive queues */
207 
208 	struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
209 	struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
210 	struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
211 	/**< Allocated mbufs queue */
212 	struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
213 	/**< To be freed mbufs queue */
214 
215 	/* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
216 	rte_spinlock_t lock;
217 
218 	/* For request & response */
219 	struct rte_avp_fifo *req_q; /**< Request queue */
220 	struct rte_avp_fifo *resp_q; /**< Response queue */
221 	void *host_sync_addr; /**< (host) Req/Resp Mem address */
222 	void *sync_addr; /**< Req/Resp Mem address */
223 	void *host_mbuf_addr; /**< (host) MBUF pool start address */
224 	void *mbuf_addr; /**< MBUF pool start address */
225 } __rte_cache_aligned;
226 
227 /* RTE ethernet private data */
228 struct avp_adapter {
229 	struct avp_dev avp;
230 } __rte_cache_aligned;
231 
232 
233 /* 32-bit MMIO register write */
234 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
235 
236 /* 32-bit MMIO register read */
237 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
238 
239 /* Macro to cast the ethernet device private data to a AVP object */
240 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
241 	(&((struct avp_adapter *)adapter)->avp)
242 
243 /*
244  * Defines the structure of a AVP device queue for the purpose of handling the
245  * receive and transmit burst callback functions
246  */
247 struct avp_queue {
248 	struct rte_eth_dev_data *dev_data;
249 	/**< Backpointer to ethernet device data */
250 	struct avp_dev *avp; /**< Backpointer to AVP device */
251 	uint16_t queue_id;
252 	/**< Queue identifier used for indexing current queue */
253 	uint16_t queue_base;
254 	/**< Base queue identifier for queue servicing */
255 	uint16_t queue_limit;
256 	/**< Maximum queue identifier for queue servicing */
257 
258 	uint64_t packets;
259 	uint64_t bytes;
260 	uint64_t errors;
261 };
262 
263 /* send a request and wait for a response
264  *
265  * @warning must be called while holding the avp->lock spinlock.
266  */
267 static int
268 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
269 {
270 	unsigned int retry = AVP_MAX_REQUEST_RETRY;
271 	void *resp_addr = NULL;
272 	unsigned int count;
273 	int ret;
274 
275 	PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
276 
277 	request->result = -ENOTSUP;
278 
279 	/* Discard any stale responses before starting a new request */
280 	while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
281 		PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
282 
283 	rte_memcpy(avp->sync_addr, request, sizeof(*request));
284 	count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
285 	if (count < 1) {
286 		PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
287 			    request->req_id);
288 		ret = -EBUSY;
289 		goto done;
290 	}
291 
292 	while (retry--) {
293 		/* wait for a response */
294 		usleep(AVP_REQUEST_DELAY_USECS);
295 
296 		count = avp_fifo_count(avp->resp_q);
297 		if (count >= 1) {
298 			/* response received */
299 			break;
300 		}
301 
302 		if ((count < 1) && (retry == 0)) {
303 			PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
304 				    request->req_id);
305 			ret = -ETIME;
306 			goto done;
307 		}
308 	}
309 
310 	/* retrieve the response */
311 	count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
312 	if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
313 		PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
314 			    count, resp_addr, avp->host_sync_addr);
315 		ret = -ENODATA;
316 		goto done;
317 	}
318 
319 	/* copy to user buffer */
320 	rte_memcpy(request, avp->sync_addr, sizeof(*request));
321 	ret = 0;
322 
323 	PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
324 		    request->result, request->req_id);
325 
326 done:
327 	return ret;
328 }
329 
330 static int
331 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
332 {
333 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
334 	struct rte_avp_request request;
335 	int ret;
336 
337 	/* setup a link state change request */
338 	memset(&request, 0, sizeof(request));
339 	request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
340 	request.if_up = state;
341 
342 	ret = avp_dev_process_request(avp, &request);
343 
344 	return ret == 0 ? request.result : ret;
345 }
346 
347 static int
348 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
349 			struct rte_avp_device_config *config)
350 {
351 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
352 	struct rte_avp_request request;
353 	int ret;
354 
355 	/* setup a configure request */
356 	memset(&request, 0, sizeof(request));
357 	request.req_id = RTE_AVP_REQ_CFG_DEVICE;
358 	memcpy(&request.config, config, sizeof(request.config));
359 
360 	ret = avp_dev_process_request(avp, &request);
361 
362 	return ret == 0 ? request.result : ret;
363 }
364 
365 static int
366 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
367 {
368 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
369 	struct rte_avp_request request;
370 	int ret;
371 
372 	/* setup a shutdown request */
373 	memset(&request, 0, sizeof(request));
374 	request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
375 
376 	ret = avp_dev_process_request(avp, &request);
377 
378 	return ret == 0 ? request.result : ret;
379 }
380 
381 /* translate from host mbuf virtual address to guest virtual address */
382 static inline void *
383 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
384 {
385 	return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
386 				       (uintptr_t)avp->host_mbuf_addr),
387 			   (uintptr_t)avp->mbuf_addr);
388 }
389 
390 /* translate from host physical address to guest virtual address */
391 static void *
392 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
393 			  phys_addr_t host_phys_addr)
394 {
395 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
396 	struct rte_mem_resource *resource;
397 	struct rte_avp_memmap_info *info;
398 	struct rte_avp_memmap *map;
399 	off_t offset;
400 	void *addr;
401 	unsigned int i;
402 
403 	addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
404 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
405 	info = (struct rte_avp_memmap_info *)resource->addr;
406 
407 	offset = 0;
408 	for (i = 0; i < info->nb_maps; i++) {
409 		/* search all segments looking for a matching address */
410 		map = &info->maps[i];
411 
412 		if ((host_phys_addr >= map->phys_addr) &&
413 			(host_phys_addr < (map->phys_addr + map->length))) {
414 			/* address is within this segment */
415 			offset += (host_phys_addr - map->phys_addr);
416 			addr = RTE_PTR_ADD(addr, offset);
417 
418 			PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
419 				    host_phys_addr, addr);
420 
421 			return addr;
422 		}
423 		offset += map->length;
424 	}
425 
426 	return NULL;
427 }
428 
429 /* verify that the incoming device version is compatible with our version */
430 static int
431 avp_dev_version_check(uint32_t version)
432 {
433 	uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
434 	uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
435 
436 	if (device <= driver) {
437 		/* the host driver version is less than or equal to ours */
438 		return 0;
439 	}
440 
441 	return 1;
442 }
443 
444 /* verify that memory regions have expected version and validation markers */
445 static int
446 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
447 {
448 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
449 	struct rte_avp_memmap_info *memmap;
450 	struct rte_avp_device_info *info;
451 	struct rte_mem_resource *resource;
452 	unsigned int i;
453 
454 	/* Dump resource info for debug */
455 	for (i = 0; i < PCI_MAX_RESOURCE; i++) {
456 		resource = &pci_dev->mem_resource[i];
457 		if ((resource->phys_addr == 0) || (resource->len == 0))
458 			continue;
459 
460 		PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
461 			    i, resource->phys_addr,
462 			    resource->len, resource->addr);
463 
464 		switch (i) {
465 		case RTE_AVP_PCI_MEMMAP_BAR:
466 			memmap = (struct rte_avp_memmap_info *)resource->addr;
467 			if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
468 			    (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
469 				PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
470 					    memmap->magic, memmap->version);
471 				return -EINVAL;
472 			}
473 			break;
474 
475 		case RTE_AVP_PCI_DEVICE_BAR:
476 			info = (struct rte_avp_device_info *)resource->addr;
477 			if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
478 			    avp_dev_version_check(info->version)) {
479 				PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
480 					    info->magic, info->version,
481 					    AVP_DPDK_DRIVER_VERSION);
482 				return -EINVAL;
483 			}
484 			break;
485 
486 		case RTE_AVP_PCI_MEMORY_BAR:
487 		case RTE_AVP_PCI_MMIO_BAR:
488 			if (resource->addr == NULL) {
489 				PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
490 					    i);
491 				return -EINVAL;
492 			}
493 			break;
494 
495 		case RTE_AVP_PCI_MSIX_BAR:
496 		default:
497 			/* no validation required */
498 			break;
499 		}
500 	}
501 
502 	return 0;
503 }
504 
505 static int
506 avp_dev_detach(struct rte_eth_dev *eth_dev)
507 {
508 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
509 	int ret;
510 
511 	PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
512 		    eth_dev->data->port_id, avp->device_id);
513 
514 	rte_spinlock_lock(&avp->lock);
515 
516 	if (avp->flags & AVP_F_DETACHED) {
517 		PMD_DRV_LOG(NOTICE, "port %u already detached\n",
518 			    eth_dev->data->port_id);
519 		ret = 0;
520 		goto unlock;
521 	}
522 
523 	/* shutdown the device first so the host stops sending us packets. */
524 	ret = avp_dev_ctrl_shutdown(eth_dev);
525 	if (ret < 0) {
526 		PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
527 			    ret);
528 		avp->flags &= ~AVP_F_DETACHED;
529 		goto unlock;
530 	}
531 
532 	avp->flags |= AVP_F_DETACHED;
533 	rte_wmb();
534 
535 	/* wait for queues to acknowledge the presence of the detach flag */
536 	rte_delay_ms(1);
537 
538 	ret = 0;
539 
540 unlock:
541 	rte_spinlock_unlock(&avp->lock);
542 	return ret;
543 }
544 
545 static void
546 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
547 {
548 	struct avp_dev *avp =
549 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
550 	struct avp_queue *rxq;
551 	uint16_t queue_count;
552 	uint16_t remainder;
553 
554 	rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
555 
556 	/*
557 	 * Must map all AVP fifos as evenly as possible between the configured
558 	 * device queues.  Each device queue will service a subset of the AVP
559 	 * fifos. If there is an odd number of device queues the first set of
560 	 * device queues will get the extra AVP fifos.
561 	 */
562 	queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
563 	remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
564 	if (rx_queue_id < remainder) {
565 		/* these queues must service one extra FIFO */
566 		rxq->queue_base = rx_queue_id * (queue_count + 1);
567 		rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
568 	} else {
569 		/* these queues service the regular number of FIFO */
570 		rxq->queue_base = ((remainder * (queue_count + 1)) +
571 				   ((rx_queue_id - remainder) * queue_count));
572 		rxq->queue_limit = rxq->queue_base + queue_count - 1;
573 	}
574 
575 	PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
576 		    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
577 
578 	rxq->queue_id = rxq->queue_base;
579 }
580 
581 static void
582 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
583 {
584 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
585 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
586 	struct rte_avp_device_info *host_info;
587 	void *addr;
588 
589 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
590 	host_info = (struct rte_avp_device_info *)addr;
591 
592 	/*
593 	 * the transmit direction is not negotiated beyond respecting the max
594 	 * number of queues because the host can handle arbitrary guest tx
595 	 * queues (host rx queues).
596 	 */
597 	avp->num_tx_queues = eth_dev->data->nb_tx_queues;
598 
599 	/*
600 	 * the receive direction is more restrictive.  The host requires a
601 	 * minimum number of guest rx queues (host tx queues) therefore
602 	 * negotiate a value that is at least as large as the host minimum
603 	 * requirement.  If the host and guest values are not identical then a
604 	 * mapping will be established in the receive_queue_setup function.
605 	 */
606 	avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
607 				     eth_dev->data->nb_rx_queues);
608 
609 	PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
610 		    avp->num_tx_queues, avp->num_rx_queues);
611 }
612 
613 static int
614 avp_dev_attach(struct rte_eth_dev *eth_dev)
615 {
616 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
617 	struct rte_avp_device_config config;
618 	unsigned int i;
619 	int ret;
620 
621 	PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
622 		    eth_dev->data->port_id, avp->device_id);
623 
624 	rte_spinlock_lock(&avp->lock);
625 
626 	if (!(avp->flags & AVP_F_DETACHED)) {
627 		PMD_DRV_LOG(NOTICE, "port %u already attached\n",
628 			    eth_dev->data->port_id);
629 		ret = 0;
630 		goto unlock;
631 	}
632 
633 	/*
634 	 * make sure that the detached flag is set prior to reconfiguring the
635 	 * queues.
636 	 */
637 	avp->flags |= AVP_F_DETACHED;
638 	rte_wmb();
639 
640 	/*
641 	 * re-run the device create utility which will parse the new host info
642 	 * and setup the AVP device queue pointers.
643 	 */
644 	ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
645 	if (ret < 0) {
646 		PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
647 			    ret);
648 		goto unlock;
649 	}
650 
651 	if (avp->flags & AVP_F_CONFIGURED) {
652 		/*
653 		 * Update the receive queue mapping to handle cases where the
654 		 * source and destination hosts have different queue
655 		 * requirements.  As long as the DETACHED flag is asserted the
656 		 * queue table should not be referenced so it should be safe to
657 		 * update it.
658 		 */
659 		_avp_set_queue_counts(eth_dev);
660 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
661 			_avp_set_rx_queue_mappings(eth_dev, i);
662 
663 		/*
664 		 * Update the host with our config details so that it knows the
665 		 * device is active.
666 		 */
667 		memset(&config, 0, sizeof(config));
668 		config.device_id = avp->device_id;
669 		config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
670 		config.driver_version = AVP_DPDK_DRIVER_VERSION;
671 		config.features = avp->features;
672 		config.num_tx_queues = avp->num_tx_queues;
673 		config.num_rx_queues = avp->num_rx_queues;
674 		config.if_up = !!(avp->flags & AVP_F_LINKUP);
675 
676 		ret = avp_dev_ctrl_set_config(eth_dev, &config);
677 		if (ret < 0) {
678 			PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
679 				    ret);
680 			goto unlock;
681 		}
682 	}
683 
684 	rte_wmb();
685 	avp->flags &= ~AVP_F_DETACHED;
686 
687 	ret = 0;
688 
689 unlock:
690 	rte_spinlock_unlock(&avp->lock);
691 	return ret;
692 }
693 
694 static void
695 avp_dev_interrupt_handler(struct rte_intr_handle *intr_handle,
696 						  void *data)
697 {
698 	struct rte_eth_dev *eth_dev = data;
699 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
700 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
701 	uint32_t status, value;
702 	int ret;
703 
704 	if (registers == NULL)
705 		rte_panic("no mapped MMIO register space\n");
706 
707 	/* read the interrupt status register
708 	 * note: this register clears on read so all raised interrupts must be
709 	 *    handled or remembered for later processing
710 	 */
711 	status = AVP_READ32(
712 		RTE_PTR_ADD(registers,
713 			    RTE_AVP_INTERRUPT_STATUS_OFFSET));
714 
715 	if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) {
716 		/* handle interrupt based on current status */
717 		value = AVP_READ32(
718 			RTE_PTR_ADD(registers,
719 				    RTE_AVP_MIGRATION_STATUS_OFFSET));
720 		switch (value) {
721 		case RTE_AVP_MIGRATION_DETACHED:
722 			ret = avp_dev_detach(eth_dev);
723 			break;
724 		case RTE_AVP_MIGRATION_ATTACHED:
725 			ret = avp_dev_attach(eth_dev);
726 			break;
727 		default:
728 			PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
729 				    value);
730 			ret = -EINVAL;
731 		}
732 
733 		/* acknowledge the request by writing out our current status */
734 		value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
735 		AVP_WRITE32(value,
736 			    RTE_PTR_ADD(registers,
737 					RTE_AVP_MIGRATION_ACK_OFFSET));
738 
739 		PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
740 	}
741 
742 	if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
743 		PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
744 			    status);
745 
746 	/* re-enable UIO interrupt handling */
747 	ret = rte_intr_enable(intr_handle);
748 	if (ret < 0) {
749 		PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
750 			    ret);
751 		/* continue */
752 	}
753 }
754 
755 static int
756 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
757 {
758 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
759 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
760 	int ret;
761 
762 	if (registers == NULL)
763 		return -EINVAL;
764 
765 	/* enable UIO interrupt handling */
766 	ret = rte_intr_enable(&pci_dev->intr_handle);
767 	if (ret < 0) {
768 		PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
769 			    ret);
770 		return ret;
771 	}
772 
773 	/* inform the device that all interrupts are enabled */
774 	AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
775 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
776 
777 	return 0;
778 }
779 
780 static int
781 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
782 {
783 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
784 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
785 	int ret;
786 
787 	if (registers == NULL)
788 		return 0;
789 
790 	/* inform the device that all interrupts are disabled */
791 	AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
792 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
793 
794 	/* enable UIO interrupt handling */
795 	ret = rte_intr_disable(&pci_dev->intr_handle);
796 	if (ret < 0) {
797 		PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
798 			    ret);
799 		return ret;
800 	}
801 
802 	return 0;
803 }
804 
805 static int
806 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
807 {
808 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
809 	int ret;
810 
811 	/* register a callback handler with UIO for interrupt notifications */
812 	ret = rte_intr_callback_register(&pci_dev->intr_handle,
813 					 avp_dev_interrupt_handler,
814 					 (void *)eth_dev);
815 	if (ret < 0) {
816 		PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
817 			    ret);
818 		return ret;
819 	}
820 
821 	/* enable interrupt processing */
822 	return avp_dev_enable_interrupts(eth_dev);
823 }
824 
825 static int
826 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
827 {
828 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
829 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
830 	uint32_t value;
831 
832 	if (registers == NULL)
833 		return 0;
834 
835 	value = AVP_READ32(RTE_PTR_ADD(registers,
836 				       RTE_AVP_MIGRATION_STATUS_OFFSET));
837 	if (value == RTE_AVP_MIGRATION_DETACHED) {
838 		/* migration is in progress; ack it if we have not already */
839 		AVP_WRITE32(value,
840 			    RTE_PTR_ADD(registers,
841 					RTE_AVP_MIGRATION_ACK_OFFSET));
842 		return 1;
843 	}
844 	return 0;
845 }
846 
847 /*
848  * create a AVP device using the supplied device info by first translating it
849  * to guest address space(s).
850  */
851 static int
852 avp_dev_create(struct rte_pci_device *pci_dev,
853 	       struct rte_eth_dev *eth_dev)
854 {
855 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
856 	struct rte_avp_device_info *host_info;
857 	struct rte_mem_resource *resource;
858 	unsigned int i;
859 
860 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
861 	if (resource->addr == NULL) {
862 		PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
863 			    RTE_AVP_PCI_DEVICE_BAR);
864 		return -EFAULT;
865 	}
866 	host_info = (struct rte_avp_device_info *)resource->addr;
867 
868 	if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
869 		avp_dev_version_check(host_info->version)) {
870 		PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
871 			    host_info->magic, host_info->version,
872 			    AVP_DPDK_DRIVER_VERSION);
873 		return -EINVAL;
874 	}
875 
876 	PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
877 		    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
878 		    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
879 		    RTE_AVP_GET_MINOR_VERSION(host_info->version));
880 
881 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
882 		    host_info->min_tx_queues, host_info->max_tx_queues);
883 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
884 		    host_info->min_rx_queues, host_info->max_rx_queues);
885 	PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
886 		    host_info->features);
887 
888 	if (avp->magic != AVP_ETHDEV_MAGIC) {
889 		/*
890 		 * First time initialization (i.e., not during a VM
891 		 * migration)
892 		 */
893 		memset(avp, 0, sizeof(*avp));
894 		avp->magic = AVP_ETHDEV_MAGIC;
895 		avp->dev_data = eth_dev->data;
896 		avp->port_id = eth_dev->data->port_id;
897 		avp->host_mbuf_size = host_info->mbuf_size;
898 		avp->host_features = host_info->features;
899 		rte_spinlock_init(&avp->lock);
900 		memcpy(&avp->ethaddr.addr_bytes[0],
901 		       host_info->ethaddr, ETHER_ADDR_LEN);
902 		/* adjust max values to not exceed our max */
903 		avp->max_tx_queues =
904 			RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
905 		avp->max_rx_queues =
906 			RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
907 	} else {
908 		/* Re-attaching during migration */
909 
910 		/* TODO... requires validation of host values */
911 		if ((host_info->features & avp->features) != avp->features) {
912 			PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
913 				    avp->features, host_info->features);
914 			/* this should not be possible; continue for now */
915 		}
916 	}
917 
918 	/* the device id is allowed to change over migrations */
919 	avp->device_id = host_info->device_id;
920 
921 	/* translate incoming host addresses to guest address space */
922 	PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
923 		    host_info->tx_phys);
924 	PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
925 		    host_info->alloc_phys);
926 	for (i = 0; i < avp->max_tx_queues; i++) {
927 		avp->tx_q[i] = avp_dev_translate_address(eth_dev,
928 			host_info->tx_phys + (i * host_info->tx_size));
929 
930 		avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
931 			host_info->alloc_phys + (i * host_info->alloc_size));
932 	}
933 
934 	PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
935 		    host_info->rx_phys);
936 	PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
937 		    host_info->free_phys);
938 	for (i = 0; i < avp->max_rx_queues; i++) {
939 		avp->rx_q[i] = avp_dev_translate_address(eth_dev,
940 			host_info->rx_phys + (i * host_info->rx_size));
941 		avp->free_q[i] = avp_dev_translate_address(eth_dev,
942 			host_info->free_phys + (i * host_info->free_size));
943 	}
944 
945 	PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
946 		    host_info->req_phys);
947 	PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
948 		    host_info->resp_phys);
949 	PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
950 		    host_info->sync_phys);
951 	PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
952 		    host_info->mbuf_phys);
953 	avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
954 	avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
955 	avp->sync_addr =
956 		avp_dev_translate_address(eth_dev, host_info->sync_phys);
957 	avp->mbuf_addr =
958 		avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
959 
960 	/*
961 	 * store the host mbuf virtual address so that we can calculate
962 	 * relative offsets for each mbuf as they are processed
963 	 */
964 	avp->host_mbuf_addr = host_info->mbuf_va;
965 	avp->host_sync_addr = host_info->sync_va;
966 
967 	/*
968 	 * store the maximum packet length that is supported by the host.
969 	 */
970 	avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
971 	PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
972 				host_info->max_rx_pkt_len);
973 
974 	return 0;
975 }
976 
977 /*
978  * This function is based on probe() function in avp_pci.c
979  * It returns 0 on success.
980  */
981 static int
982 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
983 {
984 	struct avp_dev *avp =
985 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
986 	struct rte_pci_device *pci_dev;
987 	int ret;
988 
989 	pci_dev = AVP_DEV_TO_PCI(eth_dev);
990 	eth_dev->dev_ops = &avp_eth_dev_ops;
991 	eth_dev->rx_pkt_burst = &avp_recv_pkts;
992 	eth_dev->tx_pkt_burst = &avp_xmit_pkts;
993 
994 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
995 		/*
996 		 * no setup required on secondary processes.  All data is saved
997 		 * in dev_private by the primary process. All resource should
998 		 * be mapped to the same virtual address so all pointers should
999 		 * be valid.
1000 		 */
1001 		if (eth_dev->data->scattered_rx) {
1002 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1003 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1004 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1005 		}
1006 		return 0;
1007 	}
1008 
1009 	rte_eth_copy_pci_info(eth_dev, pci_dev);
1010 
1011 	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
1012 
1013 	/* Check current migration status */
1014 	if (avp_dev_migration_pending(eth_dev)) {
1015 		PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1016 		return -EBUSY;
1017 	}
1018 
1019 	/* Check BAR resources */
1020 	ret = avp_dev_check_regions(eth_dev);
1021 	if (ret < 0) {
1022 		PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1023 			    ret);
1024 		return ret;
1025 	}
1026 
1027 	/* Enable interrupts */
1028 	ret = avp_dev_setup_interrupts(eth_dev);
1029 	if (ret < 0) {
1030 		PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1031 		return ret;
1032 	}
1033 
1034 	/* Handle each subtype */
1035 	ret = avp_dev_create(pci_dev, eth_dev);
1036 	if (ret < 0) {
1037 		PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1038 		return ret;
1039 	}
1040 
1041 	/* Allocate memory for storing MAC addresses */
1042 	eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1043 	if (eth_dev->data->mac_addrs == NULL) {
1044 		PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1045 			    ETHER_ADDR_LEN);
1046 		return -ENOMEM;
1047 	}
1048 
1049 	/* Get a mac from device config */
1050 	ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1051 
1052 	return 0;
1053 }
1054 
1055 static int
1056 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1057 {
1058 	int ret;
1059 
1060 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1061 		return -EPERM;
1062 
1063 	if (eth_dev->data == NULL)
1064 		return 0;
1065 
1066 	ret = avp_dev_disable_interrupts(eth_dev);
1067 	if (ret != 0) {
1068 		PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1069 		return ret;
1070 	}
1071 
1072 	if (eth_dev->data->mac_addrs != NULL) {
1073 		rte_free(eth_dev->data->mac_addrs);
1074 		eth_dev->data->mac_addrs = NULL;
1075 	}
1076 
1077 	return 0;
1078 }
1079 
1080 
1081 static struct eth_driver rte_avp_pmd = {
1082 	{
1083 		.id_table = pci_id_avp_map,
1084 		.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1085 		.probe = rte_eth_dev_pci_probe,
1086 		.remove = rte_eth_dev_pci_remove,
1087 	},
1088 	.eth_dev_init = eth_avp_dev_init,
1089 	.eth_dev_uninit = eth_avp_dev_uninit,
1090 	.dev_private_size = sizeof(struct avp_adapter),
1091 };
1092 
1093 static int
1094 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1095 			 struct avp_dev *avp)
1096 {
1097 	unsigned int max_rx_pkt_len;
1098 
1099 	max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1100 
1101 	if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1102 	    (max_rx_pkt_len > avp->host_mbuf_size)) {
1103 		/*
1104 		 * If the guest MTU is greater than either the host or guest
1105 		 * buffers then chained mbufs have to be enabled in the TX
1106 		 * direction.  It is assumed that the application will not need
1107 		 * to send packets larger than their max_rx_pkt_len (MRU).
1108 		 */
1109 		return 1;
1110 	}
1111 
1112 	if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1113 	    (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1114 		/*
1115 		 * If the host MRU is greater than its own mbuf size or the
1116 		 * guest mbuf size then chained mbufs have to be enabled in the
1117 		 * RX direction.
1118 		 */
1119 		return 1;
1120 	}
1121 
1122 	return 0;
1123 }
1124 
1125 static int
1126 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1127 		       uint16_t rx_queue_id,
1128 		       uint16_t nb_rx_desc,
1129 		       unsigned int socket_id,
1130 		       const struct rte_eth_rxconf *rx_conf,
1131 		       struct rte_mempool *pool)
1132 {
1133 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1134 	struct rte_pktmbuf_pool_private *mbp_priv;
1135 	struct avp_queue *rxq;
1136 
1137 	if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1138 		PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1139 			    rx_queue_id, eth_dev->data->nb_rx_queues);
1140 		return -EINVAL;
1141 	}
1142 
1143 	/* Save mbuf pool pointer */
1144 	avp->pool = pool;
1145 
1146 	/* Save the local mbuf size */
1147 	mbp_priv = rte_mempool_get_priv(pool);
1148 	avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1149 	avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1150 
1151 	if (avp_dev_enable_scattered(eth_dev, avp)) {
1152 		if (!eth_dev->data->scattered_rx) {
1153 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1154 			eth_dev->data->scattered_rx = 1;
1155 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1156 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1157 		}
1158 	}
1159 
1160 	PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1161 		    avp->max_rx_pkt_len,
1162 		    eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1163 		    avp->host_mbuf_size,
1164 		    avp->guest_mbuf_size);
1165 
1166 	/* allocate a queue object */
1167 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1168 				 RTE_CACHE_LINE_SIZE, socket_id);
1169 	if (rxq == NULL) {
1170 		PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1171 		return -ENOMEM;
1172 	}
1173 
1174 	/* save back pointers to AVP and Ethernet devices */
1175 	rxq->avp = avp;
1176 	rxq->dev_data = eth_dev->data;
1177 	eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1178 
1179 	/* setup the queue receive mapping for the current queue. */
1180 	_avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1181 
1182 	PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1183 
1184 	(void)nb_rx_desc;
1185 	(void)rx_conf;
1186 	return 0;
1187 }
1188 
1189 static int
1190 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1191 		       uint16_t tx_queue_id,
1192 		       uint16_t nb_tx_desc,
1193 		       unsigned int socket_id,
1194 		       const struct rte_eth_txconf *tx_conf)
1195 {
1196 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1197 	struct avp_queue *txq;
1198 
1199 	if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1200 		PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1201 			    tx_queue_id, eth_dev->data->nb_tx_queues);
1202 		return -EINVAL;
1203 	}
1204 
1205 	/* allocate a queue object */
1206 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1207 				 RTE_CACHE_LINE_SIZE, socket_id);
1208 	if (txq == NULL) {
1209 		PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1210 		return -ENOMEM;
1211 	}
1212 
1213 	/* only the configured set of transmit queues are used */
1214 	txq->queue_id = tx_queue_id;
1215 	txq->queue_base = tx_queue_id;
1216 	txq->queue_limit = tx_queue_id;
1217 
1218 	/* save back pointers to AVP and Ethernet devices */
1219 	txq->avp = avp;
1220 	txq->dev_data = eth_dev->data;
1221 	eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1222 
1223 	PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1224 
1225 	(void)nb_tx_desc;
1226 	(void)tx_conf;
1227 	return 0;
1228 }
1229 
1230 static inline int
1231 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1232 {
1233 	uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1234 	uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1235 	return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1236 }
1237 
1238 static inline int
1239 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1240 {
1241 	struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1242 
1243 	if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1244 		/* allow all packets destined to our address */
1245 		return 0;
1246 	}
1247 
1248 	if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1249 		/* allow all broadcast packets */
1250 		return 0;
1251 	}
1252 
1253 	if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1254 		/* allow all multicast packets */
1255 		return 0;
1256 	}
1257 
1258 	if (avp->flags & AVP_F_PROMISC) {
1259 		/* allow all packets when in promiscuous mode */
1260 		return 0;
1261 	}
1262 
1263 	return -1;
1264 }
1265 
1266 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1267 static inline void
1268 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1269 {
1270 	struct rte_avp_desc *first_buf;
1271 	struct rte_avp_desc *pkt_buf;
1272 	unsigned int pkt_len;
1273 	unsigned int nb_segs;
1274 	void *pkt_data;
1275 	unsigned int i;
1276 
1277 	first_buf = avp_dev_translate_buffer(avp, buf);
1278 
1279 	i = 0;
1280 	pkt_len = 0;
1281 	nb_segs = first_buf->nb_segs;
1282 	do {
1283 		/* Adjust pointers for guest addressing */
1284 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1285 		if (pkt_buf == NULL)
1286 			rte_panic("bad buffer: segment %u has an invalid address %p\n",
1287 				  i, buf);
1288 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1289 		if (pkt_data == NULL)
1290 			rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1291 				  i);
1292 		if (pkt_buf->data_len == 0)
1293 			rte_panic("bad buffer: segment %u has 0 data length\n",
1294 				  i);
1295 		pkt_len += pkt_buf->data_len;
1296 		nb_segs--;
1297 		i++;
1298 
1299 	} while (nb_segs && (buf = pkt_buf->next) != NULL);
1300 
1301 	if (nb_segs != 0)
1302 		rte_panic("bad buffer: expected %u segments found %u\n",
1303 			  first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1304 	if (pkt_len != first_buf->pkt_len)
1305 		rte_panic("bad buffer: expected length %u found %u\n",
1306 			  first_buf->pkt_len, pkt_len);
1307 }
1308 
1309 #define avp_dev_buffer_sanity_check(a, b) \
1310 	__avp_dev_buffer_sanity_check((a), (b))
1311 
1312 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1313 
1314 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1315 
1316 #endif
1317 
1318 /*
1319  * Copy a host buffer chain to a set of mbufs.	This function assumes that
1320  * there exactly the required number of mbufs to copy all source bytes.
1321  */
1322 static inline struct rte_mbuf *
1323 avp_dev_copy_from_buffers(struct avp_dev *avp,
1324 			  struct rte_avp_desc *buf,
1325 			  struct rte_mbuf **mbufs,
1326 			  unsigned int count)
1327 {
1328 	struct rte_mbuf *m_previous = NULL;
1329 	struct rte_avp_desc *pkt_buf;
1330 	unsigned int total_length = 0;
1331 	unsigned int copy_length;
1332 	unsigned int src_offset;
1333 	struct rte_mbuf *m;
1334 	uint16_t ol_flags;
1335 	uint16_t vlan_tci;
1336 	void *pkt_data;
1337 	unsigned int i;
1338 
1339 	avp_dev_buffer_sanity_check(avp, buf);
1340 
1341 	/* setup the first source buffer */
1342 	pkt_buf = avp_dev_translate_buffer(avp, buf);
1343 	pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1344 	total_length = pkt_buf->pkt_len;
1345 	src_offset = 0;
1346 
1347 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1348 		ol_flags = PKT_RX_VLAN_PKT;
1349 		vlan_tci = pkt_buf->vlan_tci;
1350 	} else {
1351 		ol_flags = 0;
1352 		vlan_tci = 0;
1353 	}
1354 
1355 	for (i = 0; (i < count) && (buf != NULL); i++) {
1356 		/* fill each destination buffer */
1357 		m = mbufs[i];
1358 
1359 		if (m_previous != NULL)
1360 			m_previous->next = m;
1361 
1362 		m_previous = m;
1363 
1364 		do {
1365 			/*
1366 			 * Copy as many source buffers as will fit in the
1367 			 * destination buffer.
1368 			 */
1369 			copy_length = RTE_MIN((avp->guest_mbuf_size -
1370 					       rte_pktmbuf_data_len(m)),
1371 					      (pkt_buf->data_len -
1372 					       src_offset));
1373 			rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1374 					       rte_pktmbuf_data_len(m)),
1375 				   RTE_PTR_ADD(pkt_data, src_offset),
1376 				   copy_length);
1377 			rte_pktmbuf_data_len(m) += copy_length;
1378 			src_offset += copy_length;
1379 
1380 			if (likely(src_offset == pkt_buf->data_len)) {
1381 				/* need a new source buffer */
1382 				buf = pkt_buf->next;
1383 				if (buf != NULL) {
1384 					pkt_buf = avp_dev_translate_buffer(
1385 						avp, buf);
1386 					pkt_data = avp_dev_translate_buffer(
1387 						avp, pkt_buf->data);
1388 					src_offset = 0;
1389 				}
1390 			}
1391 
1392 			if (unlikely(rte_pktmbuf_data_len(m) ==
1393 				     avp->guest_mbuf_size)) {
1394 				/* need a new destination mbuf */
1395 				break;
1396 			}
1397 
1398 		} while (buf != NULL);
1399 	}
1400 
1401 	m = mbufs[0];
1402 	m->ol_flags = ol_flags;
1403 	m->nb_segs = count;
1404 	rte_pktmbuf_pkt_len(m) = total_length;
1405 	m->vlan_tci = vlan_tci;
1406 
1407 	__rte_mbuf_sanity_check(m, 1);
1408 
1409 	return m;
1410 }
1411 
1412 static uint16_t
1413 avp_recv_scattered_pkts(void *rx_queue,
1414 			struct rte_mbuf **rx_pkts,
1415 			uint16_t nb_pkts)
1416 {
1417 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1418 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1419 	struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1420 	struct avp_dev *avp = rxq->avp;
1421 	struct rte_avp_desc *pkt_buf;
1422 	struct rte_avp_fifo *free_q;
1423 	struct rte_avp_fifo *rx_q;
1424 	struct rte_avp_desc *buf;
1425 	unsigned int count, avail, n;
1426 	unsigned int guest_mbuf_size;
1427 	struct rte_mbuf *m;
1428 	unsigned int required;
1429 	unsigned int buf_len;
1430 	unsigned int port_id;
1431 	unsigned int i;
1432 
1433 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1434 		/* VM live migration in progress */
1435 		return 0;
1436 	}
1437 
1438 	guest_mbuf_size = avp->guest_mbuf_size;
1439 	port_id = avp->port_id;
1440 	rx_q = avp->rx_q[rxq->queue_id];
1441 	free_q = avp->free_q[rxq->queue_id];
1442 
1443 	/* setup next queue to service */
1444 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1445 		(rxq->queue_id + 1) : rxq->queue_base;
1446 
1447 	/* determine how many slots are available in the free queue */
1448 	count = avp_fifo_free_count(free_q);
1449 
1450 	/* determine how many packets are available in the rx queue */
1451 	avail = avp_fifo_count(rx_q);
1452 
1453 	/* determine how many packets can be received */
1454 	count = RTE_MIN(count, avail);
1455 	count = RTE_MIN(count, nb_pkts);
1456 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1457 
1458 	if (unlikely(count == 0)) {
1459 		/* no free buffers, or no buffers on the rx queue */
1460 		return 0;
1461 	}
1462 
1463 	/* retrieve pending packets */
1464 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1465 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1466 		   count, rx_q);
1467 
1468 	count = 0;
1469 	for (i = 0; i < n; i++) {
1470 		/* prefetch next entry while processing current one */
1471 		if (i + 1 < n) {
1472 			pkt_buf = avp_dev_translate_buffer(avp,
1473 							   avp_bufs[i + 1]);
1474 			rte_prefetch0(pkt_buf);
1475 		}
1476 		buf = avp_bufs[i];
1477 
1478 		/* Peek into the first buffer to determine the total length */
1479 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1480 		buf_len = pkt_buf->pkt_len;
1481 
1482 		/* Allocate enough mbufs to receive the entire packet */
1483 		required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1484 		if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1485 			rxq->dev_data->rx_mbuf_alloc_failed++;
1486 			continue;
1487 		}
1488 
1489 		/* Copy the data from the buffers to our mbufs */
1490 		m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1491 
1492 		/* finalize mbuf */
1493 		m->port = port_id;
1494 
1495 		if (_avp_mac_filter(avp, m) != 0) {
1496 			/* silently discard packets not destined to our MAC */
1497 			rte_pktmbuf_free(m);
1498 			continue;
1499 		}
1500 
1501 		/* return new mbuf to caller */
1502 		rx_pkts[count++] = m;
1503 		rxq->bytes += buf_len;
1504 	}
1505 
1506 	rxq->packets += count;
1507 
1508 	/* return the buffers to the free queue */
1509 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1510 
1511 	return count;
1512 }
1513 
1514 
1515 static uint16_t
1516 avp_recv_pkts(void *rx_queue,
1517 	      struct rte_mbuf **rx_pkts,
1518 	      uint16_t nb_pkts)
1519 {
1520 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1521 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1522 	struct avp_dev *avp = rxq->avp;
1523 	struct rte_avp_desc *pkt_buf;
1524 	struct rte_avp_fifo *free_q;
1525 	struct rte_avp_fifo *rx_q;
1526 	unsigned int count, avail, n;
1527 	unsigned int pkt_len;
1528 	struct rte_mbuf *m;
1529 	char *pkt_data;
1530 	unsigned int i;
1531 
1532 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1533 		/* VM live migration in progress */
1534 		return 0;
1535 	}
1536 
1537 	rx_q = avp->rx_q[rxq->queue_id];
1538 	free_q = avp->free_q[rxq->queue_id];
1539 
1540 	/* setup next queue to service */
1541 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1542 		(rxq->queue_id + 1) : rxq->queue_base;
1543 
1544 	/* determine how many slots are available in the free queue */
1545 	count = avp_fifo_free_count(free_q);
1546 
1547 	/* determine how many packets are available in the rx queue */
1548 	avail = avp_fifo_count(rx_q);
1549 
1550 	/* determine how many packets can be received */
1551 	count = RTE_MIN(count, avail);
1552 	count = RTE_MIN(count, nb_pkts);
1553 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1554 
1555 	if (unlikely(count == 0)) {
1556 		/* no free buffers, or no buffers on the rx queue */
1557 		return 0;
1558 	}
1559 
1560 	/* retrieve pending packets */
1561 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1562 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1563 		   count, rx_q);
1564 
1565 	count = 0;
1566 	for (i = 0; i < n; i++) {
1567 		/* prefetch next entry while processing current one */
1568 		if (i < n - 1) {
1569 			pkt_buf = avp_dev_translate_buffer(avp,
1570 							   avp_bufs[i + 1]);
1571 			rte_prefetch0(pkt_buf);
1572 		}
1573 
1574 		/* Adjust host pointers for guest addressing */
1575 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1576 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1577 		pkt_len = pkt_buf->pkt_len;
1578 
1579 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1580 			     (pkt_buf->nb_segs > 1))) {
1581 			/*
1582 			 * application should be using the scattered receive
1583 			 * function
1584 			 */
1585 			rxq->errors++;
1586 			continue;
1587 		}
1588 
1589 		/* process each packet to be transmitted */
1590 		m = rte_pktmbuf_alloc(avp->pool);
1591 		if (unlikely(m == NULL)) {
1592 			rxq->dev_data->rx_mbuf_alloc_failed++;
1593 			continue;
1594 		}
1595 
1596 		/* copy data out of the host buffer to our buffer */
1597 		m->data_off = RTE_PKTMBUF_HEADROOM;
1598 		rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1599 
1600 		/* initialize the local mbuf */
1601 		rte_pktmbuf_data_len(m) = pkt_len;
1602 		rte_pktmbuf_pkt_len(m) = pkt_len;
1603 		m->port = avp->port_id;
1604 
1605 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1606 			m->ol_flags = PKT_RX_VLAN_PKT;
1607 			m->vlan_tci = pkt_buf->vlan_tci;
1608 		}
1609 
1610 		if (_avp_mac_filter(avp, m) != 0) {
1611 			/* silently discard packets not destined to our MAC */
1612 			rte_pktmbuf_free(m);
1613 			continue;
1614 		}
1615 
1616 		/* return new mbuf to caller */
1617 		rx_pkts[count++] = m;
1618 		rxq->bytes += pkt_len;
1619 	}
1620 
1621 	rxq->packets += count;
1622 
1623 	/* return the buffers to the free queue */
1624 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1625 
1626 	return count;
1627 }
1628 
1629 /*
1630  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1631  * there are sufficient destination buffers to contain the entire source
1632  * packet.
1633  */
1634 static inline uint16_t
1635 avp_dev_copy_to_buffers(struct avp_dev *avp,
1636 			struct rte_mbuf *mbuf,
1637 			struct rte_avp_desc **buffers,
1638 			unsigned int count)
1639 {
1640 	struct rte_avp_desc *previous_buf = NULL;
1641 	struct rte_avp_desc *first_buf = NULL;
1642 	struct rte_avp_desc *pkt_buf;
1643 	struct rte_avp_desc *buf;
1644 	size_t total_length;
1645 	struct rte_mbuf *m;
1646 	size_t copy_length;
1647 	size_t src_offset;
1648 	char *pkt_data;
1649 	unsigned int i;
1650 
1651 	__rte_mbuf_sanity_check(mbuf, 1);
1652 
1653 	m = mbuf;
1654 	src_offset = 0;
1655 	total_length = rte_pktmbuf_pkt_len(m);
1656 	for (i = 0; (i < count) && (m != NULL); i++) {
1657 		/* fill each destination buffer */
1658 		buf = buffers[i];
1659 
1660 		if (i < count - 1) {
1661 			/* prefetch next entry while processing this one */
1662 			pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1663 			rte_prefetch0(pkt_buf);
1664 		}
1665 
1666 		/* Adjust pointers for guest addressing */
1667 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1668 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1669 
1670 		/* setup the buffer chain */
1671 		if (previous_buf != NULL)
1672 			previous_buf->next = buf;
1673 		else
1674 			first_buf = pkt_buf;
1675 
1676 		previous_buf = pkt_buf;
1677 
1678 		do {
1679 			/*
1680 			 * copy as many source mbuf segments as will fit in the
1681 			 * destination buffer.
1682 			 */
1683 			copy_length = RTE_MIN((avp->host_mbuf_size -
1684 					       pkt_buf->data_len),
1685 					      (rte_pktmbuf_data_len(m) -
1686 					       src_offset));
1687 			rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1688 				   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1689 					       src_offset),
1690 				   copy_length);
1691 			pkt_buf->data_len += copy_length;
1692 			src_offset += copy_length;
1693 
1694 			if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1695 				/* need a new source buffer */
1696 				m = m->next;
1697 				src_offset = 0;
1698 			}
1699 
1700 			if (unlikely(pkt_buf->data_len ==
1701 				     avp->host_mbuf_size)) {
1702 				/* need a new destination buffer */
1703 				break;
1704 			}
1705 
1706 		} while (m != NULL);
1707 	}
1708 
1709 	first_buf->nb_segs = count;
1710 	first_buf->pkt_len = total_length;
1711 
1712 	if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1713 		first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1714 		first_buf->vlan_tci = mbuf->vlan_tci;
1715 	}
1716 
1717 	avp_dev_buffer_sanity_check(avp, buffers[0]);
1718 
1719 	return total_length;
1720 }
1721 
1722 
1723 static uint16_t
1724 avp_xmit_scattered_pkts(void *tx_queue,
1725 			struct rte_mbuf **tx_pkts,
1726 			uint16_t nb_pkts)
1727 {
1728 	struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1729 				       RTE_AVP_MAX_MBUF_SEGMENTS)];
1730 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1731 	struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1732 	struct avp_dev *avp = txq->avp;
1733 	struct rte_avp_fifo *alloc_q;
1734 	struct rte_avp_fifo *tx_q;
1735 	unsigned int count, avail, n;
1736 	unsigned int orig_nb_pkts;
1737 	struct rte_mbuf *m;
1738 	unsigned int required;
1739 	unsigned int segments;
1740 	unsigned int tx_bytes;
1741 	unsigned int i;
1742 
1743 	orig_nb_pkts = nb_pkts;
1744 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1745 		/* VM live migration in progress */
1746 		/* TODO ... buffer for X packets then drop? */
1747 		txq->errors += nb_pkts;
1748 		return 0;
1749 	}
1750 
1751 	tx_q = avp->tx_q[txq->queue_id];
1752 	alloc_q = avp->alloc_q[txq->queue_id];
1753 
1754 	/* limit the number of transmitted packets to the max burst size */
1755 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1756 		nb_pkts = AVP_MAX_TX_BURST;
1757 
1758 	/* determine how many buffers are available to copy into */
1759 	avail = avp_fifo_count(alloc_q);
1760 	if (unlikely(avail > (AVP_MAX_TX_BURST *
1761 			      RTE_AVP_MAX_MBUF_SEGMENTS)))
1762 		avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1763 
1764 	/* determine how many slots are available in the transmit queue */
1765 	count = avp_fifo_free_count(tx_q);
1766 
1767 	/* determine how many packets can be sent */
1768 	nb_pkts = RTE_MIN(count, nb_pkts);
1769 
1770 	/* determine how many packets will fit in the available buffers */
1771 	count = 0;
1772 	segments = 0;
1773 	for (i = 0; i < nb_pkts; i++) {
1774 		m = tx_pkts[i];
1775 		if (likely(i < (unsigned int)nb_pkts - 1)) {
1776 			/* prefetch next entry while processing this one */
1777 			rte_prefetch0(tx_pkts[i + 1]);
1778 		}
1779 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1780 			avp->host_mbuf_size;
1781 
1782 		if (unlikely((required == 0) ||
1783 			     (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1784 			break;
1785 		else if (unlikely(required + segments > avail))
1786 			break;
1787 		segments += required;
1788 		count++;
1789 	}
1790 	nb_pkts = count;
1791 
1792 	if (unlikely(nb_pkts == 0)) {
1793 		/* no available buffers, or no space on the tx queue */
1794 		txq->errors += orig_nb_pkts;
1795 		return 0;
1796 	}
1797 
1798 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1799 		   nb_pkts, tx_q);
1800 
1801 	/* retrieve sufficient send buffers */
1802 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1803 	if (unlikely(n != segments)) {
1804 		PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1805 			   "n=%u, segments=%u, orig=%u\n",
1806 			   n, segments, orig_nb_pkts);
1807 		txq->errors += orig_nb_pkts;
1808 		return 0;
1809 	}
1810 
1811 	tx_bytes = 0;
1812 	count = 0;
1813 	for (i = 0; i < nb_pkts; i++) {
1814 		/* process each packet to be transmitted */
1815 		m = tx_pkts[i];
1816 
1817 		/* determine how many buffers are required for this packet */
1818 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1819 			avp->host_mbuf_size;
1820 
1821 		tx_bytes += avp_dev_copy_to_buffers(avp, m,
1822 						    &avp_bufs[count], required);
1823 		tx_bufs[i] = avp_bufs[count];
1824 		count += required;
1825 
1826 		/* free the original mbuf */
1827 		rte_pktmbuf_free(m);
1828 	}
1829 
1830 	txq->packets += nb_pkts;
1831 	txq->bytes += tx_bytes;
1832 
1833 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1834 	for (i = 0; i < nb_pkts; i++)
1835 		avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1836 #endif
1837 
1838 	/* send the packets */
1839 	n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1840 	if (unlikely(n != orig_nb_pkts))
1841 		txq->errors += (orig_nb_pkts - n);
1842 
1843 	return n;
1844 }
1845 
1846 
1847 static uint16_t
1848 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1849 {
1850 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1851 	struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1852 	struct avp_dev *avp = txq->avp;
1853 	struct rte_avp_desc *pkt_buf;
1854 	struct rte_avp_fifo *alloc_q;
1855 	struct rte_avp_fifo *tx_q;
1856 	unsigned int count, avail, n;
1857 	struct rte_mbuf *m;
1858 	unsigned int pkt_len;
1859 	unsigned int tx_bytes;
1860 	char *pkt_data;
1861 	unsigned int i;
1862 
1863 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1864 		/* VM live migration in progress */
1865 		/* TODO ... buffer for X packets then drop?! */
1866 		txq->errors++;
1867 		return 0;
1868 	}
1869 
1870 	tx_q = avp->tx_q[txq->queue_id];
1871 	alloc_q = avp->alloc_q[txq->queue_id];
1872 
1873 	/* limit the number of transmitted packets to the max burst size */
1874 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1875 		nb_pkts = AVP_MAX_TX_BURST;
1876 
1877 	/* determine how many buffers are available to copy into */
1878 	avail = avp_fifo_count(alloc_q);
1879 
1880 	/* determine how many slots are available in the transmit queue */
1881 	count = avp_fifo_free_count(tx_q);
1882 
1883 	/* determine how many packets can be sent */
1884 	count = RTE_MIN(count, avail);
1885 	count = RTE_MIN(count, nb_pkts);
1886 
1887 	if (unlikely(count == 0)) {
1888 		/* no available buffers, or no space on the tx queue */
1889 		txq->errors += nb_pkts;
1890 		return 0;
1891 	}
1892 
1893 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1894 		   count, tx_q);
1895 
1896 	/* retrieve sufficient send buffers */
1897 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1898 	if (unlikely(n != count)) {
1899 		txq->errors++;
1900 		return 0;
1901 	}
1902 
1903 	tx_bytes = 0;
1904 	for (i = 0; i < count; i++) {
1905 		/* prefetch next entry while processing the current one */
1906 		if (i < count - 1) {
1907 			pkt_buf = avp_dev_translate_buffer(avp,
1908 							   avp_bufs[i + 1]);
1909 			rte_prefetch0(pkt_buf);
1910 		}
1911 
1912 		/* process each packet to be transmitted */
1913 		m = tx_pkts[i];
1914 
1915 		/* Adjust pointers for guest addressing */
1916 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1917 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1918 		pkt_len = rte_pktmbuf_pkt_len(m);
1919 
1920 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1921 					 (pkt_len > avp->host_mbuf_size))) {
1922 			/*
1923 			 * application should be using the scattered transmit
1924 			 * function; send it truncated to avoid the performance
1925 			 * hit of having to manage returning the already
1926 			 * allocated buffer to the free list.  This should not
1927 			 * happen since the application should have set the
1928 			 * max_rx_pkt_len based on its MTU and it should be
1929 			 * policing its own packet sizes.
1930 			 */
1931 			txq->errors++;
1932 			pkt_len = RTE_MIN(avp->guest_mbuf_size,
1933 					  avp->host_mbuf_size);
1934 		}
1935 
1936 		/* copy data out of our mbuf and into the AVP buffer */
1937 		rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1938 		pkt_buf->pkt_len = pkt_len;
1939 		pkt_buf->data_len = pkt_len;
1940 		pkt_buf->nb_segs = 1;
1941 		pkt_buf->next = NULL;
1942 
1943 		if (m->ol_flags & PKT_TX_VLAN_PKT) {
1944 			pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1945 			pkt_buf->vlan_tci = m->vlan_tci;
1946 		}
1947 
1948 		tx_bytes += pkt_len;
1949 
1950 		/* free the original mbuf */
1951 		rte_pktmbuf_free(m);
1952 	}
1953 
1954 	txq->packets += count;
1955 	txq->bytes += tx_bytes;
1956 
1957 	/* send the packets */
1958 	n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1959 
1960 	return n;
1961 }
1962 
1963 static void
1964 avp_dev_rx_queue_release(void *rx_queue)
1965 {
1966 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1967 	struct avp_dev *avp = rxq->avp;
1968 	struct rte_eth_dev_data *data = avp->dev_data;
1969 	unsigned int i;
1970 
1971 	for (i = 0; i < avp->num_rx_queues; i++) {
1972 		if (data->rx_queues[i] == rxq)
1973 			data->rx_queues[i] = NULL;
1974 	}
1975 }
1976 
1977 static void
1978 avp_dev_tx_queue_release(void *tx_queue)
1979 {
1980 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1981 	struct avp_dev *avp = txq->avp;
1982 	struct rte_eth_dev_data *data = avp->dev_data;
1983 	unsigned int i;
1984 
1985 	for (i = 0; i < avp->num_tx_queues; i++) {
1986 		if (data->tx_queues[i] == txq)
1987 			data->tx_queues[i] = NULL;
1988 	}
1989 }
1990 
1991 static int
1992 avp_dev_configure(struct rte_eth_dev *eth_dev)
1993 {
1994 	struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
1995 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1996 	struct rte_avp_device_info *host_info;
1997 	struct rte_avp_device_config config;
1998 	int mask = 0;
1999 	void *addr;
2000 	int ret;
2001 
2002 	rte_spinlock_lock(&avp->lock);
2003 	if (avp->flags & AVP_F_DETACHED) {
2004 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2005 		ret = -ENOTSUP;
2006 		goto unlock;
2007 	}
2008 
2009 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2010 	host_info = (struct rte_avp_device_info *)addr;
2011 
2012 	/* Setup required number of queues */
2013 	_avp_set_queue_counts(eth_dev);
2014 
2015 	mask = (ETH_VLAN_STRIP_MASK |
2016 		ETH_VLAN_FILTER_MASK |
2017 		ETH_VLAN_EXTEND_MASK);
2018 	avp_vlan_offload_set(eth_dev, mask);
2019 
2020 	/* update device config */
2021 	memset(&config, 0, sizeof(config));
2022 	config.device_id = host_info->device_id;
2023 	config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2024 	config.driver_version = AVP_DPDK_DRIVER_VERSION;
2025 	config.features = avp->features;
2026 	config.num_tx_queues = avp->num_tx_queues;
2027 	config.num_rx_queues = avp->num_rx_queues;
2028 
2029 	ret = avp_dev_ctrl_set_config(eth_dev, &config);
2030 	if (ret < 0) {
2031 		PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2032 			    ret);
2033 		goto unlock;
2034 	}
2035 
2036 	avp->flags |= AVP_F_CONFIGURED;
2037 	ret = 0;
2038 
2039 unlock:
2040 	rte_spinlock_unlock(&avp->lock);
2041 	return ret;
2042 }
2043 
2044 static int
2045 avp_dev_start(struct rte_eth_dev *eth_dev)
2046 {
2047 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2048 	int ret;
2049 
2050 	rte_spinlock_lock(&avp->lock);
2051 	if (avp->flags & AVP_F_DETACHED) {
2052 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2053 		ret = -ENOTSUP;
2054 		goto unlock;
2055 	}
2056 
2057 	/* disable features that we do not support */
2058 	eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2059 	eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2060 	eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2061 	eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2062 
2063 	/* update link state */
2064 	ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2065 	if (ret < 0) {
2066 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2067 			    ret);
2068 		goto unlock;
2069 	}
2070 
2071 	/* remember current link state */
2072 	avp->flags |= AVP_F_LINKUP;
2073 
2074 	ret = 0;
2075 
2076 unlock:
2077 	rte_spinlock_unlock(&avp->lock);
2078 	return ret;
2079 }
2080 
2081 static void
2082 avp_dev_stop(struct rte_eth_dev *eth_dev)
2083 {
2084 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2085 	int ret;
2086 
2087 	rte_spinlock_lock(&avp->lock);
2088 	if (avp->flags & AVP_F_DETACHED) {
2089 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2090 		goto unlock;
2091 	}
2092 
2093 	/* remember current link state */
2094 	avp->flags &= ~AVP_F_LINKUP;
2095 
2096 	/* update link state */
2097 	ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2098 	if (ret < 0) {
2099 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2100 			    ret);
2101 	}
2102 
2103 unlock:
2104 	rte_spinlock_unlock(&avp->lock);
2105 }
2106 
2107 static void
2108 avp_dev_close(struct rte_eth_dev *eth_dev)
2109 {
2110 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2111 	int ret;
2112 
2113 	rte_spinlock_lock(&avp->lock);
2114 	if (avp->flags & AVP_F_DETACHED) {
2115 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2116 		goto unlock;
2117 	}
2118 
2119 	/* remember current link state */
2120 	avp->flags &= ~AVP_F_LINKUP;
2121 	avp->flags &= ~AVP_F_CONFIGURED;
2122 
2123 	ret = avp_dev_disable_interrupts(eth_dev);
2124 	if (ret < 0) {
2125 		PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2126 		/* continue */
2127 	}
2128 
2129 	/* update device state */
2130 	ret = avp_dev_ctrl_shutdown(eth_dev);
2131 	if (ret < 0) {
2132 		PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2133 			    ret);
2134 		/* continue */
2135 	}
2136 
2137 unlock:
2138 	rte_spinlock_unlock(&avp->lock);
2139 }
2140 
2141 static int
2142 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2143 					__rte_unused int wait_to_complete)
2144 {
2145 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2146 	struct rte_eth_link *link = &eth_dev->data->dev_link;
2147 
2148 	link->link_speed = ETH_SPEED_NUM_10G;
2149 	link->link_duplex = ETH_LINK_FULL_DUPLEX;
2150 	link->link_status = !!(avp->flags & AVP_F_LINKUP);
2151 
2152 	return -1;
2153 }
2154 
2155 static void
2156 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2157 {
2158 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2159 
2160 	rte_spinlock_lock(&avp->lock);
2161 	if ((avp->flags & AVP_F_PROMISC) == 0) {
2162 		avp->flags |= AVP_F_PROMISC;
2163 		PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2164 			    eth_dev->data->port_id);
2165 	}
2166 	rte_spinlock_unlock(&avp->lock);
2167 }
2168 
2169 static void
2170 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2171 {
2172 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2173 
2174 	rte_spinlock_lock(&avp->lock);
2175 	if ((avp->flags & AVP_F_PROMISC) != 0) {
2176 		avp->flags &= ~AVP_F_PROMISC;
2177 		PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2178 			    eth_dev->data->port_id);
2179 	}
2180 	rte_spinlock_unlock(&avp->lock);
2181 }
2182 
2183 static void
2184 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2185 		 struct rte_eth_dev_info *dev_info)
2186 {
2187 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2188 
2189 	dev_info->driver_name = "rte_avp_pmd";
2190 	dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
2191 	dev_info->max_rx_queues = avp->max_rx_queues;
2192 	dev_info->max_tx_queues = avp->max_tx_queues;
2193 	dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2194 	dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2195 	dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2196 	if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2197 		dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2198 		dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2199 	}
2200 }
2201 
2202 static void
2203 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2204 {
2205 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2206 
2207 	if (mask & ETH_VLAN_STRIP_MASK) {
2208 		if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2209 			if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2210 				avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2211 			else
2212 				avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2213 		} else {
2214 			PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2215 		}
2216 	}
2217 
2218 	if (mask & ETH_VLAN_FILTER_MASK) {
2219 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2220 			PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2221 	}
2222 
2223 	if (mask & ETH_VLAN_EXTEND_MASK) {
2224 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2225 			PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2226 	}
2227 }
2228 
2229 static void
2230 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2231 {
2232 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2233 	unsigned int i;
2234 
2235 	for (i = 0; i < avp->num_rx_queues; i++) {
2236 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2237 
2238 		if (rxq) {
2239 			stats->ipackets += rxq->packets;
2240 			stats->ibytes += rxq->bytes;
2241 			stats->ierrors += rxq->errors;
2242 
2243 			stats->q_ipackets[i] += rxq->packets;
2244 			stats->q_ibytes[i] += rxq->bytes;
2245 			stats->q_errors[i] += rxq->errors;
2246 		}
2247 	}
2248 
2249 	for (i = 0; i < avp->num_tx_queues; i++) {
2250 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2251 
2252 		if (txq) {
2253 			stats->opackets += txq->packets;
2254 			stats->obytes += txq->bytes;
2255 			stats->oerrors += txq->errors;
2256 
2257 			stats->q_opackets[i] += txq->packets;
2258 			stats->q_obytes[i] += txq->bytes;
2259 			stats->q_errors[i] += txq->errors;
2260 		}
2261 	}
2262 }
2263 
2264 static void
2265 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2266 {
2267 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2268 	unsigned int i;
2269 
2270 	for (i = 0; i < avp->num_rx_queues; i++) {
2271 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2272 
2273 		if (rxq) {
2274 			rxq->bytes = 0;
2275 			rxq->packets = 0;
2276 			rxq->errors = 0;
2277 		}
2278 	}
2279 
2280 	for (i = 0; i < avp->num_tx_queues; i++) {
2281 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2282 
2283 		if (txq) {
2284 			txq->bytes = 0;
2285 			txq->packets = 0;
2286 			txq->errors = 0;
2287 		}
2288 	}
2289 }
2290 
2291 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd.pci_drv);
2292 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2293