xref: /dpdk/drivers/net/avp/avp_ethdev.c (revision 3998e2a07220844d3f3c17f76a781ced3efe0de0)
1 /*
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2013-2017, Wind River Systems, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * 2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * 3) Neither the name of Wind River Systems nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <stdint.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38 
39 #include <rte_ethdev.h>
40 #include <rte_ethdev_pci.h>
41 #include <rte_memcpy.h>
42 #include <rte_string_fns.h>
43 #include <rte_malloc.h>
44 #include <rte_atomic.h>
45 #include <rte_branch_prediction.h>
46 #include <rte_pci.h>
47 #include <rte_bus_pci.h>
48 #include <rte_ether.h>
49 #include <rte_common.h>
50 #include <rte_cycles.h>
51 #include <rte_spinlock.h>
52 #include <rte_byteorder.h>
53 #include <rte_dev.h>
54 #include <rte_memory.h>
55 #include <rte_eal.h>
56 #include <rte_io.h>
57 
58 #include "rte_avp_common.h"
59 #include "rte_avp_fifo.h"
60 
61 #include "avp_logs.h"
62 
63 
64 static int avp_dev_create(struct rte_pci_device *pci_dev,
65 			  struct rte_eth_dev *eth_dev);
66 
67 static int avp_dev_configure(struct rte_eth_dev *dev);
68 static int avp_dev_start(struct rte_eth_dev *dev);
69 static void avp_dev_stop(struct rte_eth_dev *dev);
70 static void avp_dev_close(struct rte_eth_dev *dev);
71 static void avp_dev_info_get(struct rte_eth_dev *dev,
72 			     struct rte_eth_dev_info *dev_info);
73 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
74 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
75 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
76 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
77 
78 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
79 				  uint16_t rx_queue_id,
80 				  uint16_t nb_rx_desc,
81 				  unsigned int socket_id,
82 				  const struct rte_eth_rxconf *rx_conf,
83 				  struct rte_mempool *pool);
84 
85 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
86 				  uint16_t tx_queue_id,
87 				  uint16_t nb_tx_desc,
88 				  unsigned int socket_id,
89 				  const struct rte_eth_txconf *tx_conf);
90 
91 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
92 					struct rte_mbuf **rx_pkts,
93 					uint16_t nb_pkts);
94 
95 static uint16_t avp_recv_pkts(void *rx_queue,
96 			      struct rte_mbuf **rx_pkts,
97 			      uint16_t nb_pkts);
98 
99 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
100 					struct rte_mbuf **tx_pkts,
101 					uint16_t nb_pkts);
102 
103 static uint16_t avp_xmit_pkts(void *tx_queue,
104 			      struct rte_mbuf **tx_pkts,
105 			      uint16_t nb_pkts);
106 
107 static void avp_dev_rx_queue_release(void *rxq);
108 static void avp_dev_tx_queue_release(void *txq);
109 
110 static int avp_dev_stats_get(struct rte_eth_dev *dev,
111 			      struct rte_eth_stats *stats);
112 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
113 
114 
115 #define AVP_MAX_RX_BURST 64
116 #define AVP_MAX_TX_BURST 64
117 #define AVP_MAX_MAC_ADDRS 1
118 #define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
119 
120 
121 /*
122  * Defines the number of microseconds to wait before checking the response
123  * queue for completion.
124  */
125 #define AVP_REQUEST_DELAY_USECS (5000)
126 
127 /*
128  * Defines the number times to check the response queue for completion before
129  * declaring a timeout.
130  */
131 #define AVP_MAX_REQUEST_RETRY (100)
132 
133 /* Defines the current PCI driver version number */
134 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
135 
136 /*
137  * The set of PCI devices this driver supports
138  */
139 static const struct rte_pci_id pci_id_avp_map[] = {
140 	{ .vendor_id = RTE_AVP_PCI_VENDOR_ID,
141 	  .device_id = RTE_AVP_PCI_DEVICE_ID,
142 	  .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
143 	  .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
144 	  .class_id = RTE_CLASS_ANY_ID,
145 	},
146 
147 	{ .vendor_id = 0, /* sentinel */
148 	},
149 };
150 
151 /*
152  * dev_ops for avp, bare necessities for basic operation
153  */
154 static const struct eth_dev_ops avp_eth_dev_ops = {
155 	.dev_configure       = avp_dev_configure,
156 	.dev_start           = avp_dev_start,
157 	.dev_stop            = avp_dev_stop,
158 	.dev_close           = avp_dev_close,
159 	.dev_infos_get       = avp_dev_info_get,
160 	.vlan_offload_set    = avp_vlan_offload_set,
161 	.stats_get           = avp_dev_stats_get,
162 	.stats_reset         = avp_dev_stats_reset,
163 	.link_update         = avp_dev_link_update,
164 	.promiscuous_enable  = avp_dev_promiscuous_enable,
165 	.promiscuous_disable = avp_dev_promiscuous_disable,
166 	.rx_queue_setup      = avp_dev_rx_queue_setup,
167 	.rx_queue_release    = avp_dev_rx_queue_release,
168 	.tx_queue_setup      = avp_dev_tx_queue_setup,
169 	.tx_queue_release    = avp_dev_tx_queue_release,
170 };
171 
172 /**@{ AVP device flags */
173 #define AVP_F_PROMISC (1 << 1)
174 #define AVP_F_CONFIGURED (1 << 2)
175 #define AVP_F_LINKUP (1 << 3)
176 #define AVP_F_DETACHED (1 << 4)
177 /**@} */
178 
179 /* Ethernet device validation marker */
180 #define AVP_ETHDEV_MAGIC 0x92972862
181 
182 /*
183  * Defines the AVP device attributes which are attached to an RTE ethernet
184  * device
185  */
186 struct avp_dev {
187 	uint32_t magic; /**< Memory validation marker */
188 	uint64_t device_id; /**< Unique system identifier */
189 	struct ether_addr ethaddr; /**< Host specified MAC address */
190 	struct rte_eth_dev_data *dev_data;
191 	/**< Back pointer to ethernet device data */
192 	volatile uint32_t flags; /**< Device operational flags */
193 	uint16_t port_id; /**< Ethernet port identifier */
194 	struct rte_mempool *pool; /**< pkt mbuf mempool */
195 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
196 	unsigned int host_mbuf_size; /**< host mbuf size */
197 	unsigned int max_rx_pkt_len; /**< maximum receive unit */
198 	uint32_t host_features; /**< Supported feature bitmap */
199 	uint32_t features; /**< Enabled feature bitmap */
200 	unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
201 	unsigned int max_tx_queues; /**< Maximum number of transmit queues */
202 	unsigned int num_rx_queues; /**< Negotiated number of receive queues */
203 	unsigned int max_rx_queues; /**< Maximum number of receive queues */
204 
205 	struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
206 	struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
207 	struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
208 	/**< Allocated mbufs queue */
209 	struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
210 	/**< To be freed mbufs queue */
211 
212 	/* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
213 	rte_spinlock_t lock;
214 
215 	/* For request & response */
216 	struct rte_avp_fifo *req_q; /**< Request queue */
217 	struct rte_avp_fifo *resp_q; /**< Response queue */
218 	void *host_sync_addr; /**< (host) Req/Resp Mem address */
219 	void *sync_addr; /**< Req/Resp Mem address */
220 	void *host_mbuf_addr; /**< (host) MBUF pool start address */
221 	void *mbuf_addr; /**< MBUF pool start address */
222 } __rte_cache_aligned;
223 
224 /* RTE ethernet private data */
225 struct avp_adapter {
226 	struct avp_dev avp;
227 } __rte_cache_aligned;
228 
229 
230 /* 32-bit MMIO register write */
231 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
232 
233 /* 32-bit MMIO register read */
234 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
235 
236 /* Macro to cast the ethernet device private data to a AVP object */
237 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
238 	(&((struct avp_adapter *)adapter)->avp)
239 
240 /*
241  * Defines the structure of a AVP device queue for the purpose of handling the
242  * receive and transmit burst callback functions
243  */
244 struct avp_queue {
245 	struct rte_eth_dev_data *dev_data;
246 	/**< Backpointer to ethernet device data */
247 	struct avp_dev *avp; /**< Backpointer to AVP device */
248 	uint16_t queue_id;
249 	/**< Queue identifier used for indexing current queue */
250 	uint16_t queue_base;
251 	/**< Base queue identifier for queue servicing */
252 	uint16_t queue_limit;
253 	/**< Maximum queue identifier for queue servicing */
254 
255 	uint64_t packets;
256 	uint64_t bytes;
257 	uint64_t errors;
258 };
259 
260 /* send a request and wait for a response
261  *
262  * @warning must be called while holding the avp->lock spinlock.
263  */
264 static int
265 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
266 {
267 	unsigned int retry = AVP_MAX_REQUEST_RETRY;
268 	void *resp_addr = NULL;
269 	unsigned int count;
270 	int ret;
271 
272 	PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
273 
274 	request->result = -ENOTSUP;
275 
276 	/* Discard any stale responses before starting a new request */
277 	while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
278 		PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
279 
280 	rte_memcpy(avp->sync_addr, request, sizeof(*request));
281 	count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
282 	if (count < 1) {
283 		PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
284 			    request->req_id);
285 		ret = -EBUSY;
286 		goto done;
287 	}
288 
289 	while (retry--) {
290 		/* wait for a response */
291 		usleep(AVP_REQUEST_DELAY_USECS);
292 
293 		count = avp_fifo_count(avp->resp_q);
294 		if (count >= 1) {
295 			/* response received */
296 			break;
297 		}
298 
299 		if ((count < 1) && (retry == 0)) {
300 			PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
301 				    request->req_id);
302 			ret = -ETIME;
303 			goto done;
304 		}
305 	}
306 
307 	/* retrieve the response */
308 	count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
309 	if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
310 		PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
311 			    count, resp_addr, avp->host_sync_addr);
312 		ret = -ENODATA;
313 		goto done;
314 	}
315 
316 	/* copy to user buffer */
317 	rte_memcpy(request, avp->sync_addr, sizeof(*request));
318 	ret = 0;
319 
320 	PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
321 		    request->result, request->req_id);
322 
323 done:
324 	return ret;
325 }
326 
327 static int
328 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
329 {
330 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
331 	struct rte_avp_request request;
332 	int ret;
333 
334 	/* setup a link state change request */
335 	memset(&request, 0, sizeof(request));
336 	request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
337 	request.if_up = state;
338 
339 	ret = avp_dev_process_request(avp, &request);
340 
341 	return ret == 0 ? request.result : ret;
342 }
343 
344 static int
345 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
346 			struct rte_avp_device_config *config)
347 {
348 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
349 	struct rte_avp_request request;
350 	int ret;
351 
352 	/* setup a configure request */
353 	memset(&request, 0, sizeof(request));
354 	request.req_id = RTE_AVP_REQ_CFG_DEVICE;
355 	memcpy(&request.config, config, sizeof(request.config));
356 
357 	ret = avp_dev_process_request(avp, &request);
358 
359 	return ret == 0 ? request.result : ret;
360 }
361 
362 static int
363 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
364 {
365 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
366 	struct rte_avp_request request;
367 	int ret;
368 
369 	/* setup a shutdown request */
370 	memset(&request, 0, sizeof(request));
371 	request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
372 
373 	ret = avp_dev_process_request(avp, &request);
374 
375 	return ret == 0 ? request.result : ret;
376 }
377 
378 /* translate from host mbuf virtual address to guest virtual address */
379 static inline void *
380 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
381 {
382 	return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
383 				       (uintptr_t)avp->host_mbuf_addr),
384 			   (uintptr_t)avp->mbuf_addr);
385 }
386 
387 /* translate from host physical address to guest virtual address */
388 static void *
389 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
390 			  rte_iova_t host_phys_addr)
391 {
392 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
393 	struct rte_mem_resource *resource;
394 	struct rte_avp_memmap_info *info;
395 	struct rte_avp_memmap *map;
396 	off_t offset;
397 	void *addr;
398 	unsigned int i;
399 
400 	addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
401 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
402 	info = (struct rte_avp_memmap_info *)resource->addr;
403 
404 	offset = 0;
405 	for (i = 0; i < info->nb_maps; i++) {
406 		/* search all segments looking for a matching address */
407 		map = &info->maps[i];
408 
409 		if ((host_phys_addr >= map->phys_addr) &&
410 			(host_phys_addr < (map->phys_addr + map->length))) {
411 			/* address is within this segment */
412 			offset += (host_phys_addr - map->phys_addr);
413 			addr = RTE_PTR_ADD(addr, offset);
414 
415 			PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
416 				    host_phys_addr, addr);
417 
418 			return addr;
419 		}
420 		offset += map->length;
421 	}
422 
423 	return NULL;
424 }
425 
426 /* verify that the incoming device version is compatible with our version */
427 static int
428 avp_dev_version_check(uint32_t version)
429 {
430 	uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
431 	uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
432 
433 	if (device <= driver) {
434 		/* the host driver version is less than or equal to ours */
435 		return 0;
436 	}
437 
438 	return 1;
439 }
440 
441 /* verify that memory regions have expected version and validation markers */
442 static int
443 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
444 {
445 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
446 	struct rte_avp_memmap_info *memmap;
447 	struct rte_avp_device_info *info;
448 	struct rte_mem_resource *resource;
449 	unsigned int i;
450 
451 	/* Dump resource info for debug */
452 	for (i = 0; i < PCI_MAX_RESOURCE; i++) {
453 		resource = &pci_dev->mem_resource[i];
454 		if ((resource->phys_addr == 0) || (resource->len == 0))
455 			continue;
456 
457 		PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
458 			    i, resource->phys_addr,
459 			    resource->len, resource->addr);
460 
461 		switch (i) {
462 		case RTE_AVP_PCI_MEMMAP_BAR:
463 			memmap = (struct rte_avp_memmap_info *)resource->addr;
464 			if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
465 			    (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
466 				PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
467 					    memmap->magic, memmap->version);
468 				return -EINVAL;
469 			}
470 			break;
471 
472 		case RTE_AVP_PCI_DEVICE_BAR:
473 			info = (struct rte_avp_device_info *)resource->addr;
474 			if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
475 			    avp_dev_version_check(info->version)) {
476 				PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
477 					    info->magic, info->version,
478 					    AVP_DPDK_DRIVER_VERSION);
479 				return -EINVAL;
480 			}
481 			break;
482 
483 		case RTE_AVP_PCI_MEMORY_BAR:
484 		case RTE_AVP_PCI_MMIO_BAR:
485 			if (resource->addr == NULL) {
486 				PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
487 					    i);
488 				return -EINVAL;
489 			}
490 			break;
491 
492 		case RTE_AVP_PCI_MSIX_BAR:
493 		default:
494 			/* no validation required */
495 			break;
496 		}
497 	}
498 
499 	return 0;
500 }
501 
502 static int
503 avp_dev_detach(struct rte_eth_dev *eth_dev)
504 {
505 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
506 	int ret;
507 
508 	PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
509 		    eth_dev->data->port_id, avp->device_id);
510 
511 	rte_spinlock_lock(&avp->lock);
512 
513 	if (avp->flags & AVP_F_DETACHED) {
514 		PMD_DRV_LOG(NOTICE, "port %u already detached\n",
515 			    eth_dev->data->port_id);
516 		ret = 0;
517 		goto unlock;
518 	}
519 
520 	/* shutdown the device first so the host stops sending us packets. */
521 	ret = avp_dev_ctrl_shutdown(eth_dev);
522 	if (ret < 0) {
523 		PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
524 			    ret);
525 		avp->flags &= ~AVP_F_DETACHED;
526 		goto unlock;
527 	}
528 
529 	avp->flags |= AVP_F_DETACHED;
530 	rte_wmb();
531 
532 	/* wait for queues to acknowledge the presence of the detach flag */
533 	rte_delay_ms(1);
534 
535 	ret = 0;
536 
537 unlock:
538 	rte_spinlock_unlock(&avp->lock);
539 	return ret;
540 }
541 
542 static void
543 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
544 {
545 	struct avp_dev *avp =
546 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
547 	struct avp_queue *rxq;
548 	uint16_t queue_count;
549 	uint16_t remainder;
550 
551 	rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
552 
553 	/*
554 	 * Must map all AVP fifos as evenly as possible between the configured
555 	 * device queues.  Each device queue will service a subset of the AVP
556 	 * fifos. If there is an odd number of device queues the first set of
557 	 * device queues will get the extra AVP fifos.
558 	 */
559 	queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
560 	remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
561 	if (rx_queue_id < remainder) {
562 		/* these queues must service one extra FIFO */
563 		rxq->queue_base = rx_queue_id * (queue_count + 1);
564 		rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
565 	} else {
566 		/* these queues service the regular number of FIFO */
567 		rxq->queue_base = ((remainder * (queue_count + 1)) +
568 				   ((rx_queue_id - remainder) * queue_count));
569 		rxq->queue_limit = rxq->queue_base + queue_count - 1;
570 	}
571 
572 	PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
573 		    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
574 
575 	rxq->queue_id = rxq->queue_base;
576 }
577 
578 static void
579 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
580 {
581 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
582 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
583 	struct rte_avp_device_info *host_info;
584 	void *addr;
585 
586 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
587 	host_info = (struct rte_avp_device_info *)addr;
588 
589 	/*
590 	 * the transmit direction is not negotiated beyond respecting the max
591 	 * number of queues because the host can handle arbitrary guest tx
592 	 * queues (host rx queues).
593 	 */
594 	avp->num_tx_queues = eth_dev->data->nb_tx_queues;
595 
596 	/*
597 	 * the receive direction is more restrictive.  The host requires a
598 	 * minimum number of guest rx queues (host tx queues) therefore
599 	 * negotiate a value that is at least as large as the host minimum
600 	 * requirement.  If the host and guest values are not identical then a
601 	 * mapping will be established in the receive_queue_setup function.
602 	 */
603 	avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
604 				     eth_dev->data->nb_rx_queues);
605 
606 	PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
607 		    avp->num_tx_queues, avp->num_rx_queues);
608 }
609 
610 static int
611 avp_dev_attach(struct rte_eth_dev *eth_dev)
612 {
613 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
614 	struct rte_avp_device_config config;
615 	unsigned int i;
616 	int ret;
617 
618 	PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
619 		    eth_dev->data->port_id, avp->device_id);
620 
621 	rte_spinlock_lock(&avp->lock);
622 
623 	if (!(avp->flags & AVP_F_DETACHED)) {
624 		PMD_DRV_LOG(NOTICE, "port %u already attached\n",
625 			    eth_dev->data->port_id);
626 		ret = 0;
627 		goto unlock;
628 	}
629 
630 	/*
631 	 * make sure that the detached flag is set prior to reconfiguring the
632 	 * queues.
633 	 */
634 	avp->flags |= AVP_F_DETACHED;
635 	rte_wmb();
636 
637 	/*
638 	 * re-run the device create utility which will parse the new host info
639 	 * and setup the AVP device queue pointers.
640 	 */
641 	ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
642 	if (ret < 0) {
643 		PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
644 			    ret);
645 		goto unlock;
646 	}
647 
648 	if (avp->flags & AVP_F_CONFIGURED) {
649 		/*
650 		 * Update the receive queue mapping to handle cases where the
651 		 * source and destination hosts have different queue
652 		 * requirements.  As long as the DETACHED flag is asserted the
653 		 * queue table should not be referenced so it should be safe to
654 		 * update it.
655 		 */
656 		_avp_set_queue_counts(eth_dev);
657 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
658 			_avp_set_rx_queue_mappings(eth_dev, i);
659 
660 		/*
661 		 * Update the host with our config details so that it knows the
662 		 * device is active.
663 		 */
664 		memset(&config, 0, sizeof(config));
665 		config.device_id = avp->device_id;
666 		config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
667 		config.driver_version = AVP_DPDK_DRIVER_VERSION;
668 		config.features = avp->features;
669 		config.num_tx_queues = avp->num_tx_queues;
670 		config.num_rx_queues = avp->num_rx_queues;
671 		config.if_up = !!(avp->flags & AVP_F_LINKUP);
672 
673 		ret = avp_dev_ctrl_set_config(eth_dev, &config);
674 		if (ret < 0) {
675 			PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
676 				    ret);
677 			goto unlock;
678 		}
679 	}
680 
681 	rte_wmb();
682 	avp->flags &= ~AVP_F_DETACHED;
683 
684 	ret = 0;
685 
686 unlock:
687 	rte_spinlock_unlock(&avp->lock);
688 	return ret;
689 }
690 
691 static void
692 avp_dev_interrupt_handler(void *data)
693 {
694 	struct rte_eth_dev *eth_dev = data;
695 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
696 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
697 	uint32_t status, value;
698 	int ret;
699 
700 	if (registers == NULL)
701 		rte_panic("no mapped MMIO register space\n");
702 
703 	/* read the interrupt status register
704 	 * note: this register clears on read so all raised interrupts must be
705 	 *    handled or remembered for later processing
706 	 */
707 	status = AVP_READ32(
708 		RTE_PTR_ADD(registers,
709 			    RTE_AVP_INTERRUPT_STATUS_OFFSET));
710 
711 	if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
712 		/* handle interrupt based on current status */
713 		value = AVP_READ32(
714 			RTE_PTR_ADD(registers,
715 				    RTE_AVP_MIGRATION_STATUS_OFFSET));
716 		switch (value) {
717 		case RTE_AVP_MIGRATION_DETACHED:
718 			ret = avp_dev_detach(eth_dev);
719 			break;
720 		case RTE_AVP_MIGRATION_ATTACHED:
721 			ret = avp_dev_attach(eth_dev);
722 			break;
723 		default:
724 			PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
725 				    value);
726 			ret = -EINVAL;
727 		}
728 
729 		/* acknowledge the request by writing out our current status */
730 		value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
731 		AVP_WRITE32(value,
732 			    RTE_PTR_ADD(registers,
733 					RTE_AVP_MIGRATION_ACK_OFFSET));
734 
735 		PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
736 	}
737 
738 	if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
739 		PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
740 			    status);
741 
742 	/* re-enable UIO interrupt handling */
743 	ret = rte_intr_enable(&pci_dev->intr_handle);
744 	if (ret < 0) {
745 		PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
746 			    ret);
747 		/* continue */
748 	}
749 }
750 
751 static int
752 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
753 {
754 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
755 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
756 	int ret;
757 
758 	if (registers == NULL)
759 		return -EINVAL;
760 
761 	/* enable UIO interrupt handling */
762 	ret = rte_intr_enable(&pci_dev->intr_handle);
763 	if (ret < 0) {
764 		PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
765 			    ret);
766 		return ret;
767 	}
768 
769 	/* inform the device that all interrupts are enabled */
770 	AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
771 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
772 
773 	return 0;
774 }
775 
776 static int
777 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
778 {
779 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
780 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
781 	int ret;
782 
783 	if (registers == NULL)
784 		return 0;
785 
786 	/* inform the device that all interrupts are disabled */
787 	AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
788 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
789 
790 	/* enable UIO interrupt handling */
791 	ret = rte_intr_disable(&pci_dev->intr_handle);
792 	if (ret < 0) {
793 		PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
794 			    ret);
795 		return ret;
796 	}
797 
798 	return 0;
799 }
800 
801 static int
802 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
803 {
804 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
805 	int ret;
806 
807 	/* register a callback handler with UIO for interrupt notifications */
808 	ret = rte_intr_callback_register(&pci_dev->intr_handle,
809 					 avp_dev_interrupt_handler,
810 					 (void *)eth_dev);
811 	if (ret < 0) {
812 		PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
813 			    ret);
814 		return ret;
815 	}
816 
817 	/* enable interrupt processing */
818 	return avp_dev_enable_interrupts(eth_dev);
819 }
820 
821 static int
822 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
823 {
824 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
825 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
826 	uint32_t value;
827 
828 	if (registers == NULL)
829 		return 0;
830 
831 	value = AVP_READ32(RTE_PTR_ADD(registers,
832 				       RTE_AVP_MIGRATION_STATUS_OFFSET));
833 	if (value == RTE_AVP_MIGRATION_DETACHED) {
834 		/* migration is in progress; ack it if we have not already */
835 		AVP_WRITE32(value,
836 			    RTE_PTR_ADD(registers,
837 					RTE_AVP_MIGRATION_ACK_OFFSET));
838 		return 1;
839 	}
840 	return 0;
841 }
842 
843 /*
844  * create a AVP device using the supplied device info by first translating it
845  * to guest address space(s).
846  */
847 static int
848 avp_dev_create(struct rte_pci_device *pci_dev,
849 	       struct rte_eth_dev *eth_dev)
850 {
851 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
852 	struct rte_avp_device_info *host_info;
853 	struct rte_mem_resource *resource;
854 	unsigned int i;
855 
856 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
857 	if (resource->addr == NULL) {
858 		PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
859 			    RTE_AVP_PCI_DEVICE_BAR);
860 		return -EFAULT;
861 	}
862 	host_info = (struct rte_avp_device_info *)resource->addr;
863 
864 	if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
865 		avp_dev_version_check(host_info->version)) {
866 		PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
867 			    host_info->magic, host_info->version,
868 			    AVP_DPDK_DRIVER_VERSION);
869 		return -EINVAL;
870 	}
871 
872 	PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
873 		    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
874 		    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
875 		    RTE_AVP_GET_MINOR_VERSION(host_info->version));
876 
877 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
878 		    host_info->min_tx_queues, host_info->max_tx_queues);
879 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
880 		    host_info->min_rx_queues, host_info->max_rx_queues);
881 	PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
882 		    host_info->features);
883 
884 	if (avp->magic != AVP_ETHDEV_MAGIC) {
885 		/*
886 		 * First time initialization (i.e., not during a VM
887 		 * migration)
888 		 */
889 		memset(avp, 0, sizeof(*avp));
890 		avp->magic = AVP_ETHDEV_MAGIC;
891 		avp->dev_data = eth_dev->data;
892 		avp->port_id = eth_dev->data->port_id;
893 		avp->host_mbuf_size = host_info->mbuf_size;
894 		avp->host_features = host_info->features;
895 		rte_spinlock_init(&avp->lock);
896 		memcpy(&avp->ethaddr.addr_bytes[0],
897 		       host_info->ethaddr, ETHER_ADDR_LEN);
898 		/* adjust max values to not exceed our max */
899 		avp->max_tx_queues =
900 			RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
901 		avp->max_rx_queues =
902 			RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
903 	} else {
904 		/* Re-attaching during migration */
905 
906 		/* TODO... requires validation of host values */
907 		if ((host_info->features & avp->features) != avp->features) {
908 			PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
909 				    avp->features, host_info->features);
910 			/* this should not be possible; continue for now */
911 		}
912 	}
913 
914 	/* the device id is allowed to change over migrations */
915 	avp->device_id = host_info->device_id;
916 
917 	/* translate incoming host addresses to guest address space */
918 	PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
919 		    host_info->tx_phys);
920 	PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
921 		    host_info->alloc_phys);
922 	for (i = 0; i < avp->max_tx_queues; i++) {
923 		avp->tx_q[i] = avp_dev_translate_address(eth_dev,
924 			host_info->tx_phys + (i * host_info->tx_size));
925 
926 		avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
927 			host_info->alloc_phys + (i * host_info->alloc_size));
928 	}
929 
930 	PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
931 		    host_info->rx_phys);
932 	PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
933 		    host_info->free_phys);
934 	for (i = 0; i < avp->max_rx_queues; i++) {
935 		avp->rx_q[i] = avp_dev_translate_address(eth_dev,
936 			host_info->rx_phys + (i * host_info->rx_size));
937 		avp->free_q[i] = avp_dev_translate_address(eth_dev,
938 			host_info->free_phys + (i * host_info->free_size));
939 	}
940 
941 	PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
942 		    host_info->req_phys);
943 	PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
944 		    host_info->resp_phys);
945 	PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
946 		    host_info->sync_phys);
947 	PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
948 		    host_info->mbuf_phys);
949 	avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
950 	avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
951 	avp->sync_addr =
952 		avp_dev_translate_address(eth_dev, host_info->sync_phys);
953 	avp->mbuf_addr =
954 		avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
955 
956 	/*
957 	 * store the host mbuf virtual address so that we can calculate
958 	 * relative offsets for each mbuf as they are processed
959 	 */
960 	avp->host_mbuf_addr = host_info->mbuf_va;
961 	avp->host_sync_addr = host_info->sync_va;
962 
963 	/*
964 	 * store the maximum packet length that is supported by the host.
965 	 */
966 	avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
967 	PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
968 				host_info->max_rx_pkt_len);
969 
970 	return 0;
971 }
972 
973 /*
974  * This function is based on probe() function in avp_pci.c
975  * It returns 0 on success.
976  */
977 static int
978 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
979 {
980 	struct avp_dev *avp =
981 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
982 	struct rte_pci_device *pci_dev;
983 	int ret;
984 
985 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
986 	eth_dev->dev_ops = &avp_eth_dev_ops;
987 	eth_dev->rx_pkt_burst = &avp_recv_pkts;
988 	eth_dev->tx_pkt_burst = &avp_xmit_pkts;
989 
990 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
991 		/*
992 		 * no setup required on secondary processes.  All data is saved
993 		 * in dev_private by the primary process. All resource should
994 		 * be mapped to the same virtual address so all pointers should
995 		 * be valid.
996 		 */
997 		if (eth_dev->data->scattered_rx) {
998 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
999 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1000 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1001 		}
1002 		return 0;
1003 	}
1004 
1005 	rte_eth_copy_pci_info(eth_dev, pci_dev);
1006 
1007 	/* Check current migration status */
1008 	if (avp_dev_migration_pending(eth_dev)) {
1009 		PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
1010 		return -EBUSY;
1011 	}
1012 
1013 	/* Check BAR resources */
1014 	ret = avp_dev_check_regions(eth_dev);
1015 	if (ret < 0) {
1016 		PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
1017 			    ret);
1018 		return ret;
1019 	}
1020 
1021 	/* Enable interrupts */
1022 	ret = avp_dev_setup_interrupts(eth_dev);
1023 	if (ret < 0) {
1024 		PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1025 		return ret;
1026 	}
1027 
1028 	/* Handle each subtype */
1029 	ret = avp_dev_create(pci_dev, eth_dev);
1030 	if (ret < 0) {
1031 		PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1032 		return ret;
1033 	}
1034 
1035 	/* Allocate memory for storing MAC addresses */
1036 	eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
1037 	if (eth_dev->data->mac_addrs == NULL) {
1038 		PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1039 			    ETHER_ADDR_LEN);
1040 		return -ENOMEM;
1041 	}
1042 
1043 	/* Get a mac from device config */
1044 	ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1045 
1046 	return 0;
1047 }
1048 
1049 static int
1050 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1051 {
1052 	int ret;
1053 
1054 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1055 		return -EPERM;
1056 
1057 	if (eth_dev->data == NULL)
1058 		return 0;
1059 
1060 	ret = avp_dev_disable_interrupts(eth_dev);
1061 	if (ret != 0) {
1062 		PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
1063 		return ret;
1064 	}
1065 
1066 	if (eth_dev->data->mac_addrs != NULL) {
1067 		rte_free(eth_dev->data->mac_addrs);
1068 		eth_dev->data->mac_addrs = NULL;
1069 	}
1070 
1071 	return 0;
1072 }
1073 
1074 static int
1075 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1076 		  struct rte_pci_device *pci_dev)
1077 {
1078 	struct rte_eth_dev *eth_dev;
1079 	int ret;
1080 
1081 	eth_dev = rte_eth_dev_pci_allocate(pci_dev,
1082 					   sizeof(struct avp_adapter));
1083 	if (eth_dev == NULL)
1084 		return -ENOMEM;
1085 
1086 	ret = eth_avp_dev_init(eth_dev);
1087 	if (ret)
1088 		rte_eth_dev_pci_release(eth_dev);
1089 
1090 	return ret;
1091 }
1092 
1093 static int
1094 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1095 {
1096 	return rte_eth_dev_pci_generic_remove(pci_dev,
1097 					      eth_avp_dev_uninit);
1098 }
1099 
1100 static struct rte_pci_driver rte_avp_pmd = {
1101 	.id_table = pci_id_avp_map,
1102 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1103 	.probe = eth_avp_pci_probe,
1104 	.remove = eth_avp_pci_remove,
1105 };
1106 
1107 static int
1108 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1109 			 struct avp_dev *avp)
1110 {
1111 	unsigned int max_rx_pkt_len;
1112 
1113 	max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1114 
1115 	if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1116 	    (max_rx_pkt_len > avp->host_mbuf_size)) {
1117 		/*
1118 		 * If the guest MTU is greater than either the host or guest
1119 		 * buffers then chained mbufs have to be enabled in the TX
1120 		 * direction.  It is assumed that the application will not need
1121 		 * to send packets larger than their max_rx_pkt_len (MRU).
1122 		 */
1123 		return 1;
1124 	}
1125 
1126 	if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1127 	    (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1128 		/*
1129 		 * If the host MRU is greater than its own mbuf size or the
1130 		 * guest mbuf size then chained mbufs have to be enabled in the
1131 		 * RX direction.
1132 		 */
1133 		return 1;
1134 	}
1135 
1136 	return 0;
1137 }
1138 
1139 static int
1140 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1141 		       uint16_t rx_queue_id,
1142 		       uint16_t nb_rx_desc,
1143 		       unsigned int socket_id,
1144 		       const struct rte_eth_rxconf *rx_conf,
1145 		       struct rte_mempool *pool)
1146 {
1147 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1148 	struct rte_pktmbuf_pool_private *mbp_priv;
1149 	struct avp_queue *rxq;
1150 
1151 	if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1152 		PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1153 			    rx_queue_id, eth_dev->data->nb_rx_queues);
1154 		return -EINVAL;
1155 	}
1156 
1157 	/* Save mbuf pool pointer */
1158 	avp->pool = pool;
1159 
1160 	/* Save the local mbuf size */
1161 	mbp_priv = rte_mempool_get_priv(pool);
1162 	avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1163 	avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1164 
1165 	if (avp_dev_enable_scattered(eth_dev, avp)) {
1166 		if (!eth_dev->data->scattered_rx) {
1167 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1168 			eth_dev->data->scattered_rx = 1;
1169 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1170 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1171 		}
1172 	}
1173 
1174 	PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1175 		    avp->max_rx_pkt_len,
1176 		    eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1177 		    avp->host_mbuf_size,
1178 		    avp->guest_mbuf_size);
1179 
1180 	/* allocate a queue object */
1181 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1182 				 RTE_CACHE_LINE_SIZE, socket_id);
1183 	if (rxq == NULL) {
1184 		PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1185 		return -ENOMEM;
1186 	}
1187 
1188 	/* save back pointers to AVP and Ethernet devices */
1189 	rxq->avp = avp;
1190 	rxq->dev_data = eth_dev->data;
1191 	eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1192 
1193 	/* setup the queue receive mapping for the current queue. */
1194 	_avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1195 
1196 	PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1197 
1198 	(void)nb_rx_desc;
1199 	(void)rx_conf;
1200 	return 0;
1201 }
1202 
1203 static int
1204 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1205 		       uint16_t tx_queue_id,
1206 		       uint16_t nb_tx_desc,
1207 		       unsigned int socket_id,
1208 		       const struct rte_eth_txconf *tx_conf)
1209 {
1210 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1211 	struct avp_queue *txq;
1212 
1213 	if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1214 		PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1215 			    tx_queue_id, eth_dev->data->nb_tx_queues);
1216 		return -EINVAL;
1217 	}
1218 
1219 	/* allocate a queue object */
1220 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1221 				 RTE_CACHE_LINE_SIZE, socket_id);
1222 	if (txq == NULL) {
1223 		PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1224 		return -ENOMEM;
1225 	}
1226 
1227 	/* only the configured set of transmit queues are used */
1228 	txq->queue_id = tx_queue_id;
1229 	txq->queue_base = tx_queue_id;
1230 	txq->queue_limit = tx_queue_id;
1231 
1232 	/* save back pointers to AVP and Ethernet devices */
1233 	txq->avp = avp;
1234 	txq->dev_data = eth_dev->data;
1235 	eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1236 
1237 	PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1238 
1239 	(void)nb_tx_desc;
1240 	(void)tx_conf;
1241 	return 0;
1242 }
1243 
1244 static inline int
1245 _avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
1246 {
1247 	uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1248 	uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1249 	return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1250 }
1251 
1252 static inline int
1253 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1254 {
1255 	struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
1256 
1257 	if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1258 		/* allow all packets destined to our address */
1259 		return 0;
1260 	}
1261 
1262 	if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
1263 		/* allow all broadcast packets */
1264 		return 0;
1265 	}
1266 
1267 	if (likely(is_multicast_ether_addr(&eth->d_addr))) {
1268 		/* allow all multicast packets */
1269 		return 0;
1270 	}
1271 
1272 	if (avp->flags & AVP_F_PROMISC) {
1273 		/* allow all packets when in promiscuous mode */
1274 		return 0;
1275 	}
1276 
1277 	return -1;
1278 }
1279 
1280 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1281 static inline void
1282 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1283 {
1284 	struct rte_avp_desc *first_buf;
1285 	struct rte_avp_desc *pkt_buf;
1286 	unsigned int pkt_len;
1287 	unsigned int nb_segs;
1288 	void *pkt_data;
1289 	unsigned int i;
1290 
1291 	first_buf = avp_dev_translate_buffer(avp, buf);
1292 
1293 	i = 0;
1294 	pkt_len = 0;
1295 	nb_segs = first_buf->nb_segs;
1296 	do {
1297 		/* Adjust pointers for guest addressing */
1298 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1299 		if (pkt_buf == NULL)
1300 			rte_panic("bad buffer: segment %u has an invalid address %p\n",
1301 				  i, buf);
1302 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1303 		if (pkt_data == NULL)
1304 			rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1305 				  i);
1306 		if (pkt_buf->data_len == 0)
1307 			rte_panic("bad buffer: segment %u has 0 data length\n",
1308 				  i);
1309 		pkt_len += pkt_buf->data_len;
1310 		nb_segs--;
1311 		i++;
1312 
1313 	} while (nb_segs && (buf = pkt_buf->next) != NULL);
1314 
1315 	if (nb_segs != 0)
1316 		rte_panic("bad buffer: expected %u segments found %u\n",
1317 			  first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1318 	if (pkt_len != first_buf->pkt_len)
1319 		rte_panic("bad buffer: expected length %u found %u\n",
1320 			  first_buf->pkt_len, pkt_len);
1321 }
1322 
1323 #define avp_dev_buffer_sanity_check(a, b) \
1324 	__avp_dev_buffer_sanity_check((a), (b))
1325 
1326 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1327 
1328 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1329 
1330 #endif
1331 
1332 /*
1333  * Copy a host buffer chain to a set of mbufs.	This function assumes that
1334  * there exactly the required number of mbufs to copy all source bytes.
1335  */
1336 static inline struct rte_mbuf *
1337 avp_dev_copy_from_buffers(struct avp_dev *avp,
1338 			  struct rte_avp_desc *buf,
1339 			  struct rte_mbuf **mbufs,
1340 			  unsigned int count)
1341 {
1342 	struct rte_mbuf *m_previous = NULL;
1343 	struct rte_avp_desc *pkt_buf;
1344 	unsigned int total_length = 0;
1345 	unsigned int copy_length;
1346 	unsigned int src_offset;
1347 	struct rte_mbuf *m;
1348 	uint16_t ol_flags;
1349 	uint16_t vlan_tci;
1350 	void *pkt_data;
1351 	unsigned int i;
1352 
1353 	avp_dev_buffer_sanity_check(avp, buf);
1354 
1355 	/* setup the first source buffer */
1356 	pkt_buf = avp_dev_translate_buffer(avp, buf);
1357 	pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1358 	total_length = pkt_buf->pkt_len;
1359 	src_offset = 0;
1360 
1361 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1362 		ol_flags = PKT_RX_VLAN;
1363 		vlan_tci = pkt_buf->vlan_tci;
1364 	} else {
1365 		ol_flags = 0;
1366 		vlan_tci = 0;
1367 	}
1368 
1369 	for (i = 0; (i < count) && (buf != NULL); i++) {
1370 		/* fill each destination buffer */
1371 		m = mbufs[i];
1372 
1373 		if (m_previous != NULL)
1374 			m_previous->next = m;
1375 
1376 		m_previous = m;
1377 
1378 		do {
1379 			/*
1380 			 * Copy as many source buffers as will fit in the
1381 			 * destination buffer.
1382 			 */
1383 			copy_length = RTE_MIN((avp->guest_mbuf_size -
1384 					       rte_pktmbuf_data_len(m)),
1385 					      (pkt_buf->data_len -
1386 					       src_offset));
1387 			rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1388 					       rte_pktmbuf_data_len(m)),
1389 				   RTE_PTR_ADD(pkt_data, src_offset),
1390 				   copy_length);
1391 			rte_pktmbuf_data_len(m) += copy_length;
1392 			src_offset += copy_length;
1393 
1394 			if (likely(src_offset == pkt_buf->data_len)) {
1395 				/* need a new source buffer */
1396 				buf = pkt_buf->next;
1397 				if (buf != NULL) {
1398 					pkt_buf = avp_dev_translate_buffer(
1399 						avp, buf);
1400 					pkt_data = avp_dev_translate_buffer(
1401 						avp, pkt_buf->data);
1402 					src_offset = 0;
1403 				}
1404 			}
1405 
1406 			if (unlikely(rte_pktmbuf_data_len(m) ==
1407 				     avp->guest_mbuf_size)) {
1408 				/* need a new destination mbuf */
1409 				break;
1410 			}
1411 
1412 		} while (buf != NULL);
1413 	}
1414 
1415 	m = mbufs[0];
1416 	m->ol_flags = ol_flags;
1417 	m->nb_segs = count;
1418 	rte_pktmbuf_pkt_len(m) = total_length;
1419 	m->vlan_tci = vlan_tci;
1420 
1421 	__rte_mbuf_sanity_check(m, 1);
1422 
1423 	return m;
1424 }
1425 
1426 static uint16_t
1427 avp_recv_scattered_pkts(void *rx_queue,
1428 			struct rte_mbuf **rx_pkts,
1429 			uint16_t nb_pkts)
1430 {
1431 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1432 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1433 	struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1434 	struct avp_dev *avp = rxq->avp;
1435 	struct rte_avp_desc *pkt_buf;
1436 	struct rte_avp_fifo *free_q;
1437 	struct rte_avp_fifo *rx_q;
1438 	struct rte_avp_desc *buf;
1439 	unsigned int count, avail, n;
1440 	unsigned int guest_mbuf_size;
1441 	struct rte_mbuf *m;
1442 	unsigned int required;
1443 	unsigned int buf_len;
1444 	unsigned int port_id;
1445 	unsigned int i;
1446 
1447 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1448 		/* VM live migration in progress */
1449 		return 0;
1450 	}
1451 
1452 	guest_mbuf_size = avp->guest_mbuf_size;
1453 	port_id = avp->port_id;
1454 	rx_q = avp->rx_q[rxq->queue_id];
1455 	free_q = avp->free_q[rxq->queue_id];
1456 
1457 	/* setup next queue to service */
1458 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1459 		(rxq->queue_id + 1) : rxq->queue_base;
1460 
1461 	/* determine how many slots are available in the free queue */
1462 	count = avp_fifo_free_count(free_q);
1463 
1464 	/* determine how many packets are available in the rx queue */
1465 	avail = avp_fifo_count(rx_q);
1466 
1467 	/* determine how many packets can be received */
1468 	count = RTE_MIN(count, avail);
1469 	count = RTE_MIN(count, nb_pkts);
1470 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1471 
1472 	if (unlikely(count == 0)) {
1473 		/* no free buffers, or no buffers on the rx queue */
1474 		return 0;
1475 	}
1476 
1477 	/* retrieve pending packets */
1478 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1479 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1480 		   count, rx_q);
1481 
1482 	count = 0;
1483 	for (i = 0; i < n; i++) {
1484 		/* prefetch next entry while processing current one */
1485 		if (i + 1 < n) {
1486 			pkt_buf = avp_dev_translate_buffer(avp,
1487 							   avp_bufs[i + 1]);
1488 			rte_prefetch0(pkt_buf);
1489 		}
1490 		buf = avp_bufs[i];
1491 
1492 		/* Peek into the first buffer to determine the total length */
1493 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1494 		buf_len = pkt_buf->pkt_len;
1495 
1496 		/* Allocate enough mbufs to receive the entire packet */
1497 		required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1498 		if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1499 			rxq->dev_data->rx_mbuf_alloc_failed++;
1500 			continue;
1501 		}
1502 
1503 		/* Copy the data from the buffers to our mbufs */
1504 		m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1505 
1506 		/* finalize mbuf */
1507 		m->port = port_id;
1508 
1509 		if (_avp_mac_filter(avp, m) != 0) {
1510 			/* silently discard packets not destined to our MAC */
1511 			rte_pktmbuf_free(m);
1512 			continue;
1513 		}
1514 
1515 		/* return new mbuf to caller */
1516 		rx_pkts[count++] = m;
1517 		rxq->bytes += buf_len;
1518 	}
1519 
1520 	rxq->packets += count;
1521 
1522 	/* return the buffers to the free queue */
1523 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1524 
1525 	return count;
1526 }
1527 
1528 
1529 static uint16_t
1530 avp_recv_pkts(void *rx_queue,
1531 	      struct rte_mbuf **rx_pkts,
1532 	      uint16_t nb_pkts)
1533 {
1534 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1535 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1536 	struct avp_dev *avp = rxq->avp;
1537 	struct rte_avp_desc *pkt_buf;
1538 	struct rte_avp_fifo *free_q;
1539 	struct rte_avp_fifo *rx_q;
1540 	unsigned int count, avail, n;
1541 	unsigned int pkt_len;
1542 	struct rte_mbuf *m;
1543 	char *pkt_data;
1544 	unsigned int i;
1545 
1546 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1547 		/* VM live migration in progress */
1548 		return 0;
1549 	}
1550 
1551 	rx_q = avp->rx_q[rxq->queue_id];
1552 	free_q = avp->free_q[rxq->queue_id];
1553 
1554 	/* setup next queue to service */
1555 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1556 		(rxq->queue_id + 1) : rxq->queue_base;
1557 
1558 	/* determine how many slots are available in the free queue */
1559 	count = avp_fifo_free_count(free_q);
1560 
1561 	/* determine how many packets are available in the rx queue */
1562 	avail = avp_fifo_count(rx_q);
1563 
1564 	/* determine how many packets can be received */
1565 	count = RTE_MIN(count, avail);
1566 	count = RTE_MIN(count, nb_pkts);
1567 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1568 
1569 	if (unlikely(count == 0)) {
1570 		/* no free buffers, or no buffers on the rx queue */
1571 		return 0;
1572 	}
1573 
1574 	/* retrieve pending packets */
1575 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1576 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1577 		   count, rx_q);
1578 
1579 	count = 0;
1580 	for (i = 0; i < n; i++) {
1581 		/* prefetch next entry while processing current one */
1582 		if (i < n - 1) {
1583 			pkt_buf = avp_dev_translate_buffer(avp,
1584 							   avp_bufs[i + 1]);
1585 			rte_prefetch0(pkt_buf);
1586 		}
1587 
1588 		/* Adjust host pointers for guest addressing */
1589 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1590 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1591 		pkt_len = pkt_buf->pkt_len;
1592 
1593 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1594 			     (pkt_buf->nb_segs > 1))) {
1595 			/*
1596 			 * application should be using the scattered receive
1597 			 * function
1598 			 */
1599 			rxq->errors++;
1600 			continue;
1601 		}
1602 
1603 		/* process each packet to be transmitted */
1604 		m = rte_pktmbuf_alloc(avp->pool);
1605 		if (unlikely(m == NULL)) {
1606 			rxq->dev_data->rx_mbuf_alloc_failed++;
1607 			continue;
1608 		}
1609 
1610 		/* copy data out of the host buffer to our buffer */
1611 		m->data_off = RTE_PKTMBUF_HEADROOM;
1612 		rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1613 
1614 		/* initialize the local mbuf */
1615 		rte_pktmbuf_data_len(m) = pkt_len;
1616 		rte_pktmbuf_pkt_len(m) = pkt_len;
1617 		m->port = avp->port_id;
1618 
1619 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1620 			m->ol_flags = PKT_RX_VLAN;
1621 			m->vlan_tci = pkt_buf->vlan_tci;
1622 		}
1623 
1624 		if (_avp_mac_filter(avp, m) != 0) {
1625 			/* silently discard packets not destined to our MAC */
1626 			rte_pktmbuf_free(m);
1627 			continue;
1628 		}
1629 
1630 		/* return new mbuf to caller */
1631 		rx_pkts[count++] = m;
1632 		rxq->bytes += pkt_len;
1633 	}
1634 
1635 	rxq->packets += count;
1636 
1637 	/* return the buffers to the free queue */
1638 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1639 
1640 	return count;
1641 }
1642 
1643 /*
1644  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1645  * there are sufficient destination buffers to contain the entire source
1646  * packet.
1647  */
1648 static inline uint16_t
1649 avp_dev_copy_to_buffers(struct avp_dev *avp,
1650 			struct rte_mbuf *mbuf,
1651 			struct rte_avp_desc **buffers,
1652 			unsigned int count)
1653 {
1654 	struct rte_avp_desc *previous_buf = NULL;
1655 	struct rte_avp_desc *first_buf = NULL;
1656 	struct rte_avp_desc *pkt_buf;
1657 	struct rte_avp_desc *buf;
1658 	size_t total_length;
1659 	struct rte_mbuf *m;
1660 	size_t copy_length;
1661 	size_t src_offset;
1662 	char *pkt_data;
1663 	unsigned int i;
1664 
1665 	__rte_mbuf_sanity_check(mbuf, 1);
1666 
1667 	m = mbuf;
1668 	src_offset = 0;
1669 	total_length = rte_pktmbuf_pkt_len(m);
1670 	for (i = 0; (i < count) && (m != NULL); i++) {
1671 		/* fill each destination buffer */
1672 		buf = buffers[i];
1673 
1674 		if (i < count - 1) {
1675 			/* prefetch next entry while processing this one */
1676 			pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1677 			rte_prefetch0(pkt_buf);
1678 		}
1679 
1680 		/* Adjust pointers for guest addressing */
1681 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1682 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1683 
1684 		/* setup the buffer chain */
1685 		if (previous_buf != NULL)
1686 			previous_buf->next = buf;
1687 		else
1688 			first_buf = pkt_buf;
1689 
1690 		previous_buf = pkt_buf;
1691 
1692 		do {
1693 			/*
1694 			 * copy as many source mbuf segments as will fit in the
1695 			 * destination buffer.
1696 			 */
1697 			copy_length = RTE_MIN((avp->host_mbuf_size -
1698 					       pkt_buf->data_len),
1699 					      (rte_pktmbuf_data_len(m) -
1700 					       src_offset));
1701 			rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1702 				   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1703 					       src_offset),
1704 				   copy_length);
1705 			pkt_buf->data_len += copy_length;
1706 			src_offset += copy_length;
1707 
1708 			if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1709 				/* need a new source buffer */
1710 				m = m->next;
1711 				src_offset = 0;
1712 			}
1713 
1714 			if (unlikely(pkt_buf->data_len ==
1715 				     avp->host_mbuf_size)) {
1716 				/* need a new destination buffer */
1717 				break;
1718 			}
1719 
1720 		} while (m != NULL);
1721 	}
1722 
1723 	first_buf->nb_segs = count;
1724 	first_buf->pkt_len = total_length;
1725 
1726 	if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1727 		first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1728 		first_buf->vlan_tci = mbuf->vlan_tci;
1729 	}
1730 
1731 	avp_dev_buffer_sanity_check(avp, buffers[0]);
1732 
1733 	return total_length;
1734 }
1735 
1736 
1737 static uint16_t
1738 avp_xmit_scattered_pkts(void *tx_queue,
1739 			struct rte_mbuf **tx_pkts,
1740 			uint16_t nb_pkts)
1741 {
1742 	struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1743 				       RTE_AVP_MAX_MBUF_SEGMENTS)];
1744 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1745 	struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1746 	struct avp_dev *avp = txq->avp;
1747 	struct rte_avp_fifo *alloc_q;
1748 	struct rte_avp_fifo *tx_q;
1749 	unsigned int count, avail, n;
1750 	unsigned int orig_nb_pkts;
1751 	struct rte_mbuf *m;
1752 	unsigned int required;
1753 	unsigned int segments;
1754 	unsigned int tx_bytes;
1755 	unsigned int i;
1756 
1757 	orig_nb_pkts = nb_pkts;
1758 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1759 		/* VM live migration in progress */
1760 		/* TODO ... buffer for X packets then drop? */
1761 		txq->errors += nb_pkts;
1762 		return 0;
1763 	}
1764 
1765 	tx_q = avp->tx_q[txq->queue_id];
1766 	alloc_q = avp->alloc_q[txq->queue_id];
1767 
1768 	/* limit the number of transmitted packets to the max burst size */
1769 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1770 		nb_pkts = AVP_MAX_TX_BURST;
1771 
1772 	/* determine how many buffers are available to copy into */
1773 	avail = avp_fifo_count(alloc_q);
1774 	if (unlikely(avail > (AVP_MAX_TX_BURST *
1775 			      RTE_AVP_MAX_MBUF_SEGMENTS)))
1776 		avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1777 
1778 	/* determine how many slots are available in the transmit queue */
1779 	count = avp_fifo_free_count(tx_q);
1780 
1781 	/* determine how many packets can be sent */
1782 	nb_pkts = RTE_MIN(count, nb_pkts);
1783 
1784 	/* determine how many packets will fit in the available buffers */
1785 	count = 0;
1786 	segments = 0;
1787 	for (i = 0; i < nb_pkts; i++) {
1788 		m = tx_pkts[i];
1789 		if (likely(i < (unsigned int)nb_pkts - 1)) {
1790 			/* prefetch next entry while processing this one */
1791 			rte_prefetch0(tx_pkts[i + 1]);
1792 		}
1793 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1794 			avp->host_mbuf_size;
1795 
1796 		if (unlikely((required == 0) ||
1797 			     (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1798 			break;
1799 		else if (unlikely(required + segments > avail))
1800 			break;
1801 		segments += required;
1802 		count++;
1803 	}
1804 	nb_pkts = count;
1805 
1806 	if (unlikely(nb_pkts == 0)) {
1807 		/* no available buffers, or no space on the tx queue */
1808 		txq->errors += orig_nb_pkts;
1809 		return 0;
1810 	}
1811 
1812 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1813 		   nb_pkts, tx_q);
1814 
1815 	/* retrieve sufficient send buffers */
1816 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1817 	if (unlikely(n != segments)) {
1818 		PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1819 			   "n=%u, segments=%u, orig=%u\n",
1820 			   n, segments, orig_nb_pkts);
1821 		txq->errors += orig_nb_pkts;
1822 		return 0;
1823 	}
1824 
1825 	tx_bytes = 0;
1826 	count = 0;
1827 	for (i = 0; i < nb_pkts; i++) {
1828 		/* process each packet to be transmitted */
1829 		m = tx_pkts[i];
1830 
1831 		/* determine how many buffers are required for this packet */
1832 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1833 			avp->host_mbuf_size;
1834 
1835 		tx_bytes += avp_dev_copy_to_buffers(avp, m,
1836 						    &avp_bufs[count], required);
1837 		tx_bufs[i] = avp_bufs[count];
1838 		count += required;
1839 
1840 		/* free the original mbuf */
1841 		rte_pktmbuf_free(m);
1842 	}
1843 
1844 	txq->packets += nb_pkts;
1845 	txq->bytes += tx_bytes;
1846 
1847 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1848 	for (i = 0; i < nb_pkts; i++)
1849 		avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1850 #endif
1851 
1852 	/* send the packets */
1853 	n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1854 	if (unlikely(n != orig_nb_pkts))
1855 		txq->errors += (orig_nb_pkts - n);
1856 
1857 	return n;
1858 }
1859 
1860 
1861 static uint16_t
1862 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1863 {
1864 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1865 	struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1866 	struct avp_dev *avp = txq->avp;
1867 	struct rte_avp_desc *pkt_buf;
1868 	struct rte_avp_fifo *alloc_q;
1869 	struct rte_avp_fifo *tx_q;
1870 	unsigned int count, avail, n;
1871 	struct rte_mbuf *m;
1872 	unsigned int pkt_len;
1873 	unsigned int tx_bytes;
1874 	char *pkt_data;
1875 	unsigned int i;
1876 
1877 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1878 		/* VM live migration in progress */
1879 		/* TODO ... buffer for X packets then drop?! */
1880 		txq->errors++;
1881 		return 0;
1882 	}
1883 
1884 	tx_q = avp->tx_q[txq->queue_id];
1885 	alloc_q = avp->alloc_q[txq->queue_id];
1886 
1887 	/* limit the number of transmitted packets to the max burst size */
1888 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1889 		nb_pkts = AVP_MAX_TX_BURST;
1890 
1891 	/* determine how many buffers are available to copy into */
1892 	avail = avp_fifo_count(alloc_q);
1893 
1894 	/* determine how many slots are available in the transmit queue */
1895 	count = avp_fifo_free_count(tx_q);
1896 
1897 	/* determine how many packets can be sent */
1898 	count = RTE_MIN(count, avail);
1899 	count = RTE_MIN(count, nb_pkts);
1900 
1901 	if (unlikely(count == 0)) {
1902 		/* no available buffers, or no space on the tx queue */
1903 		txq->errors += nb_pkts;
1904 		return 0;
1905 	}
1906 
1907 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1908 		   count, tx_q);
1909 
1910 	/* retrieve sufficient send buffers */
1911 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1912 	if (unlikely(n != count)) {
1913 		txq->errors++;
1914 		return 0;
1915 	}
1916 
1917 	tx_bytes = 0;
1918 	for (i = 0; i < count; i++) {
1919 		/* prefetch next entry while processing the current one */
1920 		if (i < count - 1) {
1921 			pkt_buf = avp_dev_translate_buffer(avp,
1922 							   avp_bufs[i + 1]);
1923 			rte_prefetch0(pkt_buf);
1924 		}
1925 
1926 		/* process each packet to be transmitted */
1927 		m = tx_pkts[i];
1928 
1929 		/* Adjust pointers for guest addressing */
1930 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1931 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1932 		pkt_len = rte_pktmbuf_pkt_len(m);
1933 
1934 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1935 					 (pkt_len > avp->host_mbuf_size))) {
1936 			/*
1937 			 * application should be using the scattered transmit
1938 			 * function; send it truncated to avoid the performance
1939 			 * hit of having to manage returning the already
1940 			 * allocated buffer to the free list.  This should not
1941 			 * happen since the application should have set the
1942 			 * max_rx_pkt_len based on its MTU and it should be
1943 			 * policing its own packet sizes.
1944 			 */
1945 			txq->errors++;
1946 			pkt_len = RTE_MIN(avp->guest_mbuf_size,
1947 					  avp->host_mbuf_size);
1948 		}
1949 
1950 		/* copy data out of our mbuf and into the AVP buffer */
1951 		rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1952 		pkt_buf->pkt_len = pkt_len;
1953 		pkt_buf->data_len = pkt_len;
1954 		pkt_buf->nb_segs = 1;
1955 		pkt_buf->next = NULL;
1956 
1957 		if (m->ol_flags & PKT_TX_VLAN_PKT) {
1958 			pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1959 			pkt_buf->vlan_tci = m->vlan_tci;
1960 		}
1961 
1962 		tx_bytes += pkt_len;
1963 
1964 		/* free the original mbuf */
1965 		rte_pktmbuf_free(m);
1966 	}
1967 
1968 	txq->packets += count;
1969 	txq->bytes += tx_bytes;
1970 
1971 	/* send the packets */
1972 	n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1973 
1974 	return n;
1975 }
1976 
1977 static void
1978 avp_dev_rx_queue_release(void *rx_queue)
1979 {
1980 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1981 	struct avp_dev *avp = rxq->avp;
1982 	struct rte_eth_dev_data *data = avp->dev_data;
1983 	unsigned int i;
1984 
1985 	for (i = 0; i < avp->num_rx_queues; i++) {
1986 		if (data->rx_queues[i] == rxq)
1987 			data->rx_queues[i] = NULL;
1988 	}
1989 }
1990 
1991 static void
1992 avp_dev_tx_queue_release(void *tx_queue)
1993 {
1994 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1995 	struct avp_dev *avp = txq->avp;
1996 	struct rte_eth_dev_data *data = avp->dev_data;
1997 	unsigned int i;
1998 
1999 	for (i = 0; i < avp->num_tx_queues; i++) {
2000 		if (data->tx_queues[i] == txq)
2001 			data->tx_queues[i] = NULL;
2002 	}
2003 }
2004 
2005 static int
2006 avp_dev_configure(struct rte_eth_dev *eth_dev)
2007 {
2008 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2009 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2010 	struct rte_avp_device_info *host_info;
2011 	struct rte_avp_device_config config;
2012 	int mask = 0;
2013 	void *addr;
2014 	int ret;
2015 
2016 	rte_spinlock_lock(&avp->lock);
2017 	if (avp->flags & AVP_F_DETACHED) {
2018 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2019 		ret = -ENOTSUP;
2020 		goto unlock;
2021 	}
2022 
2023 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2024 	host_info = (struct rte_avp_device_info *)addr;
2025 
2026 	/* Setup required number of queues */
2027 	_avp_set_queue_counts(eth_dev);
2028 
2029 	mask = (ETH_VLAN_STRIP_MASK |
2030 		ETH_VLAN_FILTER_MASK |
2031 		ETH_VLAN_EXTEND_MASK);
2032 	ret = avp_vlan_offload_set(eth_dev, mask);
2033 	if (ret < 0) {
2034 		PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
2035 			    ret);
2036 		goto unlock;
2037 	}
2038 
2039 	/* update device config */
2040 	memset(&config, 0, sizeof(config));
2041 	config.device_id = host_info->device_id;
2042 	config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2043 	config.driver_version = AVP_DPDK_DRIVER_VERSION;
2044 	config.features = avp->features;
2045 	config.num_tx_queues = avp->num_tx_queues;
2046 	config.num_rx_queues = avp->num_rx_queues;
2047 
2048 	ret = avp_dev_ctrl_set_config(eth_dev, &config);
2049 	if (ret < 0) {
2050 		PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2051 			    ret);
2052 		goto unlock;
2053 	}
2054 
2055 	avp->flags |= AVP_F_CONFIGURED;
2056 	ret = 0;
2057 
2058 unlock:
2059 	rte_spinlock_unlock(&avp->lock);
2060 	return ret;
2061 }
2062 
2063 static int
2064 avp_dev_start(struct rte_eth_dev *eth_dev)
2065 {
2066 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2067 	int ret;
2068 
2069 	rte_spinlock_lock(&avp->lock);
2070 	if (avp->flags & AVP_F_DETACHED) {
2071 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2072 		ret = -ENOTSUP;
2073 		goto unlock;
2074 	}
2075 
2076 	/* disable features that we do not support */
2077 	eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
2078 	eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
2079 	eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
2080 	eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
2081 
2082 	/* update link state */
2083 	ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2084 	if (ret < 0) {
2085 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2086 			    ret);
2087 		goto unlock;
2088 	}
2089 
2090 	/* remember current link state */
2091 	avp->flags |= AVP_F_LINKUP;
2092 
2093 	ret = 0;
2094 
2095 unlock:
2096 	rte_spinlock_unlock(&avp->lock);
2097 	return ret;
2098 }
2099 
2100 static void
2101 avp_dev_stop(struct rte_eth_dev *eth_dev)
2102 {
2103 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2104 	int ret;
2105 
2106 	rte_spinlock_lock(&avp->lock);
2107 	if (avp->flags & AVP_F_DETACHED) {
2108 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2109 		goto unlock;
2110 	}
2111 
2112 	/* remember current link state */
2113 	avp->flags &= ~AVP_F_LINKUP;
2114 
2115 	/* update link state */
2116 	ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2117 	if (ret < 0) {
2118 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2119 			    ret);
2120 	}
2121 
2122 unlock:
2123 	rte_spinlock_unlock(&avp->lock);
2124 }
2125 
2126 static void
2127 avp_dev_close(struct rte_eth_dev *eth_dev)
2128 {
2129 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2130 	int ret;
2131 
2132 	rte_spinlock_lock(&avp->lock);
2133 	if (avp->flags & AVP_F_DETACHED) {
2134 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2135 		goto unlock;
2136 	}
2137 
2138 	/* remember current link state */
2139 	avp->flags &= ~AVP_F_LINKUP;
2140 	avp->flags &= ~AVP_F_CONFIGURED;
2141 
2142 	ret = avp_dev_disable_interrupts(eth_dev);
2143 	if (ret < 0) {
2144 		PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2145 		/* continue */
2146 	}
2147 
2148 	/* update device state */
2149 	ret = avp_dev_ctrl_shutdown(eth_dev);
2150 	if (ret < 0) {
2151 		PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2152 			    ret);
2153 		/* continue */
2154 	}
2155 
2156 unlock:
2157 	rte_spinlock_unlock(&avp->lock);
2158 }
2159 
2160 static int
2161 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2162 					__rte_unused int wait_to_complete)
2163 {
2164 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2165 	struct rte_eth_link *link = &eth_dev->data->dev_link;
2166 
2167 	link->link_speed = ETH_SPEED_NUM_10G;
2168 	link->link_duplex = ETH_LINK_FULL_DUPLEX;
2169 	link->link_status = !!(avp->flags & AVP_F_LINKUP);
2170 
2171 	return -1;
2172 }
2173 
2174 static void
2175 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2176 {
2177 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2178 
2179 	rte_spinlock_lock(&avp->lock);
2180 	if ((avp->flags & AVP_F_PROMISC) == 0) {
2181 		avp->flags |= AVP_F_PROMISC;
2182 		PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2183 			    eth_dev->data->port_id);
2184 	}
2185 	rte_spinlock_unlock(&avp->lock);
2186 }
2187 
2188 static void
2189 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2190 {
2191 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2192 
2193 	rte_spinlock_lock(&avp->lock);
2194 	if ((avp->flags & AVP_F_PROMISC) != 0) {
2195 		avp->flags &= ~AVP_F_PROMISC;
2196 		PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2197 			    eth_dev->data->port_id);
2198 	}
2199 	rte_spinlock_unlock(&avp->lock);
2200 }
2201 
2202 static void
2203 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2204 		 struct rte_eth_dev_info *dev_info)
2205 {
2206 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2207 
2208 	dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2209 	dev_info->max_rx_queues = avp->max_rx_queues;
2210 	dev_info->max_tx_queues = avp->max_tx_queues;
2211 	dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2212 	dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2213 	dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2214 	if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2215 		dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2216 		dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2217 	}
2218 }
2219 
2220 static int
2221 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2222 {
2223 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2224 
2225 	if (mask & ETH_VLAN_STRIP_MASK) {
2226 		if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2227 			if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
2228 				avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2229 			else
2230 				avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2231 		} else {
2232 			PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2233 		}
2234 	}
2235 
2236 	if (mask & ETH_VLAN_FILTER_MASK) {
2237 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
2238 			PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2239 	}
2240 
2241 	if (mask & ETH_VLAN_EXTEND_MASK) {
2242 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
2243 			PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2244 	}
2245 
2246 	return 0;
2247 }
2248 
2249 static int
2250 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2251 {
2252 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2253 	unsigned int i;
2254 
2255 	for (i = 0; i < avp->num_rx_queues; i++) {
2256 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2257 
2258 		if (rxq) {
2259 			stats->ipackets += rxq->packets;
2260 			stats->ibytes += rxq->bytes;
2261 			stats->ierrors += rxq->errors;
2262 
2263 			stats->q_ipackets[i] += rxq->packets;
2264 			stats->q_ibytes[i] += rxq->bytes;
2265 			stats->q_errors[i] += rxq->errors;
2266 		}
2267 	}
2268 
2269 	for (i = 0; i < avp->num_tx_queues; i++) {
2270 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2271 
2272 		if (txq) {
2273 			stats->opackets += txq->packets;
2274 			stats->obytes += txq->bytes;
2275 			stats->oerrors += txq->errors;
2276 
2277 			stats->q_opackets[i] += txq->packets;
2278 			stats->q_obytes[i] += txq->bytes;
2279 			stats->q_errors[i] += txq->errors;
2280 		}
2281 	}
2282 
2283 	return 0;
2284 }
2285 
2286 static void
2287 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2288 {
2289 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2290 	unsigned int i;
2291 
2292 	for (i = 0; i < avp->num_rx_queues; i++) {
2293 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2294 
2295 		if (rxq) {
2296 			rxq->bytes = 0;
2297 			rxq->packets = 0;
2298 			rxq->errors = 0;
2299 		}
2300 	}
2301 
2302 	for (i = 0; i < avp->num_tx_queues; i++) {
2303 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2304 
2305 		if (txq) {
2306 			txq->bytes = 0;
2307 			txq->packets = 0;
2308 			txq->errors = 0;
2309 		}
2310 	}
2311 }
2312 
2313 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2314 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2315