xref: /dpdk/drivers/net/avp/avp_ethdev.c (revision 0964a95120fa024888fbc0ea5e34d1abef1b93dc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2013-2017 Wind River Systems, Inc.
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_malloc.h>
16 #include <rte_atomic.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_pci.h>
19 #include <rte_bus_pci.h>
20 #include <rte_ether.h>
21 #include <rte_common.h>
22 #include <rte_cycles.h>
23 #include <rte_spinlock.h>
24 #include <rte_byteorder.h>
25 #include <rte_dev.h>
26 #include <rte_memory.h>
27 #include <rte_eal.h>
28 #include <rte_io.h>
29 
30 #include "rte_avp_common.h"
31 #include "rte_avp_fifo.h"
32 
33 #include "avp_logs.h"
34 
35 int avp_logtype_driver;
36 
37 static int avp_dev_create(struct rte_pci_device *pci_dev,
38 			  struct rte_eth_dev *eth_dev);
39 
40 static int avp_dev_configure(struct rte_eth_dev *dev);
41 static int avp_dev_start(struct rte_eth_dev *dev);
42 static void avp_dev_stop(struct rte_eth_dev *dev);
43 static void avp_dev_close(struct rte_eth_dev *dev);
44 static int avp_dev_info_get(struct rte_eth_dev *dev,
45 			    struct rte_eth_dev_info *dev_info);
46 static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
47 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
48 static int avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
49 static int avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
50 
51 static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
52 				  uint16_t rx_queue_id,
53 				  uint16_t nb_rx_desc,
54 				  unsigned int socket_id,
55 				  const struct rte_eth_rxconf *rx_conf,
56 				  struct rte_mempool *pool);
57 
58 static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
59 				  uint16_t tx_queue_id,
60 				  uint16_t nb_tx_desc,
61 				  unsigned int socket_id,
62 				  const struct rte_eth_txconf *tx_conf);
63 
64 static uint16_t avp_recv_scattered_pkts(void *rx_queue,
65 					struct rte_mbuf **rx_pkts,
66 					uint16_t nb_pkts);
67 
68 static uint16_t avp_recv_pkts(void *rx_queue,
69 			      struct rte_mbuf **rx_pkts,
70 			      uint16_t nb_pkts);
71 
72 static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
73 					struct rte_mbuf **tx_pkts,
74 					uint16_t nb_pkts);
75 
76 static uint16_t avp_xmit_pkts(void *tx_queue,
77 			      struct rte_mbuf **tx_pkts,
78 			      uint16_t nb_pkts);
79 
80 static void avp_dev_rx_queue_release(void *rxq);
81 static void avp_dev_tx_queue_release(void *txq);
82 
83 static int avp_dev_stats_get(struct rte_eth_dev *dev,
84 			      struct rte_eth_stats *stats);
85 static int avp_dev_stats_reset(struct rte_eth_dev *dev);
86 
87 
88 #define AVP_MAX_RX_BURST 64
89 #define AVP_MAX_TX_BURST 64
90 #define AVP_MAX_MAC_ADDRS 1
91 #define AVP_MIN_RX_BUFSIZE RTE_ETHER_MIN_LEN
92 
93 
94 /*
95  * Defines the number of microseconds to wait before checking the response
96  * queue for completion.
97  */
98 #define AVP_REQUEST_DELAY_USECS (5000)
99 
100 /*
101  * Defines the number times to check the response queue for completion before
102  * declaring a timeout.
103  */
104 #define AVP_MAX_REQUEST_RETRY (100)
105 
106 /* Defines the current PCI driver version number */
107 #define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
108 
109 /*
110  * The set of PCI devices this driver supports
111  */
112 static const struct rte_pci_id pci_id_avp_map[] = {
113 	{ .vendor_id = RTE_AVP_PCI_VENDOR_ID,
114 	  .device_id = RTE_AVP_PCI_DEVICE_ID,
115 	  .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
116 	  .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
117 	  .class_id = RTE_CLASS_ANY_ID,
118 	},
119 
120 	{ .vendor_id = 0, /* sentinel */
121 	},
122 };
123 
124 /*
125  * dev_ops for avp, bare necessities for basic operation
126  */
127 static const struct eth_dev_ops avp_eth_dev_ops = {
128 	.dev_configure       = avp_dev_configure,
129 	.dev_start           = avp_dev_start,
130 	.dev_stop            = avp_dev_stop,
131 	.dev_close           = avp_dev_close,
132 	.dev_infos_get       = avp_dev_info_get,
133 	.vlan_offload_set    = avp_vlan_offload_set,
134 	.stats_get           = avp_dev_stats_get,
135 	.stats_reset         = avp_dev_stats_reset,
136 	.link_update         = avp_dev_link_update,
137 	.promiscuous_enable  = avp_dev_promiscuous_enable,
138 	.promiscuous_disable = avp_dev_promiscuous_disable,
139 	.rx_queue_setup      = avp_dev_rx_queue_setup,
140 	.rx_queue_release    = avp_dev_rx_queue_release,
141 	.tx_queue_setup      = avp_dev_tx_queue_setup,
142 	.tx_queue_release    = avp_dev_tx_queue_release,
143 };
144 
145 /**@{ AVP device flags */
146 #define AVP_F_PROMISC (1 << 1)
147 #define AVP_F_CONFIGURED (1 << 2)
148 #define AVP_F_LINKUP (1 << 3)
149 #define AVP_F_DETACHED (1 << 4)
150 /**@} */
151 
152 /* Ethernet device validation marker */
153 #define AVP_ETHDEV_MAGIC 0x92972862
154 
155 /*
156  * Defines the AVP device attributes which are attached to an RTE ethernet
157  * device
158  */
159 struct avp_dev {
160 	uint32_t magic; /**< Memory validation marker */
161 	uint64_t device_id; /**< Unique system identifier */
162 	struct rte_ether_addr ethaddr; /**< Host specified MAC address */
163 	struct rte_eth_dev_data *dev_data;
164 	/**< Back pointer to ethernet device data */
165 	volatile uint32_t flags; /**< Device operational flags */
166 	uint16_t port_id; /**< Ethernet port identifier */
167 	struct rte_mempool *pool; /**< pkt mbuf mempool */
168 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
169 	unsigned int host_mbuf_size; /**< host mbuf size */
170 	unsigned int max_rx_pkt_len; /**< maximum receive unit */
171 	uint32_t host_features; /**< Supported feature bitmap */
172 	uint32_t features; /**< Enabled feature bitmap */
173 	unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
174 	unsigned int max_tx_queues; /**< Maximum number of transmit queues */
175 	unsigned int num_rx_queues; /**< Negotiated number of receive queues */
176 	unsigned int max_rx_queues; /**< Maximum number of receive queues */
177 
178 	struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
179 	struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
180 	struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
181 	/**< Allocated mbufs queue */
182 	struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
183 	/**< To be freed mbufs queue */
184 
185 	/* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
186 	rte_spinlock_t lock;
187 
188 	/* For request & response */
189 	struct rte_avp_fifo *req_q; /**< Request queue */
190 	struct rte_avp_fifo *resp_q; /**< Response queue */
191 	void *host_sync_addr; /**< (host) Req/Resp Mem address */
192 	void *sync_addr; /**< Req/Resp Mem address */
193 	void *host_mbuf_addr; /**< (host) MBUF pool start address */
194 	void *mbuf_addr; /**< MBUF pool start address */
195 } __rte_cache_aligned;
196 
197 /* RTE ethernet private data */
198 struct avp_adapter {
199 	struct avp_dev avp;
200 } __rte_cache_aligned;
201 
202 
203 /* 32-bit MMIO register write */
204 #define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
205 
206 /* 32-bit MMIO register read */
207 #define AVP_READ32(_addr) rte_read32_relaxed((_addr))
208 
209 /* Macro to cast the ethernet device private data to a AVP object */
210 #define AVP_DEV_PRIVATE_TO_HW(adapter) \
211 	(&((struct avp_adapter *)adapter)->avp)
212 
213 /*
214  * Defines the structure of a AVP device queue for the purpose of handling the
215  * receive and transmit burst callback functions
216  */
217 struct avp_queue {
218 	struct rte_eth_dev_data *dev_data;
219 	/**< Backpointer to ethernet device data */
220 	struct avp_dev *avp; /**< Backpointer to AVP device */
221 	uint16_t queue_id;
222 	/**< Queue identifier used for indexing current queue */
223 	uint16_t queue_base;
224 	/**< Base queue identifier for queue servicing */
225 	uint16_t queue_limit;
226 	/**< Maximum queue identifier for queue servicing */
227 
228 	uint64_t packets;
229 	uint64_t bytes;
230 	uint64_t errors;
231 };
232 
233 /* send a request and wait for a response
234  *
235  * @warning must be called while holding the avp->lock spinlock.
236  */
237 static int
238 avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
239 {
240 	unsigned int retry = AVP_MAX_REQUEST_RETRY;
241 	void *resp_addr = NULL;
242 	unsigned int count;
243 	int ret;
244 
245 	PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
246 
247 	request->result = -ENOTSUP;
248 
249 	/* Discard any stale responses before starting a new request */
250 	while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
251 		PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
252 
253 	rte_memcpy(avp->sync_addr, request, sizeof(*request));
254 	count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
255 	if (count < 1) {
256 		PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
257 			    request->req_id);
258 		ret = -EBUSY;
259 		goto done;
260 	}
261 
262 	while (retry--) {
263 		/* wait for a response */
264 		usleep(AVP_REQUEST_DELAY_USECS);
265 
266 		count = avp_fifo_count(avp->resp_q);
267 		if (count >= 1) {
268 			/* response received */
269 			break;
270 		}
271 
272 		if ((count < 1) && (retry == 0)) {
273 			PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
274 				    request->req_id);
275 			ret = -ETIME;
276 			goto done;
277 		}
278 	}
279 
280 	/* retrieve the response */
281 	count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
282 	if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
283 		PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
284 			    count, resp_addr, avp->host_sync_addr);
285 		ret = -ENODATA;
286 		goto done;
287 	}
288 
289 	/* copy to user buffer */
290 	rte_memcpy(request, avp->sync_addr, sizeof(*request));
291 	ret = 0;
292 
293 	PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
294 		    request->result, request->req_id);
295 
296 done:
297 	return ret;
298 }
299 
300 static int
301 avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
302 {
303 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
304 	struct rte_avp_request request;
305 	int ret;
306 
307 	/* setup a link state change request */
308 	memset(&request, 0, sizeof(request));
309 	request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
310 	request.if_up = state;
311 
312 	ret = avp_dev_process_request(avp, &request);
313 
314 	return ret == 0 ? request.result : ret;
315 }
316 
317 static int
318 avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
319 			struct rte_avp_device_config *config)
320 {
321 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
322 	struct rte_avp_request request;
323 	int ret;
324 
325 	/* setup a configure request */
326 	memset(&request, 0, sizeof(request));
327 	request.req_id = RTE_AVP_REQ_CFG_DEVICE;
328 	memcpy(&request.config, config, sizeof(request.config));
329 
330 	ret = avp_dev_process_request(avp, &request);
331 
332 	return ret == 0 ? request.result : ret;
333 }
334 
335 static int
336 avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
337 {
338 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
339 	struct rte_avp_request request;
340 	int ret;
341 
342 	/* setup a shutdown request */
343 	memset(&request, 0, sizeof(request));
344 	request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
345 
346 	ret = avp_dev_process_request(avp, &request);
347 
348 	return ret == 0 ? request.result : ret;
349 }
350 
351 /* translate from host mbuf virtual address to guest virtual address */
352 static inline void *
353 avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
354 {
355 	return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
356 				       (uintptr_t)avp->host_mbuf_addr),
357 			   (uintptr_t)avp->mbuf_addr);
358 }
359 
360 /* translate from host physical address to guest virtual address */
361 static void *
362 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
363 			  rte_iova_t host_phys_addr)
364 {
365 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
366 	struct rte_mem_resource *resource;
367 	struct rte_avp_memmap_info *info;
368 	struct rte_avp_memmap *map;
369 	off_t offset;
370 	void *addr;
371 	unsigned int i;
372 
373 	addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
374 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
375 	info = (struct rte_avp_memmap_info *)resource->addr;
376 
377 	offset = 0;
378 	for (i = 0; i < info->nb_maps; i++) {
379 		/* search all segments looking for a matching address */
380 		map = &info->maps[i];
381 
382 		if ((host_phys_addr >= map->phys_addr) &&
383 			(host_phys_addr < (map->phys_addr + map->length))) {
384 			/* address is within this segment */
385 			offset += (host_phys_addr - map->phys_addr);
386 			addr = RTE_PTR_ADD(addr, (uintptr_t)offset);
387 
388 			PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
389 				    host_phys_addr, addr);
390 
391 			return addr;
392 		}
393 		offset += map->length;
394 	}
395 
396 	return NULL;
397 }
398 
399 /* verify that the incoming device version is compatible with our version */
400 static int
401 avp_dev_version_check(uint32_t version)
402 {
403 	uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
404 	uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
405 
406 	if (device <= driver) {
407 		/* the host driver version is less than or equal to ours */
408 		return 0;
409 	}
410 
411 	return 1;
412 }
413 
414 /* verify that memory regions have expected version and validation markers */
415 static int
416 avp_dev_check_regions(struct rte_eth_dev *eth_dev)
417 {
418 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
419 	struct rte_avp_memmap_info *memmap;
420 	struct rte_avp_device_info *info;
421 	struct rte_mem_resource *resource;
422 	unsigned int i;
423 
424 	/* Dump resource info for debug */
425 	for (i = 0; i < PCI_MAX_RESOURCE; i++) {
426 		resource = &pci_dev->mem_resource[i];
427 		if ((resource->phys_addr == 0) || (resource->len == 0))
428 			continue;
429 
430 		PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
431 			    i, resource->phys_addr,
432 			    resource->len, resource->addr);
433 
434 		switch (i) {
435 		case RTE_AVP_PCI_MEMMAP_BAR:
436 			memmap = (struct rte_avp_memmap_info *)resource->addr;
437 			if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
438 			    (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
439 				PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
440 					    memmap->magic, memmap->version);
441 				return -EINVAL;
442 			}
443 			break;
444 
445 		case RTE_AVP_PCI_DEVICE_BAR:
446 			info = (struct rte_avp_device_info *)resource->addr;
447 			if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
448 			    avp_dev_version_check(info->version)) {
449 				PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
450 					    info->magic, info->version,
451 					    AVP_DPDK_DRIVER_VERSION);
452 				return -EINVAL;
453 			}
454 			break;
455 
456 		case RTE_AVP_PCI_MEMORY_BAR:
457 		case RTE_AVP_PCI_MMIO_BAR:
458 			if (resource->addr == NULL) {
459 				PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
460 					    i);
461 				return -EINVAL;
462 			}
463 			break;
464 
465 		case RTE_AVP_PCI_MSIX_BAR:
466 		default:
467 			/* no validation required */
468 			break;
469 		}
470 	}
471 
472 	return 0;
473 }
474 
475 static int
476 avp_dev_detach(struct rte_eth_dev *eth_dev)
477 {
478 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
479 	int ret;
480 
481 	PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
482 		    eth_dev->data->port_id, avp->device_id);
483 
484 	rte_spinlock_lock(&avp->lock);
485 
486 	if (avp->flags & AVP_F_DETACHED) {
487 		PMD_DRV_LOG(NOTICE, "port %u already detached\n",
488 			    eth_dev->data->port_id);
489 		ret = 0;
490 		goto unlock;
491 	}
492 
493 	/* shutdown the device first so the host stops sending us packets. */
494 	ret = avp_dev_ctrl_shutdown(eth_dev);
495 	if (ret < 0) {
496 		PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
497 			    ret);
498 		avp->flags &= ~AVP_F_DETACHED;
499 		goto unlock;
500 	}
501 
502 	avp->flags |= AVP_F_DETACHED;
503 	rte_wmb();
504 
505 	/* wait for queues to acknowledge the presence of the detach flag */
506 	rte_delay_ms(1);
507 
508 	ret = 0;
509 
510 unlock:
511 	rte_spinlock_unlock(&avp->lock);
512 	return ret;
513 }
514 
515 static void
516 _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
517 {
518 	struct avp_dev *avp =
519 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
520 	struct avp_queue *rxq;
521 	uint16_t queue_count;
522 	uint16_t remainder;
523 
524 	rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
525 
526 	/*
527 	 * Must map all AVP fifos as evenly as possible between the configured
528 	 * device queues.  Each device queue will service a subset of the AVP
529 	 * fifos. If there is an odd number of device queues the first set of
530 	 * device queues will get the extra AVP fifos.
531 	 */
532 	queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
533 	remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
534 	if (rx_queue_id < remainder) {
535 		/* these queues must service one extra FIFO */
536 		rxq->queue_base = rx_queue_id * (queue_count + 1);
537 		rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
538 	} else {
539 		/* these queues service the regular number of FIFO */
540 		rxq->queue_base = ((remainder * (queue_count + 1)) +
541 				   ((rx_queue_id - remainder) * queue_count));
542 		rxq->queue_limit = rxq->queue_base + queue_count - 1;
543 	}
544 
545 	PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
546 		    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
547 
548 	rxq->queue_id = rxq->queue_base;
549 }
550 
551 static void
552 _avp_set_queue_counts(struct rte_eth_dev *eth_dev)
553 {
554 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
555 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
556 	struct rte_avp_device_info *host_info;
557 	void *addr;
558 
559 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
560 	host_info = (struct rte_avp_device_info *)addr;
561 
562 	/*
563 	 * the transmit direction is not negotiated beyond respecting the max
564 	 * number of queues because the host can handle arbitrary guest tx
565 	 * queues (host rx queues).
566 	 */
567 	avp->num_tx_queues = eth_dev->data->nb_tx_queues;
568 
569 	/*
570 	 * the receive direction is more restrictive.  The host requires a
571 	 * minimum number of guest rx queues (host tx queues) therefore
572 	 * negotiate a value that is at least as large as the host minimum
573 	 * requirement.  If the host and guest values are not identical then a
574 	 * mapping will be established in the receive_queue_setup function.
575 	 */
576 	avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
577 				     eth_dev->data->nb_rx_queues);
578 
579 	PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
580 		    avp->num_tx_queues, avp->num_rx_queues);
581 }
582 
583 static int
584 avp_dev_attach(struct rte_eth_dev *eth_dev)
585 {
586 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
587 	struct rte_avp_device_config config;
588 	unsigned int i;
589 	int ret;
590 
591 	PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
592 		    eth_dev->data->port_id, avp->device_id);
593 
594 	rte_spinlock_lock(&avp->lock);
595 
596 	if (!(avp->flags & AVP_F_DETACHED)) {
597 		PMD_DRV_LOG(NOTICE, "port %u already attached\n",
598 			    eth_dev->data->port_id);
599 		ret = 0;
600 		goto unlock;
601 	}
602 
603 	/*
604 	 * make sure that the detached flag is set prior to reconfiguring the
605 	 * queues.
606 	 */
607 	avp->flags |= AVP_F_DETACHED;
608 	rte_wmb();
609 
610 	/*
611 	 * re-run the device create utility which will parse the new host info
612 	 * and setup the AVP device queue pointers.
613 	 */
614 	ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
615 	if (ret < 0) {
616 		PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
617 			    ret);
618 		goto unlock;
619 	}
620 
621 	if (avp->flags & AVP_F_CONFIGURED) {
622 		/*
623 		 * Update the receive queue mapping to handle cases where the
624 		 * source and destination hosts have different queue
625 		 * requirements.  As long as the DETACHED flag is asserted the
626 		 * queue table should not be referenced so it should be safe to
627 		 * update it.
628 		 */
629 		_avp_set_queue_counts(eth_dev);
630 		for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
631 			_avp_set_rx_queue_mappings(eth_dev, i);
632 
633 		/*
634 		 * Update the host with our config details so that it knows the
635 		 * device is active.
636 		 */
637 		memset(&config, 0, sizeof(config));
638 		config.device_id = avp->device_id;
639 		config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
640 		config.driver_version = AVP_DPDK_DRIVER_VERSION;
641 		config.features = avp->features;
642 		config.num_tx_queues = avp->num_tx_queues;
643 		config.num_rx_queues = avp->num_rx_queues;
644 		config.if_up = !!(avp->flags & AVP_F_LINKUP);
645 
646 		ret = avp_dev_ctrl_set_config(eth_dev, &config);
647 		if (ret < 0) {
648 			PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
649 				    ret);
650 			goto unlock;
651 		}
652 	}
653 
654 	rte_wmb();
655 	avp->flags &= ~AVP_F_DETACHED;
656 
657 	ret = 0;
658 
659 unlock:
660 	rte_spinlock_unlock(&avp->lock);
661 	return ret;
662 }
663 
664 static void
665 avp_dev_interrupt_handler(void *data)
666 {
667 	struct rte_eth_dev *eth_dev = data;
668 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
669 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
670 	uint32_t status, value;
671 	int ret;
672 
673 	if (registers == NULL)
674 		rte_panic("no mapped MMIO register space\n");
675 
676 	/* read the interrupt status register
677 	 * note: this register clears on read so all raised interrupts must be
678 	 *    handled or remembered for later processing
679 	 */
680 	status = AVP_READ32(
681 		RTE_PTR_ADD(registers,
682 			    RTE_AVP_INTERRUPT_STATUS_OFFSET));
683 
684 	if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
685 		/* handle interrupt based on current status */
686 		value = AVP_READ32(
687 			RTE_PTR_ADD(registers,
688 				    RTE_AVP_MIGRATION_STATUS_OFFSET));
689 		switch (value) {
690 		case RTE_AVP_MIGRATION_DETACHED:
691 			ret = avp_dev_detach(eth_dev);
692 			break;
693 		case RTE_AVP_MIGRATION_ATTACHED:
694 			ret = avp_dev_attach(eth_dev);
695 			break;
696 		default:
697 			PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
698 				    value);
699 			ret = -EINVAL;
700 		}
701 
702 		/* acknowledge the request by writing out our current status */
703 		value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
704 		AVP_WRITE32(value,
705 			    RTE_PTR_ADD(registers,
706 					RTE_AVP_MIGRATION_ACK_OFFSET));
707 
708 		PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
709 	}
710 
711 	if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
712 		PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
713 			    status);
714 
715 	/* re-enable UIO interrupt handling */
716 	ret = rte_intr_ack(&pci_dev->intr_handle);
717 	if (ret < 0) {
718 		PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
719 			    ret);
720 		/* continue */
721 	}
722 }
723 
724 static int
725 avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
726 {
727 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
728 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
729 	int ret;
730 
731 	if (registers == NULL)
732 		return -EINVAL;
733 
734 	/* enable UIO interrupt handling */
735 	ret = rte_intr_enable(&pci_dev->intr_handle);
736 	if (ret < 0) {
737 		PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
738 			    ret);
739 		return ret;
740 	}
741 
742 	/* inform the device that all interrupts are enabled */
743 	AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
744 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
745 
746 	return 0;
747 }
748 
749 static int
750 avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
751 {
752 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
753 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
754 	int ret;
755 
756 	if (registers == NULL)
757 		return 0;
758 
759 	/* inform the device that all interrupts are disabled */
760 	AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
761 		    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
762 
763 	/* enable UIO interrupt handling */
764 	ret = rte_intr_disable(&pci_dev->intr_handle);
765 	if (ret < 0) {
766 		PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
767 			    ret);
768 		return ret;
769 	}
770 
771 	return 0;
772 }
773 
774 static int
775 avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
776 {
777 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
778 	int ret;
779 
780 	/* register a callback handler with UIO for interrupt notifications */
781 	ret = rte_intr_callback_register(&pci_dev->intr_handle,
782 					 avp_dev_interrupt_handler,
783 					 (void *)eth_dev);
784 	if (ret < 0) {
785 		PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
786 			    ret);
787 		return ret;
788 	}
789 
790 	/* enable interrupt processing */
791 	return avp_dev_enable_interrupts(eth_dev);
792 }
793 
794 static int
795 avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
796 {
797 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
798 	void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
799 	uint32_t value;
800 
801 	if (registers == NULL)
802 		return 0;
803 
804 	value = AVP_READ32(RTE_PTR_ADD(registers,
805 				       RTE_AVP_MIGRATION_STATUS_OFFSET));
806 	if (value == RTE_AVP_MIGRATION_DETACHED) {
807 		/* migration is in progress; ack it if we have not already */
808 		AVP_WRITE32(value,
809 			    RTE_PTR_ADD(registers,
810 					RTE_AVP_MIGRATION_ACK_OFFSET));
811 		return 1;
812 	}
813 	return 0;
814 }
815 
816 /*
817  * create a AVP device using the supplied device info by first translating it
818  * to guest address space(s).
819  */
820 static int
821 avp_dev_create(struct rte_pci_device *pci_dev,
822 	       struct rte_eth_dev *eth_dev)
823 {
824 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
825 	struct rte_avp_device_info *host_info;
826 	struct rte_mem_resource *resource;
827 	unsigned int i;
828 
829 	resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
830 	if (resource->addr == NULL) {
831 		PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
832 			    RTE_AVP_PCI_DEVICE_BAR);
833 		return -EFAULT;
834 	}
835 	host_info = (struct rte_avp_device_info *)resource->addr;
836 
837 	if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
838 		avp_dev_version_check(host_info->version)) {
839 		PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
840 			    host_info->magic, host_info->version,
841 			    AVP_DPDK_DRIVER_VERSION);
842 		return -EINVAL;
843 	}
844 
845 	PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
846 		    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
847 		    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
848 		    RTE_AVP_GET_MINOR_VERSION(host_info->version));
849 
850 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
851 		    host_info->min_tx_queues, host_info->max_tx_queues);
852 	PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
853 		    host_info->min_rx_queues, host_info->max_rx_queues);
854 	PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
855 		    host_info->features);
856 
857 	if (avp->magic != AVP_ETHDEV_MAGIC) {
858 		/*
859 		 * First time initialization (i.e., not during a VM
860 		 * migration)
861 		 */
862 		memset(avp, 0, sizeof(*avp));
863 		avp->magic = AVP_ETHDEV_MAGIC;
864 		avp->dev_data = eth_dev->data;
865 		avp->port_id = eth_dev->data->port_id;
866 		avp->host_mbuf_size = host_info->mbuf_size;
867 		avp->host_features = host_info->features;
868 		rte_spinlock_init(&avp->lock);
869 		memcpy(&avp->ethaddr.addr_bytes[0],
870 		       host_info->ethaddr, RTE_ETHER_ADDR_LEN);
871 		/* adjust max values to not exceed our max */
872 		avp->max_tx_queues =
873 			RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
874 		avp->max_rx_queues =
875 			RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
876 	} else {
877 		/* Re-attaching during migration */
878 
879 		/* TODO... requires validation of host values */
880 		if ((host_info->features & avp->features) != avp->features) {
881 			PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
882 				    avp->features, host_info->features);
883 			/* this should not be possible; continue for now */
884 		}
885 	}
886 
887 	/* the device id is allowed to change over migrations */
888 	avp->device_id = host_info->device_id;
889 
890 	/* translate incoming host addresses to guest address space */
891 	PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
892 		    host_info->tx_phys);
893 	PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
894 		    host_info->alloc_phys);
895 	for (i = 0; i < avp->max_tx_queues; i++) {
896 		avp->tx_q[i] = avp_dev_translate_address(eth_dev,
897 			host_info->tx_phys + (i * host_info->tx_size));
898 
899 		avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
900 			host_info->alloc_phys + (i * host_info->alloc_size));
901 	}
902 
903 	PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
904 		    host_info->rx_phys);
905 	PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
906 		    host_info->free_phys);
907 	for (i = 0; i < avp->max_rx_queues; i++) {
908 		avp->rx_q[i] = avp_dev_translate_address(eth_dev,
909 			host_info->rx_phys + (i * host_info->rx_size));
910 		avp->free_q[i] = avp_dev_translate_address(eth_dev,
911 			host_info->free_phys + (i * host_info->free_size));
912 	}
913 
914 	PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
915 		    host_info->req_phys);
916 	PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
917 		    host_info->resp_phys);
918 	PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
919 		    host_info->sync_phys);
920 	PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
921 		    host_info->mbuf_phys);
922 	avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
923 	avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
924 	avp->sync_addr =
925 		avp_dev_translate_address(eth_dev, host_info->sync_phys);
926 	avp->mbuf_addr =
927 		avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
928 
929 	/*
930 	 * store the host mbuf virtual address so that we can calculate
931 	 * relative offsets for each mbuf as they are processed
932 	 */
933 	avp->host_mbuf_addr = host_info->mbuf_va;
934 	avp->host_sync_addr = host_info->sync_va;
935 
936 	/*
937 	 * store the maximum packet length that is supported by the host.
938 	 */
939 	avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
940 	PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
941 				host_info->max_rx_pkt_len);
942 
943 	return 0;
944 }
945 
946 /*
947  * This function is based on probe() function in avp_pci.c
948  * It returns 0 on success.
949  */
950 static int
951 eth_avp_dev_init(struct rte_eth_dev *eth_dev)
952 {
953 	struct avp_dev *avp =
954 		AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
955 	struct rte_pci_device *pci_dev;
956 	int ret;
957 
958 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
959 	eth_dev->dev_ops = &avp_eth_dev_ops;
960 	eth_dev->rx_pkt_burst = &avp_recv_pkts;
961 	eth_dev->tx_pkt_burst = &avp_xmit_pkts;
962 	/* Let rte_eth_dev_close() release the port resources */
963 	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
964 
965 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
966 		/*
967 		 * no setup required on secondary processes.  All data is saved
968 		 * in dev_private by the primary process. All resource should
969 		 * be mapped to the same virtual address so all pointers should
970 		 * be valid.
971 		 */
972 		if (eth_dev->data->scattered_rx) {
973 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
974 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
975 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
976 		}
977 		return 0;
978 	}
979 
980 	rte_eth_copy_pci_info(eth_dev, pci_dev);
981 
982 	/* Check current migration status */
983 	if (avp_dev_migration_pending(eth_dev)) {
984 		PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
985 		return -EBUSY;
986 	}
987 
988 	/* Check BAR resources */
989 	ret = avp_dev_check_regions(eth_dev);
990 	if (ret < 0) {
991 		PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
992 			    ret);
993 		return ret;
994 	}
995 
996 	/* Enable interrupts */
997 	ret = avp_dev_setup_interrupts(eth_dev);
998 	if (ret < 0) {
999 		PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
1000 		return ret;
1001 	}
1002 
1003 	/* Handle each subtype */
1004 	ret = avp_dev_create(pci_dev, eth_dev);
1005 	if (ret < 0) {
1006 		PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1007 		return ret;
1008 	}
1009 
1010 	/* Allocate memory for storing MAC addresses */
1011 	eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev",
1012 					RTE_ETHER_ADDR_LEN, 0);
1013 	if (eth_dev->data->mac_addrs == NULL) {
1014 		PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1015 			    RTE_ETHER_ADDR_LEN);
1016 		return -ENOMEM;
1017 	}
1018 
1019 	/* Get a mac from device config */
1020 	rte_ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1021 
1022 	return 0;
1023 }
1024 
1025 static int
1026 eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1027 {
1028 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1029 		return -EPERM;
1030 
1031 	if (eth_dev->data == NULL)
1032 		return 0;
1033 
1034 	avp_dev_close(eth_dev);
1035 
1036 	return 0;
1037 }
1038 
1039 static int
1040 eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1041 		  struct rte_pci_device *pci_dev)
1042 {
1043 	return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct avp_adapter),
1044 			eth_avp_dev_init);
1045 }
1046 
1047 static int
1048 eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1049 {
1050 	return rte_eth_dev_pci_generic_remove(pci_dev,
1051 					      eth_avp_dev_uninit);
1052 }
1053 
1054 static struct rte_pci_driver rte_avp_pmd = {
1055 	.id_table = pci_id_avp_map,
1056 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1057 	.probe = eth_avp_pci_probe,
1058 	.remove = eth_avp_pci_remove,
1059 };
1060 
1061 static int
1062 avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1063 			 struct avp_dev *avp)
1064 {
1065 	unsigned int max_rx_pkt_len;
1066 
1067 	max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1068 
1069 	if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1070 	    (max_rx_pkt_len > avp->host_mbuf_size)) {
1071 		/*
1072 		 * If the guest MTU is greater than either the host or guest
1073 		 * buffers then chained mbufs have to be enabled in the TX
1074 		 * direction.  It is assumed that the application will not need
1075 		 * to send packets larger than their max_rx_pkt_len (MRU).
1076 		 */
1077 		return 1;
1078 	}
1079 
1080 	if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1081 	    (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1082 		/*
1083 		 * If the host MRU is greater than its own mbuf size or the
1084 		 * guest mbuf size then chained mbufs have to be enabled in the
1085 		 * RX direction.
1086 		 */
1087 		return 1;
1088 	}
1089 
1090 	return 0;
1091 }
1092 
1093 static int
1094 avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1095 		       uint16_t rx_queue_id,
1096 		       uint16_t nb_rx_desc,
1097 		       unsigned int socket_id,
1098 		       const struct rte_eth_rxconf *rx_conf,
1099 		       struct rte_mempool *pool)
1100 {
1101 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1102 	struct rte_pktmbuf_pool_private *mbp_priv;
1103 	struct avp_queue *rxq;
1104 
1105 	if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1106 		PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1107 			    rx_queue_id, eth_dev->data->nb_rx_queues);
1108 		return -EINVAL;
1109 	}
1110 
1111 	/* Save mbuf pool pointer */
1112 	avp->pool = pool;
1113 
1114 	/* Save the local mbuf size */
1115 	mbp_priv = rte_mempool_get_priv(pool);
1116 	avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1117 	avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1118 
1119 	if (avp_dev_enable_scattered(eth_dev, avp)) {
1120 		if (!eth_dev->data->scattered_rx) {
1121 			PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1122 			eth_dev->data->scattered_rx = 1;
1123 			eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1124 			eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1125 		}
1126 	}
1127 
1128 	PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1129 		    avp->max_rx_pkt_len,
1130 		    eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1131 		    avp->host_mbuf_size,
1132 		    avp->guest_mbuf_size);
1133 
1134 	/* allocate a queue object */
1135 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1136 				 RTE_CACHE_LINE_SIZE, socket_id);
1137 	if (rxq == NULL) {
1138 		PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1139 		return -ENOMEM;
1140 	}
1141 
1142 	/* save back pointers to AVP and Ethernet devices */
1143 	rxq->avp = avp;
1144 	rxq->dev_data = eth_dev->data;
1145 	eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1146 
1147 	/* setup the queue receive mapping for the current queue. */
1148 	_avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1149 
1150 	PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1151 
1152 	(void)nb_rx_desc;
1153 	(void)rx_conf;
1154 	return 0;
1155 }
1156 
1157 static int
1158 avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1159 		       uint16_t tx_queue_id,
1160 		       uint16_t nb_tx_desc,
1161 		       unsigned int socket_id,
1162 		       const struct rte_eth_txconf *tx_conf)
1163 {
1164 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1165 	struct avp_queue *txq;
1166 
1167 	if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1168 		PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1169 			    tx_queue_id, eth_dev->data->nb_tx_queues);
1170 		return -EINVAL;
1171 	}
1172 
1173 	/* allocate a queue object */
1174 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1175 				 RTE_CACHE_LINE_SIZE, socket_id);
1176 	if (txq == NULL) {
1177 		PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1178 		return -ENOMEM;
1179 	}
1180 
1181 	/* only the configured set of transmit queues are used */
1182 	txq->queue_id = tx_queue_id;
1183 	txq->queue_base = tx_queue_id;
1184 	txq->queue_limit = tx_queue_id;
1185 
1186 	/* save back pointers to AVP and Ethernet devices */
1187 	txq->avp = avp;
1188 	txq->dev_data = eth_dev->data;
1189 	eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1190 
1191 	PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1192 
1193 	(void)nb_tx_desc;
1194 	(void)tx_conf;
1195 	return 0;
1196 }
1197 
1198 static inline int
1199 _avp_cmp_ether_addr(struct rte_ether_addr *a, struct rte_ether_addr *b)
1200 {
1201 	uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1202 	uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1203 	return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1204 }
1205 
1206 static inline int
1207 _avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1208 {
1209 	struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1210 
1211 	if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1212 		/* allow all packets destined to our address */
1213 		return 0;
1214 	}
1215 
1216 	if (likely(rte_is_broadcast_ether_addr(&eth->d_addr))) {
1217 		/* allow all broadcast packets */
1218 		return 0;
1219 	}
1220 
1221 	if (likely(rte_is_multicast_ether_addr(&eth->d_addr))) {
1222 		/* allow all multicast packets */
1223 		return 0;
1224 	}
1225 
1226 	if (avp->flags & AVP_F_PROMISC) {
1227 		/* allow all packets when in promiscuous mode */
1228 		return 0;
1229 	}
1230 
1231 	return -1;
1232 }
1233 
1234 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1235 static inline void
1236 __avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1237 {
1238 	struct rte_avp_desc *first_buf;
1239 	struct rte_avp_desc *pkt_buf;
1240 	unsigned int pkt_len;
1241 	unsigned int nb_segs;
1242 	void *pkt_data;
1243 	unsigned int i;
1244 
1245 	first_buf = avp_dev_translate_buffer(avp, buf);
1246 
1247 	i = 0;
1248 	pkt_len = 0;
1249 	nb_segs = first_buf->nb_segs;
1250 	do {
1251 		/* Adjust pointers for guest addressing */
1252 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1253 		if (pkt_buf == NULL)
1254 			rte_panic("bad buffer: segment %u has an invalid address %p\n",
1255 				  i, buf);
1256 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1257 		if (pkt_data == NULL)
1258 			rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1259 				  i);
1260 		if (pkt_buf->data_len == 0)
1261 			rte_panic("bad buffer: segment %u has 0 data length\n",
1262 				  i);
1263 		pkt_len += pkt_buf->data_len;
1264 		nb_segs--;
1265 		i++;
1266 
1267 	} while (nb_segs && (buf = pkt_buf->next) != NULL);
1268 
1269 	if (nb_segs != 0)
1270 		rte_panic("bad buffer: expected %u segments found %u\n",
1271 			  first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1272 	if (pkt_len != first_buf->pkt_len)
1273 		rte_panic("bad buffer: expected length %u found %u\n",
1274 			  first_buf->pkt_len, pkt_len);
1275 }
1276 
1277 #define avp_dev_buffer_sanity_check(a, b) \
1278 	__avp_dev_buffer_sanity_check((a), (b))
1279 
1280 #else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1281 
1282 #define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1283 
1284 #endif
1285 
1286 /*
1287  * Copy a host buffer chain to a set of mbufs.	This function assumes that
1288  * there exactly the required number of mbufs to copy all source bytes.
1289  */
1290 static inline struct rte_mbuf *
1291 avp_dev_copy_from_buffers(struct avp_dev *avp,
1292 			  struct rte_avp_desc *buf,
1293 			  struct rte_mbuf **mbufs,
1294 			  unsigned int count)
1295 {
1296 	struct rte_mbuf *m_previous = NULL;
1297 	struct rte_avp_desc *pkt_buf;
1298 	unsigned int total_length = 0;
1299 	unsigned int copy_length;
1300 	unsigned int src_offset;
1301 	struct rte_mbuf *m;
1302 	uint16_t ol_flags;
1303 	uint16_t vlan_tci;
1304 	void *pkt_data;
1305 	unsigned int i;
1306 
1307 	avp_dev_buffer_sanity_check(avp, buf);
1308 
1309 	/* setup the first source buffer */
1310 	pkt_buf = avp_dev_translate_buffer(avp, buf);
1311 	pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1312 	total_length = pkt_buf->pkt_len;
1313 	src_offset = 0;
1314 
1315 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1316 		ol_flags = PKT_RX_VLAN;
1317 		vlan_tci = pkt_buf->vlan_tci;
1318 	} else {
1319 		ol_flags = 0;
1320 		vlan_tci = 0;
1321 	}
1322 
1323 	for (i = 0; (i < count) && (buf != NULL); i++) {
1324 		/* fill each destination buffer */
1325 		m = mbufs[i];
1326 
1327 		if (m_previous != NULL)
1328 			m_previous->next = m;
1329 
1330 		m_previous = m;
1331 
1332 		do {
1333 			/*
1334 			 * Copy as many source buffers as will fit in the
1335 			 * destination buffer.
1336 			 */
1337 			copy_length = RTE_MIN((avp->guest_mbuf_size -
1338 					       rte_pktmbuf_data_len(m)),
1339 					      (pkt_buf->data_len -
1340 					       src_offset));
1341 			rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1342 					       rte_pktmbuf_data_len(m)),
1343 				   RTE_PTR_ADD(pkt_data, src_offset),
1344 				   copy_length);
1345 			rte_pktmbuf_data_len(m) += copy_length;
1346 			src_offset += copy_length;
1347 
1348 			if (likely(src_offset == pkt_buf->data_len)) {
1349 				/* need a new source buffer */
1350 				buf = pkt_buf->next;
1351 				if (buf != NULL) {
1352 					pkt_buf = avp_dev_translate_buffer(
1353 						avp, buf);
1354 					pkt_data = avp_dev_translate_buffer(
1355 						avp, pkt_buf->data);
1356 					src_offset = 0;
1357 				}
1358 			}
1359 
1360 			if (unlikely(rte_pktmbuf_data_len(m) ==
1361 				     avp->guest_mbuf_size)) {
1362 				/* need a new destination mbuf */
1363 				break;
1364 			}
1365 
1366 		} while (buf != NULL);
1367 	}
1368 
1369 	m = mbufs[0];
1370 	m->ol_flags = ol_flags;
1371 	m->nb_segs = count;
1372 	rte_pktmbuf_pkt_len(m) = total_length;
1373 	m->vlan_tci = vlan_tci;
1374 
1375 	__rte_mbuf_sanity_check(m, 1);
1376 
1377 	return m;
1378 }
1379 
1380 static uint16_t
1381 avp_recv_scattered_pkts(void *rx_queue,
1382 			struct rte_mbuf **rx_pkts,
1383 			uint16_t nb_pkts)
1384 {
1385 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1386 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1387 	struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1388 	struct avp_dev *avp = rxq->avp;
1389 	struct rte_avp_desc *pkt_buf;
1390 	struct rte_avp_fifo *free_q;
1391 	struct rte_avp_fifo *rx_q;
1392 	struct rte_avp_desc *buf;
1393 	unsigned int count, avail, n;
1394 	unsigned int guest_mbuf_size;
1395 	struct rte_mbuf *m;
1396 	unsigned int required;
1397 	unsigned int buf_len;
1398 	unsigned int port_id;
1399 	unsigned int i;
1400 
1401 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1402 		/* VM live migration in progress */
1403 		return 0;
1404 	}
1405 
1406 	guest_mbuf_size = avp->guest_mbuf_size;
1407 	port_id = avp->port_id;
1408 	rx_q = avp->rx_q[rxq->queue_id];
1409 	free_q = avp->free_q[rxq->queue_id];
1410 
1411 	/* setup next queue to service */
1412 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1413 		(rxq->queue_id + 1) : rxq->queue_base;
1414 
1415 	/* determine how many slots are available in the free queue */
1416 	count = avp_fifo_free_count(free_q);
1417 
1418 	/* determine how many packets are available in the rx queue */
1419 	avail = avp_fifo_count(rx_q);
1420 
1421 	/* determine how many packets can be received */
1422 	count = RTE_MIN(count, avail);
1423 	count = RTE_MIN(count, nb_pkts);
1424 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1425 
1426 	if (unlikely(count == 0)) {
1427 		/* no free buffers, or no buffers on the rx queue */
1428 		return 0;
1429 	}
1430 
1431 	/* retrieve pending packets */
1432 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1433 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1434 		   count, rx_q);
1435 
1436 	count = 0;
1437 	for (i = 0; i < n; i++) {
1438 		/* prefetch next entry while processing current one */
1439 		if (i + 1 < n) {
1440 			pkt_buf = avp_dev_translate_buffer(avp,
1441 							   avp_bufs[i + 1]);
1442 			rte_prefetch0(pkt_buf);
1443 		}
1444 		buf = avp_bufs[i];
1445 
1446 		/* Peek into the first buffer to determine the total length */
1447 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1448 		buf_len = pkt_buf->pkt_len;
1449 
1450 		/* Allocate enough mbufs to receive the entire packet */
1451 		required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1452 		if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1453 			rxq->dev_data->rx_mbuf_alloc_failed++;
1454 			continue;
1455 		}
1456 
1457 		/* Copy the data from the buffers to our mbufs */
1458 		m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1459 
1460 		/* finalize mbuf */
1461 		m->port = port_id;
1462 
1463 		if (_avp_mac_filter(avp, m) != 0) {
1464 			/* silently discard packets not destined to our MAC */
1465 			rte_pktmbuf_free(m);
1466 			continue;
1467 		}
1468 
1469 		/* return new mbuf to caller */
1470 		rx_pkts[count++] = m;
1471 		rxq->bytes += buf_len;
1472 	}
1473 
1474 	rxq->packets += count;
1475 
1476 	/* return the buffers to the free queue */
1477 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1478 
1479 	return count;
1480 }
1481 
1482 
1483 static uint16_t
1484 avp_recv_pkts(void *rx_queue,
1485 	      struct rte_mbuf **rx_pkts,
1486 	      uint16_t nb_pkts)
1487 {
1488 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1489 	struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1490 	struct avp_dev *avp = rxq->avp;
1491 	struct rte_avp_desc *pkt_buf;
1492 	struct rte_avp_fifo *free_q;
1493 	struct rte_avp_fifo *rx_q;
1494 	unsigned int count, avail, n;
1495 	unsigned int pkt_len;
1496 	struct rte_mbuf *m;
1497 	char *pkt_data;
1498 	unsigned int i;
1499 
1500 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1501 		/* VM live migration in progress */
1502 		return 0;
1503 	}
1504 
1505 	rx_q = avp->rx_q[rxq->queue_id];
1506 	free_q = avp->free_q[rxq->queue_id];
1507 
1508 	/* setup next queue to service */
1509 	rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1510 		(rxq->queue_id + 1) : rxq->queue_base;
1511 
1512 	/* determine how many slots are available in the free queue */
1513 	count = avp_fifo_free_count(free_q);
1514 
1515 	/* determine how many packets are available in the rx queue */
1516 	avail = avp_fifo_count(rx_q);
1517 
1518 	/* determine how many packets can be received */
1519 	count = RTE_MIN(count, avail);
1520 	count = RTE_MIN(count, nb_pkts);
1521 	count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1522 
1523 	if (unlikely(count == 0)) {
1524 		/* no free buffers, or no buffers on the rx queue */
1525 		return 0;
1526 	}
1527 
1528 	/* retrieve pending packets */
1529 	n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1530 	PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1531 		   count, rx_q);
1532 
1533 	count = 0;
1534 	for (i = 0; i < n; i++) {
1535 		/* prefetch next entry while processing current one */
1536 		if (i < n - 1) {
1537 			pkt_buf = avp_dev_translate_buffer(avp,
1538 							   avp_bufs[i + 1]);
1539 			rte_prefetch0(pkt_buf);
1540 		}
1541 
1542 		/* Adjust host pointers for guest addressing */
1543 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1544 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1545 		pkt_len = pkt_buf->pkt_len;
1546 
1547 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1548 			     (pkt_buf->nb_segs > 1))) {
1549 			/*
1550 			 * application should be using the scattered receive
1551 			 * function
1552 			 */
1553 			rxq->errors++;
1554 			continue;
1555 		}
1556 
1557 		/* process each packet to be transmitted */
1558 		m = rte_pktmbuf_alloc(avp->pool);
1559 		if (unlikely(m == NULL)) {
1560 			rxq->dev_data->rx_mbuf_alloc_failed++;
1561 			continue;
1562 		}
1563 
1564 		/* copy data out of the host buffer to our buffer */
1565 		m->data_off = RTE_PKTMBUF_HEADROOM;
1566 		rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1567 
1568 		/* initialize the local mbuf */
1569 		rte_pktmbuf_data_len(m) = pkt_len;
1570 		rte_pktmbuf_pkt_len(m) = pkt_len;
1571 		m->port = avp->port_id;
1572 
1573 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1574 			m->ol_flags = PKT_RX_VLAN;
1575 			m->vlan_tci = pkt_buf->vlan_tci;
1576 		}
1577 
1578 		if (_avp_mac_filter(avp, m) != 0) {
1579 			/* silently discard packets not destined to our MAC */
1580 			rte_pktmbuf_free(m);
1581 			continue;
1582 		}
1583 
1584 		/* return new mbuf to caller */
1585 		rx_pkts[count++] = m;
1586 		rxq->bytes += pkt_len;
1587 	}
1588 
1589 	rxq->packets += count;
1590 
1591 	/* return the buffers to the free queue */
1592 	avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1593 
1594 	return count;
1595 }
1596 
1597 /*
1598  * Copy a chained mbuf to a set of host buffers.  This function assumes that
1599  * there are sufficient destination buffers to contain the entire source
1600  * packet.
1601  */
1602 static inline uint16_t
1603 avp_dev_copy_to_buffers(struct avp_dev *avp,
1604 			struct rte_mbuf *mbuf,
1605 			struct rte_avp_desc **buffers,
1606 			unsigned int count)
1607 {
1608 	struct rte_avp_desc *previous_buf = NULL;
1609 	struct rte_avp_desc *first_buf = NULL;
1610 	struct rte_avp_desc *pkt_buf;
1611 	struct rte_avp_desc *buf;
1612 	size_t total_length;
1613 	struct rte_mbuf *m;
1614 	size_t copy_length;
1615 	size_t src_offset;
1616 	char *pkt_data;
1617 	unsigned int i;
1618 
1619 	__rte_mbuf_sanity_check(mbuf, 1);
1620 
1621 	m = mbuf;
1622 	src_offset = 0;
1623 	total_length = rte_pktmbuf_pkt_len(m);
1624 	for (i = 0; (i < count) && (m != NULL); i++) {
1625 		/* fill each destination buffer */
1626 		buf = buffers[i];
1627 
1628 		if (i < count - 1) {
1629 			/* prefetch next entry while processing this one */
1630 			pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1631 			rte_prefetch0(pkt_buf);
1632 		}
1633 
1634 		/* Adjust pointers for guest addressing */
1635 		pkt_buf = avp_dev_translate_buffer(avp, buf);
1636 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1637 
1638 		/* setup the buffer chain */
1639 		if (previous_buf != NULL)
1640 			previous_buf->next = buf;
1641 		else
1642 			first_buf = pkt_buf;
1643 
1644 		previous_buf = pkt_buf;
1645 
1646 		do {
1647 			/*
1648 			 * copy as many source mbuf segments as will fit in the
1649 			 * destination buffer.
1650 			 */
1651 			copy_length = RTE_MIN((avp->host_mbuf_size -
1652 					       pkt_buf->data_len),
1653 					      (rte_pktmbuf_data_len(m) -
1654 					       src_offset));
1655 			rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1656 				   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1657 					       src_offset),
1658 				   copy_length);
1659 			pkt_buf->data_len += copy_length;
1660 			src_offset += copy_length;
1661 
1662 			if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1663 				/* need a new source buffer */
1664 				m = m->next;
1665 				src_offset = 0;
1666 			}
1667 
1668 			if (unlikely(pkt_buf->data_len ==
1669 				     avp->host_mbuf_size)) {
1670 				/* need a new destination buffer */
1671 				break;
1672 			}
1673 
1674 		} while (m != NULL);
1675 	}
1676 
1677 	first_buf->nb_segs = count;
1678 	first_buf->pkt_len = total_length;
1679 
1680 	if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1681 		first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1682 		first_buf->vlan_tci = mbuf->vlan_tci;
1683 	}
1684 
1685 	avp_dev_buffer_sanity_check(avp, buffers[0]);
1686 
1687 	return total_length;
1688 }
1689 
1690 
1691 static uint16_t
1692 avp_xmit_scattered_pkts(void *tx_queue,
1693 			struct rte_mbuf **tx_pkts,
1694 			uint16_t nb_pkts)
1695 {
1696 	struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1697 				       RTE_AVP_MAX_MBUF_SEGMENTS)];
1698 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1699 	struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1700 	struct avp_dev *avp = txq->avp;
1701 	struct rte_avp_fifo *alloc_q;
1702 	struct rte_avp_fifo *tx_q;
1703 	unsigned int count, avail, n;
1704 	unsigned int orig_nb_pkts;
1705 	struct rte_mbuf *m;
1706 	unsigned int required;
1707 	unsigned int segments;
1708 	unsigned int tx_bytes;
1709 	unsigned int i;
1710 
1711 	orig_nb_pkts = nb_pkts;
1712 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1713 		/* VM live migration in progress */
1714 		/* TODO ... buffer for X packets then drop? */
1715 		txq->errors += nb_pkts;
1716 		return 0;
1717 	}
1718 
1719 	tx_q = avp->tx_q[txq->queue_id];
1720 	alloc_q = avp->alloc_q[txq->queue_id];
1721 
1722 	/* limit the number of transmitted packets to the max burst size */
1723 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1724 		nb_pkts = AVP_MAX_TX_BURST;
1725 
1726 	/* determine how many buffers are available to copy into */
1727 	avail = avp_fifo_count(alloc_q);
1728 	if (unlikely(avail > (AVP_MAX_TX_BURST *
1729 			      RTE_AVP_MAX_MBUF_SEGMENTS)))
1730 		avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1731 
1732 	/* determine how many slots are available in the transmit queue */
1733 	count = avp_fifo_free_count(tx_q);
1734 
1735 	/* determine how many packets can be sent */
1736 	nb_pkts = RTE_MIN(count, nb_pkts);
1737 
1738 	/* determine how many packets will fit in the available buffers */
1739 	count = 0;
1740 	segments = 0;
1741 	for (i = 0; i < nb_pkts; i++) {
1742 		m = tx_pkts[i];
1743 		if (likely(i < (unsigned int)nb_pkts - 1)) {
1744 			/* prefetch next entry while processing this one */
1745 			rte_prefetch0(tx_pkts[i + 1]);
1746 		}
1747 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1748 			avp->host_mbuf_size;
1749 
1750 		if (unlikely((required == 0) ||
1751 			     (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1752 			break;
1753 		else if (unlikely(required + segments > avail))
1754 			break;
1755 		segments += required;
1756 		count++;
1757 	}
1758 	nb_pkts = count;
1759 
1760 	if (unlikely(nb_pkts == 0)) {
1761 		/* no available buffers, or no space on the tx queue */
1762 		txq->errors += orig_nb_pkts;
1763 		return 0;
1764 	}
1765 
1766 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1767 		   nb_pkts, tx_q);
1768 
1769 	/* retrieve sufficient send buffers */
1770 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1771 	if (unlikely(n != segments)) {
1772 		PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1773 			   "n=%u, segments=%u, orig=%u\n",
1774 			   n, segments, orig_nb_pkts);
1775 		txq->errors += orig_nb_pkts;
1776 		return 0;
1777 	}
1778 
1779 	tx_bytes = 0;
1780 	count = 0;
1781 	for (i = 0; i < nb_pkts; i++) {
1782 		/* process each packet to be transmitted */
1783 		m = tx_pkts[i];
1784 
1785 		/* determine how many buffers are required for this packet */
1786 		required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1787 			avp->host_mbuf_size;
1788 
1789 		tx_bytes += avp_dev_copy_to_buffers(avp, m,
1790 						    &avp_bufs[count], required);
1791 		tx_bufs[i] = avp_bufs[count];
1792 		count += required;
1793 
1794 		/* free the original mbuf */
1795 		rte_pktmbuf_free(m);
1796 	}
1797 
1798 	txq->packets += nb_pkts;
1799 	txq->bytes += tx_bytes;
1800 
1801 #ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1802 	for (i = 0; i < nb_pkts; i++)
1803 		avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1804 #endif
1805 
1806 	/* send the packets */
1807 	n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1808 	if (unlikely(n != orig_nb_pkts))
1809 		txq->errors += (orig_nb_pkts - n);
1810 
1811 	return n;
1812 }
1813 
1814 
1815 static uint16_t
1816 avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1817 {
1818 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1819 	struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1820 	struct avp_dev *avp = txq->avp;
1821 	struct rte_avp_desc *pkt_buf;
1822 	struct rte_avp_fifo *alloc_q;
1823 	struct rte_avp_fifo *tx_q;
1824 	unsigned int count, avail, n;
1825 	struct rte_mbuf *m;
1826 	unsigned int pkt_len;
1827 	unsigned int tx_bytes;
1828 	char *pkt_data;
1829 	unsigned int i;
1830 
1831 	if (unlikely(avp->flags & AVP_F_DETACHED)) {
1832 		/* VM live migration in progress */
1833 		/* TODO ... buffer for X packets then drop?! */
1834 		txq->errors++;
1835 		return 0;
1836 	}
1837 
1838 	tx_q = avp->tx_q[txq->queue_id];
1839 	alloc_q = avp->alloc_q[txq->queue_id];
1840 
1841 	/* limit the number of transmitted packets to the max burst size */
1842 	if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1843 		nb_pkts = AVP_MAX_TX_BURST;
1844 
1845 	/* determine how many buffers are available to copy into */
1846 	avail = avp_fifo_count(alloc_q);
1847 
1848 	/* determine how many slots are available in the transmit queue */
1849 	count = avp_fifo_free_count(tx_q);
1850 
1851 	/* determine how many packets can be sent */
1852 	count = RTE_MIN(count, avail);
1853 	count = RTE_MIN(count, nb_pkts);
1854 
1855 	if (unlikely(count == 0)) {
1856 		/* no available buffers, or no space on the tx queue */
1857 		txq->errors += nb_pkts;
1858 		return 0;
1859 	}
1860 
1861 	PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1862 		   count, tx_q);
1863 
1864 	/* retrieve sufficient send buffers */
1865 	n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1866 	if (unlikely(n != count)) {
1867 		txq->errors++;
1868 		return 0;
1869 	}
1870 
1871 	tx_bytes = 0;
1872 	for (i = 0; i < count; i++) {
1873 		/* prefetch next entry while processing the current one */
1874 		if (i < count - 1) {
1875 			pkt_buf = avp_dev_translate_buffer(avp,
1876 							   avp_bufs[i + 1]);
1877 			rte_prefetch0(pkt_buf);
1878 		}
1879 
1880 		/* process each packet to be transmitted */
1881 		m = tx_pkts[i];
1882 
1883 		/* Adjust pointers for guest addressing */
1884 		pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1885 		pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1886 		pkt_len = rte_pktmbuf_pkt_len(m);
1887 
1888 		if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1889 					 (pkt_len > avp->host_mbuf_size))) {
1890 			/*
1891 			 * application should be using the scattered transmit
1892 			 * function; send it truncated to avoid the performance
1893 			 * hit of having to manage returning the already
1894 			 * allocated buffer to the free list.  This should not
1895 			 * happen since the application should have set the
1896 			 * max_rx_pkt_len based on its MTU and it should be
1897 			 * policing its own packet sizes.
1898 			 */
1899 			txq->errors++;
1900 			pkt_len = RTE_MIN(avp->guest_mbuf_size,
1901 					  avp->host_mbuf_size);
1902 		}
1903 
1904 		/* copy data out of our mbuf and into the AVP buffer */
1905 		rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1906 		pkt_buf->pkt_len = pkt_len;
1907 		pkt_buf->data_len = pkt_len;
1908 		pkt_buf->nb_segs = 1;
1909 		pkt_buf->next = NULL;
1910 
1911 		if (m->ol_flags & PKT_TX_VLAN_PKT) {
1912 			pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1913 			pkt_buf->vlan_tci = m->vlan_tci;
1914 		}
1915 
1916 		tx_bytes += pkt_len;
1917 
1918 		/* free the original mbuf */
1919 		rte_pktmbuf_free(m);
1920 	}
1921 
1922 	txq->packets += count;
1923 	txq->bytes += tx_bytes;
1924 
1925 	/* send the packets */
1926 	n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1927 
1928 	return n;
1929 }
1930 
1931 static void
1932 avp_dev_rx_queue_release(void *rx_queue)
1933 {
1934 	struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1935 	struct avp_dev *avp = rxq->avp;
1936 	struct rte_eth_dev_data *data = avp->dev_data;
1937 	unsigned int i;
1938 
1939 	for (i = 0; i < avp->num_rx_queues; i++) {
1940 		if (data->rx_queues[i] == rxq) {
1941 			rte_free(data->rx_queues[i]);
1942 			data->rx_queues[i] = NULL;
1943 		}
1944 	}
1945 }
1946 
1947 static void
1948 avp_dev_rx_queue_release_all(struct rte_eth_dev *eth_dev)
1949 {
1950 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1951 	struct rte_eth_dev_data *data = avp->dev_data;
1952 	unsigned int i;
1953 
1954 	for (i = 0; i < avp->num_rx_queues; i++) {
1955 		if (data->rx_queues[i]) {
1956 			rte_free(data->rx_queues[i]);
1957 			data->rx_queues[i] = NULL;
1958 		}
1959 	}
1960 }
1961 
1962 static void
1963 avp_dev_tx_queue_release(void *tx_queue)
1964 {
1965 	struct avp_queue *txq = (struct avp_queue *)tx_queue;
1966 	struct avp_dev *avp = txq->avp;
1967 	struct rte_eth_dev_data *data = avp->dev_data;
1968 	unsigned int i;
1969 
1970 	for (i = 0; i < avp->num_tx_queues; i++) {
1971 		if (data->tx_queues[i] == txq) {
1972 			rte_free(data->tx_queues[i]);
1973 			data->tx_queues[i] = NULL;
1974 		}
1975 	}
1976 }
1977 
1978 static void
1979 avp_dev_tx_queue_release_all(struct rte_eth_dev *eth_dev)
1980 {
1981 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1982 	struct rte_eth_dev_data *data = avp->dev_data;
1983 	unsigned int i;
1984 
1985 	for (i = 0; i < avp->num_tx_queues; i++) {
1986 		if (data->tx_queues[i]) {
1987 			rte_free(data->tx_queues[i]);
1988 			data->tx_queues[i] = NULL;
1989 		}
1990 	}
1991 }
1992 
1993 static int
1994 avp_dev_configure(struct rte_eth_dev *eth_dev)
1995 {
1996 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1997 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1998 	struct rte_avp_device_info *host_info;
1999 	struct rte_avp_device_config config;
2000 	int mask = 0;
2001 	void *addr;
2002 	int ret;
2003 
2004 	rte_spinlock_lock(&avp->lock);
2005 	if (avp->flags & AVP_F_DETACHED) {
2006 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2007 		ret = -ENOTSUP;
2008 		goto unlock;
2009 	}
2010 
2011 	addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2012 	host_info = (struct rte_avp_device_info *)addr;
2013 
2014 	/* Setup required number of queues */
2015 	_avp_set_queue_counts(eth_dev);
2016 
2017 	mask = (ETH_VLAN_STRIP_MASK |
2018 		ETH_VLAN_FILTER_MASK |
2019 		ETH_VLAN_EXTEND_MASK);
2020 	ret = avp_vlan_offload_set(eth_dev, mask);
2021 	if (ret < 0) {
2022 		PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
2023 			    ret);
2024 		goto unlock;
2025 	}
2026 
2027 	/* update device config */
2028 	memset(&config, 0, sizeof(config));
2029 	config.device_id = host_info->device_id;
2030 	config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2031 	config.driver_version = AVP_DPDK_DRIVER_VERSION;
2032 	config.features = avp->features;
2033 	config.num_tx_queues = avp->num_tx_queues;
2034 	config.num_rx_queues = avp->num_rx_queues;
2035 
2036 	ret = avp_dev_ctrl_set_config(eth_dev, &config);
2037 	if (ret < 0) {
2038 		PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2039 			    ret);
2040 		goto unlock;
2041 	}
2042 
2043 	avp->flags |= AVP_F_CONFIGURED;
2044 	ret = 0;
2045 
2046 unlock:
2047 	rte_spinlock_unlock(&avp->lock);
2048 	return ret;
2049 }
2050 
2051 static int
2052 avp_dev_start(struct rte_eth_dev *eth_dev)
2053 {
2054 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2055 	int ret;
2056 
2057 	rte_spinlock_lock(&avp->lock);
2058 	if (avp->flags & AVP_F_DETACHED) {
2059 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2060 		ret = -ENOTSUP;
2061 		goto unlock;
2062 	}
2063 
2064 	/* update link state */
2065 	ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2066 	if (ret < 0) {
2067 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2068 			    ret);
2069 		goto unlock;
2070 	}
2071 
2072 	/* remember current link state */
2073 	avp->flags |= AVP_F_LINKUP;
2074 
2075 	ret = 0;
2076 
2077 unlock:
2078 	rte_spinlock_unlock(&avp->lock);
2079 	return ret;
2080 }
2081 
2082 static void
2083 avp_dev_stop(struct rte_eth_dev *eth_dev)
2084 {
2085 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2086 	int ret;
2087 
2088 	rte_spinlock_lock(&avp->lock);
2089 	if (avp->flags & AVP_F_DETACHED) {
2090 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2091 		goto unlock;
2092 	}
2093 
2094 	/* remember current link state */
2095 	avp->flags &= ~AVP_F_LINKUP;
2096 
2097 	/* update link state */
2098 	ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2099 	if (ret < 0) {
2100 		PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2101 			    ret);
2102 	}
2103 
2104 unlock:
2105 	rte_spinlock_unlock(&avp->lock);
2106 }
2107 
2108 static void
2109 avp_dev_close(struct rte_eth_dev *eth_dev)
2110 {
2111 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2112 	int ret;
2113 
2114 	rte_spinlock_lock(&avp->lock);
2115 	if (avp->flags & AVP_F_DETACHED) {
2116 		PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2117 		goto unlock;
2118 	}
2119 
2120 	/* remember current link state */
2121 	avp->flags &= ~AVP_F_LINKUP;
2122 	avp->flags &= ~AVP_F_CONFIGURED;
2123 
2124 	ret = avp_dev_disable_interrupts(eth_dev);
2125 	if (ret < 0) {
2126 		PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2127 		/* continue */
2128 	}
2129 
2130 	/* update device state */
2131 	ret = avp_dev_ctrl_shutdown(eth_dev);
2132 	if (ret < 0) {
2133 		PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2134 			    ret);
2135 		/* continue */
2136 	}
2137 
2138 	/* release dynamic storage for rx/tx queues */
2139 	avp_dev_rx_queue_release_all(eth_dev);
2140 	avp_dev_tx_queue_release_all(eth_dev);
2141 
2142 unlock:
2143 	rte_spinlock_unlock(&avp->lock);
2144 }
2145 
2146 static int
2147 avp_dev_link_update(struct rte_eth_dev *eth_dev,
2148 					__rte_unused int wait_to_complete)
2149 {
2150 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2151 	struct rte_eth_link *link = &eth_dev->data->dev_link;
2152 
2153 	link->link_speed = ETH_SPEED_NUM_10G;
2154 	link->link_duplex = ETH_LINK_FULL_DUPLEX;
2155 	link->link_status = !!(avp->flags & AVP_F_LINKUP);
2156 
2157 	return -1;
2158 }
2159 
2160 static int
2161 avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2162 {
2163 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2164 
2165 	rte_spinlock_lock(&avp->lock);
2166 	if ((avp->flags & AVP_F_PROMISC) == 0) {
2167 		avp->flags |= AVP_F_PROMISC;
2168 		PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2169 			    eth_dev->data->port_id);
2170 	}
2171 	rte_spinlock_unlock(&avp->lock);
2172 
2173 	return 0;
2174 }
2175 
2176 static int
2177 avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2178 {
2179 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2180 
2181 	rte_spinlock_lock(&avp->lock);
2182 	if ((avp->flags & AVP_F_PROMISC) != 0) {
2183 		avp->flags &= ~AVP_F_PROMISC;
2184 		PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2185 			    eth_dev->data->port_id);
2186 	}
2187 	rte_spinlock_unlock(&avp->lock);
2188 
2189 	return 0;
2190 }
2191 
2192 static int
2193 avp_dev_info_get(struct rte_eth_dev *eth_dev,
2194 		 struct rte_eth_dev_info *dev_info)
2195 {
2196 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2197 
2198 	dev_info->max_rx_queues = avp->max_rx_queues;
2199 	dev_info->max_tx_queues = avp->max_tx_queues;
2200 	dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2201 	dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2202 	dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2203 	if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2204 		dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2205 		dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2206 	}
2207 
2208 	return 0;
2209 }
2210 
2211 static int
2212 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2213 {
2214 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2215 	struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
2216 	uint64_t offloads = dev_conf->rxmode.offloads;
2217 
2218 	if (mask & ETH_VLAN_STRIP_MASK) {
2219 		if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2220 			if (offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2221 				avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2222 			else
2223 				avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2224 		} else {
2225 			PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2226 		}
2227 	}
2228 
2229 	if (mask & ETH_VLAN_FILTER_MASK) {
2230 		if (offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2231 			PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2232 	}
2233 
2234 	if (mask & ETH_VLAN_EXTEND_MASK) {
2235 		if (offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2236 			PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2237 	}
2238 
2239 	return 0;
2240 }
2241 
2242 static int
2243 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2244 {
2245 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2246 	unsigned int i;
2247 
2248 	for (i = 0; i < avp->num_rx_queues; i++) {
2249 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2250 
2251 		if (rxq) {
2252 			stats->ipackets += rxq->packets;
2253 			stats->ibytes += rxq->bytes;
2254 			stats->ierrors += rxq->errors;
2255 
2256 			stats->q_ipackets[i] += rxq->packets;
2257 			stats->q_ibytes[i] += rxq->bytes;
2258 			stats->q_errors[i] += rxq->errors;
2259 		}
2260 	}
2261 
2262 	for (i = 0; i < avp->num_tx_queues; i++) {
2263 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2264 
2265 		if (txq) {
2266 			stats->opackets += txq->packets;
2267 			stats->obytes += txq->bytes;
2268 			stats->oerrors += txq->errors;
2269 
2270 			stats->q_opackets[i] += txq->packets;
2271 			stats->q_obytes[i] += txq->bytes;
2272 		}
2273 	}
2274 
2275 	return 0;
2276 }
2277 
2278 static int
2279 avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2280 {
2281 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2282 	unsigned int i;
2283 
2284 	for (i = 0; i < avp->num_rx_queues; i++) {
2285 		struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2286 
2287 		if (rxq) {
2288 			rxq->bytes = 0;
2289 			rxq->packets = 0;
2290 			rxq->errors = 0;
2291 		}
2292 	}
2293 
2294 	for (i = 0; i < avp->num_tx_queues; i++) {
2295 		struct avp_queue *txq = avp->dev_data->tx_queues[i];
2296 
2297 		if (txq) {
2298 			txq->bytes = 0;
2299 			txq->packets = 0;
2300 			txq->errors = 0;
2301 		}
2302 	}
2303 
2304 	return 0;
2305 }
2306 
2307 RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2308 RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2309 
2310 RTE_INIT(avp_init_log)
2311 {
2312 	avp_logtype_driver = rte_log_register("pmd.net.avp.driver");
2313 	if (avp_logtype_driver >= 0)
2314 		rte_log_set_level(avp_logtype_driver, RTE_LOG_NOTICE);
2315 }
2316