xref: /dpdk/drivers/net/pcap/pcap_ethdev.c (revision f5ead8f84f205babb320a1d805fb436ba31a5532)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright(c) 2014 6WIND S.A.
4  * All rights reserved.
5  */
6 
7 #include <stdlib.h>
8 #include <time.h>
9 
10 #include <pcap.h>
11 
12 #include <rte_cycles.h>
13 #include <ethdev_driver.h>
14 #include <ethdev_vdev.h>
15 #include <rte_kvargs.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_mbuf_dyn.h>
19 #include <bus_vdev_driver.h>
20 #include <rte_os_shim.h>
21 
22 #include "pcap_osdep.h"
23 
24 #define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
25 #define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN
26 #define RTE_ETH_PCAP_PROMISC 1
27 #define RTE_ETH_PCAP_TIMEOUT -1
28 
29 #define ETH_PCAP_RX_PCAP_ARG  "rx_pcap"
30 #define ETH_PCAP_TX_PCAP_ARG  "tx_pcap"
31 #define ETH_PCAP_RX_IFACE_ARG "rx_iface"
32 #define ETH_PCAP_RX_IFACE_IN_ARG "rx_iface_in"
33 #define ETH_PCAP_TX_IFACE_ARG "tx_iface"
34 #define ETH_PCAP_IFACE_ARG    "iface"
35 #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
36 #define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
37 
38 #define ETH_PCAP_ARG_MAXLEN	64
39 
40 #define RTE_PMD_PCAP_MAX_QUEUES 16
41 
42 static char errbuf[PCAP_ERRBUF_SIZE];
43 static struct timespec start_time;
44 static uint64_t start_cycles;
45 static uint64_t hz;
46 static uint8_t iface_idx;
47 
48 static uint64_t timestamp_rx_dynflag;
49 static int timestamp_dynfield_offset = -1;
50 
51 struct queue_stat {
52 	volatile unsigned long pkts;
53 	volatile unsigned long bytes;
54 	volatile unsigned long err_pkts;
55 	volatile unsigned long rx_nombuf;
56 };
57 
58 struct queue_missed_stat {
59 	/* last value retrieved from pcap */
60 	unsigned int pcap;
61 	/* stores values lost by pcap stop or rollover */
62 	unsigned long mnemonic;
63 	/* value on last reset */
64 	unsigned long reset;
65 };
66 
67 struct pcap_rx_queue {
68 	uint16_t port_id;
69 	uint16_t queue_id;
70 	struct rte_mempool *mb_pool;
71 	struct queue_stat rx_stat;
72 	struct queue_missed_stat missed_stat;
73 	char name[PATH_MAX];
74 	char type[ETH_PCAP_ARG_MAXLEN];
75 
76 	/* Contains pre-generated packets to be looped through */
77 	struct rte_ring *pkts;
78 };
79 
80 struct pcap_tx_queue {
81 	uint16_t port_id;
82 	uint16_t queue_id;
83 	struct queue_stat tx_stat;
84 	char name[PATH_MAX];
85 	char type[ETH_PCAP_ARG_MAXLEN];
86 };
87 
88 struct pmd_internals {
89 	struct pcap_rx_queue rx_queue[RTE_PMD_PCAP_MAX_QUEUES];
90 	struct pcap_tx_queue tx_queue[RTE_PMD_PCAP_MAX_QUEUES];
91 	char devargs[ETH_PCAP_ARG_MAXLEN];
92 	struct rte_ether_addr eth_addr;
93 	int if_index;
94 	int single_iface;
95 	int phy_mac;
96 	unsigned int infinite_rx;
97 };
98 
99 struct pmd_process_private {
100 	pcap_t *rx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
101 	pcap_t *tx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
102 	pcap_dumper_t *tx_dumper[RTE_PMD_PCAP_MAX_QUEUES];
103 };
104 
105 struct pmd_devargs {
106 	unsigned int num_of_queue;
107 	struct devargs_queue {
108 		pcap_dumper_t *dumper;
109 		pcap_t *pcap;
110 		const char *name;
111 		const char *type;
112 	} queue[RTE_PMD_PCAP_MAX_QUEUES];
113 	int phy_mac;
114 };
115 
116 struct pmd_devargs_all {
117 	struct pmd_devargs rx_queues;
118 	struct pmd_devargs tx_queues;
119 	int single_iface;
120 	unsigned int is_tx_pcap;
121 	unsigned int is_tx_iface;
122 	unsigned int is_rx_pcap;
123 	unsigned int is_rx_iface;
124 	unsigned int infinite_rx;
125 };
126 
127 static const char *valid_arguments[] = {
128 	ETH_PCAP_RX_PCAP_ARG,
129 	ETH_PCAP_TX_PCAP_ARG,
130 	ETH_PCAP_RX_IFACE_ARG,
131 	ETH_PCAP_RX_IFACE_IN_ARG,
132 	ETH_PCAP_TX_IFACE_ARG,
133 	ETH_PCAP_IFACE_ARG,
134 	ETH_PCAP_PHY_MAC_ARG,
135 	ETH_PCAP_INFINITE_RX_ARG,
136 	NULL
137 };
138 
139 static struct rte_eth_link pmd_link = {
140 		.link_speed = RTE_ETH_SPEED_NUM_10G,
141 		.link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
142 		.link_status = RTE_ETH_LINK_DOWN,
143 		.link_autoneg = RTE_ETH_LINK_FIXED,
144 };
145 
146 RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
147 
148 static struct queue_missed_stat*
149 queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
150 {
151 	struct pmd_internals *internals = dev->data->dev_private;
152 	struct queue_missed_stat *missed_stat =
153 			&internals->rx_queue[qid].missed_stat;
154 	const struct pmd_process_private *pp = dev->process_private;
155 	pcap_t *pcap = pp->rx_pcap[qid];
156 	struct pcap_stat stat;
157 
158 	if (!pcap || (pcap_stats(pcap, &stat) != 0))
159 		return missed_stat;
160 
161 	/* rollover check - best effort fixup assuming single rollover */
162 	if (stat.ps_drop < missed_stat->pcap)
163 		missed_stat->mnemonic += UINT_MAX;
164 	missed_stat->pcap = stat.ps_drop;
165 
166 	return missed_stat;
167 }
168 
169 static void
170 queue_missed_stat_on_stop_update(struct rte_eth_dev *dev, unsigned int qid)
171 {
172 	struct queue_missed_stat *missed_stat =
173 			queue_missed_stat_update(dev, qid);
174 
175 	missed_stat->mnemonic += missed_stat->pcap;
176 	missed_stat->pcap = 0;
177 }
178 
179 static void
180 queue_missed_stat_reset(struct rte_eth_dev *dev, unsigned int qid)
181 {
182 	struct queue_missed_stat *missed_stat =
183 			queue_missed_stat_update(dev, qid);
184 
185 	missed_stat->reset = missed_stat->pcap;
186 	missed_stat->mnemonic = 0;
187 }
188 
189 static unsigned long
190 queue_missed_stat_get(struct rte_eth_dev *dev, unsigned int qid)
191 {
192 	const struct queue_missed_stat *missed_stat =
193 			queue_missed_stat_update(dev, qid);
194 
195 	return missed_stat->pcap + missed_stat->mnemonic - missed_stat->reset;
196 }
197 
198 static int
199 eth_pcap_rx_jumbo(struct rte_mempool *mb_pool, struct rte_mbuf *mbuf,
200 		const u_char *data, uint16_t data_len)
201 {
202 	/* Copy the first segment. */
203 	uint16_t len = rte_pktmbuf_tailroom(mbuf);
204 	struct rte_mbuf *m = mbuf;
205 
206 	rte_memcpy(rte_pktmbuf_append(mbuf, len), data, len);
207 	data_len -= len;
208 	data += len;
209 
210 	while (data_len > 0) {
211 		/* Allocate next mbuf and point to that. */
212 		m->next = rte_pktmbuf_alloc(mb_pool);
213 
214 		if (unlikely(!m->next))
215 			return -1;
216 
217 		m = m->next;
218 
219 		/* Headroom is not needed in chained mbufs. */
220 		rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
221 		m->pkt_len = 0;
222 		m->data_len = 0;
223 
224 		/* Copy next segment. */
225 		len = RTE_MIN(rte_pktmbuf_tailroom(m), data_len);
226 		rte_memcpy(rte_pktmbuf_append(m, len), data, len);
227 
228 		mbuf->nb_segs++;
229 		data_len -= len;
230 		data += len;
231 	}
232 
233 	return mbuf->nb_segs;
234 }
235 
236 static uint16_t
237 eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
238 {
239 	int i;
240 	struct pcap_rx_queue *pcap_q = queue;
241 	uint32_t rx_bytes = 0;
242 
243 	if (unlikely(nb_pkts == 0))
244 		return 0;
245 
246 	if (rte_pktmbuf_alloc_bulk(pcap_q->mb_pool, bufs, nb_pkts) != 0)
247 		return 0;
248 
249 	for (i = 0; i < nb_pkts; i++) {
250 		struct rte_mbuf *pcap_buf;
251 		int err = rte_ring_dequeue(pcap_q->pkts, (void **)&pcap_buf);
252 		if (err)
253 			return i;
254 
255 		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
256 				rte_pktmbuf_mtod(pcap_buf, void *),
257 				pcap_buf->data_len);
258 		bufs[i]->data_len = pcap_buf->data_len;
259 		bufs[i]->pkt_len = pcap_buf->pkt_len;
260 		bufs[i]->port = pcap_q->port_id;
261 		rx_bytes += pcap_buf->data_len;
262 
263 		/* Enqueue packet back on ring to allow infinite rx. */
264 		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
265 	}
266 
267 	pcap_q->rx_stat.pkts += i;
268 	pcap_q->rx_stat.bytes += rx_bytes;
269 
270 	return i;
271 }
272 
273 static uint16_t
274 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
275 {
276 	unsigned int i;
277 	struct pcap_pkthdr *header;
278 	struct pmd_process_private *pp;
279 	const u_char *packet;
280 	struct rte_mbuf *mbuf;
281 	struct pcap_rx_queue *pcap_q = queue;
282 	uint16_t num_rx = 0;
283 	uint32_t rx_bytes = 0;
284 	pcap_t *pcap;
285 
286 	pp = rte_eth_devices[pcap_q->port_id].process_private;
287 	pcap = pp->rx_pcap[pcap_q->queue_id];
288 
289 	if (unlikely(pcap == NULL || nb_pkts == 0))
290 		return 0;
291 
292 	/* Reads the given number of packets from the pcap file one by one
293 	 * and copies the packet data into a newly allocated mbuf to return.
294 	 */
295 	for (i = 0; i < nb_pkts; i++) {
296 		/* Get the next PCAP packet */
297 		int ret = pcap_next_ex(pcap, &header, &packet);
298 		if (ret != 1) {
299 			if (ret == PCAP_ERROR)
300 				pcap_q->rx_stat.err_pkts++;
301 
302 			break;
303 		}
304 
305 		mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
306 		if (unlikely(mbuf == NULL)) {
307 			pcap_q->rx_stat.rx_nombuf++;
308 			break;
309 		}
310 
311 		uint32_t len = header->caplen;
312 		if (len <= rte_pktmbuf_tailroom(mbuf)) {
313 			/* pcap packet will fit in the mbuf, can copy it */
314 			rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet, len);
315 			mbuf->data_len = len;
316 		} else {
317 			/* Try read jumbo frame into multi mbufs. */
318 			if (unlikely(eth_pcap_rx_jumbo(pcap_q->mb_pool,
319 						       mbuf, packet, len) == -1)) {
320 				pcap_q->rx_stat.err_pkts++;
321 				rte_pktmbuf_free(mbuf);
322 				break;
323 			}
324 		}
325 
326 		mbuf->pkt_len = len;
327 		uint64_t us = (uint64_t)header->ts.tv_sec * US_PER_S + header->ts.tv_usec;
328 
329 		*RTE_MBUF_DYNFIELD(mbuf, timestamp_dynfield_offset, rte_mbuf_timestamp_t *) = us;
330 		mbuf->ol_flags |= timestamp_rx_dynflag;
331 		mbuf->port = pcap_q->port_id;
332 		bufs[num_rx] = mbuf;
333 		num_rx++;
334 		rx_bytes += len;
335 	}
336 	pcap_q->rx_stat.pkts += num_rx;
337 	pcap_q->rx_stat.bytes += rx_bytes;
338 
339 	return num_rx;
340 }
341 
342 static uint16_t
343 eth_null_rx(void *queue __rte_unused,
344 		struct rte_mbuf **bufs __rte_unused,
345 		uint16_t nb_pkts __rte_unused)
346 {
347 	return 0;
348 }
349 
350 #define NSEC_PER_SEC	1000000000L
351 
352 /*
353  * This function stores nanoseconds in `tv_usec` field of `struct timeval`,
354  * because `ts` goes directly to nanosecond-precision dump.
355  */
356 static inline void
357 calculate_timestamp(struct timeval *ts) {
358 	uint64_t cycles;
359 	struct timespec cur_time;
360 
361 	cycles = rte_get_timer_cycles() - start_cycles;
362 	cur_time.tv_sec = cycles / hz;
363 	cur_time.tv_nsec = (cycles % hz) * NSEC_PER_SEC / hz;
364 
365 	ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
366 	ts->tv_usec = start_time.tv_nsec + cur_time.tv_nsec;
367 	if (ts->tv_usec >= NSEC_PER_SEC) {
368 		ts->tv_usec -= NSEC_PER_SEC;
369 		ts->tv_sec += 1;
370 	}
371 }
372 
373 /*
374  * Callback to handle writing packets to a pcap file.
375  */
376 static uint16_t
377 eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
378 {
379 	unsigned int i;
380 	struct rte_mbuf *mbuf;
381 	struct pmd_process_private *pp;
382 	struct pcap_tx_queue *dumper_q = queue;
383 	uint16_t num_tx = 0;
384 	uint32_t tx_bytes = 0;
385 	struct pcap_pkthdr header;
386 	pcap_dumper_t *dumper;
387 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
388 	size_t len, caplen;
389 
390 	pp = rte_eth_devices[dumper_q->port_id].process_private;
391 	dumper = pp->tx_dumper[dumper_q->queue_id];
392 
393 	if (dumper == NULL || nb_pkts == 0)
394 		return 0;
395 
396 	/* writes the nb_pkts packets to the previously opened pcap file
397 	 * dumper */
398 	for (i = 0; i < nb_pkts; i++) {
399 		mbuf = bufs[i];
400 		len = caplen = rte_pktmbuf_pkt_len(mbuf);
401 		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
402 				len > sizeof(temp_data))) {
403 			caplen = sizeof(temp_data);
404 		}
405 
406 		calculate_timestamp(&header.ts);
407 		header.len = len;
408 		header.caplen = caplen;
409 		/* rte_pktmbuf_read() returns a pointer to the data directly
410 		 * in the mbuf (when the mbuf is contiguous) or, otherwise,
411 		 * a pointer to temp_data after copying into it.
412 		 */
413 		pcap_dump((u_char *)dumper, &header,
414 			rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
415 
416 		num_tx++;
417 		tx_bytes += caplen;
418 		rte_pktmbuf_free(mbuf);
419 	}
420 
421 	/*
422 	 * Since there's no place to hook a callback when the forwarding
423 	 * process stops and to make sure the pcap file is actually written,
424 	 * we flush the pcap dumper within each burst.
425 	 */
426 	pcap_dump_flush(dumper);
427 	dumper_q->tx_stat.pkts += num_tx;
428 	dumper_q->tx_stat.bytes += tx_bytes;
429 	dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
430 
431 	return nb_pkts;
432 }
433 
434 /*
435  * Callback to handle dropping packets in the infinite rx case.
436  */
437 static uint16_t
438 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
439 {
440 	unsigned int i;
441 	uint32_t tx_bytes = 0;
442 	struct pcap_tx_queue *tx_queue = queue;
443 
444 	if (unlikely(nb_pkts == 0))
445 		return 0;
446 
447 	for (i = 0; i < nb_pkts; i++) {
448 		tx_bytes += bufs[i]->pkt_len;
449 		rte_pktmbuf_free(bufs[i]);
450 	}
451 
452 	tx_queue->tx_stat.pkts += nb_pkts;
453 	tx_queue->tx_stat.bytes += tx_bytes;
454 
455 	return i;
456 }
457 
458 /*
459  * Callback to handle sending packets through a real NIC.
460  */
461 static uint16_t
462 eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
463 {
464 	unsigned int i;
465 	int ret;
466 	struct rte_mbuf *mbuf;
467 	struct pmd_process_private *pp;
468 	struct pcap_tx_queue *tx_queue = queue;
469 	uint16_t num_tx = 0;
470 	uint32_t tx_bytes = 0;
471 	pcap_t *pcap;
472 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
473 	size_t len;
474 
475 	pp = rte_eth_devices[tx_queue->port_id].process_private;
476 	pcap = pp->tx_pcap[tx_queue->queue_id];
477 
478 	if (unlikely(nb_pkts == 0 || pcap == NULL))
479 		return 0;
480 
481 	for (i = 0; i < nb_pkts; i++) {
482 		mbuf = bufs[i];
483 		len = rte_pktmbuf_pkt_len(mbuf);
484 		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
485 				len > sizeof(temp_data))) {
486 			PMD_LOG(ERR,
487 				"Dropping multi segment PCAP packet. Size (%zd) > max size (%zd).",
488 				len, sizeof(temp_data));
489 			rte_pktmbuf_free(mbuf);
490 			continue;
491 		}
492 
493 		/* rte_pktmbuf_read() returns a pointer to the data directly
494 		 * in the mbuf (when the mbuf is contiguous) or, otherwise,
495 		 * a pointer to temp_data after copying into it.
496 		 */
497 		ret = pcap_sendpacket(pcap,
498 			rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
499 		if (unlikely(ret != 0))
500 			break;
501 		num_tx++;
502 		tx_bytes += len;
503 		rte_pktmbuf_free(mbuf);
504 	}
505 
506 	tx_queue->tx_stat.pkts += num_tx;
507 	tx_queue->tx_stat.bytes += tx_bytes;
508 	tx_queue->tx_stat.err_pkts += i - num_tx;
509 
510 	return i;
511 }
512 
513 /*
514  * pcap_open_live wrapper function
515  */
516 static inline int
517 open_iface_live(const char *iface, pcap_t **pcap) {
518 	*pcap = pcap_open_live(iface, RTE_ETH_PCAP_SNAPLEN,
519 			RTE_ETH_PCAP_PROMISC, RTE_ETH_PCAP_TIMEOUT, errbuf);
520 
521 	if (*pcap == NULL) {
522 		PMD_LOG(ERR, "Couldn't open %s: %s", iface, errbuf);
523 		return -1;
524 	}
525 
526 	if (pcap_setnonblock(*pcap, 1, errbuf)) {
527 		PMD_LOG(ERR, "Couldn't set non-blocking on %s: %s", iface, errbuf);
528 		pcap_close(*pcap);
529 		return -1;
530 	}
531 
532 	return 0;
533 }
534 
535 static int
536 open_single_iface(const char *iface, pcap_t **pcap)
537 {
538 	if (open_iface_live(iface, pcap) < 0) {
539 		PMD_LOG(ERR, "Couldn't open interface %s", iface);
540 		return -1;
541 	}
542 
543 	return 0;
544 }
545 
546 static int
547 open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
548 {
549 	pcap_t *tx_pcap;
550 
551 	/*
552 	 * We need to create a dummy empty pcap_t to use it
553 	 * with pcap_dump_open(). We create big enough an Ethernet
554 	 * pcap holder.
555 	 */
556 	tx_pcap = pcap_open_dead_with_tstamp_precision(DLT_EN10MB,
557 			RTE_ETH_PCAP_SNAPSHOT_LEN, PCAP_TSTAMP_PRECISION_NANO);
558 	if (tx_pcap == NULL) {
559 		PMD_LOG(ERR, "Couldn't create dead pcap");
560 		return -1;
561 	}
562 
563 	/* The dumper is created using the previous pcap_t reference */
564 	*dumper = pcap_dump_open(tx_pcap, pcap_filename);
565 	if (*dumper == NULL) {
566 		pcap_close(tx_pcap);
567 		PMD_LOG(ERR, "Couldn't open %s for writing.",
568 			pcap_filename);
569 		return -1;
570 	}
571 
572 	pcap_close(tx_pcap);
573 	return 0;
574 }
575 
576 static int
577 open_single_rx_pcap(const char *pcap_filename, pcap_t **pcap)
578 {
579 	*pcap = pcap_open_offline(pcap_filename, errbuf);
580 	if (*pcap == NULL) {
581 		PMD_LOG(ERR, "Couldn't open %s: %s", pcap_filename,
582 			errbuf);
583 		return -1;
584 	}
585 
586 	return 0;
587 }
588 
589 static uint64_t
590 count_packets_in_pcap(pcap_t **pcap, struct pcap_rx_queue *pcap_q)
591 {
592 	const u_char *packet;
593 	struct pcap_pkthdr header;
594 	uint64_t pcap_pkt_count = 0;
595 
596 	while ((packet = pcap_next(*pcap, &header)))
597 		pcap_pkt_count++;
598 
599 	/* The pcap is reopened so it can be used as normal later. */
600 	pcap_close(*pcap);
601 	*pcap = NULL;
602 	open_single_rx_pcap(pcap_q->name, pcap);
603 
604 	return pcap_pkt_count;
605 }
606 
607 static int
608 eth_dev_start(struct rte_eth_dev *dev)
609 {
610 	unsigned int i;
611 	struct pmd_internals *internals = dev->data->dev_private;
612 	struct pmd_process_private *pp = dev->process_private;
613 	struct pcap_tx_queue *tx;
614 	struct pcap_rx_queue *rx;
615 
616 	/* Special iface case. Single pcap is open and shared between tx/rx. */
617 	if (internals->single_iface) {
618 		tx = &internals->tx_queue[0];
619 		rx = &internals->rx_queue[0];
620 
621 		if (!pp->tx_pcap[0] &&
622 			strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
623 			if (open_single_iface(tx->name, &pp->tx_pcap[0]) < 0)
624 				return -1;
625 			pp->rx_pcap[0] = pp->tx_pcap[0];
626 		}
627 
628 		goto status_up;
629 	}
630 
631 	/* If not open already, open tx pcaps/dumpers */
632 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
633 		tx = &internals->tx_queue[i];
634 
635 		if (!pp->tx_dumper[i] &&
636 				strcmp(tx->type, ETH_PCAP_TX_PCAP_ARG) == 0) {
637 			if (open_single_tx_pcap(tx->name,
638 				&pp->tx_dumper[i]) < 0)
639 				return -1;
640 		} else if (!pp->tx_pcap[i] &&
641 				strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
642 			if (open_single_iface(tx->name, &pp->tx_pcap[i]) < 0)
643 				return -1;
644 		}
645 	}
646 
647 	/* If not open already, open rx pcaps */
648 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
649 		rx = &internals->rx_queue[i];
650 
651 		if (pp->rx_pcap[i] != NULL)
652 			continue;
653 
654 		if (strcmp(rx->type, ETH_PCAP_RX_PCAP_ARG) == 0) {
655 			if (open_single_rx_pcap(rx->name, &pp->rx_pcap[i]) < 0)
656 				return -1;
657 		} else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
658 			if (open_single_iface(rx->name, &pp->rx_pcap[i]) < 0)
659 				return -1;
660 		}
661 	}
662 
663 status_up:
664 	for (i = 0; i < dev->data->nb_rx_queues; i++)
665 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
666 
667 	for (i = 0; i < dev->data->nb_tx_queues; i++)
668 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
669 
670 	dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
671 
672 	return 0;
673 }
674 
675 /*
676  * This function gets called when the current port gets stopped.
677  * Is the only place for us to close all the tx streams dumpers.
678  * If not called the dumpers will be flushed within each tx burst.
679  */
680 static int
681 eth_dev_stop(struct rte_eth_dev *dev)
682 {
683 	unsigned int i;
684 	struct pmd_internals *internals = dev->data->dev_private;
685 	struct pmd_process_private *pp = dev->process_private;
686 
687 	/* Special iface case. Single pcap is open and shared between tx/rx. */
688 	if (internals->single_iface) {
689 		queue_missed_stat_on_stop_update(dev, 0);
690 		if (pp->tx_pcap[0] != NULL) {
691 			pcap_close(pp->tx_pcap[0]);
692 			pp->tx_pcap[0] = NULL;
693 			pp->rx_pcap[0] = NULL;
694 		}
695 		goto status_down;
696 	}
697 
698 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
699 		if (pp->tx_dumper[i] != NULL) {
700 			pcap_dump_close(pp->tx_dumper[i]);
701 			pp->tx_dumper[i] = NULL;
702 		}
703 
704 		if (pp->tx_pcap[i] != NULL) {
705 			pcap_close(pp->tx_pcap[i]);
706 			pp->tx_pcap[i] = NULL;
707 		}
708 	}
709 
710 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
711 		if (pp->rx_pcap[i] != NULL) {
712 			queue_missed_stat_on_stop_update(dev, i);
713 			pcap_close(pp->rx_pcap[i]);
714 			pp->rx_pcap[i] = NULL;
715 		}
716 	}
717 
718 status_down:
719 	for (i = 0; i < dev->data->nb_rx_queues; i++)
720 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
721 
722 	for (i = 0; i < dev->data->nb_tx_queues; i++)
723 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
724 
725 	dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
726 
727 	return 0;
728 }
729 
730 static int
731 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
732 {
733 	return 0;
734 }
735 
736 static int
737 eth_dev_info(struct rte_eth_dev *dev,
738 		struct rte_eth_dev_info *dev_info)
739 {
740 	struct pmd_internals *internals = dev->data->dev_private;
741 
742 	dev_info->if_index = internals->if_index;
743 	dev_info->max_mac_addrs = 1;
744 	dev_info->max_rx_pktlen = (uint32_t) -1;
745 	dev_info->max_rx_queues = dev->data->nb_rx_queues;
746 	dev_info->max_tx_queues = dev->data->nb_tx_queues;
747 	dev_info->min_rx_bufsize = 0;
748 
749 	return 0;
750 }
751 
752 static int
753 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
754 {
755 	unsigned int i;
756 	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
757 	unsigned long rx_missed_total = 0;
758 	unsigned long rx_nombuf_total = 0, rx_err_total = 0;
759 	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
760 	unsigned long tx_packets_err_total = 0;
761 	const struct pmd_internals *internal = dev->data->dev_private;
762 
763 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
764 			i < dev->data->nb_rx_queues; i++) {
765 		stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
766 		stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
767 		rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
768 		rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
769 		rx_packets_total += stats->q_ipackets[i];
770 		rx_bytes_total += stats->q_ibytes[i];
771 		rx_missed_total += queue_missed_stat_get(dev, i);
772 	}
773 
774 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
775 			i < dev->data->nb_tx_queues; i++) {
776 		stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
777 		stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
778 		tx_packets_total += stats->q_opackets[i];
779 		tx_bytes_total += stats->q_obytes[i];
780 		tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
781 	}
782 
783 	stats->ipackets = rx_packets_total;
784 	stats->ibytes = rx_bytes_total;
785 	stats->imissed = rx_missed_total;
786 	stats->ierrors = rx_err_total;
787 	stats->rx_nombuf = rx_nombuf_total;
788 	stats->opackets = tx_packets_total;
789 	stats->obytes = tx_bytes_total;
790 	stats->oerrors = tx_packets_err_total;
791 
792 	return 0;
793 }
794 
795 static int
796 eth_stats_reset(struct rte_eth_dev *dev)
797 {
798 	unsigned int i;
799 	struct pmd_internals *internal = dev->data->dev_private;
800 
801 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
802 		internal->rx_queue[i].rx_stat.pkts = 0;
803 		internal->rx_queue[i].rx_stat.bytes = 0;
804 		internal->rx_queue[i].rx_stat.err_pkts = 0;
805 		internal->rx_queue[i].rx_stat.rx_nombuf = 0;
806 		queue_missed_stat_reset(dev, i);
807 	}
808 
809 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
810 		internal->tx_queue[i].tx_stat.pkts = 0;
811 		internal->tx_queue[i].tx_stat.bytes = 0;
812 		internal->tx_queue[i].tx_stat.err_pkts = 0;
813 	}
814 
815 	return 0;
816 }
817 
818 static inline void
819 infinite_rx_ring_free(struct rte_ring *pkts)
820 {
821 	struct rte_mbuf *bufs;
822 
823 	while (!rte_ring_dequeue(pkts, (void **)&bufs))
824 		rte_pktmbuf_free(bufs);
825 
826 	rte_ring_free(pkts);
827 }
828 
829 static int
830 eth_dev_close(struct rte_eth_dev *dev)
831 {
832 	unsigned int i;
833 	struct pmd_internals *internals = dev->data->dev_private;
834 
835 	PMD_LOG(INFO, "Closing pcap ethdev on NUMA socket %d",
836 			rte_socket_id());
837 
838 	eth_dev_stop(dev);
839 
840 	rte_free(dev->process_private);
841 
842 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
843 		return 0;
844 
845 	/* Device wide flag, but cleanup must be performed per queue. */
846 	if (internals->infinite_rx) {
847 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
848 			struct pcap_rx_queue *pcap_q = &internals->rx_queue[i];
849 
850 			/*
851 			 * 'pcap_q->pkts' can be NULL if 'eth_dev_close()'
852 			 * called before 'eth_rx_queue_setup()' has been called
853 			 */
854 			if (pcap_q->pkts == NULL)
855 				continue;
856 
857 			infinite_rx_ring_free(pcap_q->pkts);
858 		}
859 	}
860 
861 	if (internals->phy_mac == 0)
862 		/* not dynamically allocated, must not be freed */
863 		dev->data->mac_addrs = NULL;
864 
865 	return 0;
866 }
867 
868 static int
869 eth_link_update(struct rte_eth_dev *dev __rte_unused,
870 		int wait_to_complete __rte_unused)
871 {
872 	return 0;
873 }
874 
875 static int
876 eth_rx_queue_setup(struct rte_eth_dev *dev,
877 		uint16_t rx_queue_id,
878 		uint16_t nb_rx_desc __rte_unused,
879 		unsigned int socket_id __rte_unused,
880 		const struct rte_eth_rxconf *rx_conf __rte_unused,
881 		struct rte_mempool *mb_pool)
882 {
883 	struct pmd_internals *internals = dev->data->dev_private;
884 	struct pcap_rx_queue *pcap_q = &internals->rx_queue[rx_queue_id];
885 
886 	pcap_q->mb_pool = mb_pool;
887 	pcap_q->port_id = dev->data->port_id;
888 	pcap_q->queue_id = rx_queue_id;
889 	dev->data->rx_queues[rx_queue_id] = pcap_q;
890 
891 	if (internals->infinite_rx) {
892 		struct pmd_process_private *pp;
893 		char ring_name[RTE_RING_NAMESIZE];
894 		static uint32_t ring_number;
895 		uint64_t pcap_pkt_count = 0;
896 		struct rte_mbuf *bufs[1];
897 		pcap_t **pcap;
898 
899 		pp = rte_eth_devices[pcap_q->port_id].process_private;
900 		pcap = &pp->rx_pcap[pcap_q->queue_id];
901 
902 		if (unlikely(*pcap == NULL))
903 			return -ENOENT;
904 
905 		pcap_pkt_count = count_packets_in_pcap(pcap, pcap_q);
906 
907 		snprintf(ring_name, sizeof(ring_name), "PCAP_RING%" PRIu32,
908 				ring_number);
909 
910 		pcap_q->pkts = rte_ring_create(ring_name,
911 				rte_align64pow2(pcap_pkt_count + 1), 0,
912 				RING_F_SP_ENQ | RING_F_SC_DEQ);
913 		ring_number++;
914 		if (!pcap_q->pkts)
915 			return -ENOENT;
916 
917 		/* Fill ring with packets from PCAP file one by one. */
918 		while (eth_pcap_rx(pcap_q, bufs, 1)) {
919 			/* Check for multiseg mbufs. */
920 			if (bufs[0]->nb_segs != 1) {
921 				infinite_rx_ring_free(pcap_q->pkts);
922 				PMD_LOG(ERR,
923 					"Multiseg mbufs are not supported in infinite_rx mode.");
924 				return -EINVAL;
925 			}
926 
927 			rte_ring_enqueue_bulk(pcap_q->pkts,
928 					(void * const *)bufs, 1, NULL);
929 		}
930 
931 		if (rte_ring_count(pcap_q->pkts) < pcap_pkt_count) {
932 			infinite_rx_ring_free(pcap_q->pkts);
933 			PMD_LOG(ERR,
934 				"Not enough mbufs to accommodate packets in pcap file. "
935 				"At least %" PRIu64 " mbufs per queue is required.",
936 				pcap_pkt_count);
937 			return -EINVAL;
938 		}
939 
940 		/*
941 		 * Reset the stats for this queue since eth_pcap_rx calls above
942 		 * didn't result in the application receiving packets.
943 		 */
944 		pcap_q->rx_stat.pkts = 0;
945 		pcap_q->rx_stat.bytes = 0;
946 	}
947 
948 	return 0;
949 }
950 
951 static int
952 eth_tx_queue_setup(struct rte_eth_dev *dev,
953 		uint16_t tx_queue_id,
954 		uint16_t nb_tx_desc __rte_unused,
955 		unsigned int socket_id __rte_unused,
956 		const struct rte_eth_txconf *tx_conf __rte_unused)
957 {
958 	struct pmd_internals *internals = dev->data->dev_private;
959 	struct pcap_tx_queue *pcap_q = &internals->tx_queue[tx_queue_id];
960 
961 	pcap_q->port_id = dev->data->port_id;
962 	pcap_q->queue_id = tx_queue_id;
963 	dev->data->tx_queues[tx_queue_id] = pcap_q;
964 
965 	return 0;
966 }
967 
968 static int
969 eth_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
970 {
971 	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
972 
973 	return 0;
974 }
975 
976 static int
977 eth_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
978 {
979 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
980 
981 	return 0;
982 }
983 
984 static int
985 eth_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
986 {
987 	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
988 
989 	return 0;
990 }
991 
992 static int
993 eth_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
994 {
995 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
996 
997 	return 0;
998 }
999 
1000 static const struct eth_dev_ops ops = {
1001 	.dev_start = eth_dev_start,
1002 	.dev_stop = eth_dev_stop,
1003 	.dev_close = eth_dev_close,
1004 	.dev_configure = eth_dev_configure,
1005 	.dev_infos_get = eth_dev_info,
1006 	.rx_queue_setup = eth_rx_queue_setup,
1007 	.tx_queue_setup = eth_tx_queue_setup,
1008 	.rx_queue_start = eth_rx_queue_start,
1009 	.tx_queue_start = eth_tx_queue_start,
1010 	.rx_queue_stop = eth_rx_queue_stop,
1011 	.tx_queue_stop = eth_tx_queue_stop,
1012 	.link_update = eth_link_update,
1013 	.stats_get = eth_stats_get,
1014 	.stats_reset = eth_stats_reset,
1015 };
1016 
1017 static int
1018 add_queue(struct pmd_devargs *pmd, const char *name, const char *type,
1019 		pcap_t *pcap, pcap_dumper_t *dumper)
1020 {
1021 	if (pmd->num_of_queue >= RTE_PMD_PCAP_MAX_QUEUES)
1022 		return -1;
1023 	if (pcap)
1024 		pmd->queue[pmd->num_of_queue].pcap = pcap;
1025 	if (dumper)
1026 		pmd->queue[pmd->num_of_queue].dumper = dumper;
1027 	pmd->queue[pmd->num_of_queue].name = name;
1028 	pmd->queue[pmd->num_of_queue].type = type;
1029 	pmd->num_of_queue++;
1030 	return 0;
1031 }
1032 
1033 /*
1034  * Function handler that opens the pcap file for reading a stores a
1035  * reference of it for use it later on.
1036  */
1037 static int
1038 open_rx_pcap(const char *key, const char *value, void *extra_args)
1039 {
1040 	const char *pcap_filename = value;
1041 	struct pmd_devargs *rx = extra_args;
1042 	pcap_t *pcap = NULL;
1043 
1044 	if (open_single_rx_pcap(pcap_filename, &pcap) < 0)
1045 		return -1;
1046 
1047 	if (add_queue(rx, pcap_filename, key, pcap, NULL) < 0) {
1048 		pcap_close(pcap);
1049 		return -1;
1050 	}
1051 
1052 	return 0;
1053 }
1054 
1055 /*
1056  * Opens a pcap file for writing and stores a reference to it
1057  * for use it later on.
1058  */
1059 static int
1060 open_tx_pcap(const char *key, const char *value, void *extra_args)
1061 {
1062 	const char *pcap_filename = value;
1063 	struct pmd_devargs *dumpers = extra_args;
1064 	pcap_dumper_t *dumper;
1065 
1066 	if (open_single_tx_pcap(pcap_filename, &dumper) < 0)
1067 		return -1;
1068 
1069 	if (add_queue(dumpers, pcap_filename, key, NULL, dumper) < 0) {
1070 		pcap_dump_close(dumper);
1071 		return -1;
1072 	}
1073 
1074 	return 0;
1075 }
1076 
1077 /*
1078  * Opens an interface for reading and writing
1079  */
1080 static inline int
1081 open_rx_tx_iface(const char *key, const char *value, void *extra_args)
1082 {
1083 	const char *iface = value;
1084 	struct pmd_devargs *tx = extra_args;
1085 	pcap_t *pcap = NULL;
1086 
1087 	if (open_single_iface(iface, &pcap) < 0)
1088 		return -1;
1089 
1090 	tx->queue[0].pcap = pcap;
1091 	tx->queue[0].name = iface;
1092 	tx->queue[0].type = key;
1093 
1094 	return 0;
1095 }
1096 
1097 static inline int
1098 set_iface_direction(const char *iface, pcap_t *pcap,
1099 		pcap_direction_t direction)
1100 {
1101 	const char *direction_str = (direction == PCAP_D_IN) ? "IN" : "OUT";
1102 	if (pcap_setdirection(pcap, direction) < 0) {
1103 		PMD_LOG(ERR, "Setting %s pcap direction %s failed - %s",
1104 				iface, direction_str, pcap_geterr(pcap));
1105 		return -1;
1106 	}
1107 	PMD_LOG(INFO, "Setting %s pcap direction %s",
1108 			iface, direction_str);
1109 	return 0;
1110 }
1111 
1112 static inline int
1113 open_iface(const char *key, const char *value, void *extra_args)
1114 {
1115 	const char *iface = value;
1116 	struct pmd_devargs *pmd = extra_args;
1117 	pcap_t *pcap = NULL;
1118 
1119 	if (open_single_iface(iface, &pcap) < 0)
1120 		return -1;
1121 	if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
1122 		pcap_close(pcap);
1123 		return -1;
1124 	}
1125 
1126 	return 0;
1127 }
1128 
1129 /*
1130  * Opens a NIC for reading packets from it
1131  */
1132 static inline int
1133 open_rx_iface(const char *key, const char *value, void *extra_args)
1134 {
1135 	int ret = open_iface(key, value, extra_args);
1136 	if (ret < 0)
1137 		return ret;
1138 	if (strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0) {
1139 		struct pmd_devargs *pmd = extra_args;
1140 		unsigned int qid = pmd->num_of_queue - 1;
1141 
1142 		set_iface_direction(pmd->queue[qid].name,
1143 				pmd->queue[qid].pcap,
1144 				PCAP_D_IN);
1145 	}
1146 
1147 	return 0;
1148 }
1149 
1150 static inline int
1151 rx_iface_args_process(const char *key, const char *value, void *extra_args)
1152 {
1153 	if (strcmp(key, ETH_PCAP_RX_IFACE_ARG) == 0 ||
1154 			strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0)
1155 		return open_rx_iface(key, value, extra_args);
1156 
1157 	return 0;
1158 }
1159 
1160 /*
1161  * Opens a NIC for writing packets to it
1162  */
1163 static int
1164 open_tx_iface(const char *key, const char *value, void *extra_args)
1165 {
1166 	return open_iface(key, value, extra_args);
1167 }
1168 
1169 static int
1170 select_phy_mac(const char *key __rte_unused, const char *value,
1171 		void *extra_args)
1172 {
1173 	if (extra_args) {
1174 		const int phy_mac = atoi(value);
1175 		int *enable_phy_mac = extra_args;
1176 
1177 		if (phy_mac)
1178 			*enable_phy_mac = 1;
1179 	}
1180 	return 0;
1181 }
1182 
1183 static int
1184 get_infinite_rx_arg(const char *key __rte_unused,
1185 		const char *value, void *extra_args)
1186 {
1187 	if (extra_args) {
1188 		const int infinite_rx = atoi(value);
1189 		int *enable_infinite_rx = extra_args;
1190 
1191 		if (infinite_rx > 0)
1192 			*enable_infinite_rx = 1;
1193 	}
1194 	return 0;
1195 }
1196 
1197 static int
1198 pmd_init_internals(struct rte_vdev_device *vdev,
1199 		const unsigned int nb_rx_queues,
1200 		const unsigned int nb_tx_queues,
1201 		struct pmd_internals **internals,
1202 		struct rte_eth_dev **eth_dev)
1203 {
1204 	struct rte_eth_dev_data *data;
1205 	struct pmd_process_private *pp;
1206 	unsigned int numa_node = vdev->device.numa_node;
1207 
1208 	PMD_LOG(INFO, "Creating pcap-backed ethdev on numa socket %d",
1209 		numa_node);
1210 
1211 	pp = (struct pmd_process_private *)
1212 		rte_zmalloc(NULL, sizeof(struct pmd_process_private),
1213 				RTE_CACHE_LINE_SIZE);
1214 
1215 	if (pp == NULL) {
1216 		PMD_LOG(ERR,
1217 			"Failed to allocate memory for process private");
1218 		return -1;
1219 	}
1220 
1221 	/* reserve an ethdev entry */
1222 	*eth_dev = rte_eth_vdev_allocate(vdev, sizeof(**internals));
1223 	if (!(*eth_dev)) {
1224 		rte_free(pp);
1225 		return -1;
1226 	}
1227 	(*eth_dev)->process_private = pp;
1228 	/* now put it all together
1229 	 * - store queue data in internals,
1230 	 * - store numa_node info in eth_dev
1231 	 * - point eth_dev_data to internals
1232 	 * - and point eth_dev structure to new eth_dev_data structure
1233 	 */
1234 	*internals = (*eth_dev)->data->dev_private;
1235 	/*
1236 	 * Interface MAC = 02:70:63:61:70:<iface_idx>
1237 	 * derived from: 'locally administered':'p':'c':'a':'p':'iface_idx'
1238 	 * where the middle 4 characters are converted to hex.
1239 	 */
1240 	(*internals)->eth_addr = (struct rte_ether_addr) {
1241 		.addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
1242 	};
1243 	(*internals)->phy_mac = 0;
1244 	data = (*eth_dev)->data;
1245 	data->nb_rx_queues = (uint16_t)nb_rx_queues;
1246 	data->nb_tx_queues = (uint16_t)nb_tx_queues;
1247 	data->dev_link = pmd_link;
1248 	data->mac_addrs = &(*internals)->eth_addr;
1249 	data->promiscuous = 1;
1250 	data->all_multicast = 1;
1251 	data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1252 
1253 	/*
1254 	 * NOTE: we'll replace the data element, of originally allocated
1255 	 * eth_dev so the rings are local per-process
1256 	 */
1257 	(*eth_dev)->dev_ops = &ops;
1258 
1259 	strlcpy((*internals)->devargs, rte_vdev_device_args(vdev),
1260 			ETH_PCAP_ARG_MAXLEN);
1261 
1262 	return 0;
1263 }
1264 
1265 static int
1266 eth_pcap_update_mac(const char *if_name, struct rte_eth_dev *eth_dev,
1267 		const unsigned int numa_node)
1268 {
1269 	void *mac_addrs;
1270 	struct rte_ether_addr mac;
1271 
1272 	if (osdep_iface_mac_get(if_name, &mac) < 0)
1273 		return -1;
1274 
1275 	mac_addrs = rte_zmalloc_socket(NULL, RTE_ETHER_ADDR_LEN, 0, numa_node);
1276 	if (mac_addrs == NULL)
1277 		return -1;
1278 
1279 	PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
1280 	rte_memcpy(mac_addrs, mac.addr_bytes, RTE_ETHER_ADDR_LEN);
1281 	eth_dev->data->mac_addrs = mac_addrs;
1282 	return 0;
1283 }
1284 
1285 static int
1286 eth_from_pcaps_common(struct rte_vdev_device *vdev,
1287 		struct pmd_devargs_all *devargs_all,
1288 		struct pmd_internals **internals, struct rte_eth_dev **eth_dev)
1289 {
1290 	struct pmd_process_private *pp;
1291 	struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1292 	struct pmd_devargs *tx_queues = &devargs_all->tx_queues;
1293 	const unsigned int nb_rx_queues = rx_queues->num_of_queue;
1294 	const unsigned int nb_tx_queues = tx_queues->num_of_queue;
1295 	unsigned int i;
1296 
1297 	if (pmd_init_internals(vdev, nb_rx_queues, nb_tx_queues, internals,
1298 			eth_dev) < 0)
1299 		return -1;
1300 
1301 	pp = (*eth_dev)->process_private;
1302 	for (i = 0; i < nb_rx_queues; i++) {
1303 		struct pcap_rx_queue *rx = &(*internals)->rx_queue[i];
1304 		struct devargs_queue *queue = &rx_queues->queue[i];
1305 
1306 		pp->rx_pcap[i] = queue->pcap;
1307 		strlcpy(rx->name, queue->name, sizeof(rx->name));
1308 		strlcpy(rx->type, queue->type, sizeof(rx->type));
1309 	}
1310 
1311 	for (i = 0; i < nb_tx_queues; i++) {
1312 		struct pcap_tx_queue *tx = &(*internals)->tx_queue[i];
1313 		struct devargs_queue *queue = &tx_queues->queue[i];
1314 
1315 		pp->tx_dumper[i] = queue->dumper;
1316 		pp->tx_pcap[i] = queue->pcap;
1317 		strlcpy(tx->name, queue->name, sizeof(tx->name));
1318 		strlcpy(tx->type, queue->type, sizeof(tx->type));
1319 	}
1320 
1321 	return 0;
1322 }
1323 
1324 static int
1325 eth_from_pcaps(struct rte_vdev_device *vdev,
1326 		struct pmd_devargs_all *devargs_all)
1327 {
1328 	struct pmd_internals *internals = NULL;
1329 	struct rte_eth_dev *eth_dev = NULL;
1330 	struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1331 	int single_iface = devargs_all->single_iface;
1332 	unsigned int infinite_rx = devargs_all->infinite_rx;
1333 	int ret;
1334 
1335 	ret = eth_from_pcaps_common(vdev, devargs_all, &internals, &eth_dev);
1336 
1337 	if (ret < 0)
1338 		return ret;
1339 
1340 	/* store weather we are using a single interface for rx/tx or not */
1341 	internals->single_iface = single_iface;
1342 
1343 	if (single_iface) {
1344 		internals->if_index =
1345 			osdep_iface_index_get(rx_queues->queue[0].name);
1346 
1347 		/* phy_mac arg is applied only if "iface" devarg is provided */
1348 		if (rx_queues->phy_mac) {
1349 			if (eth_pcap_update_mac(rx_queues->queue[0].name,
1350 					eth_dev, vdev->device.numa_node) == 0)
1351 				internals->phy_mac = 1;
1352 		}
1353 	}
1354 
1355 	internals->infinite_rx = infinite_rx;
1356 	/* Assign rx ops. */
1357 	if (infinite_rx)
1358 		eth_dev->rx_pkt_burst = eth_pcap_rx_infinite;
1359 	else if (devargs_all->is_rx_pcap || devargs_all->is_rx_iface ||
1360 			single_iface)
1361 		eth_dev->rx_pkt_burst = eth_pcap_rx;
1362 	else
1363 		eth_dev->rx_pkt_burst = eth_null_rx;
1364 
1365 	/* Assign tx ops. */
1366 	if (devargs_all->is_tx_pcap)
1367 		eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1368 	else if (devargs_all->is_tx_iface || single_iface)
1369 		eth_dev->tx_pkt_burst = eth_pcap_tx;
1370 	else
1371 		eth_dev->tx_pkt_burst = eth_tx_drop;
1372 
1373 	rte_eth_dev_probing_finish(eth_dev);
1374 	return 0;
1375 }
1376 
1377 static void
1378 eth_release_pcaps(struct pmd_devargs *pcaps,
1379 		struct pmd_devargs *dumpers,
1380 		int single_iface)
1381 {
1382 	unsigned int i;
1383 
1384 	if (single_iface) {
1385 		if (pcaps->queue[0].pcap)
1386 			pcap_close(pcaps->queue[0].pcap);
1387 		return;
1388 	}
1389 
1390 	for (i = 0; i < dumpers->num_of_queue; i++) {
1391 		if (dumpers->queue[i].dumper)
1392 			pcap_dump_close(dumpers->queue[i].dumper);
1393 
1394 		if (dumpers->queue[i].pcap)
1395 			pcap_close(dumpers->queue[i].pcap);
1396 	}
1397 
1398 	for (i = 0; i < pcaps->num_of_queue; i++) {
1399 		if (pcaps->queue[i].pcap)
1400 			pcap_close(pcaps->queue[i].pcap);
1401 	}
1402 }
1403 
1404 static int
1405 pmd_pcap_probe(struct rte_vdev_device *dev)
1406 {
1407 	const char *name;
1408 	struct rte_kvargs *kvlist;
1409 	struct pmd_devargs pcaps = {0};
1410 	struct pmd_devargs dumpers = {0};
1411 	struct rte_eth_dev *eth_dev =  NULL;
1412 	struct pmd_internals *internal;
1413 	int ret = 0;
1414 
1415 	struct pmd_devargs_all devargs_all = {
1416 		.single_iface = 0,
1417 		.is_tx_pcap = 0,
1418 		.is_tx_iface = 0,
1419 		.infinite_rx = 0,
1420 	};
1421 
1422 	name = rte_vdev_device_name(dev);
1423 	PMD_LOG(INFO, "Initializing pmd_pcap for %s", name);
1424 
1425 	timespec_get(&start_time, TIME_UTC);
1426 	start_cycles = rte_get_timer_cycles();
1427 	hz = rte_get_timer_hz();
1428 
1429 	ret = rte_mbuf_dyn_rx_timestamp_register(&timestamp_dynfield_offset,
1430 			&timestamp_rx_dynflag);
1431 	if (ret != 0) {
1432 		PMD_LOG(ERR, "Failed to register Rx timestamp field/flag");
1433 		return -1;
1434 	}
1435 
1436 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1437 		eth_dev = rte_eth_dev_attach_secondary(name);
1438 		if (!eth_dev) {
1439 			PMD_LOG(ERR, "Failed to probe %s", name);
1440 			return -1;
1441 		}
1442 
1443 		internal = eth_dev->data->dev_private;
1444 
1445 		kvlist = rte_kvargs_parse(internal->devargs, valid_arguments);
1446 		if (kvlist == NULL)
1447 			return -1;
1448 	} else {
1449 		kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
1450 				valid_arguments);
1451 		if (kvlist == NULL)
1452 			return -1;
1453 	}
1454 
1455 	/*
1456 	 * If iface argument is passed we open the NICs and use them for
1457 	 * reading / writing
1458 	 */
1459 	if (rte_kvargs_count(kvlist, ETH_PCAP_IFACE_ARG) == 1) {
1460 
1461 		ret = rte_kvargs_process(kvlist, ETH_PCAP_IFACE_ARG,
1462 				&open_rx_tx_iface, &pcaps);
1463 		if (ret < 0)
1464 			goto free_kvlist;
1465 
1466 		dumpers.queue[0] = pcaps.queue[0];
1467 
1468 		ret = rte_kvargs_process(kvlist, ETH_PCAP_PHY_MAC_ARG,
1469 				&select_phy_mac, &pcaps.phy_mac);
1470 		if (ret < 0)
1471 			goto free_kvlist;
1472 
1473 		dumpers.phy_mac = pcaps.phy_mac;
1474 
1475 		devargs_all.single_iface = 1;
1476 		pcaps.num_of_queue = 1;
1477 		dumpers.num_of_queue = 1;
1478 
1479 		goto create_eth;
1480 	}
1481 
1482 	/*
1483 	 * We check whether we want to open a RX stream from a real NIC, a
1484 	 * pcap file or open a dummy RX stream
1485 	 */
1486 	devargs_all.is_rx_pcap =
1487 		rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
1488 	devargs_all.is_rx_iface =
1489 		(rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_ARG) +
1490 		 rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_IN_ARG)) ? 1 : 0;
1491 	pcaps.num_of_queue = 0;
1492 
1493 	devargs_all.is_tx_pcap =
1494 		rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1 : 0;
1495 	devargs_all.is_tx_iface =
1496 		rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG) ? 1 : 0;
1497 	dumpers.num_of_queue = 0;
1498 
1499 	if (devargs_all.is_rx_pcap) {
1500 		/*
1501 		 * We check whether we want to infinitely rx the pcap file.
1502 		 */
1503 		unsigned int infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
1504 				ETH_PCAP_INFINITE_RX_ARG);
1505 
1506 		if (infinite_rx_arg_cnt == 1) {
1507 			ret = rte_kvargs_process(kvlist,
1508 					ETH_PCAP_INFINITE_RX_ARG,
1509 					&get_infinite_rx_arg,
1510 					&devargs_all.infinite_rx);
1511 			if (ret < 0)
1512 				goto free_kvlist;
1513 			PMD_LOG(INFO, "infinite_rx has been %s for %s",
1514 					devargs_all.infinite_rx ? "enabled" : "disabled",
1515 					name);
1516 
1517 		} else if (infinite_rx_arg_cnt > 1) {
1518 			PMD_LOG(WARNING, "infinite_rx has not been enabled since the "
1519 					"argument has been provided more than once "
1520 					"for %s", name);
1521 		}
1522 
1523 		ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
1524 				&open_rx_pcap, &pcaps);
1525 	} else if (devargs_all.is_rx_iface) {
1526 		ret = rte_kvargs_process(kvlist, NULL,
1527 				&rx_iface_args_process, &pcaps);
1528 	} else if (devargs_all.is_tx_iface || devargs_all.is_tx_pcap) {
1529 		unsigned int i;
1530 
1531 		/* Count number of tx queue args passed before dummy rx queue
1532 		 * creation so a dummy rx queue can be created for each tx queue
1533 		 */
1534 		unsigned int num_tx_queues =
1535 			(rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) +
1536 			rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG));
1537 
1538 		PMD_LOG(INFO, "Creating null rx queue since no rx queues were provided.");
1539 
1540 		/* Creating a dummy rx queue for each tx queue passed */
1541 		for (i = 0; i < num_tx_queues; i++)
1542 			ret = add_queue(&pcaps, "dummy_rx", "rx_null", NULL,
1543 					NULL);
1544 	} else {
1545 		PMD_LOG(ERR, "Error - No rx or tx queues provided");
1546 		ret = -ENOENT;
1547 	}
1548 	if (ret < 0)
1549 		goto free_kvlist;
1550 
1551 	/*
1552 	 * We check whether we want to open a TX stream to a real NIC,
1553 	 * a pcap file, or drop packets on tx
1554 	 */
1555 	if (devargs_all.is_tx_pcap) {
1556 		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
1557 				&open_tx_pcap, &dumpers);
1558 	} else if (devargs_all.is_tx_iface) {
1559 		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
1560 				&open_tx_iface, &dumpers);
1561 	} else {
1562 		unsigned int i;
1563 
1564 		PMD_LOG(INFO, "Dropping packets on tx since no tx queues were provided.");
1565 
1566 		/* Add 1 dummy queue per rxq which counts and drops packets. */
1567 		for (i = 0; i < pcaps.num_of_queue; i++)
1568 			ret = add_queue(&dumpers, "dummy_tx", "tx_drop", NULL,
1569 					NULL);
1570 	}
1571 
1572 	if (ret < 0)
1573 		goto free_kvlist;
1574 
1575 create_eth:
1576 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1577 		struct pmd_process_private *pp;
1578 		unsigned int i;
1579 
1580 		internal = eth_dev->data->dev_private;
1581 			pp = (struct pmd_process_private *)
1582 				rte_zmalloc(NULL,
1583 					sizeof(struct pmd_process_private),
1584 					RTE_CACHE_LINE_SIZE);
1585 
1586 		if (pp == NULL) {
1587 			PMD_LOG(ERR,
1588 				"Failed to allocate memory for process private");
1589 			ret = -1;
1590 			goto free_kvlist;
1591 		}
1592 
1593 		eth_dev->dev_ops = &ops;
1594 		eth_dev->device = &dev->device;
1595 
1596 		/* setup process private */
1597 		for (i = 0; i < pcaps.num_of_queue; i++)
1598 			pp->rx_pcap[i] = pcaps.queue[i].pcap;
1599 
1600 		for (i = 0; i < dumpers.num_of_queue; i++) {
1601 			pp->tx_dumper[i] = dumpers.queue[i].dumper;
1602 			pp->tx_pcap[i] = dumpers.queue[i].pcap;
1603 		}
1604 
1605 		eth_dev->process_private = pp;
1606 		eth_dev->rx_pkt_burst = eth_pcap_rx;
1607 		if (devargs_all.is_tx_pcap)
1608 			eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1609 		else
1610 			eth_dev->tx_pkt_burst = eth_pcap_tx;
1611 
1612 		rte_eth_dev_probing_finish(eth_dev);
1613 		goto free_kvlist;
1614 	}
1615 
1616 	devargs_all.rx_queues = pcaps;
1617 	devargs_all.tx_queues = dumpers;
1618 
1619 	ret = eth_from_pcaps(dev, &devargs_all);
1620 
1621 free_kvlist:
1622 	rte_kvargs_free(kvlist);
1623 
1624 	if (ret < 0)
1625 		eth_release_pcaps(&pcaps, &dumpers, devargs_all.single_iface);
1626 
1627 	return ret;
1628 }
1629 
1630 static int
1631 pmd_pcap_remove(struct rte_vdev_device *dev)
1632 {
1633 	struct rte_eth_dev *eth_dev = NULL;
1634 
1635 	if (!dev)
1636 		return -1;
1637 
1638 	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1639 	if (eth_dev == NULL)
1640 		return 0; /* port already released */
1641 
1642 	eth_dev_close(eth_dev);
1643 	rte_eth_dev_release_port(eth_dev);
1644 
1645 	return 0;
1646 }
1647 
1648 static struct rte_vdev_driver pmd_pcap_drv = {
1649 	.probe = pmd_pcap_probe,
1650 	.remove = pmd_pcap_remove,
1651 };
1652 
1653 RTE_PMD_REGISTER_VDEV(net_pcap, pmd_pcap_drv);
1654 RTE_PMD_REGISTER_ALIAS(net_pcap, eth_pcap);
1655 RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
1656 	ETH_PCAP_RX_PCAP_ARG "=<string> "
1657 	ETH_PCAP_TX_PCAP_ARG "=<string> "
1658 	ETH_PCAP_RX_IFACE_ARG "=<ifc> "
1659 	ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
1660 	ETH_PCAP_TX_IFACE_ARG "=<ifc> "
1661 	ETH_PCAP_IFACE_ARG "=<ifc> "
1662 	ETH_PCAP_PHY_MAC_ARG "=<int>"
1663 	ETH_PCAP_INFINITE_RX_ARG "=<0|1>");
1664