xref: /dpdk/lib/pcapng/rte_pcapng.c (revision bc84182d6ae809d131467516056c7ea98bb4d961)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Microsoft Corporation
3  */
4 
5 #include <errno.h>
6 #include <net/if.h>
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/uio.h>
12 #include <time.h>
13 #include <unistd.h>
14 
15 #include <bus_driver.h>
16 #include <rte_common.h>
17 #include <rte_cycles.h>
18 #include <dev_driver.h>
19 #include <rte_errno.h>
20 #include <rte_ethdev.h>
21 #include <rte_ether.h>
22 #include <rte_mbuf.h>
23 #include <rte_pcapng.h>
24 #include <rte_reciprocal.h>
25 #include <rte_time.h>
26 
27 #include "pcapng_proto.h"
28 
29 /* conversion from DPDK speed to PCAPNG */
30 #define PCAPNG_MBPS_SPEED 1000000ull
31 
32 /* Format of the capture file handle */
33 struct rte_pcapng {
34 	int  outfd;		/* output file */
35 
36 	unsigned int ports;	/* number of interfaces added */
37 
38 	/* DPDK port id to interface index in file */
39 	uint32_t port_index[RTE_MAX_ETHPORTS];
40 };
41 
42 /* For converting TSC cycles to PCAPNG ns format */
43 static struct pcapng_time {
44 	uint64_t ns;
45 	uint64_t cycles;
46 	uint64_t tsc_hz;
47 	struct rte_reciprocal_u64 tsc_hz_inverse;
48 } pcapng_time;
49 
50 static inline void
51 pcapng_init(void)
52 {
53 	struct timespec ts;
54 
55 	pcapng_time.cycles = rte_get_tsc_cycles();
56 	clock_gettime(CLOCK_REALTIME, &ts);
57 	pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2;
58 	pcapng_time.ns = rte_timespec_to_ns(&ts);
59 
60 	pcapng_time.tsc_hz = rte_get_tsc_hz();
61 	pcapng_time.tsc_hz_inverse = rte_reciprocal_value_u64(pcapng_time.tsc_hz);
62 }
63 
64 /* PCAPNG timestamps are in nanoseconds */
65 static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
66 {
67 	uint64_t delta, secs;
68 
69 	if (!pcapng_time.tsc_hz)
70 		pcapng_init();
71 
72 	/* In essence the calculation is:
73 	 *   delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / rte_get_tsc_hz()
74 	 * but this overflows within 4 to 8 seconds depending on TSC frequency.
75 	 * Instead, if delta >= pcapng_time.tsc_hz:
76 	 *   Increase pcapng_time.ns and pcapng_time.cycles by the number of
77 	 *   whole seconds in delta and reduce delta accordingly.
78 	 * delta will therefore always lie in the interval [0, pcapng_time.tsc_hz),
79 	 * which will not overflow when multiplied by NSEC_PER_SEC provided the
80 	 * TSC frequency < approx 18.4GHz.
81 	 *
82 	 * Currently all TSCs operate below 5GHz.
83 	 */
84 	delta = cycles - pcapng_time.cycles;
85 	if (unlikely(delta >= pcapng_time.tsc_hz)) {
86 		if (likely(delta < pcapng_time.tsc_hz * 2)) {
87 			delta -= pcapng_time.tsc_hz;
88 			pcapng_time.cycles += pcapng_time.tsc_hz;
89 			pcapng_time.ns += NSEC_PER_SEC;
90 		} else {
91 			secs = rte_reciprocal_divide_u64(delta, &pcapng_time.tsc_hz_inverse);
92 			delta -= secs * pcapng_time.tsc_hz;
93 			pcapng_time.cycles += secs * pcapng_time.tsc_hz;
94 			pcapng_time.ns += secs * NSEC_PER_SEC;
95 		}
96 	}
97 
98 	return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC,
99 							  &pcapng_time.tsc_hz_inverse);
100 }
101 
102 /* length of option including padding */
103 static uint16_t pcapng_optlen(uint16_t len)
104 {
105 	return RTE_ALIGN(sizeof(struct pcapng_option) + len,
106 			 sizeof(uint32_t));
107 }
108 
109 /* build TLV option and return location of next */
110 static struct pcapng_option *
111 pcapng_add_option(struct pcapng_option *popt, uint16_t code,
112 		  const void *data, uint16_t len)
113 {
114 	popt->code = code;
115 	popt->length = len;
116 	memcpy(popt->data, data, len);
117 
118 	return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
119 }
120 
121 /*
122  * Write required initial section header describing the capture
123  */
124 static int
125 pcapng_section_block(rte_pcapng_t *self,
126 		    const char *os, const char *hw,
127 		    const char *app, const char *comment)
128 {
129 	struct pcapng_section_header *hdr;
130 	struct pcapng_option *opt;
131 	void *buf;
132 	uint32_t len;
133 	ssize_t cc;
134 
135 	len = sizeof(*hdr);
136 	if (hw)
137 		len += pcapng_optlen(strlen(hw));
138 	if (os)
139 		len += pcapng_optlen(strlen(os));
140 	if (app)
141 		len += pcapng_optlen(strlen(app));
142 	if (comment)
143 		len += pcapng_optlen(strlen(comment));
144 
145 	/* reserve space for OPT_END */
146 	len += pcapng_optlen(0);
147 	len += sizeof(uint32_t);
148 
149 	buf = calloc(1, len);
150 	if (!buf)
151 		return -1;
152 
153 	hdr = (struct pcapng_section_header *)buf;
154 	*hdr = (struct pcapng_section_header) {
155 		.block_type = PCAPNG_SECTION_BLOCK,
156 		.block_length = len,
157 		.byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
158 		.major_version = PCAPNG_MAJOR_VERS,
159 		.minor_version = PCAPNG_MINOR_VERS,
160 		.section_length = UINT64_MAX,
161 	};
162 
163 	/* After the section header insert variable length options. */
164 	opt = (struct pcapng_option *)(hdr + 1);
165 	if (comment)
166 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
167 					comment, strlen(comment));
168 	if (hw)
169 		opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
170 					hw, strlen(hw));
171 	if (os)
172 		opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
173 					os, strlen(os));
174 	if (app)
175 		opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
176 					app, strlen(app));
177 
178 	/* The standard requires last option to be OPT_END */
179 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
180 
181 	/* clone block_length after option */
182 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
183 
184 	cc = write(self->outfd, buf, len);
185 	free(buf);
186 
187 	return cc;
188 }
189 
190 /* Write an interface block for a DPDK port */
191 int
192 rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port,
193 			 const char *ifname, const char *ifdescr,
194 			 const char *filter)
195 {
196 	struct pcapng_interface_block *hdr;
197 	struct rte_eth_dev_info dev_info;
198 	struct rte_ether_addr *ea, macaddr;
199 	const struct rte_device *dev;
200 	struct rte_eth_link link;
201 	struct pcapng_option *opt;
202 	const uint8_t tsresol = 9;	/* nanosecond resolution */
203 	uint32_t len;
204 	void *buf;
205 	char ifname_buf[IF_NAMESIZE];
206 	char ifhw[256];
207 	uint64_t speed = 0;
208 
209 	if (rte_eth_dev_info_get(port, &dev_info) < 0)
210 		return -1;
211 
212 	/* make something like an interface name */
213 	if (ifname == NULL) {
214 		/* Use kernel name if available */
215 		ifname = if_indextoname(dev_info.if_index, ifname_buf);
216 		if (ifname == NULL) {
217 			snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port);
218 			ifname = ifname_buf;
219 		}
220 	}
221 
222 	/* make a useful device hardware string */
223 	dev = dev_info.device;
224 	if (dev)
225 		snprintf(ifhw, sizeof(ifhw),
226 			 "%s-%s", dev->bus->name, dev->name);
227 
228 	/* DPDK reports in units of Mbps */
229 	if (rte_eth_link_get(port, &link) == 0 &&
230 	    link.link_status == RTE_ETH_LINK_UP)
231 		speed = link.link_speed * PCAPNG_MBPS_SPEED;
232 
233 	if (rte_eth_macaddr_get(port, &macaddr) < 0)
234 		ea = NULL;
235 	else
236 		ea = &macaddr;
237 
238 	/* Compute length of interface block options */
239 	len = sizeof(*hdr);
240 
241 	len += pcapng_optlen(sizeof(tsresol));	/* timestamp */
242 	len += pcapng_optlen(strlen(ifname));	/* ifname */
243 
244 	if (ifdescr)
245 		len += pcapng_optlen(strlen(ifdescr));
246 	if (ea)
247 		len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
248 	if (speed != 0)
249 		len += pcapng_optlen(sizeof(uint64_t));
250 	if (filter)
251 		len += pcapng_optlen(strlen(filter) + 1);
252 	if (dev)
253 		len += pcapng_optlen(strlen(ifhw));
254 
255 	len += pcapng_optlen(0);
256 	len += sizeof(uint32_t);
257 
258 	buf = alloca(len);
259 	if (!buf)
260 		return -1;
261 
262 	hdr = (struct pcapng_interface_block *)buf;
263 	*hdr = (struct pcapng_interface_block) {
264 		.block_type = PCAPNG_INTERFACE_BLOCK,
265 		.link_type = 1,		/* DLT_EN10MB - Ethernet */
266 		.block_length = len,
267 	};
268 
269 	opt = (struct pcapng_option *)(hdr + 1);
270 	opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
271 				&tsresol, sizeof(tsresol));
272 	opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
273 				ifname, strlen(ifname));
274 	if (ifdescr)
275 		opt = pcapng_add_option(opt, PCAPNG_IFB_DESCRIPTION,
276 					ifdescr, strlen(ifdescr));
277 	if (ea)
278 		opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
279 					ea, RTE_ETHER_ADDR_LEN);
280 	if (speed != 0)
281 		opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
282 					 &speed, sizeof(uint64_t));
283 	if (dev)
284 		opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
285 					 ifhw, strlen(ifhw));
286 	if (filter) {
287 		/* Encoding is that the first octet indicates string vs BPF */
288 		size_t len;
289 		char *buf;
290 
291 		len = strlen(filter) + 1;
292 		buf = alloca(len);
293 		*buf = '\0';
294 		memcpy(buf + 1, filter, len);
295 
296 		opt = pcapng_add_option(opt, PCAPNG_IFB_FILTER,
297 					buf, len);
298 	}
299 
300 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
301 
302 	/* clone block_length after optionsa */
303 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
304 
305 	/* remember the file index */
306 	self->port_index[port] = self->ports++;
307 
308 	return write(self->outfd, buf, len);
309 }
310 
311 /*
312  * Write an Interface statistics block at the end of capture.
313  */
314 ssize_t
315 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
316 		       const char *comment,
317 		       uint64_t start_time, uint64_t end_time,
318 		       uint64_t ifrecv, uint64_t ifdrop)
319 {
320 	struct pcapng_statistics *hdr;
321 	struct pcapng_option *opt;
322 	uint32_t optlen, len;
323 	uint8_t *buf;
324 	uint64_t ns;
325 
326 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
327 
328 	optlen = 0;
329 
330 	if (ifrecv != UINT64_MAX)
331 		optlen += pcapng_optlen(sizeof(ifrecv));
332 	if (ifdrop != UINT64_MAX)
333 		optlen += pcapng_optlen(sizeof(ifdrop));
334 	if (start_time != 0)
335 		optlen += pcapng_optlen(sizeof(start_time));
336 	if (end_time != 0)
337 		optlen += pcapng_optlen(sizeof(end_time));
338 	if (comment)
339 		optlen += pcapng_optlen(strlen(comment));
340 	if (optlen != 0)
341 		optlen += pcapng_optlen(0);
342 
343 	len = sizeof(*hdr) + optlen + sizeof(uint32_t);
344 	buf = alloca(len);
345 	if (buf == NULL)
346 		return -1;
347 
348 	hdr = (struct pcapng_statistics *)buf;
349 	opt = (struct pcapng_option *)(hdr + 1);
350 
351 	if (comment)
352 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
353 					comment, strlen(comment));
354 	if (start_time != 0)
355 		opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
356 					 &start_time, sizeof(start_time));
357 	if (end_time != 0)
358 		opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
359 					 &end_time, sizeof(end_time));
360 	if (ifrecv != UINT64_MAX)
361 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
362 				&ifrecv, sizeof(ifrecv));
363 	if (ifdrop != UINT64_MAX)
364 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
365 				&ifdrop, sizeof(ifdrop));
366 	if (optlen != 0)
367 		opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
368 
369 	hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
370 	hdr->block_length = len;
371 	hdr->interface_id = self->port_index[port_id];
372 
373 	ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
374 	hdr->timestamp_hi = ns >> 32;
375 	hdr->timestamp_lo = (uint32_t)ns;
376 
377 	/* clone block_length after option */
378 	memcpy(opt, &len, sizeof(uint32_t));
379 
380 	return write(self->outfd, buf, len);
381 }
382 
383 uint32_t
384 rte_pcapng_mbuf_size(uint32_t length)
385 {
386 	/* The VLAN and EPB header must fit in the mbuf headroom. */
387 	RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
388 		   sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
389 
390 	/* The flags and queue information are added at the end. */
391 	return sizeof(struct rte_mbuf)
392 		+ RTE_ALIGN(length, sizeof(uint32_t))
393 		+ pcapng_optlen(sizeof(uint32_t)) /* flag option */
394 		+ pcapng_optlen(sizeof(uint32_t)) /* queue option */
395 		+ sizeof(uint32_t);		  /*  length */
396 }
397 
398 /* More generalized version rte_vlan_insert() */
399 static int
400 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
401 {
402 	struct rte_ether_hdr *nh, *oh;
403 	struct rte_vlan_hdr *vh;
404 
405 	if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
406 		return -EINVAL;
407 
408 	if (rte_pktmbuf_data_len(m) < sizeof(*oh))
409 		return -EINVAL;
410 
411 	oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
412 	nh = (struct rte_ether_hdr *)
413 		rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
414 	if (nh == NULL)
415 		return -ENOSPC;
416 
417 	memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
418 	nh->ether_type = rte_cpu_to_be_16(ether_type);
419 
420 	vh = (struct rte_vlan_hdr *) (nh + 1);
421 	vh->vlan_tci = rte_cpu_to_be_16(tci);
422 
423 	return 0;
424 }
425 
426 /*
427  *   The mbufs created use the Pcapng standard enhanced packet  block.
428  *
429  *                         1                   2                   3
430  *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
431  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
432  *  0 |                    Block Type = 0x00000006                    |
433  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
434  *  4 |                      Block Total Length                       |
435  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
436  *  8 |                         Interface ID                          |
437  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
438  * 12 |                        Timestamp (High)                       |
439  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
440  * 16 |                        Timestamp (Low)                        |
441  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
442  * 20 |                    Captured Packet Length                     |
443  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
444  * 24 |                    Original Packet Length                     |
445  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
446  * 28 /                                                               /
447  *    /                          Packet Data                          /
448  *    /              variable length, padded to 32 bits               /
449  *    /                                                               /
450  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
451  *    |      Option Code = 0x0002     |     Option Length = 0x004     |
452  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
453  *    |              Flags (direction)                                |
454  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
455  *    |      Option Code = 0x0006     |     Option Length = 0x002     |
456  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
457  *    |              Queue id                                         |
458  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
459  *    |                      Block Total Length                       |
460  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
461  */
462 
463 /* Make a copy of original mbuf with pcapng header and options */
464 struct rte_mbuf *
465 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
466 		const struct rte_mbuf *md,
467 		struct rte_mempool *mp,
468 		uint32_t length, uint64_t cycles,
469 		enum rte_pcapng_direction direction,
470 		const char *comment)
471 {
472 	struct pcapng_enhance_packet_block *epb;
473 	uint32_t orig_len, data_len, padding, flags;
474 	struct pcapng_option *opt;
475 	uint16_t optlen;
476 	struct rte_mbuf *mc;
477 	uint64_t ns;
478 	bool rss_hash;
479 
480 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
481 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
482 #endif
483 	ns = pcapng_tsc_to_ns(cycles);
484 
485 	orig_len = rte_pktmbuf_pkt_len(md);
486 
487 	/* Take snapshot of the data */
488 	mc = rte_pktmbuf_copy(md, mp, 0, length);
489 	if (unlikely(mc == NULL))
490 		return NULL;
491 
492 	/* Expand any offloaded VLAN information */
493 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
494 	     (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
495 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
496 	     (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
497 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
498 				       md->vlan_tci) != 0)
499 			goto fail;
500 	}
501 
502 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
503 	     (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
504 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
505 	     (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
506 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
507 				       md->vlan_tci_outer) != 0)
508 			goto fail;
509 	}
510 
511 	/* record HASH on incoming packets */
512 	rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
513 		    (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
514 
515 	/* pad the packet to 32 bit boundary */
516 	data_len = rte_pktmbuf_data_len(mc);
517 	padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
518 	if (padding > 0) {
519 		void *tail = rte_pktmbuf_append(mc, padding);
520 
521 		if (tail == NULL)
522 			goto fail;
523 		memset(tail, 0, padding);
524 	}
525 
526 	optlen = pcapng_optlen(sizeof(flags));
527 	optlen += pcapng_optlen(sizeof(queue));
528 	if (rss_hash)
529 		optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t));
530 
531 	if (comment)
532 		optlen += pcapng_optlen(strlen(comment));
533 
534 	/* reserve trailing options and block length */
535 	opt = (struct pcapng_option *)
536 		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
537 	if (unlikely(opt == NULL))
538 		goto fail;
539 
540 	switch (direction) {
541 	case RTE_PCAPNG_DIRECTION_IN:
542 		flags = PCAPNG_IFB_INBOUND;
543 		break;
544 	case RTE_PCAPNG_DIRECTION_OUT:
545 		flags = PCAPNG_IFB_OUTBOUND;
546 		break;
547 	default:
548 		flags = 0;
549 	}
550 
551 	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
552 				&flags, sizeof(flags));
553 
554 	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
555 				&queue, sizeof(queue));
556 
557 	if (rss_hash) {
558 		uint8_t hash_opt[5];
559 
560 		/* The algorithm could be something else if
561 		 * using rte_flow_action_rss; but the current API does not
562 		 * have a way for ethdev to report  this on a per-packet basis.
563 		 */
564 		hash_opt[0] = PCAPNG_HASH_TOEPLITZ;
565 
566 		memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t));
567 		opt = pcapng_add_option(opt, PCAPNG_EPB_HASH,
568 					&hash_opt, sizeof(hash_opt));
569 	}
570 
571 	if (comment)
572 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, comment,
573 					strlen(comment));
574 
575 	/* Note: END_OPT necessary here. Wireshark doesn't do it. */
576 
577 	/* Add PCAPNG packet header */
578 	epb = (struct pcapng_enhance_packet_block *)
579 		rte_pktmbuf_prepend(mc, sizeof(*epb));
580 	if (unlikely(epb == NULL))
581 		goto fail;
582 
583 	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
584 	epb->block_length = rte_pktmbuf_data_len(mc);
585 
586 	/* Interface index is filled in later during write */
587 	mc->port = port_id;
588 
589 	epb->timestamp_hi = ns >> 32;
590 	epb->timestamp_lo = (uint32_t)ns;
591 	epb->capture_length = data_len;
592 	epb->original_length = orig_len;
593 
594 	/* set trailer of block length */
595 	*(uint32_t *)opt = epb->block_length;
596 
597 	return mc;
598 
599 fail:
600 	rte_pktmbuf_free(mc);
601 	return NULL;
602 }
603 
604 /* Write pre-formatted packets to file. */
605 ssize_t
606 rte_pcapng_write_packets(rte_pcapng_t *self,
607 			 struct rte_mbuf *pkts[], uint16_t nb_pkts)
608 {
609 	struct iovec iov[IOV_MAX];
610 	unsigned int i, cnt = 0;
611 	ssize_t ret, total = 0;
612 
613 	for (i = 0; i < nb_pkts; i++) {
614 		struct rte_mbuf *m = pkts[i];
615 		struct pcapng_enhance_packet_block *epb;
616 
617 		/* sanity check that is really a pcapng mbuf */
618 		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
619 		if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
620 			     epb->block_length != rte_pktmbuf_data_len(m))) {
621 			rte_errno = EINVAL;
622 			return -1;
623 		}
624 
625 		/* check that this interface was added. */
626 		epb->interface_id = self->port_index[m->port];
627 		if (unlikely(epb->interface_id > RTE_MAX_ETHPORTS)) {
628 			rte_errno = EINVAL;
629 			return -1;
630 		}
631 
632 		/*
633 		 * Handle case of highly fragmented and large burst size
634 		 * Note: this assumes that max segments per mbuf < IOV_MAX
635 		 */
636 		if (unlikely(cnt + m->nb_segs >= IOV_MAX)) {
637 			ret = writev(self->outfd, iov, cnt);
638 			if (unlikely(ret < 0)) {
639 				rte_errno = errno;
640 				return -1;
641 			}
642 			total += ret;
643 			cnt = 0;
644 		}
645 
646 		/*
647 		 * The DPDK port is recorded during pcapng_copy.
648 		 * Map that to PCAPNG interface in file.
649 		 */
650 		do {
651 			iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
652 			iov[cnt].iov_len = rte_pktmbuf_data_len(m);
653 			++cnt;
654 		} while ((m = m->next));
655 	}
656 
657 	ret = writev(self->outfd, iov, cnt);
658 	if (unlikely(ret < 0)) {
659 		rte_errno = errno;
660 		return -1;
661 	}
662 	return total + ret;
663 }
664 
665 /* Create new pcapng writer handle */
666 rte_pcapng_t *
667 rte_pcapng_fdopen(int fd,
668 		  const char *osname, const char *hardware,
669 		  const char *appname, const char *comment)
670 {
671 	unsigned int i;
672 	rte_pcapng_t *self;
673 
674 	self = malloc(sizeof(*self));
675 	if (!self) {
676 		rte_errno = ENOMEM;
677 		return NULL;
678 	}
679 
680 	self->outfd = fd;
681 	self->ports = 0;
682 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
683 		self->port_index[i] = UINT32_MAX;
684 
685 	if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
686 		goto fail;
687 
688 	return self;
689 fail:
690 	free(self);
691 	return NULL;
692 }
693 
694 void
695 rte_pcapng_close(rte_pcapng_t *self)
696 {
697 	close(self->outfd);
698 	free(self);
699 }
700