xref: /dpdk/lib/pcapng/rte_pcapng.c (revision 3da59f30a23f2e795d2315f3d949e1b3e0ce0c3d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Microsoft Corporation
3  */
4 
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <time.h>
11 #include <unistd.h>
12 
13 #ifndef RTE_EXEC_ENV_WINDOWS
14 #include <net/if.h>
15 #include <sys/uio.h>
16 #endif
17 
18 #include <bus_driver.h>
19 #include <rte_common.h>
20 #include <rte_cycles.h>
21 #include <dev_driver.h>
22 #include <rte_errno.h>
23 #include <rte_ethdev.h>
24 #include <rte_ether.h>
25 #include <rte_mbuf.h>
26 #include <rte_os_shim.h>
27 #include <rte_pcapng.h>
28 #include <rte_reciprocal.h>
29 #include <rte_time.h>
30 
31 #include "pcapng_proto.h"
32 
33 /* conversion from DPDK speed to PCAPNG */
34 #define PCAPNG_MBPS_SPEED 1000000ull
35 
36 /* upper bound for section, stats and interface blocks */
37 #define PCAPNG_BLKSIZ	2048
38 
39 /* Format of the capture file handle */
40 struct rte_pcapng {
41 	int  outfd;		/* output file */
42 	unsigned int ports;	/* number of interfaces added */
43 	uint64_t offset_ns;	/* ns since 1/1/1970 when initialized */
44 	uint64_t tsc_base;	/* TSC when started */
45 
46 	/* DPDK port id to interface index in file */
47 	uint32_t port_index[RTE_MAX_ETHPORTS];
48 };
49 
50 #ifdef RTE_EXEC_ENV_WINDOWS
51 /*
52  * Windows does not have writev() call.
53  * Emulate this by copying to a new buffer.
54  * The copy is necessary since pcapng needs to be thread-safe
55  * and do atomic write operations.
56  */
57 
58 #define IOV_MAX 128
59 struct iovec {
60 	void   *iov_base;
61 	size_t  iov_len;
62 };
63 
64 static ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
65 {
66 	size_t bytes = 0;
67 	uint8_t *ptr;
68 	void *tmp_buf;
69 	ssize_t ret;
70 	int i;
71 
72 	for (i = 0; i < iovcnt; i++)
73 		bytes += iov[i].iov_len;
74 
75 	if (unlikely(bytes == 0))
76 		return 0;
77 
78 	tmp_buf = malloc(bytes);
79 	if (unlikely(tmp_buf == NULL)) {
80 		errno = ENOMEM;
81 		return -1;
82 	}
83 
84 	ptr = tmp_buf;
85 	for (i = 0; i < iovcnt; i++) {
86 		rte_memcpy(ptr, iov[i].iov_base, iov[i].iov_len);
87 		ptr += iov[i].iov_len;
88 	}
89 
90 	ret = write(fd, tmp_buf, bytes);
91 	free(tmp_buf);
92 	return ret;
93 }
94 
95 #define IF_NAMESIZE	16
96 /* compatibility wrapper because name is optional */
97 #define if_indextoname(ifindex, ifname) NULL
98 #endif
99 
100 /* Convert from TSC (CPU cycles) to nanoseconds */
101 static uint64_t
102 pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
103 {
104 	uint64_t delta, rem, secs, ns;
105 	const uint64_t hz = rte_get_tsc_hz();
106 
107 	delta = cycles - self->tsc_base;
108 
109 	/* Avoid numeric wraparound by computing seconds first */
110 	secs = delta / hz;
111 	rem = delta % hz;
112 	ns = (rem * NS_PER_S) / hz;
113 
114 	return secs * NS_PER_S + ns + self->offset_ns;
115 }
116 
117 /* length of option including padding */
118 static uint16_t pcapng_optlen(uint16_t len)
119 {
120 	return RTE_ALIGN(sizeof(struct pcapng_option) + len,
121 			 sizeof(uint32_t));
122 }
123 
124 /* build TLV option and return location of next */
125 static struct pcapng_option *
126 pcapng_add_option(struct pcapng_option *popt, uint16_t code,
127 		  const void *data, uint16_t len)
128 {
129 	popt->code = code;
130 	popt->length = len;
131 	memcpy(popt->data, data, len);
132 
133 	return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
134 }
135 
136 /*
137  * Write required initial section header describing the capture
138  */
139 static int
140 pcapng_section_block(rte_pcapng_t *self,
141 		    const char *os, const char *hw,
142 		    const char *app, const char *comment)
143 {
144 	struct pcapng_section_header *hdr;
145 	struct pcapng_option *opt;
146 	uint8_t buf[PCAPNG_BLKSIZ];
147 	uint32_t len;
148 
149 	len = sizeof(*hdr);
150 	if (hw)
151 		len += pcapng_optlen(strlen(hw));
152 	if (os)
153 		len += pcapng_optlen(strlen(os));
154 	if (app)
155 		len += pcapng_optlen(strlen(app));
156 	if (comment)
157 		len += pcapng_optlen(strlen(comment));
158 
159 	/* reserve space for OPT_END */
160 	len += pcapng_optlen(0);
161 	len += sizeof(uint32_t);
162 
163 	if (len > sizeof(buf))
164 		return -1;
165 
166 	hdr = (struct pcapng_section_header *)buf;
167 	*hdr = (struct pcapng_section_header) {
168 		.block_type = PCAPNG_SECTION_BLOCK,
169 		.block_length = len,
170 		.byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
171 		.major_version = PCAPNG_MAJOR_VERS,
172 		.minor_version = PCAPNG_MINOR_VERS,
173 		.section_length = UINT64_MAX,
174 	};
175 
176 	/* After the section header insert variable length options. */
177 	opt = (struct pcapng_option *)(hdr + 1);
178 	if (comment)
179 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
180 					comment, strlen(comment));
181 	if (hw)
182 		opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
183 					hw, strlen(hw));
184 	if (os)
185 		opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
186 					os, strlen(os));
187 	if (app)
188 		opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
189 					app, strlen(app));
190 
191 	/* The standard requires last option to be OPT_END */
192 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
193 
194 	/* clone block_length after option */
195 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
196 
197 	return write(self->outfd, buf, len);
198 }
199 
200 /* Write an interface block for a DPDK port */
201 int
202 rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port,
203 			 const char *ifname, const char *ifdescr,
204 			 const char *filter)
205 {
206 	struct pcapng_interface_block *hdr;
207 	struct rte_eth_dev_info dev_info;
208 	struct rte_ether_addr *ea, macaddr;
209 	const struct rte_device *dev;
210 	struct rte_eth_link link;
211 	struct pcapng_option *opt;
212 	const uint8_t tsresol = 9;	/* nanosecond resolution */
213 	uint32_t len;
214 	uint8_t buf[PCAPNG_BLKSIZ];
215 	char ifname_buf[IF_NAMESIZE];
216 	char ifhw[256];
217 	uint64_t speed = 0;
218 
219 	if (rte_eth_dev_info_get(port, &dev_info) < 0)
220 		return -1;
221 
222 	/* make something like an interface name */
223 	if (ifname == NULL) {
224 		/* Use kernel name if available */
225 		ifname = if_indextoname(dev_info.if_index, ifname_buf);
226 		if (ifname == NULL) {
227 			snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port);
228 			ifname = ifname_buf;
229 		}
230 	}
231 
232 	/* make a useful device hardware string */
233 	dev = dev_info.device;
234 	if (dev)
235 		snprintf(ifhw, sizeof(ifhw),
236 			 "%s-%s", dev->bus->name, dev->name);
237 
238 	/* DPDK reports in units of Mbps */
239 	if (rte_eth_link_get(port, &link) == 0 &&
240 	    link.link_status == RTE_ETH_LINK_UP)
241 		speed = link.link_speed * PCAPNG_MBPS_SPEED;
242 
243 	if (rte_eth_macaddr_get(port, &macaddr) < 0)
244 		ea = NULL;
245 	else
246 		ea = &macaddr;
247 
248 	/* Compute length of interface block options */
249 	len = sizeof(*hdr);
250 
251 	len += pcapng_optlen(sizeof(tsresol));	/* timestamp */
252 	len += pcapng_optlen(strlen(ifname));	/* ifname */
253 
254 	if (ifdescr)
255 		len += pcapng_optlen(strlen(ifdescr));
256 	if (ea)
257 		len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
258 	if (speed != 0)
259 		len += pcapng_optlen(sizeof(uint64_t));
260 	if (filter)
261 		len += pcapng_optlen(strlen(filter) + 1);
262 	if (dev)
263 		len += pcapng_optlen(strlen(ifhw));
264 
265 	len += pcapng_optlen(0);
266 	len += sizeof(uint32_t);
267 
268 	if (len > sizeof(buf))
269 		return -1;
270 
271 	hdr = (struct pcapng_interface_block *)buf;
272 	*hdr = (struct pcapng_interface_block) {
273 		.block_type = PCAPNG_INTERFACE_BLOCK,
274 		.link_type = 1,		/* DLT_EN10MB - Ethernet */
275 		.block_length = len,
276 	};
277 
278 	opt = (struct pcapng_option *)(hdr + 1);
279 	opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
280 				&tsresol, sizeof(tsresol));
281 	opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
282 				ifname, strlen(ifname));
283 	if (ifdescr)
284 		opt = pcapng_add_option(opt, PCAPNG_IFB_DESCRIPTION,
285 					ifdescr, strlen(ifdescr));
286 	if (ea)
287 		opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
288 					ea, RTE_ETHER_ADDR_LEN);
289 	if (speed != 0)
290 		opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
291 					 &speed, sizeof(uint64_t));
292 	if (dev)
293 		opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
294 					 ifhw, strlen(ifhw));
295 	if (filter) {
296 		size_t len;
297 
298 		len = strlen(filter) + 1;
299 		opt->code = PCAPNG_IFB_FILTER;
300 		opt->length = len;
301 		/* Encoding is that the first octet indicates string vs BPF */
302 		opt->data[0] = 0;
303 		memcpy(opt->data + 1, filter, strlen(filter));
304 
305 		opt = (struct pcapng_option *)((uint8_t *)opt + pcapng_optlen(len));
306 	}
307 
308 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
309 
310 	/* clone block_length after optionsa */
311 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
312 
313 	/* remember the file index */
314 	self->port_index[port] = self->ports++;
315 
316 	return write(self->outfd, buf, len);
317 }
318 
319 /*
320  * Write an Interface statistics block at the end of capture.
321  */
322 ssize_t
323 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
324 		       uint64_t ifrecv, uint64_t ifdrop,
325 		       const char *comment)
326 {
327 	struct pcapng_statistics *hdr;
328 	struct pcapng_option *opt;
329 	uint64_t start_time = self->offset_ns;
330 	uint64_t sample_time;
331 	uint32_t optlen, len;
332 	uint8_t buf[PCAPNG_BLKSIZ];
333 
334 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
335 
336 	optlen = 0;
337 
338 	if (ifrecv != UINT64_MAX)
339 		optlen += pcapng_optlen(sizeof(ifrecv));
340 	if (ifdrop != UINT64_MAX)
341 		optlen += pcapng_optlen(sizeof(ifdrop));
342 
343 	if (start_time != 0)
344 		optlen += pcapng_optlen(sizeof(start_time));
345 
346 	if (comment)
347 		optlen += pcapng_optlen(strlen(comment));
348 	if (optlen != 0)
349 		optlen += pcapng_optlen(0);
350 
351 	len = sizeof(*hdr) + optlen + sizeof(uint32_t);
352 	if (len > sizeof(buf))
353 		return -1;
354 
355 	hdr = (struct pcapng_statistics *)buf;
356 	opt = (struct pcapng_option *)(hdr + 1);
357 
358 	if (comment)
359 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
360 					comment, strlen(comment));
361 	if (start_time != 0)
362 		opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
363 					 &start_time, sizeof(start_time));
364 	if (ifrecv != UINT64_MAX)
365 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
366 				&ifrecv, sizeof(ifrecv));
367 	if (ifdrop != UINT64_MAX)
368 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
369 				&ifdrop, sizeof(ifdrop));
370 	if (optlen != 0)
371 		opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
372 
373 	hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
374 	hdr->block_length = len;
375 	hdr->interface_id = self->port_index[port_id];
376 
377 	sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
378 	hdr->timestamp_hi = sample_time >> 32;
379 	hdr->timestamp_lo = (uint32_t)sample_time;
380 
381 	/* clone block_length after option */
382 	memcpy(opt, &len, sizeof(uint32_t));
383 
384 	return write(self->outfd, buf, len);
385 }
386 
387 uint32_t
388 rte_pcapng_mbuf_size(uint32_t length)
389 {
390 	/* The VLAN and EPB header must fit in the mbuf headroom. */
391 	RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
392 		   sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
393 
394 	/* The flags and queue information are added at the end. */
395 	return sizeof(struct rte_mbuf)
396 		+ RTE_ALIGN(length, sizeof(uint32_t))
397 		+ pcapng_optlen(sizeof(uint32_t)) /* flag option */
398 		+ pcapng_optlen(sizeof(uint32_t)) /* queue option */
399 		+ sizeof(uint32_t);		  /*  length */
400 }
401 
402 /* More generalized version rte_vlan_insert() */
403 static int
404 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
405 {
406 	struct rte_ether_hdr *nh, *oh;
407 	struct rte_vlan_hdr *vh;
408 
409 	if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
410 		return -EINVAL;
411 
412 	if (rte_pktmbuf_data_len(m) < sizeof(*oh))
413 		return -EINVAL;
414 
415 	oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
416 	nh = (struct rte_ether_hdr *)
417 		rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
418 	if (nh == NULL)
419 		return -ENOSPC;
420 
421 	memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
422 	nh->ether_type = rte_cpu_to_be_16(ether_type);
423 
424 	vh = (struct rte_vlan_hdr *) (nh + 1);
425 	vh->vlan_tci = rte_cpu_to_be_16(tci);
426 
427 	return 0;
428 }
429 
430 /*
431  *   The mbufs created use the Pcapng standard enhanced packet  block.
432  *
433  *                         1                   2                   3
434  *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
435  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
436  *  0 |                    Block Type = 0x00000006                    |
437  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
438  *  4 |                      Block Total Length                       |
439  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
440  *  8 |                         Interface ID                          |
441  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
442  * 12 |                        Timestamp (High)                       |
443  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
444  * 16 |                        Timestamp (Low)                        |
445  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
446  * 20 |                    Captured Packet Length                     |
447  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
448  * 24 |                    Original Packet Length                     |
449  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
450  * 28 /                                                               /
451  *    /                          Packet Data                          /
452  *    /              variable length, padded to 32 bits               /
453  *    /                                                               /
454  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
455  *    |      Option Code = 0x0002     |     Option Length = 0x004     |
456  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
457  *    |              Flags (direction)                                |
458  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
459  *    |      Option Code = 0x0006     |     Option Length = 0x002     |
460  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
461  *    |              Queue id                                         |
462  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
463  *    |                      Block Total Length                       |
464  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
465  */
466 
467 /* Make a copy of original mbuf with pcapng header and options */
468 struct rte_mbuf *
469 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
470 		const struct rte_mbuf *md,
471 		struct rte_mempool *mp,
472 		uint32_t length,
473 		enum rte_pcapng_direction direction,
474 		const char *comment)
475 {
476 	struct pcapng_enhance_packet_block *epb;
477 	uint32_t orig_len, data_len, padding, flags;
478 	struct pcapng_option *opt;
479 	uint64_t timestamp;
480 	uint16_t optlen;
481 	struct rte_mbuf *mc;
482 	bool rss_hash;
483 
484 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
485 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
486 #endif
487 	orig_len = rte_pktmbuf_pkt_len(md);
488 
489 	/* Take snapshot of the data */
490 	mc = rte_pktmbuf_copy(md, mp, 0, length);
491 	if (unlikely(mc == NULL))
492 		return NULL;
493 
494 	/* Expand any offloaded VLAN information */
495 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
496 	     (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
497 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
498 	     (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
499 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
500 				       md->vlan_tci) != 0)
501 			goto fail;
502 	}
503 
504 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
505 	     (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
506 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
507 	     (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
508 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
509 				       md->vlan_tci_outer) != 0)
510 			goto fail;
511 	}
512 
513 	/* record HASH on incoming packets */
514 	rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
515 		    (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
516 
517 	/* pad the packet to 32 bit boundary */
518 	data_len = rte_pktmbuf_data_len(mc);
519 	padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
520 	if (padding > 0) {
521 		void *tail = rte_pktmbuf_append(mc, padding);
522 
523 		if (tail == NULL)
524 			goto fail;
525 		memset(tail, 0, padding);
526 	}
527 
528 	optlen = pcapng_optlen(sizeof(flags));
529 	optlen += pcapng_optlen(sizeof(queue));
530 	if (rss_hash)
531 		optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t));
532 
533 	if (comment)
534 		optlen += pcapng_optlen(strlen(comment));
535 
536 	/* reserve trailing options and block length */
537 	opt = (struct pcapng_option *)
538 		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
539 	if (unlikely(opt == NULL))
540 		goto fail;
541 
542 	switch (direction) {
543 	case RTE_PCAPNG_DIRECTION_IN:
544 		flags = PCAPNG_IFB_INBOUND;
545 		break;
546 	case RTE_PCAPNG_DIRECTION_OUT:
547 		flags = PCAPNG_IFB_OUTBOUND;
548 		break;
549 	default:
550 		flags = 0;
551 	}
552 
553 	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
554 				&flags, sizeof(flags));
555 
556 	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
557 				&queue, sizeof(queue));
558 
559 	if (rss_hash) {
560 		uint8_t hash_opt[5];
561 
562 		/* The algorithm could be something else if
563 		 * using rte_flow_action_rss; but the current API does not
564 		 * have a way for ethdev to report  this on a per-packet basis.
565 		 */
566 		hash_opt[0] = PCAPNG_HASH_TOEPLITZ;
567 
568 		memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t));
569 		opt = pcapng_add_option(opt, PCAPNG_EPB_HASH,
570 					&hash_opt, sizeof(hash_opt));
571 	}
572 
573 	if (comment)
574 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, comment,
575 					strlen(comment));
576 
577 	/* Note: END_OPT necessary here. Wireshark doesn't do it. */
578 
579 	/* Add PCAPNG packet header */
580 	epb = (struct pcapng_enhance_packet_block *)
581 		rte_pktmbuf_prepend(mc, sizeof(*epb));
582 	if (unlikely(epb == NULL))
583 		goto fail;
584 
585 	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
586 	epb->block_length = rte_pktmbuf_data_len(mc);
587 
588 	/* Interface index is filled in later during write */
589 	mc->port = port_id;
590 
591 	/* Put timestamp in cycles here - adjust in packet write */
592 	timestamp = rte_get_tsc_cycles();
593 	epb->timestamp_hi = timestamp >> 32;
594 	epb->timestamp_lo = (uint32_t)timestamp;
595 	epb->capture_length = data_len;
596 	epb->original_length = orig_len;
597 
598 	/* set trailer of block length */
599 	*(uint32_t *)opt = epb->block_length;
600 
601 	return mc;
602 
603 fail:
604 	rte_pktmbuf_free(mc);
605 	return NULL;
606 }
607 
608 /* Write pre-formatted packets to file. */
609 ssize_t
610 rte_pcapng_write_packets(rte_pcapng_t *self,
611 			 struct rte_mbuf *pkts[], uint16_t nb_pkts)
612 {
613 	struct iovec iov[IOV_MAX];
614 	unsigned int i, cnt = 0;
615 	ssize_t ret, total = 0;
616 
617 	for (i = 0; i < nb_pkts; i++) {
618 		struct rte_mbuf *m = pkts[i];
619 		struct pcapng_enhance_packet_block *epb;
620 		uint64_t cycles, timestamp;
621 
622 		/* sanity check that is really a pcapng mbuf */
623 		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
624 		if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
625 			     epb->block_length != rte_pktmbuf_data_len(m))) {
626 			rte_errno = EINVAL;
627 			return -1;
628 		}
629 
630 		/* check that this interface was added. */
631 		epb->interface_id = self->port_index[m->port];
632 		if (unlikely(epb->interface_id > RTE_MAX_ETHPORTS)) {
633 			rte_errno = EINVAL;
634 			return -1;
635 		}
636 
637 		/* adjust timestamp recorded in packet */
638 		cycles = (uint64_t)epb->timestamp_hi << 32;
639 		cycles += epb->timestamp_lo;
640 		timestamp = pcapng_timestamp(self, cycles);
641 		epb->timestamp_hi = timestamp >> 32;
642 		epb->timestamp_lo = (uint32_t)timestamp;
643 
644 		/*
645 		 * Handle case of highly fragmented and large burst size
646 		 * Note: this assumes that max segments per mbuf < IOV_MAX
647 		 */
648 		if (unlikely(cnt + m->nb_segs >= IOV_MAX)) {
649 			ret = writev(self->outfd, iov, cnt);
650 			if (unlikely(ret < 0)) {
651 				rte_errno = errno;
652 				return -1;
653 			}
654 			total += ret;
655 			cnt = 0;
656 		}
657 
658 		/*
659 		 * The DPDK port is recorded during pcapng_copy.
660 		 * Map that to PCAPNG interface in file.
661 		 */
662 		do {
663 			iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
664 			iov[cnt].iov_len = rte_pktmbuf_data_len(m);
665 			++cnt;
666 		} while ((m = m->next));
667 	}
668 
669 	ret = writev(self->outfd, iov, cnt);
670 	if (unlikely(ret < 0)) {
671 		rte_errno = errno;
672 		return -1;
673 	}
674 	return total + ret;
675 }
676 
677 /* Create new pcapng writer handle */
678 rte_pcapng_t *
679 rte_pcapng_fdopen(int fd,
680 		  const char *osname, const char *hardware,
681 		  const char *appname, const char *comment)
682 {
683 	unsigned int i;
684 	rte_pcapng_t *self;
685 	struct timespec ts;
686 	uint64_t cycles;
687 
688 	self = malloc(sizeof(*self));
689 	if (!self) {
690 		rte_errno = ENOMEM;
691 		return NULL;
692 	}
693 
694 	self->outfd = fd;
695 	self->ports = 0;
696 
697 	/* record start time in ns since 1/1/1970 */
698 	cycles = rte_get_tsc_cycles();
699 	clock_gettime(CLOCK_REALTIME, &ts);
700 	self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
701 	self->offset_ns = rte_timespec_to_ns(&ts);
702 
703 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
704 		self->port_index[i] = UINT32_MAX;
705 
706 	if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
707 		goto fail;
708 
709 	return self;
710 fail:
711 	free(self);
712 	return NULL;
713 }
714 
715 void
716 rte_pcapng_close(rte_pcapng_t *self)
717 {
718 	close(self->outfd);
719 	free(self);
720 }
721