xref: /dpdk/lib/pcapng/rte_pcapng.c (revision 0cbf27521b0d6e7cb79f41a5e699d82562b09c03)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Microsoft Corporation
3  */
4 
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <time.h>
11 #include <unistd.h>
12 
13 #ifndef RTE_EXEC_ENV_WINDOWS
14 #include <net/if.h>
15 #include <sys/uio.h>
16 #endif
17 
18 #include <bus_driver.h>
19 #include <rte_common.h>
20 #include <rte_cycles.h>
21 #include <dev_driver.h>
22 #include <rte_errno.h>
23 #include <rte_ethdev.h>
24 #include <rte_ether.h>
25 #include <rte_mbuf.h>
26 #include <rte_os_shim.h>
27 #include <rte_pcapng.h>
28 #include <rte_reciprocal.h>
29 #include <rte_time.h>
30 
31 #include "pcapng_proto.h"
32 
33 /* conversion from DPDK speed to PCAPNG */
34 #define PCAPNG_MBPS_SPEED 1000000ull
35 
36 /* upper bound for section, stats and interface blocks (in uint32_t) */
37 #define PCAPNG_BLKSIZ	(2048 / sizeof(uint32_t))
38 
39 /* Format of the capture file handle */
40 struct rte_pcapng {
41 	int  outfd;		/* output file */
42 	unsigned int ports;	/* number of interfaces added */
43 	uint64_t offset_ns;	/* ns since 1/1/1970 when initialized */
44 	uint64_t tsc_base;	/* TSC when started */
45 
46 	/* DPDK port id to interface index in file */
47 	uint32_t port_index[RTE_MAX_ETHPORTS];
48 };
49 
50 #ifdef RTE_EXEC_ENV_WINDOWS
51 /*
52  * Windows does not have writev() call.
53  * Emulate this by copying to a new buffer.
54  * The copy is necessary since pcapng needs to be thread-safe
55  * and do atomic write operations.
56  */
57 
58 #define IOV_MAX 128
59 struct iovec {
60 	void   *iov_base;
61 	size_t  iov_len;
62 };
63 
64 static ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
65 {
66 	size_t bytes = 0;
67 	uint8_t *ptr;
68 	void *tmp_buf;
69 	ssize_t ret;
70 	int i;
71 
72 	for (i = 0; i < iovcnt; i++)
73 		bytes += iov[i].iov_len;
74 
75 	if (unlikely(bytes == 0))
76 		return 0;
77 
78 	tmp_buf = malloc(bytes);
79 	if (unlikely(tmp_buf == NULL)) {
80 		errno = ENOMEM;
81 		return -1;
82 	}
83 
84 	ptr = tmp_buf;
85 	for (i = 0; i < iovcnt; i++) {
86 		rte_memcpy(ptr, iov[i].iov_base, iov[i].iov_len);
87 		ptr += iov[i].iov_len;
88 	}
89 
90 	ret = write(fd, tmp_buf, bytes);
91 	free(tmp_buf);
92 	return ret;
93 }
94 
95 #define IF_NAMESIZE	16
96 /* compatibility wrapper because name is optional */
97 #define if_indextoname(ifindex, ifname) NULL
98 #endif
99 
100 /* Convert from TSC (CPU cycles) to nanoseconds */
101 static uint64_t
102 pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
103 {
104 	uint64_t delta, rem, secs, ns;
105 	const uint64_t hz = rte_get_tsc_hz();
106 
107 	delta = cycles - self->tsc_base;
108 
109 	/* Avoid numeric wraparound by computing seconds first */
110 	secs = delta / hz;
111 	rem = delta % hz;
112 	ns = (rem * NS_PER_S) / hz;
113 
114 	return secs * NS_PER_S + ns + self->offset_ns;
115 }
116 
117 /* length of option including padding */
118 static uint16_t pcapng_optlen(uint16_t len)
119 {
120 	return RTE_ALIGN(sizeof(struct pcapng_option) + len,
121 			 sizeof(uint32_t));
122 }
123 
124 /* build TLV option and return location of next */
125 static struct pcapng_option *
126 pcapng_add_option(struct pcapng_option *popt, uint16_t code,
127 		  const void *data, uint16_t len)
128 {
129 	popt->code = code;
130 	popt->length = len;
131 	if (len > 0)
132 		memcpy(popt->data, data, len);
133 
134 	return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
135 }
136 
137 /*
138  * Write required initial section header describing the capture
139  */
140 static int
141 pcapng_section_block(rte_pcapng_t *self,
142 		    const char *os, const char *hw,
143 		    const char *app, const char *comment)
144 {
145 	struct pcapng_section_header *hdr;
146 	struct pcapng_option *opt;
147 	uint32_t buf[PCAPNG_BLKSIZ];
148 	uint32_t len;
149 
150 	len = sizeof(*hdr);
151 	if (hw)
152 		len += pcapng_optlen(strlen(hw));
153 	if (os)
154 		len += pcapng_optlen(strlen(os));
155 	if (app)
156 		len += pcapng_optlen(strlen(app));
157 	if (comment)
158 		len += pcapng_optlen(strlen(comment));
159 
160 	/* reserve space for OPT_END */
161 	len += pcapng_optlen(0);
162 	len += sizeof(uint32_t);
163 
164 	if (len > sizeof(buf))
165 		return -1;
166 
167 	hdr = (struct pcapng_section_header *)buf;
168 	*hdr = (struct pcapng_section_header) {
169 		.block_type = PCAPNG_SECTION_BLOCK,
170 		.block_length = len,
171 		.byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
172 		.major_version = PCAPNG_MAJOR_VERS,
173 		.minor_version = PCAPNG_MINOR_VERS,
174 		.section_length = UINT64_MAX,
175 	};
176 
177 	/* After the section header insert variable length options. */
178 	opt = (struct pcapng_option *)(hdr + 1);
179 	if (comment)
180 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
181 					comment, strlen(comment));
182 	if (hw)
183 		opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
184 					hw, strlen(hw));
185 	if (os)
186 		opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
187 					os, strlen(os));
188 	if (app)
189 		opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
190 					app, strlen(app));
191 
192 	/* The standard requires last option to be OPT_END */
193 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
194 
195 	/* clone block_length after option */
196 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
197 
198 	return write(self->outfd, buf, len);
199 }
200 
201 /* Write an interface block for a DPDK port */
202 int
203 rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port,
204 			 const char *ifname, const char *ifdescr,
205 			 const char *filter)
206 {
207 	struct pcapng_interface_block *hdr;
208 	struct rte_eth_dev_info dev_info;
209 	struct rte_ether_addr *ea, macaddr;
210 	const struct rte_device *dev;
211 	struct rte_eth_link link;
212 	struct pcapng_option *opt;
213 	const uint8_t tsresol = 9;	/* nanosecond resolution */
214 	uint32_t len;
215 	uint32_t buf[PCAPNG_BLKSIZ];
216 	char ifname_buf[IF_NAMESIZE];
217 	char ifhw[256];
218 	uint64_t speed = 0;
219 
220 	if (rte_eth_dev_info_get(port, &dev_info) < 0)
221 		return -1;
222 
223 	/* make something like an interface name */
224 	if (ifname == NULL) {
225 		/* Use kernel name if available */
226 		ifname = if_indextoname(dev_info.if_index, ifname_buf);
227 		if (ifname == NULL) {
228 			snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port);
229 			ifname = ifname_buf;
230 		}
231 	}
232 
233 	/* make a useful device hardware string */
234 	dev = dev_info.device;
235 	if (dev)
236 		snprintf(ifhw, sizeof(ifhw),
237 			 "%s-%s", dev->bus->name, dev->name);
238 
239 	/* DPDK reports in units of Mbps */
240 	if (rte_eth_link_get(port, &link) == 0 &&
241 	    link.link_status == RTE_ETH_LINK_UP)
242 		speed = link.link_speed * PCAPNG_MBPS_SPEED;
243 
244 	if (rte_eth_macaddr_get(port, &macaddr) < 0)
245 		ea = NULL;
246 	else
247 		ea = &macaddr;
248 
249 	/* Compute length of interface block options */
250 	len = sizeof(*hdr);
251 
252 	len += pcapng_optlen(sizeof(tsresol));	/* timestamp */
253 	len += pcapng_optlen(strlen(ifname));	/* ifname */
254 
255 	if (ifdescr)
256 		len += pcapng_optlen(strlen(ifdescr));
257 	if (ea)
258 		len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
259 	if (speed != 0)
260 		len += pcapng_optlen(sizeof(uint64_t));
261 	if (filter)
262 		len += pcapng_optlen(strlen(filter) + 1);
263 	if (dev)
264 		len += pcapng_optlen(strlen(ifhw));
265 
266 	len += pcapng_optlen(0);
267 	len += sizeof(uint32_t);
268 
269 	if (len > sizeof(buf))
270 		return -1;
271 
272 	hdr = (struct pcapng_interface_block *)buf;
273 	*hdr = (struct pcapng_interface_block) {
274 		.block_type = PCAPNG_INTERFACE_BLOCK,
275 		.link_type = 1,		/* DLT_EN10MB - Ethernet */
276 		.block_length = len,
277 	};
278 
279 	opt = (struct pcapng_option *)(hdr + 1);
280 	opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
281 				&tsresol, sizeof(tsresol));
282 	opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
283 				ifname, strlen(ifname));
284 	if (ifdescr)
285 		opt = pcapng_add_option(opt, PCAPNG_IFB_DESCRIPTION,
286 					ifdescr, strlen(ifdescr));
287 	if (ea)
288 		opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
289 					ea, RTE_ETHER_ADDR_LEN);
290 	if (speed != 0)
291 		opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
292 					 &speed, sizeof(uint64_t));
293 	if (dev)
294 		opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
295 					 ifhw, strlen(ifhw));
296 	if (filter) {
297 		size_t len;
298 
299 		len = strlen(filter) + 1;
300 		opt->code = PCAPNG_IFB_FILTER;
301 		opt->length = len;
302 		/* Encoding is that the first octet indicates string vs BPF */
303 		opt->data[0] = 0;
304 		memcpy(opt->data + 1, filter, strlen(filter));
305 
306 		opt = (struct pcapng_option *)((uint8_t *)opt + pcapng_optlen(len));
307 	}
308 
309 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
310 
311 	/* clone block_length after optionsa */
312 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
313 
314 	/* remember the file index */
315 	self->port_index[port] = self->ports++;
316 
317 	return write(self->outfd, buf, len);
318 }
319 
320 /*
321  * Write an Interface statistics block at the end of capture.
322  */
323 ssize_t
324 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
325 		       uint64_t ifrecv, uint64_t ifdrop,
326 		       const char *comment)
327 {
328 	struct pcapng_statistics *hdr;
329 	struct pcapng_option *opt;
330 	uint64_t start_time = self->offset_ns;
331 	uint64_t sample_time;
332 	uint32_t optlen, len;
333 	uint32_t buf[PCAPNG_BLKSIZ];
334 
335 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
336 
337 	optlen = 0;
338 
339 	if (ifrecv != UINT64_MAX)
340 		optlen += pcapng_optlen(sizeof(ifrecv));
341 	if (ifdrop != UINT64_MAX)
342 		optlen += pcapng_optlen(sizeof(ifdrop));
343 
344 	if (start_time != 0)
345 		optlen += pcapng_optlen(sizeof(start_time));
346 
347 	if (comment)
348 		optlen += pcapng_optlen(strlen(comment));
349 	if (optlen != 0)
350 		optlen += pcapng_optlen(0);
351 
352 	len = sizeof(*hdr) + optlen + sizeof(uint32_t);
353 	if (len > sizeof(buf))
354 		return -1;
355 
356 	hdr = (struct pcapng_statistics *)buf;
357 	opt = (struct pcapng_option *)(hdr + 1);
358 
359 	if (comment)
360 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
361 					comment, strlen(comment));
362 	if (start_time != 0)
363 		opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
364 					 &start_time, sizeof(start_time));
365 	if (ifrecv != UINT64_MAX)
366 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
367 				&ifrecv, sizeof(ifrecv));
368 	if (ifdrop != UINT64_MAX)
369 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
370 				&ifdrop, sizeof(ifdrop));
371 	if (optlen != 0)
372 		opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
373 
374 	hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
375 	hdr->block_length = len;
376 	hdr->interface_id = self->port_index[port_id];
377 
378 	sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
379 	hdr->timestamp_hi = sample_time >> 32;
380 	hdr->timestamp_lo = (uint32_t)sample_time;
381 
382 	/* clone block_length after option */
383 	memcpy(opt, &len, sizeof(uint32_t));
384 
385 	return write(self->outfd, buf, len);
386 }
387 
388 uint32_t
389 rte_pcapng_mbuf_size(uint32_t length)
390 {
391 	/* The VLAN and EPB header must fit in the mbuf headroom. */
392 	RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
393 		   sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
394 
395 	/* The flags and queue information are added at the end. */
396 	return sizeof(struct rte_mbuf)
397 		+ RTE_ALIGN(length, sizeof(uint32_t))
398 		+ pcapng_optlen(sizeof(uint32_t)) /* flag option */
399 		+ pcapng_optlen(sizeof(uint32_t)) /* queue option */
400 		+ sizeof(uint32_t);		  /*  length */
401 }
402 
403 /* More generalized version rte_vlan_insert() */
404 static int
405 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
406 {
407 	struct rte_ether_hdr *nh, *oh;
408 	struct rte_vlan_hdr *vh;
409 
410 	if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
411 		return -EINVAL;
412 
413 	if (rte_pktmbuf_data_len(m) < sizeof(*oh))
414 		return -EINVAL;
415 
416 	oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
417 	nh = (struct rte_ether_hdr *)
418 		rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
419 	if (nh == NULL)
420 		return -ENOSPC;
421 
422 	memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
423 	nh->ether_type = rte_cpu_to_be_16(ether_type);
424 
425 	vh = (struct rte_vlan_hdr *) (nh + 1);
426 	vh->vlan_tci = rte_cpu_to_be_16(tci);
427 
428 	return 0;
429 }
430 
431 /*
432  *   The mbufs created use the Pcapng standard enhanced packet  block.
433  *
434  *                         1                   2                   3
435  *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
436  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
437  *  0 |                    Block Type = 0x00000006                    |
438  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
439  *  4 |                      Block Total Length                       |
440  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
441  *  8 |                         Interface ID                          |
442  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
443  * 12 |                        Timestamp (High)                       |
444  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
445  * 16 |                        Timestamp (Low)                        |
446  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
447  * 20 |                    Captured Packet Length                     |
448  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
449  * 24 |                    Original Packet Length                     |
450  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
451  * 28 /                                                               /
452  *    /                          Packet Data                          /
453  *    /              variable length, padded to 32 bits               /
454  *    /                                                               /
455  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
456  *    |      Option Code = 0x0002     |     Option Length = 0x004     |
457  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
458  *    |              Flags (direction)                                |
459  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
460  *    |      Option Code = 0x0006     |     Option Length = 0x002     |
461  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
462  *    |              Queue id                                         |
463  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
464  *    |                      Block Total Length                       |
465  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
466  */
467 
468 /* Make a copy of original mbuf with pcapng header and options */
469 struct rte_mbuf *
470 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
471 		const struct rte_mbuf *md,
472 		struct rte_mempool *mp,
473 		uint32_t length,
474 		enum rte_pcapng_direction direction,
475 		const char *comment)
476 {
477 	struct pcapng_enhance_packet_block *epb;
478 	uint32_t orig_len, pkt_len, padding, flags;
479 	struct pcapng_option *opt;
480 	uint64_t timestamp;
481 	uint16_t optlen;
482 	struct rte_mbuf *mc;
483 	bool rss_hash;
484 
485 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
486 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
487 #endif
488 	orig_len = rte_pktmbuf_pkt_len(md);
489 
490 	/* Take snapshot of the data */
491 	mc = rte_pktmbuf_copy(md, mp, 0, length);
492 	if (unlikely(mc == NULL))
493 		return NULL;
494 
495 	/* Expand any offloaded VLAN information */
496 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
497 	     (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
498 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
499 	     (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
500 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
501 				       md->vlan_tci) != 0)
502 			goto fail;
503 	}
504 
505 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
506 	     (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
507 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
508 	     (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
509 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
510 				       md->vlan_tci_outer) != 0)
511 			goto fail;
512 	}
513 
514 	/* record HASH on incoming packets */
515 	rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
516 		    (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
517 
518 	/* pad the packet to 32 bit boundary */
519 	pkt_len = rte_pktmbuf_pkt_len(mc);
520 	padding = RTE_ALIGN(pkt_len, sizeof(uint32_t)) - pkt_len;
521 	if (padding > 0) {
522 		void *tail = rte_pktmbuf_append(mc, padding);
523 
524 		if (tail == NULL)
525 			goto fail;
526 		memset(tail, 0, padding);
527 	}
528 
529 	optlen = pcapng_optlen(sizeof(flags));
530 	optlen += pcapng_optlen(sizeof(queue));
531 	if (rss_hash)
532 		optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t));
533 
534 	if (comment)
535 		optlen += pcapng_optlen(strlen(comment));
536 
537 	/* reserve trailing options and block length */
538 	opt = (struct pcapng_option *)
539 		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
540 	if (unlikely(opt == NULL))
541 		goto fail;
542 
543 	switch (direction) {
544 	case RTE_PCAPNG_DIRECTION_IN:
545 		flags = PCAPNG_IFB_INBOUND;
546 		break;
547 	case RTE_PCAPNG_DIRECTION_OUT:
548 		flags = PCAPNG_IFB_OUTBOUND;
549 		break;
550 	default:
551 		flags = 0;
552 	}
553 
554 	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
555 				&flags, sizeof(flags));
556 
557 	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
558 				&queue, sizeof(queue));
559 
560 	if (rss_hash) {
561 		uint8_t hash_opt[5];
562 
563 		/* The algorithm could be something else if
564 		 * using rte_flow_action_rss; but the current API does not
565 		 * have a way for ethdev to report  this on a per-packet basis.
566 		 */
567 		hash_opt[0] = PCAPNG_HASH_TOEPLITZ;
568 
569 		memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t));
570 		opt = pcapng_add_option(opt, PCAPNG_EPB_HASH,
571 					&hash_opt, sizeof(hash_opt));
572 	}
573 
574 	if (comment)
575 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, comment,
576 					strlen(comment));
577 
578 	/* Note: END_OPT necessary here. Wireshark doesn't do it. */
579 
580 	/* Add PCAPNG packet header */
581 	epb = (struct pcapng_enhance_packet_block *)
582 		rte_pktmbuf_prepend(mc, sizeof(*epb));
583 	if (unlikely(epb == NULL))
584 		goto fail;
585 
586 	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
587 	epb->block_length = rte_pktmbuf_pkt_len(mc);
588 
589 	/* Interface index is filled in later during write */
590 	mc->port = port_id;
591 
592 	/* Put timestamp in cycles here - adjust in packet write */
593 	timestamp = rte_get_tsc_cycles();
594 	epb->timestamp_hi = timestamp >> 32;
595 	epb->timestamp_lo = (uint32_t)timestamp;
596 	epb->capture_length = pkt_len;
597 	epb->original_length = orig_len;
598 
599 	/* set trailer of block length */
600 	*(uint32_t *)opt = epb->block_length;
601 
602 	return mc;
603 
604 fail:
605 	rte_pktmbuf_free(mc);
606 	return NULL;
607 }
608 
609 /* Write pre-formatted packets to file. */
610 ssize_t
611 rte_pcapng_write_packets(rte_pcapng_t *self,
612 			 struct rte_mbuf *pkts[], uint16_t nb_pkts)
613 {
614 	struct iovec iov[IOV_MAX];
615 	unsigned int i, cnt = 0;
616 	ssize_t ret, total = 0;
617 
618 	for (i = 0; i < nb_pkts; i++) {
619 		struct rte_mbuf *m = pkts[i];
620 		struct pcapng_enhance_packet_block *epb;
621 		uint64_t cycles, timestamp;
622 
623 		/* sanity check that is really a pcapng mbuf */
624 		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
625 		if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
626 			     epb->block_length != rte_pktmbuf_pkt_len(m))) {
627 			rte_errno = EINVAL;
628 			return -1;
629 		}
630 
631 		/* check that this interface was added. */
632 		epb->interface_id = self->port_index[m->port];
633 		if (unlikely(epb->interface_id > RTE_MAX_ETHPORTS)) {
634 			rte_errno = EINVAL;
635 			return -1;
636 		}
637 
638 		/* adjust timestamp recorded in packet */
639 		cycles = (uint64_t)epb->timestamp_hi << 32;
640 		cycles += epb->timestamp_lo;
641 		timestamp = pcapng_timestamp(self, cycles);
642 		epb->timestamp_hi = timestamp >> 32;
643 		epb->timestamp_lo = (uint32_t)timestamp;
644 
645 		/*
646 		 * Handle case of highly fragmented and large burst size
647 		 * Note: this assumes that max segments per mbuf < IOV_MAX
648 		 */
649 		if (unlikely(cnt + m->nb_segs >= IOV_MAX)) {
650 			ret = writev(self->outfd, iov, cnt);
651 			if (unlikely(ret < 0)) {
652 				rte_errno = errno;
653 				return -1;
654 			}
655 			total += ret;
656 			cnt = 0;
657 		}
658 
659 		/*
660 		 * The DPDK port is recorded during pcapng_copy.
661 		 * Map that to PCAPNG interface in file.
662 		 */
663 		do {
664 			iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
665 			iov[cnt].iov_len = rte_pktmbuf_data_len(m);
666 			++cnt;
667 		} while ((m = m->next));
668 	}
669 
670 	ret = writev(self->outfd, iov, cnt);
671 	if (unlikely(ret < 0)) {
672 		rte_errno = errno;
673 		return -1;
674 	}
675 	return total + ret;
676 }
677 
678 /* Create new pcapng writer handle */
679 rte_pcapng_t *
680 rte_pcapng_fdopen(int fd,
681 		  const char *osname, const char *hardware,
682 		  const char *appname, const char *comment)
683 {
684 	unsigned int i;
685 	rte_pcapng_t *self;
686 	struct timespec ts;
687 	uint64_t cycles;
688 
689 	self = malloc(sizeof(*self));
690 	if (!self) {
691 		rte_errno = ENOMEM;
692 		return NULL;
693 	}
694 
695 	self->outfd = fd;
696 	self->ports = 0;
697 
698 	/* record start time in ns since 1/1/1970 */
699 	cycles = rte_get_tsc_cycles();
700 	clock_gettime(CLOCK_REALTIME, &ts);
701 	self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
702 	self->offset_ns = rte_timespec_to_ns(&ts);
703 
704 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
705 		self->port_index[i] = UINT32_MAX;
706 
707 	if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
708 		goto fail;
709 
710 	return self;
711 fail:
712 	free(self);
713 	return NULL;
714 }
715 
716 void
717 rte_pcapng_close(rte_pcapng_t *self)
718 {
719 	close(self->outfd);
720 	free(self);
721 }
722