xref: /dpdk/lib/pcapng/rte_pcapng.c (revision af0785a2447b307965377b62f46a5f39457a85a3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Microsoft Corporation
3  */
4 
5 #include <errno.h>
6 #include <net/if.h>
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/uio.h>
12 #include <time.h>
13 #include <unistd.h>
14 
15 #include <bus_driver.h>
16 #include <rte_common.h>
17 #include <rte_cycles.h>
18 #include <dev_driver.h>
19 #include <rte_errno.h>
20 #include <rte_ethdev.h>
21 #include <rte_ether.h>
22 #include <rte_mbuf.h>
23 #include <rte_pcapng.h>
24 #include <rte_reciprocal.h>
25 #include <rte_time.h>
26 
27 #include "pcapng_proto.h"
28 
29 /* conversion from DPDK speed to PCAPNG */
30 #define PCAPNG_MBPS_SPEED 1000000ull
31 
32 /* Format of the capture file handle */
33 struct rte_pcapng {
34 	int  outfd;		/* output file */
35 	/* DPDK port id to interface index in file */
36 	uint32_t port_index[RTE_MAX_ETHPORTS];
37 };
38 
39 /* For converting TSC cycles to PCAPNG ns format */
40 static struct pcapng_time {
41 	uint64_t ns;
42 	uint64_t cycles;
43 	uint64_t tsc_hz;
44 	struct rte_reciprocal_u64 tsc_hz_inverse;
45 } pcapng_time;
46 
47 static inline void
48 pcapng_init(void)
49 {
50 	struct timespec ts;
51 
52 	pcapng_time.cycles = rte_get_tsc_cycles();
53 	clock_gettime(CLOCK_REALTIME, &ts);
54 	pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2;
55 	pcapng_time.ns = rte_timespec_to_ns(&ts);
56 
57 	pcapng_time.tsc_hz = rte_get_tsc_hz();
58 	pcapng_time.tsc_hz_inverse = rte_reciprocal_value_u64(pcapng_time.tsc_hz);
59 }
60 
61 /* PCAPNG timestamps are in nanoseconds */
62 static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
63 {
64 	uint64_t delta, secs;
65 
66 	if (!pcapng_time.tsc_hz)
67 		pcapng_init();
68 
69 	/* In essence the calculation is:
70 	 *   delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / rte_get_tsc_hz()
71 	 * but this overflows within 4 to 8 seconds depending on TSC frequency.
72 	 * Instead, if delta >= pcapng_time.tsc_hz:
73 	 *   Increase pcapng_time.ns and pcapng_time.cycles by the number of
74 	 *   whole seconds in delta and reduce delta accordingly.
75 	 * delta will therefore always lie in the interval [0, pcapng_time.tsc_hz),
76 	 * which will not overflow when multiplied by NSEC_PER_SEC provided the
77 	 * TSC frequency < approx 18.4GHz.
78 	 *
79 	 * Currently all TSCs operate below 5GHz.
80 	 */
81 	delta = cycles - pcapng_time.cycles;
82 	if (unlikely(delta >= pcapng_time.tsc_hz)) {
83 		if (likely(delta < pcapng_time.tsc_hz * 2)) {
84 			delta -= pcapng_time.tsc_hz;
85 			pcapng_time.cycles += pcapng_time.tsc_hz;
86 			pcapng_time.ns += NSEC_PER_SEC;
87 		} else {
88 			secs = rte_reciprocal_divide_u64(delta, &pcapng_time.tsc_hz_inverse);
89 			delta -= secs * pcapng_time.tsc_hz;
90 			pcapng_time.cycles += secs * pcapng_time.tsc_hz;
91 			pcapng_time.ns += secs * NSEC_PER_SEC;
92 		}
93 	}
94 
95 	return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC,
96 							  &pcapng_time.tsc_hz_inverse);
97 }
98 
99 /* length of option including padding */
100 static uint16_t pcapng_optlen(uint16_t len)
101 {
102 	return RTE_ALIGN(sizeof(struct pcapng_option) + len,
103 			 sizeof(uint32_t));
104 }
105 
106 /* build TLV option and return location of next */
107 static struct pcapng_option *
108 pcapng_add_option(struct pcapng_option *popt, uint16_t code,
109 		  const void *data, uint16_t len)
110 {
111 	popt->code = code;
112 	popt->length = len;
113 	memcpy(popt->data, data, len);
114 
115 	return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
116 }
117 
118 /*
119  * Write required initial section header describing the capture
120  */
121 static int
122 pcapng_section_block(rte_pcapng_t *self,
123 		    const char *os, const char *hw,
124 		    const char *app, const char *comment)
125 {
126 	struct pcapng_section_header *hdr;
127 	struct pcapng_option *opt;
128 	void *buf;
129 	uint32_t len;
130 	ssize_t cc;
131 
132 	len = sizeof(*hdr);
133 	if (hw)
134 		len += pcapng_optlen(strlen(hw));
135 	if (os)
136 		len += pcapng_optlen(strlen(os));
137 	if (app)
138 		len += pcapng_optlen(strlen(app));
139 	if (comment)
140 		len += pcapng_optlen(strlen(comment));
141 
142 	/* reserve space for OPT_END */
143 	len += pcapng_optlen(0);
144 	len += sizeof(uint32_t);
145 
146 	buf = calloc(1, len);
147 	if (!buf)
148 		return -1;
149 
150 	hdr = (struct pcapng_section_header *)buf;
151 	*hdr = (struct pcapng_section_header) {
152 		.block_type = PCAPNG_SECTION_BLOCK,
153 		.block_length = len,
154 		.byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
155 		.major_version = PCAPNG_MAJOR_VERS,
156 		.minor_version = PCAPNG_MINOR_VERS,
157 		.section_length = UINT64_MAX,
158 	};
159 
160 	/* After the section header insert variable length options. */
161 	opt = (struct pcapng_option *)(hdr + 1);
162 	if (comment)
163 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
164 					comment, strlen(comment));
165 	if (hw)
166 		opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
167 					hw, strlen(hw));
168 	if (os)
169 		opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
170 					os, strlen(os));
171 	if (app)
172 		opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
173 					app, strlen(app));
174 
175 	/* The standard requires last option to be OPT_END */
176 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
177 
178 	/* clone block_length after option */
179 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
180 
181 	cc = write(self->outfd, buf, len);
182 	free(buf);
183 
184 	return cc;
185 }
186 
187 /* Write an interface block for a DPDK port */
188 static int
189 pcapng_add_interface(rte_pcapng_t *self, uint16_t port)
190 {
191 	struct pcapng_interface_block *hdr;
192 	struct rte_eth_dev_info dev_info;
193 	struct rte_ether_addr *ea, macaddr;
194 	const struct rte_device *dev;
195 	struct rte_eth_link link;
196 	struct pcapng_option *opt;
197 	const uint8_t tsresol = 9;	/* nanosecond resolution */
198 	uint32_t len;
199 	void *buf;
200 	char ifname[IF_NAMESIZE];
201 	char ifhw[256];
202 	uint64_t speed = 0;
203 
204 	if (rte_eth_dev_info_get(port, &dev_info) < 0)
205 		return -1;
206 
207 	/* make something like an interface name */
208 	if (if_indextoname(dev_info.if_index, ifname) == NULL)
209 		snprintf(ifname, IF_NAMESIZE, "dpdk:%u", port);
210 
211 	/* make a useful device hardware string */
212 	dev = dev_info.device;
213 	if (dev)
214 		snprintf(ifhw, sizeof(ifhw),
215 			 "%s-%s", dev->bus->name, dev->name);
216 
217 	/* DPDK reports in units of Mbps */
218 	if (rte_eth_link_get(port, &link) == 0 &&
219 	    link.link_status == RTE_ETH_LINK_UP)
220 		speed = link.link_speed * PCAPNG_MBPS_SPEED;
221 
222 	if (rte_eth_macaddr_get(port, &macaddr) < 0)
223 		ea = NULL;
224 	else
225 		ea = &macaddr;
226 
227 	/* Compute length of interface block options */
228 	len = sizeof(*hdr);
229 
230 	len += pcapng_optlen(sizeof(tsresol));	/* timestamp */
231 	len += pcapng_optlen(strlen(ifname));	/* ifname */
232 
233 	if (ea)
234 		len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
235 	if (speed != 0)
236 		len += pcapng_optlen(sizeof(uint64_t));
237 	if (dev)
238 		len += pcapng_optlen(strlen(ifhw));
239 
240 	len += pcapng_optlen(0);
241 	len += sizeof(uint32_t);
242 
243 	buf = alloca(len);
244 	if (!buf)
245 		return -1;
246 
247 	hdr = (struct pcapng_interface_block *)buf;
248 	*hdr = (struct pcapng_interface_block) {
249 		.block_type = PCAPNG_INTERFACE_BLOCK,
250 		.link_type = 1,		/* DLT_EN10MB - Ethernet */
251 		.block_length = len,
252 	};
253 
254 	opt = (struct pcapng_option *)(hdr + 1);
255 	opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
256 				&tsresol, sizeof(tsresol));
257 	opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
258 				ifname, strlen(ifname));
259 	if (ea)
260 		opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
261 					ea, RTE_ETHER_ADDR_LEN);
262 	if (speed != 0)
263 		opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
264 					 &speed, sizeof(uint64_t));
265 	if (dev)
266 		opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
267 					 ifhw, strlen(ifhw));
268 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
269 
270 	/* clone block_length after optionsa */
271 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
272 
273 	return write(self->outfd, buf, len);
274 }
275 
276 /*
277  * Write the list of possible interfaces at the start
278  * of the file.
279  */
280 static int
281 pcapng_interfaces(rte_pcapng_t *self)
282 {
283 	uint16_t port_id;
284 	uint16_t index = 0;
285 
286 	RTE_ETH_FOREACH_DEV(port_id) {
287 		/* The list if ports in pcapng needs to be contiguous */
288 		self->port_index[port_id] = index++;
289 		if (pcapng_add_interface(self, port_id) < 0)
290 			return -1;
291 	}
292 	return 0;
293 }
294 
295 /*
296  * Write an Interface statistics block at the end of capture.
297  */
298 ssize_t
299 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
300 		       const char *comment,
301 		       uint64_t start_time, uint64_t end_time,
302 		       uint64_t ifrecv, uint64_t ifdrop)
303 {
304 	struct pcapng_statistics *hdr;
305 	struct pcapng_option *opt;
306 	uint32_t optlen, len;
307 	uint8_t *buf;
308 	uint64_t ns;
309 
310 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
311 
312 	optlen = 0;
313 
314 	if (ifrecv != UINT64_MAX)
315 		optlen += pcapng_optlen(sizeof(ifrecv));
316 	if (ifdrop != UINT64_MAX)
317 		optlen += pcapng_optlen(sizeof(ifdrop));
318 	if (start_time != 0)
319 		optlen += pcapng_optlen(sizeof(start_time));
320 	if (end_time != 0)
321 		optlen += pcapng_optlen(sizeof(end_time));
322 	if (comment)
323 		optlen += pcapng_optlen(strlen(comment));
324 	if (optlen != 0)
325 		optlen += pcapng_optlen(0);
326 
327 	len = sizeof(*hdr) + optlen + sizeof(uint32_t);
328 	buf = alloca(len);
329 	if (buf == NULL)
330 		return -1;
331 
332 	hdr = (struct pcapng_statistics *)buf;
333 	opt = (struct pcapng_option *)(hdr + 1);
334 
335 	if (comment)
336 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
337 					comment, strlen(comment));
338 	if (start_time != 0)
339 		opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
340 					 &start_time, sizeof(start_time));
341 	if (end_time != 0)
342 		opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
343 					 &end_time, sizeof(end_time));
344 	if (ifrecv != UINT64_MAX)
345 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
346 				&ifrecv, sizeof(ifrecv));
347 	if (ifdrop != UINT64_MAX)
348 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
349 				&ifdrop, sizeof(ifdrop));
350 	if (optlen != 0)
351 		opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
352 
353 	hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
354 	hdr->block_length = len;
355 	hdr->interface_id = self->port_index[port_id];
356 
357 	ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
358 	hdr->timestamp_hi = ns >> 32;
359 	hdr->timestamp_lo = (uint32_t)ns;
360 
361 	/* clone block_length after option */
362 	memcpy(opt, &len, sizeof(uint32_t));
363 
364 	return write(self->outfd, buf, len);
365 }
366 
367 uint32_t
368 rte_pcapng_mbuf_size(uint32_t length)
369 {
370 	/* The VLAN and EPB header must fit in the mbuf headroom. */
371 	RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
372 		   sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
373 
374 	/* The flags and queue information are added at the end. */
375 	return sizeof(struct rte_mbuf)
376 		+ RTE_ALIGN(length, sizeof(uint32_t))
377 		+ pcapng_optlen(sizeof(uint32_t)) /* flag option */
378 		+ pcapng_optlen(sizeof(uint32_t)) /* queue option */
379 		+ sizeof(uint32_t);		  /*  length */
380 }
381 
382 /* More generalized version rte_vlan_insert() */
383 static int
384 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
385 {
386 	struct rte_ether_hdr *nh, *oh;
387 	struct rte_vlan_hdr *vh;
388 
389 	if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
390 		return -EINVAL;
391 
392 	if (rte_pktmbuf_data_len(m) < sizeof(*oh))
393 		return -EINVAL;
394 
395 	oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
396 	nh = (struct rte_ether_hdr *)
397 		rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
398 	if (nh == NULL)
399 		return -ENOSPC;
400 
401 	memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
402 	nh->ether_type = rte_cpu_to_be_16(ether_type);
403 
404 	vh = (struct rte_vlan_hdr *) (nh + 1);
405 	vh->vlan_tci = rte_cpu_to_be_16(tci);
406 
407 	return 0;
408 }
409 
410 /*
411  *   The mbufs created use the Pcapng standard enhanced packet  block.
412  *
413  *                         1                   2                   3
414  *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
415  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
416  *  0 |                    Block Type = 0x00000006                    |
417  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
418  *  4 |                      Block Total Length                       |
419  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
420  *  8 |                         Interface ID                          |
421  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
422  * 12 |                        Timestamp (High)                       |
423  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
424  * 16 |                        Timestamp (Low)                        |
425  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
426  * 20 |                    Captured Packet Length                     |
427  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
428  * 24 |                    Original Packet Length                     |
429  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
430  * 28 /                                                               /
431  *    /                          Packet Data                          /
432  *    /              variable length, padded to 32 bits               /
433  *    /                                                               /
434  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
435  *    |      Option Code = 0x0002     |     Option Length = 0x004     |
436  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
437  *    |              Flags (direction)                                |
438  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
439  *    |      Option Code = 0x0006     |     Option Length = 0x002     |
440  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
441  *    |              Queue id                                         |
442  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
443  *    |                      Block Total Length                       |
444  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
445  */
446 
447 /* Make a copy of original mbuf with pcapng header and options */
448 struct rte_mbuf *
449 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
450 		const struct rte_mbuf *md,
451 		struct rte_mempool *mp,
452 		uint32_t length, uint64_t cycles,
453 		enum rte_pcapng_direction direction)
454 {
455 	struct pcapng_enhance_packet_block *epb;
456 	uint32_t orig_len, data_len, padding, flags;
457 	struct pcapng_option *opt;
458 	uint16_t optlen;
459 	struct rte_mbuf *mc;
460 	uint64_t ns;
461 	bool rss_hash;
462 
463 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
464 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
465 #endif
466 	ns = pcapng_tsc_to_ns(cycles);
467 
468 	orig_len = rte_pktmbuf_pkt_len(md);
469 
470 	/* Take snapshot of the data */
471 	mc = rte_pktmbuf_copy(md, mp, 0, length);
472 	if (unlikely(mc == NULL))
473 		return NULL;
474 
475 	/* Expand any offloaded VLAN information */
476 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
477 	     (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
478 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
479 	     (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
480 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
481 				       md->vlan_tci) != 0)
482 			goto fail;
483 	}
484 
485 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
486 	     (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
487 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
488 	     (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
489 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
490 				       md->vlan_tci_outer) != 0)
491 			goto fail;
492 	}
493 
494 	/* record HASH on incoming packets */
495 	rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
496 		    (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
497 
498 	/* pad the packet to 32 bit boundary */
499 	data_len = rte_pktmbuf_data_len(mc);
500 	padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
501 	if (padding > 0) {
502 		void *tail = rte_pktmbuf_append(mc, padding);
503 
504 		if (tail == NULL)
505 			goto fail;
506 		memset(tail, 0, padding);
507 	}
508 
509 	optlen = pcapng_optlen(sizeof(flags));
510 	optlen += pcapng_optlen(sizeof(queue));
511 	if (rss_hash)
512 		optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t));
513 
514 	/* reserve trailing options and block length */
515 	opt = (struct pcapng_option *)
516 		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
517 	if (unlikely(opt == NULL))
518 		goto fail;
519 
520 	switch (direction) {
521 	case RTE_PCAPNG_DIRECTION_IN:
522 		flags = PCAPNG_IFB_INBOUND;
523 		break;
524 	case RTE_PCAPNG_DIRECTION_OUT:
525 		flags = PCAPNG_IFB_OUTBOUND;
526 		break;
527 	default:
528 		flags = 0;
529 	}
530 
531 	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
532 				&flags, sizeof(flags));
533 
534 	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
535 				&queue, sizeof(queue));
536 
537 	if (rss_hash) {
538 		uint8_t hash_opt[5];
539 
540 		/* The algorithm could be something else if
541 		 * using rte_flow_action_rss; but the current API does not
542 		 * have a way for ethdev to report  this on a per-packet basis.
543 		 */
544 		hash_opt[0] = PCAPNG_HASH_TOEPLITZ;
545 
546 		memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t));
547 		opt = pcapng_add_option(opt, PCAPNG_EPB_HASH,
548 					&hash_opt, sizeof(hash_opt));
549 	}
550 
551 	/* Note: END_OPT necessary here. Wireshark doesn't do it. */
552 
553 	/* Add PCAPNG packet header */
554 	epb = (struct pcapng_enhance_packet_block *)
555 		rte_pktmbuf_prepend(mc, sizeof(*epb));
556 	if (unlikely(epb == NULL))
557 		goto fail;
558 
559 	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
560 	epb->block_length = rte_pktmbuf_data_len(mc);
561 
562 	/* Interface index is filled in later during write */
563 	mc->port = port_id;
564 
565 	epb->timestamp_hi = ns >> 32;
566 	epb->timestamp_lo = (uint32_t)ns;
567 	epb->capture_length = data_len;
568 	epb->original_length = orig_len;
569 
570 	/* set trailer of block length */
571 	*(uint32_t *)opt = epb->block_length;
572 
573 	return mc;
574 
575 fail:
576 	rte_pktmbuf_free(mc);
577 	return NULL;
578 }
579 
580 /* Write pre-formatted packets to file. */
581 ssize_t
582 rte_pcapng_write_packets(rte_pcapng_t *self,
583 			 struct rte_mbuf *pkts[], uint16_t nb_pkts)
584 {
585 	struct iovec iov[IOV_MAX];
586 	unsigned int i, cnt = 0;
587 	ssize_t ret, total = 0;
588 
589 	for (i = 0; i < nb_pkts; i++) {
590 		struct rte_mbuf *m = pkts[i];
591 		struct pcapng_enhance_packet_block *epb;
592 
593 		/* sanity check that is really a pcapng mbuf */
594 		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
595 		if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
596 			     epb->block_length != rte_pktmbuf_data_len(m))) {
597 			rte_errno = EINVAL;
598 			return -1;
599 		}
600 
601 		/*
602 		 * Handle case of highly fragmented and large burst size
603 		 * Note: this assumes that max segments per mbuf < IOV_MAX
604 		 */
605 		if (unlikely(cnt + m->nb_segs >= IOV_MAX)) {
606 			ret = writev(self->outfd, iov, cnt);
607 			if (unlikely(ret < 0)) {
608 				rte_errno = errno;
609 				return -1;
610 			}
611 			total += ret;
612 			cnt = 0;
613 		}
614 
615 		/*
616 		 * The DPDK port is recorded during pcapng_copy.
617 		 * Map that to PCAPNG interface in file.
618 		 */
619 		epb->interface_id = self->port_index[m->port];
620 		do {
621 			iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
622 			iov[cnt].iov_len = rte_pktmbuf_data_len(m);
623 			++cnt;
624 		} while ((m = m->next));
625 	}
626 
627 	ret = writev(self->outfd, iov, cnt);
628 	if (unlikely(ret < 0)) {
629 		rte_errno = errno;
630 		return -1;
631 	}
632 	return total + ret;
633 }
634 
635 /* Create new pcapng writer handle */
636 rte_pcapng_t *
637 rte_pcapng_fdopen(int fd,
638 		  const char *osname, const char *hardware,
639 		  const char *appname, const char *comment)
640 {
641 	rte_pcapng_t *self;
642 
643 	self = malloc(sizeof(*self));
644 	if (!self) {
645 		rte_errno = ENOMEM;
646 		return NULL;
647 	}
648 
649 	self->outfd = fd;
650 
651 	if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
652 		goto fail;
653 
654 	if (pcapng_interfaces(self) < 0)
655 		goto fail;
656 
657 	return self;
658 fail:
659 	free(self);
660 	return NULL;
661 }
662 
663 void
664 rte_pcapng_close(rte_pcapng_t *self)
665 {
666 	close(self->outfd);
667 	free(self);
668 }
669