xref: /dpdk/lib/pcapng/rte_pcapng.c (revision 33b84a2efca7ac188def108ba8b981daa7572b9a)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Microsoft Corporation
3  */
4 
5 #include <errno.h>
6 #include <net/if.h>
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/uio.h>
12 #include <time.h>
13 #include <unistd.h>
14 
15 #include <bus_driver.h>
16 #include <rte_common.h>
17 #include <rte_cycles.h>
18 #include <dev_driver.h>
19 #include <rte_errno.h>
20 #include <rte_ethdev.h>
21 #include <rte_ether.h>
22 #include <rte_mbuf.h>
23 #include <rte_pcapng.h>
24 #include <rte_reciprocal.h>
25 #include <rte_time.h>
26 
27 #include "pcapng_proto.h"
28 
29 /* conversion from DPDK speed to PCAPNG */
30 #define PCAPNG_MBPS_SPEED 1000000ull
31 
32 /* Format of the capture file handle */
33 struct rte_pcapng {
34 	int  outfd;		/* output file */
35 
36 	unsigned int ports;	/* number of interfaces added */
37 
38 	/* DPDK port id to interface index in file */
39 	uint32_t port_index[RTE_MAX_ETHPORTS];
40 };
41 
42 /* For converting TSC cycles to PCAPNG ns format */
43 static struct pcapng_time {
44 	uint64_t ns;
45 	uint64_t cycles;
46 	uint64_t tsc_hz;
47 	struct rte_reciprocal_u64 tsc_hz_inverse;
48 } pcapng_time;
49 
50 static inline void
51 pcapng_init(void)
52 {
53 	struct timespec ts;
54 
55 	pcapng_time.cycles = rte_get_tsc_cycles();
56 	clock_gettime(CLOCK_REALTIME, &ts);
57 	pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2;
58 	pcapng_time.ns = rte_timespec_to_ns(&ts);
59 
60 	pcapng_time.tsc_hz = rte_get_tsc_hz();
61 	pcapng_time.tsc_hz_inverse = rte_reciprocal_value_u64(pcapng_time.tsc_hz);
62 }
63 
64 /* PCAPNG timestamps are in nanoseconds */
65 static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
66 {
67 	uint64_t delta, secs;
68 
69 	if (!pcapng_time.tsc_hz)
70 		pcapng_init();
71 
72 	/* In essence the calculation is:
73 	 *   delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / rte_get_tsc_hz()
74 	 * but this overflows within 4 to 8 seconds depending on TSC frequency.
75 	 * Instead, if delta >= pcapng_time.tsc_hz:
76 	 *   Increase pcapng_time.ns and pcapng_time.cycles by the number of
77 	 *   whole seconds in delta and reduce delta accordingly.
78 	 * delta will therefore always lie in the interval [0, pcapng_time.tsc_hz),
79 	 * which will not overflow when multiplied by NSEC_PER_SEC provided the
80 	 * TSC frequency < approx 18.4GHz.
81 	 *
82 	 * Currently all TSCs operate below 5GHz.
83 	 */
84 	delta = cycles - pcapng_time.cycles;
85 	if (unlikely(delta >= pcapng_time.tsc_hz)) {
86 		if (likely(delta < pcapng_time.tsc_hz * 2)) {
87 			delta -= pcapng_time.tsc_hz;
88 			pcapng_time.cycles += pcapng_time.tsc_hz;
89 			pcapng_time.ns += NSEC_PER_SEC;
90 		} else {
91 			secs = rte_reciprocal_divide_u64(delta, &pcapng_time.tsc_hz_inverse);
92 			delta -= secs * pcapng_time.tsc_hz;
93 			pcapng_time.cycles += secs * pcapng_time.tsc_hz;
94 			pcapng_time.ns += secs * NSEC_PER_SEC;
95 		}
96 	}
97 
98 	return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC,
99 							  &pcapng_time.tsc_hz_inverse);
100 }
101 
102 /* length of option including padding */
103 static uint16_t pcapng_optlen(uint16_t len)
104 {
105 	return RTE_ALIGN(sizeof(struct pcapng_option) + len,
106 			 sizeof(uint32_t));
107 }
108 
109 /* build TLV option and return location of next */
110 static struct pcapng_option *
111 pcapng_add_option(struct pcapng_option *popt, uint16_t code,
112 		  const void *data, uint16_t len)
113 {
114 	popt->code = code;
115 	popt->length = len;
116 	memcpy(popt->data, data, len);
117 
118 	return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
119 }
120 
121 /*
122  * Write required initial section header describing the capture
123  */
124 static int
125 pcapng_section_block(rte_pcapng_t *self,
126 		    const char *os, const char *hw,
127 		    const char *app, const char *comment)
128 {
129 	struct pcapng_section_header *hdr;
130 	struct pcapng_option *opt;
131 	void *buf;
132 	uint32_t len;
133 	ssize_t cc;
134 
135 	len = sizeof(*hdr);
136 	if (hw)
137 		len += pcapng_optlen(strlen(hw));
138 	if (os)
139 		len += pcapng_optlen(strlen(os));
140 	if (app)
141 		len += pcapng_optlen(strlen(app));
142 	if (comment)
143 		len += pcapng_optlen(strlen(comment));
144 
145 	/* reserve space for OPT_END */
146 	len += pcapng_optlen(0);
147 	len += sizeof(uint32_t);
148 
149 	buf = calloc(1, len);
150 	if (!buf)
151 		return -1;
152 
153 	hdr = (struct pcapng_section_header *)buf;
154 	*hdr = (struct pcapng_section_header) {
155 		.block_type = PCAPNG_SECTION_BLOCK,
156 		.block_length = len,
157 		.byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
158 		.major_version = PCAPNG_MAJOR_VERS,
159 		.minor_version = PCAPNG_MINOR_VERS,
160 		.section_length = UINT64_MAX,
161 	};
162 
163 	/* After the section header insert variable length options. */
164 	opt = (struct pcapng_option *)(hdr + 1);
165 	if (comment)
166 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
167 					comment, strlen(comment));
168 	if (hw)
169 		opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
170 					hw, strlen(hw));
171 	if (os)
172 		opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
173 					os, strlen(os));
174 	if (app)
175 		opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
176 					app, strlen(app));
177 
178 	/* The standard requires last option to be OPT_END */
179 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
180 
181 	/* clone block_length after option */
182 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
183 
184 	cc = write(self->outfd, buf, len);
185 	free(buf);
186 
187 	return cc;
188 }
189 
190 /* Write an interface block for a DPDK port */
191 int
192 rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port,
193 			 const char *ifname, const char *ifdescr,
194 			 const char *filter)
195 {
196 	struct pcapng_interface_block *hdr;
197 	struct rte_eth_dev_info dev_info;
198 	struct rte_ether_addr *ea, macaddr;
199 	const struct rte_device *dev;
200 	struct rte_eth_link link;
201 	struct pcapng_option *opt;
202 	const uint8_t tsresol = 9;	/* nanosecond resolution */
203 	uint32_t len;
204 	void *buf;
205 	char ifname_buf[IF_NAMESIZE];
206 	char ifhw[256];
207 	uint64_t speed = 0;
208 
209 	if (rte_eth_dev_info_get(port, &dev_info) < 0)
210 		return -1;
211 
212 	/* make something like an interface name */
213 	if (ifname == NULL) {
214 		/* Use kernel name if available */
215 		ifname = if_indextoname(dev_info.if_index, ifname_buf);
216 		if (ifname == NULL) {
217 			snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port);
218 			ifname = ifname_buf;
219 		}
220 	}
221 
222 	/* make a useful device hardware string */
223 	dev = dev_info.device;
224 	if (dev)
225 		snprintf(ifhw, sizeof(ifhw),
226 			 "%s-%s", dev->bus->name, dev->name);
227 
228 	/* DPDK reports in units of Mbps */
229 	if (rte_eth_link_get(port, &link) == 0 &&
230 	    link.link_status == RTE_ETH_LINK_UP)
231 		speed = link.link_speed * PCAPNG_MBPS_SPEED;
232 
233 	if (rte_eth_macaddr_get(port, &macaddr) < 0)
234 		ea = NULL;
235 	else
236 		ea = &macaddr;
237 
238 	/* Compute length of interface block options */
239 	len = sizeof(*hdr);
240 
241 	len += pcapng_optlen(sizeof(tsresol));	/* timestamp */
242 	len += pcapng_optlen(strlen(ifname));	/* ifname */
243 
244 	if (ifdescr)
245 		len += pcapng_optlen(strlen(ifdescr));
246 	if (ea)
247 		len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
248 	if (speed != 0)
249 		len += pcapng_optlen(sizeof(uint64_t));
250 	if (filter)
251 		len += pcapng_optlen(strlen(filter) + 1);
252 	if (dev)
253 		len += pcapng_optlen(strlen(ifhw));
254 
255 	len += pcapng_optlen(0);
256 	len += sizeof(uint32_t);
257 
258 	buf = alloca(len);
259 	if (!buf)
260 		return -1;
261 
262 	hdr = (struct pcapng_interface_block *)buf;
263 	*hdr = (struct pcapng_interface_block) {
264 		.block_type = PCAPNG_INTERFACE_BLOCK,
265 		.link_type = 1,		/* DLT_EN10MB - Ethernet */
266 		.block_length = len,
267 	};
268 
269 	opt = (struct pcapng_option *)(hdr + 1);
270 	opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
271 				&tsresol, sizeof(tsresol));
272 	opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
273 				ifname, strlen(ifname));
274 	if (ifdescr)
275 		opt = pcapng_add_option(opt, PCAPNG_IFB_DESCRIPTION,
276 					ifdescr, strlen(ifdescr));
277 	if (ea)
278 		opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
279 					ea, RTE_ETHER_ADDR_LEN);
280 	if (speed != 0)
281 		opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
282 					 &speed, sizeof(uint64_t));
283 	if (dev)
284 		opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
285 					 ifhw, strlen(ifhw));
286 	if (filter) {
287 		/* Encoding is that the first octet indicates string vs BPF */
288 		size_t len;
289 		char *buf;
290 
291 		len = strlen(filter) + 1;
292 		buf = alloca(len);
293 		*buf = '\0';
294 		memcpy(buf + 1, filter, len);
295 
296 		opt = pcapng_add_option(opt, PCAPNG_IFB_FILTER,
297 					buf, len);
298 	}
299 
300 	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
301 
302 	/* clone block_length after optionsa */
303 	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
304 
305 	/* remember the file index */
306 	self->port_index[port] = self->ports++;
307 
308 	return write(self->outfd, buf, len);
309 }
310 
311 /*
312  * Write an Interface statistics block at the end of capture.
313  */
314 ssize_t
315 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
316 		       const char *comment,
317 		       uint64_t start_time, uint64_t end_time,
318 		       uint64_t ifrecv, uint64_t ifdrop)
319 {
320 	struct pcapng_statistics *hdr;
321 	struct pcapng_option *opt;
322 	uint32_t optlen, len;
323 	uint8_t *buf;
324 	uint64_t ns;
325 
326 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
327 
328 	optlen = 0;
329 
330 	if (ifrecv != UINT64_MAX)
331 		optlen += pcapng_optlen(sizeof(ifrecv));
332 	if (ifdrop != UINT64_MAX)
333 		optlen += pcapng_optlen(sizeof(ifdrop));
334 	if (start_time != 0)
335 		optlen += pcapng_optlen(sizeof(start_time));
336 	if (end_time != 0)
337 		optlen += pcapng_optlen(sizeof(end_time));
338 	if (comment)
339 		optlen += pcapng_optlen(strlen(comment));
340 	if (optlen != 0)
341 		optlen += pcapng_optlen(0);
342 
343 	len = sizeof(*hdr) + optlen + sizeof(uint32_t);
344 	buf = alloca(len);
345 	if (buf == NULL)
346 		return -1;
347 
348 	hdr = (struct pcapng_statistics *)buf;
349 	opt = (struct pcapng_option *)(hdr + 1);
350 
351 	if (comment)
352 		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
353 					comment, strlen(comment));
354 	if (start_time != 0)
355 		opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
356 					 &start_time, sizeof(start_time));
357 	if (end_time != 0)
358 		opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
359 					 &end_time, sizeof(end_time));
360 	if (ifrecv != UINT64_MAX)
361 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
362 				&ifrecv, sizeof(ifrecv));
363 	if (ifdrop != UINT64_MAX)
364 		opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
365 				&ifdrop, sizeof(ifdrop));
366 	if (optlen != 0)
367 		opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
368 
369 	hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
370 	hdr->block_length = len;
371 	hdr->interface_id = self->port_index[port_id];
372 
373 	ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
374 	hdr->timestamp_hi = ns >> 32;
375 	hdr->timestamp_lo = (uint32_t)ns;
376 
377 	/* clone block_length after option */
378 	memcpy(opt, &len, sizeof(uint32_t));
379 
380 	return write(self->outfd, buf, len);
381 }
382 
383 uint32_t
384 rte_pcapng_mbuf_size(uint32_t length)
385 {
386 	/* The VLAN and EPB header must fit in the mbuf headroom. */
387 	RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
388 		   sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
389 
390 	/* The flags and queue information are added at the end. */
391 	return sizeof(struct rte_mbuf)
392 		+ RTE_ALIGN(length, sizeof(uint32_t))
393 		+ pcapng_optlen(sizeof(uint32_t)) /* flag option */
394 		+ pcapng_optlen(sizeof(uint32_t)) /* queue option */
395 		+ sizeof(uint32_t);		  /*  length */
396 }
397 
398 /* More generalized version rte_vlan_insert() */
399 static int
400 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
401 {
402 	struct rte_ether_hdr *nh, *oh;
403 	struct rte_vlan_hdr *vh;
404 
405 	if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
406 		return -EINVAL;
407 
408 	if (rte_pktmbuf_data_len(m) < sizeof(*oh))
409 		return -EINVAL;
410 
411 	oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
412 	nh = (struct rte_ether_hdr *)
413 		rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
414 	if (nh == NULL)
415 		return -ENOSPC;
416 
417 	memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
418 	nh->ether_type = rte_cpu_to_be_16(ether_type);
419 
420 	vh = (struct rte_vlan_hdr *) (nh + 1);
421 	vh->vlan_tci = rte_cpu_to_be_16(tci);
422 
423 	return 0;
424 }
425 
426 /*
427  *   The mbufs created use the Pcapng standard enhanced packet  block.
428  *
429  *                         1                   2                   3
430  *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
431  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
432  *  0 |                    Block Type = 0x00000006                    |
433  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
434  *  4 |                      Block Total Length                       |
435  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
436  *  8 |                         Interface ID                          |
437  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
438  * 12 |                        Timestamp (High)                       |
439  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
440  * 16 |                        Timestamp (Low)                        |
441  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
442  * 20 |                    Captured Packet Length                     |
443  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
444  * 24 |                    Original Packet Length                     |
445  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
446  * 28 /                                                               /
447  *    /                          Packet Data                          /
448  *    /              variable length, padded to 32 bits               /
449  *    /                                                               /
450  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
451  *    |      Option Code = 0x0002     |     Option Length = 0x004     |
452  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
453  *    |              Flags (direction)                                |
454  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
455  *    |      Option Code = 0x0006     |     Option Length = 0x002     |
456  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
457  *    |              Queue id                                         |
458  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
459  *    |                      Block Total Length                       |
460  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
461  */
462 
463 /* Make a copy of original mbuf with pcapng header and options */
464 struct rte_mbuf *
465 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
466 		const struct rte_mbuf *md,
467 		struct rte_mempool *mp,
468 		uint32_t length, uint64_t cycles,
469 		enum rte_pcapng_direction direction)
470 {
471 	struct pcapng_enhance_packet_block *epb;
472 	uint32_t orig_len, data_len, padding, flags;
473 	struct pcapng_option *opt;
474 	uint16_t optlen;
475 	struct rte_mbuf *mc;
476 	uint64_t ns;
477 	bool rss_hash;
478 
479 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
480 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
481 #endif
482 	ns = pcapng_tsc_to_ns(cycles);
483 
484 	orig_len = rte_pktmbuf_pkt_len(md);
485 
486 	/* Take snapshot of the data */
487 	mc = rte_pktmbuf_copy(md, mp, 0, length);
488 	if (unlikely(mc == NULL))
489 		return NULL;
490 
491 	/* Expand any offloaded VLAN information */
492 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
493 	     (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
494 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
495 	     (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
496 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
497 				       md->vlan_tci) != 0)
498 			goto fail;
499 	}
500 
501 	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
502 	     (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
503 	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
504 	     (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
505 		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
506 				       md->vlan_tci_outer) != 0)
507 			goto fail;
508 	}
509 
510 	/* record HASH on incoming packets */
511 	rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
512 		    (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
513 
514 	/* pad the packet to 32 bit boundary */
515 	data_len = rte_pktmbuf_data_len(mc);
516 	padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
517 	if (padding > 0) {
518 		void *tail = rte_pktmbuf_append(mc, padding);
519 
520 		if (tail == NULL)
521 			goto fail;
522 		memset(tail, 0, padding);
523 	}
524 
525 	optlen = pcapng_optlen(sizeof(flags));
526 	optlen += pcapng_optlen(sizeof(queue));
527 	if (rss_hash)
528 		optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t));
529 
530 	/* reserve trailing options and block length */
531 	opt = (struct pcapng_option *)
532 		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
533 	if (unlikely(opt == NULL))
534 		goto fail;
535 
536 	switch (direction) {
537 	case RTE_PCAPNG_DIRECTION_IN:
538 		flags = PCAPNG_IFB_INBOUND;
539 		break;
540 	case RTE_PCAPNG_DIRECTION_OUT:
541 		flags = PCAPNG_IFB_OUTBOUND;
542 		break;
543 	default:
544 		flags = 0;
545 	}
546 
547 	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
548 				&flags, sizeof(flags));
549 
550 	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
551 				&queue, sizeof(queue));
552 
553 	if (rss_hash) {
554 		uint8_t hash_opt[5];
555 
556 		/* The algorithm could be something else if
557 		 * using rte_flow_action_rss; but the current API does not
558 		 * have a way for ethdev to report  this on a per-packet basis.
559 		 */
560 		hash_opt[0] = PCAPNG_HASH_TOEPLITZ;
561 
562 		memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t));
563 		opt = pcapng_add_option(opt, PCAPNG_EPB_HASH,
564 					&hash_opt, sizeof(hash_opt));
565 	}
566 
567 	/* Note: END_OPT necessary here. Wireshark doesn't do it. */
568 
569 	/* Add PCAPNG packet header */
570 	epb = (struct pcapng_enhance_packet_block *)
571 		rte_pktmbuf_prepend(mc, sizeof(*epb));
572 	if (unlikely(epb == NULL))
573 		goto fail;
574 
575 	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
576 	epb->block_length = rte_pktmbuf_data_len(mc);
577 
578 	/* Interface index is filled in later during write */
579 	mc->port = port_id;
580 
581 	epb->timestamp_hi = ns >> 32;
582 	epb->timestamp_lo = (uint32_t)ns;
583 	epb->capture_length = data_len;
584 	epb->original_length = orig_len;
585 
586 	/* set trailer of block length */
587 	*(uint32_t *)opt = epb->block_length;
588 
589 	return mc;
590 
591 fail:
592 	rte_pktmbuf_free(mc);
593 	return NULL;
594 }
595 
596 /* Write pre-formatted packets to file. */
597 ssize_t
598 rte_pcapng_write_packets(rte_pcapng_t *self,
599 			 struct rte_mbuf *pkts[], uint16_t nb_pkts)
600 {
601 	struct iovec iov[IOV_MAX];
602 	unsigned int i, cnt = 0;
603 	ssize_t ret, total = 0;
604 
605 	for (i = 0; i < nb_pkts; i++) {
606 		struct rte_mbuf *m = pkts[i];
607 		struct pcapng_enhance_packet_block *epb;
608 
609 		/* sanity check that is really a pcapng mbuf */
610 		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
611 		if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
612 			     epb->block_length != rte_pktmbuf_data_len(m))) {
613 			rte_errno = EINVAL;
614 			return -1;
615 		}
616 
617 		/* check that this interface was added. */
618 		epb->interface_id = self->port_index[m->port];
619 		if (unlikely(epb->interface_id > RTE_MAX_ETHPORTS)) {
620 			rte_errno = EINVAL;
621 			return -1;
622 		}
623 
624 		/*
625 		 * Handle case of highly fragmented and large burst size
626 		 * Note: this assumes that max segments per mbuf < IOV_MAX
627 		 */
628 		if (unlikely(cnt + m->nb_segs >= IOV_MAX)) {
629 			ret = writev(self->outfd, iov, cnt);
630 			if (unlikely(ret < 0)) {
631 				rte_errno = errno;
632 				return -1;
633 			}
634 			total += ret;
635 			cnt = 0;
636 		}
637 
638 		/*
639 		 * The DPDK port is recorded during pcapng_copy.
640 		 * Map that to PCAPNG interface in file.
641 		 */
642 		do {
643 			iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
644 			iov[cnt].iov_len = rte_pktmbuf_data_len(m);
645 			++cnt;
646 		} while ((m = m->next));
647 	}
648 
649 	ret = writev(self->outfd, iov, cnt);
650 	if (unlikely(ret < 0)) {
651 		rte_errno = errno;
652 		return -1;
653 	}
654 	return total + ret;
655 }
656 
657 /* Create new pcapng writer handle */
658 rte_pcapng_t *
659 rte_pcapng_fdopen(int fd,
660 		  const char *osname, const char *hardware,
661 		  const char *appname, const char *comment)
662 {
663 	unsigned int i;
664 	rte_pcapng_t *self;
665 
666 	self = malloc(sizeof(*self));
667 	if (!self) {
668 		rte_errno = ENOMEM;
669 		return NULL;
670 	}
671 
672 	self->outfd = fd;
673 	self->ports = 0;
674 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
675 		self->port_index[i] = UINT32_MAX;
676 
677 	if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
678 		goto fail;
679 
680 	return self;
681 fail:
682 	free(self);
683 	return NULL;
684 }
685 
686 void
687 rte_pcapng_close(rte_pcapng_t *self)
688 {
689 	close(self->outfd);
690 	free(self);
691 }
692