xref: /dpdk/app/test-pmd/icmpecho.c (revision 200bc52e5aa0d72e70464c9cd22b55cf536ed13c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2013 6WIND S.A.
3  */
4 
5 #include <stdarg.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12 
13 #include <sys/queue.h>
14 #include <sys/stat.h>
15 
16 #include <rte_common.h>
17 #include <rte_byteorder.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_cycles.h>
21 #include <rte_per_lcore.h>
22 #include <rte_lcore.h>
23 #include <rte_atomic.h>
24 #include <rte_branch_prediction.h>
25 #include <rte_memory.h>
26 #include <rte_mempool.h>
27 #include <rte_mbuf.h>
28 #include <rte_ether.h>
29 #include <rte_ethdev.h>
30 #include <rte_arp.h>
31 #include <rte_ip.h>
32 #include <rte_icmp.h>
33 #include <rte_string_fns.h>
34 #include <rte_flow.h>
35 
36 #include "testpmd.h"
37 
38 static const char *
39 arp_op_name(uint16_t arp_op)
40 {
41 	switch (arp_op) {
42 	case RTE_ARP_OP_REQUEST:
43 		return "ARP Request";
44 	case RTE_ARP_OP_REPLY:
45 		return "ARP Reply";
46 	case RTE_ARP_OP_REVREQUEST:
47 		return "Reverse ARP Request";
48 	case RTE_ARP_OP_REVREPLY:
49 		return "Reverse ARP Reply";
50 	case RTE_ARP_OP_INVREQUEST:
51 		return "Peer Identify Request";
52 	case RTE_ARP_OP_INVREPLY:
53 		return "Peer Identify Reply";
54 	default:
55 		break;
56 	}
57 	return "Unkwown ARP op";
58 }
59 
60 static const char *
61 ip_proto_name(uint16_t ip_proto)
62 {
63 	static const char * ip_proto_names[] = {
64 		"IP6HOPOPTS", /**< IP6 hop-by-hop options */
65 		"ICMP",       /**< control message protocol */
66 		"IGMP",       /**< group mgmt protocol */
67 		"GGP",        /**< gateway^2 (deprecated) */
68 		"IPv4",       /**< IPv4 encapsulation */
69 
70 		"UNASSIGNED",
71 		"TCP",        /**< transport control protocol */
72 		"ST",         /**< Stream protocol II */
73 		"EGP",        /**< exterior gateway protocol */
74 		"PIGP",       /**< private interior gateway */
75 
76 		"RCC_MON",    /**< BBN RCC Monitoring */
77 		"NVPII",      /**< network voice protocol*/
78 		"PUP",        /**< pup */
79 		"ARGUS",      /**< Argus */
80 		"EMCON",      /**< EMCON */
81 
82 		"XNET",       /**< Cross Net Debugger */
83 		"CHAOS",      /**< Chaos*/
84 		"UDP",        /**< user datagram protocol */
85 		"MUX",        /**< Multiplexing */
86 		"DCN_MEAS",   /**< DCN Measurement Subsystems */
87 
88 		"HMP",        /**< Host Monitoring */
89 		"PRM",        /**< Packet Radio Measurement */
90 		"XNS_IDP",    /**< xns idp */
91 		"TRUNK1",     /**< Trunk-1 */
92 		"TRUNK2",     /**< Trunk-2 */
93 
94 		"LEAF1",      /**< Leaf-1 */
95 		"LEAF2",      /**< Leaf-2 */
96 		"RDP",        /**< Reliable Data */
97 		"IRTP",       /**< Reliable Transaction */
98 		"TP4",        /**< tp-4 w/ class negotiation */
99 
100 		"BLT",        /**< Bulk Data Transfer */
101 		"NSP",        /**< Network Services */
102 		"INP",        /**< Merit Internodal */
103 		"SEP",        /**< Sequential Exchange */
104 		"3PC",        /**< Third Party Connect */
105 
106 		"IDPR",       /**< InterDomain Policy Routing */
107 		"XTP",        /**< XTP */
108 		"DDP",        /**< Datagram Delivery */
109 		"CMTP",       /**< Control Message Transport */
110 		"TPXX",       /**< TP++ Transport */
111 
112 		"ILTP",       /**< IL transport protocol */
113 		"IPv6_HDR",   /**< IP6 header */
114 		"SDRP",       /**< Source Demand Routing */
115 		"IPv6_RTG",   /**< IP6 routing header */
116 		"IPv6_FRAG",  /**< IP6 fragmentation header */
117 
118 		"IDRP",       /**< InterDomain Routing*/
119 		"RSVP",       /**< resource reservation */
120 		"GRE",        /**< General Routing Encap. */
121 		"MHRP",       /**< Mobile Host Routing */
122 		"BHA",        /**< BHA */
123 
124 		"ESP",        /**< IP6 Encap Sec. Payload */
125 		"AH",         /**< IP6 Auth Header */
126 		"INLSP",      /**< Integ. Net Layer Security */
127 		"SWIPE",      /**< IP with encryption */
128 		"NHRP",       /**< Next Hop Resolution */
129 
130 		"UNASSIGNED",
131 		"UNASSIGNED",
132 		"UNASSIGNED",
133 		"ICMPv6",     /**< ICMP6 */
134 		"IPv6NONEXT", /**< IP6 no next header */
135 
136 		"Ipv6DSTOPTS",/**< IP6 destination option */
137 		"AHIP",       /**< any host internal protocol */
138 		"CFTP",       /**< CFTP */
139 		"HELLO",      /**< "hello" routing protocol */
140 		"SATEXPAK",   /**< SATNET/Backroom EXPAK */
141 
142 		"KRYPTOLAN",  /**< Kryptolan */
143 		"RVD",        /**< Remote Virtual Disk */
144 		"IPPC",       /**< Pluribus Packet Core */
145 		"ADFS",       /**< Any distributed FS */
146 		"SATMON",     /**< Satnet Monitoring */
147 
148 		"VISA",       /**< VISA Protocol */
149 		"IPCV",       /**< Packet Core Utility */
150 		"CPNX",       /**< Comp. Prot. Net. Executive */
151 		"CPHB",       /**< Comp. Prot. HeartBeat */
152 		"WSN",        /**< Wang Span Network */
153 
154 		"PVP",        /**< Packet Video Protocol */
155 		"BRSATMON",   /**< BackRoom SATNET Monitoring */
156 		"ND",         /**< Sun net disk proto (temp.) */
157 		"WBMON",      /**< WIDEBAND Monitoring */
158 		"WBEXPAK",    /**< WIDEBAND EXPAK */
159 
160 		"EON",        /**< ISO cnlp */
161 		"VMTP",       /**< VMTP */
162 		"SVMTP",      /**< Secure VMTP */
163 		"VINES",      /**< Banyon VINES */
164 		"TTP",        /**< TTP */
165 
166 		"IGP",        /**< NSFNET-IGP */
167 		"DGP",        /**< dissimilar gateway prot. */
168 		"TCF",        /**< TCF */
169 		"IGRP",       /**< Cisco/GXS IGRP */
170 		"OSPFIGP",    /**< OSPFIGP */
171 
172 		"SRPC",       /**< Strite RPC protocol */
173 		"LARP",       /**< Locus Address Resolution */
174 		"MTP",        /**< Multicast Transport */
175 		"AX25",       /**< AX.25 Frames */
176 		"4IN4",       /**< IP encapsulated in IP */
177 
178 		"MICP",       /**< Mobile Int.ing control */
179 		"SCCSP",      /**< Semaphore Comm. security */
180 		"ETHERIP",    /**< Ethernet IP encapsulation */
181 		"ENCAP",      /**< encapsulation header */
182 		"AES",        /**< any private encr. scheme */
183 
184 		"GMTP",       /**< GMTP */
185 		"IPCOMP",     /**< payload compression (IPComp) */
186 		"UNASSIGNED",
187 		"UNASSIGNED",
188 		"PIM",        /**< Protocol Independent Mcast */
189 	};
190 
191 	if (ip_proto < sizeof(ip_proto_names) / sizeof(ip_proto_names[0]))
192 		return ip_proto_names[ip_proto];
193 	switch (ip_proto) {
194 #ifdef IPPROTO_PGM
195 	case IPPROTO_PGM:  /**< PGM */
196 		return "PGM";
197 #endif
198 	case IPPROTO_SCTP:  /**< Stream Control Transport Protocol */
199 		return "SCTP";
200 #ifdef IPPROTO_DIVERT
201 	case IPPROTO_DIVERT: /**< divert pseudo-protocol */
202 		return "DIVERT";
203 #endif
204 	case IPPROTO_RAW: /**< raw IP packet */
205 		return "RAW";
206 	default:
207 		break;
208 	}
209 	return "UNASSIGNED";
210 }
211 
212 static void
213 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf)
214 {
215 	uint32_t ipv4_addr;
216 
217 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
218 	sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
219 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
220 		ipv4_addr & 0xFF);
221 }
222 
223 static void
224 ether_addr_dump(const char *what, const struct rte_ether_addr *ea)
225 {
226 	char buf[RTE_ETHER_ADDR_FMT_SIZE];
227 
228 	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, ea);
229 	if (what)
230 		printf("%s", what);
231 	printf("%s", buf);
232 }
233 
234 static void
235 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr)
236 {
237 	char buf[16];
238 
239 	ipv4_addr_to_dot(be_ipv4_addr, buf);
240 	if (what)
241 		printf("%s", what);
242 	printf("%s", buf);
243 }
244 
245 static uint16_t
246 ipv4_hdr_cksum(struct rte_ipv4_hdr *ip_h)
247 {
248 	uint16_t *v16_h;
249 	uint32_t ip_cksum;
250 
251 	/*
252 	 * Compute the sum of successive 16-bit words of the IPv4 header,
253 	 * skipping the checksum field of the header.
254 	 */
255 	v16_h = (unaligned_uint16_t *) ip_h;
256 	ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] +
257 		v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9];
258 
259 	/* reduce 32 bit checksum to 16 bits and complement it */
260 	ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
261 	ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
262 	ip_cksum = (~ip_cksum) & 0x0000FFFF;
263 	return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum;
264 }
265 
266 #define is_multicast_ipv4_addr(ipv4_addr) \
267 	(((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0)
268 
269 /*
270  * Receive a burst of packets, lookup for ICMP echo requests, and, if any,
271  * send back ICMP echo replies.
272  */
273 static void
274 reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
275 {
276 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
277 	struct rte_mbuf *pkt;
278 	struct rte_ether_hdr *eth_h;
279 	struct rte_vlan_hdr *vlan_h;
280 	struct rte_arp_hdr  *arp_h;
281 	struct rte_ipv4_hdr *ip_h;
282 	struct rte_icmp_hdr *icmp_h;
283 	struct rte_ether_addr eth_addr;
284 	uint32_t retry;
285 	uint32_t ip_addr;
286 	uint16_t nb_rx;
287 	uint16_t nb_tx;
288 	uint16_t nb_replies;
289 	uint16_t eth_type;
290 	uint16_t vlan_id;
291 	uint16_t arp_op;
292 	uint16_t arp_pro;
293 	uint32_t cksum;
294 	uint8_t  i;
295 	int l2_len;
296 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
297 	uint64_t start_tsc;
298 	uint64_t end_tsc;
299 	uint64_t core_cycles;
300 #endif
301 
302 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
303 	start_tsc = rte_rdtsc();
304 #endif
305 
306 	/*
307 	 * First, receive a burst of packets.
308 	 */
309 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
310 				 nb_pkt_per_burst);
311 	if (unlikely(nb_rx == 0))
312 		return;
313 
314 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
315 	fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
316 #endif
317 	fs->rx_packets += nb_rx;
318 	nb_replies = 0;
319 	for (i = 0; i < nb_rx; i++) {
320 		if (likely(i < nb_rx - 1))
321 			rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1],
322 						       void *));
323 		pkt = pkts_burst[i];
324 		eth_h = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
325 		eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type);
326 		l2_len = sizeof(struct rte_ether_hdr);
327 		if (verbose_level > 0) {
328 			printf("\nPort %d pkt-len=%u nb-segs=%u\n",
329 			       fs->rx_port, pkt->pkt_len, pkt->nb_segs);
330 			ether_addr_dump("  ETH:  src=", &eth_h->s_addr);
331 			ether_addr_dump(" dst=", &eth_h->d_addr);
332 		}
333 		if (eth_type == RTE_ETHER_TYPE_VLAN) {
334 			vlan_h = (struct rte_vlan_hdr *)
335 				((char *)eth_h + sizeof(struct rte_ether_hdr));
336 			l2_len  += sizeof(struct rte_vlan_hdr);
337 			eth_type = rte_be_to_cpu_16(vlan_h->eth_proto);
338 			if (verbose_level > 0) {
339 				vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci)
340 					& 0xFFF;
341 				printf(" [vlan id=%u]", vlan_id);
342 			}
343 		}
344 		if (verbose_level > 0) {
345 			printf(" type=0x%04x\n", eth_type);
346 		}
347 
348 		/* Reply to ARP requests */
349 		if (eth_type == RTE_ETHER_TYPE_ARP) {
350 			arp_h = (struct rte_arp_hdr *) ((char *)eth_h + l2_len);
351 			arp_op = RTE_BE_TO_CPU_16(arp_h->arp_opcode);
352 			arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_protocol);
353 			if (verbose_level > 0) {
354 				printf("  ARP:  hrd=%d proto=0x%04x hln=%d "
355 				       "pln=%d op=%u (%s)\n",
356 				       RTE_BE_TO_CPU_16(arp_h->arp_hardware),
357 				       arp_pro, arp_h->arp_hlen,
358 				       arp_h->arp_plen, arp_op,
359 				       arp_op_name(arp_op));
360 			}
361 			if ((RTE_BE_TO_CPU_16(arp_h->arp_hardware) !=
362 			     RTE_ARP_HRD_ETHER) ||
363 			    (arp_pro != RTE_ETHER_TYPE_IPv4) ||
364 			    (arp_h->arp_hlen != 6) ||
365 			    (arp_h->arp_plen != 4)
366 			    ) {
367 				rte_pktmbuf_free(pkt);
368 				if (verbose_level > 0)
369 					printf("\n");
370 				continue;
371 			}
372 			if (verbose_level > 0) {
373 				rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
374 						&eth_addr);
375 				ether_addr_dump("        sha=", &eth_addr);
376 				ip_addr = arp_h->arp_data.arp_sip;
377 				ipv4_addr_dump(" sip=", ip_addr);
378 				printf("\n");
379 				rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
380 						&eth_addr);
381 				ether_addr_dump("        tha=", &eth_addr);
382 				ip_addr = arp_h->arp_data.arp_tip;
383 				ipv4_addr_dump(" tip=", ip_addr);
384 				printf("\n");
385 			}
386 			if (arp_op != RTE_ARP_OP_REQUEST) {
387 				rte_pktmbuf_free(pkt);
388 				continue;
389 			}
390 
391 			/*
392 			 * Build ARP reply.
393 			 */
394 
395 			/* Use source MAC address as destination MAC address. */
396 			rte_ether_addr_copy(&eth_h->s_addr, &eth_h->d_addr);
397 			/* Set source MAC address with MAC address of TX port */
398 			rte_ether_addr_copy(&ports[fs->tx_port].eth_addr,
399 					&eth_h->s_addr);
400 
401 			arp_h->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY);
402 			rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
403 					&eth_addr);
404 			rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
405 					&arp_h->arp_data.arp_tha);
406 			rte_ether_addr_copy(&eth_h->s_addr,
407 					&arp_h->arp_data.arp_sha);
408 
409 			/* Swap IP addresses in ARP payload */
410 			ip_addr = arp_h->arp_data.arp_sip;
411 			arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip;
412 			arp_h->arp_data.arp_tip = ip_addr;
413 			pkts_burst[nb_replies++] = pkt;
414 			continue;
415 		}
416 
417 		if (eth_type != RTE_ETHER_TYPE_IPv4) {
418 			rte_pktmbuf_free(pkt);
419 			continue;
420 		}
421 		ip_h = (struct rte_ipv4_hdr *) ((char *)eth_h + l2_len);
422 		if (verbose_level > 0) {
423 			ipv4_addr_dump("  IPV4: src=", ip_h->src_addr);
424 			ipv4_addr_dump(" dst=", ip_h->dst_addr);
425 			printf(" proto=%d (%s)\n",
426 			       ip_h->next_proto_id,
427 			       ip_proto_name(ip_h->next_proto_id));
428 		}
429 
430 		/*
431 		 * Check if packet is a ICMP echo request.
432 		 */
433 		icmp_h = (struct rte_icmp_hdr *) ((char *)ip_h +
434 					      sizeof(struct rte_ipv4_hdr));
435 		if (! ((ip_h->next_proto_id == IPPROTO_ICMP) &&
436 		       (icmp_h->icmp_type == RTE_IP_ICMP_ECHO_REQUEST) &&
437 		       (icmp_h->icmp_code == 0))) {
438 			rte_pktmbuf_free(pkt);
439 			continue;
440 		}
441 
442 		if (verbose_level > 0)
443 			printf("  ICMP: echo request seq id=%d\n",
444 			       rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
445 
446 		/*
447 		 * Prepare ICMP echo reply to be sent back.
448 		 * - switch ethernet source and destinations addresses,
449 		 * - use the request IP source address as the reply IP
450 		 *    destination address,
451 		 * - if the request IP destination address is a multicast
452 		 *   address:
453 		 *     - choose a reply IP source address different from the
454 		 *       request IP source address,
455 		 *     - re-compute the IP header checksum.
456 		 *   Otherwise:
457 		 *     - switch the request IP source and destination
458 		 *       addresses in the reply IP header,
459 		 *     - keep the IP header checksum unchanged.
460 		 * - set RTE_IP_ICMP_ECHO_REPLY in ICMP header.
461 		 * ICMP checksum is computed by assuming it is valid in the
462 		 * echo request and not verified.
463 		 */
464 		rte_ether_addr_copy(&eth_h->s_addr, &eth_addr);
465 		rte_ether_addr_copy(&eth_h->d_addr, &eth_h->s_addr);
466 		rte_ether_addr_copy(&eth_addr, &eth_h->d_addr);
467 		ip_addr = ip_h->src_addr;
468 		if (is_multicast_ipv4_addr(ip_h->dst_addr)) {
469 			uint32_t ip_src;
470 
471 			ip_src = rte_be_to_cpu_32(ip_addr);
472 			if ((ip_src & 0x00000003) == 1)
473 				ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002;
474 			else
475 				ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001;
476 			ip_h->src_addr = rte_cpu_to_be_32(ip_src);
477 			ip_h->dst_addr = ip_addr;
478 			ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h);
479 		} else {
480 			ip_h->src_addr = ip_h->dst_addr;
481 			ip_h->dst_addr = ip_addr;
482 		}
483 		icmp_h->icmp_type = RTE_IP_ICMP_ECHO_REPLY;
484 		cksum = ~icmp_h->icmp_cksum & 0xffff;
485 		cksum += ~htons(RTE_IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
486 		cksum += htons(RTE_IP_ICMP_ECHO_REPLY << 8);
487 		cksum = (cksum & 0xffff) + (cksum >> 16);
488 		cksum = (cksum & 0xffff) + (cksum >> 16);
489 		icmp_h->icmp_cksum = ~cksum;
490 		pkts_burst[nb_replies++] = pkt;
491 	}
492 
493 	/* Send back ICMP echo replies, if any. */
494 	if (nb_replies > 0) {
495 		nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
496 					 nb_replies);
497 		/*
498 		 * Retry if necessary
499 		 */
500 		if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) {
501 			retry = 0;
502 			while (nb_tx < nb_replies &&
503 					retry++ < burst_tx_retry_num) {
504 				rte_delay_us(burst_tx_delay_time);
505 				nb_tx += rte_eth_tx_burst(fs->tx_port,
506 						fs->tx_queue,
507 						&pkts_burst[nb_tx],
508 						nb_replies - nb_tx);
509 			}
510 		}
511 		fs->tx_packets += nb_tx;
512 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
513 		fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
514 #endif
515 		if (unlikely(nb_tx < nb_replies)) {
516 			fs->fwd_dropped += (nb_replies - nb_tx);
517 			do {
518 				rte_pktmbuf_free(pkts_burst[nb_tx]);
519 			} while (++nb_tx < nb_replies);
520 		}
521 	}
522 
523 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
524 	end_tsc = rte_rdtsc();
525 	core_cycles = (end_tsc - start_tsc);
526 	fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
527 #endif
528 }
529 
530 struct fwd_engine icmp_echo_engine = {
531 	.fwd_mode_name  = "icmpecho",
532 	.port_fwd_begin = NULL,
533 	.port_fwd_end   = NULL,
534 	.packet_fwd     = reply_to_icmp_echo_rqsts,
535 };
536