1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2013 6WIND S.A. 3 */ 4 5 #include <stdarg.h> 6 #include <string.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <sys/queue.h> 14 #include <sys/stat.h> 15 16 #include <rte_common.h> 17 #include <rte_byteorder.h> 18 #include <rte_log.h> 19 #include <rte_debug.h> 20 #include <rte_cycles.h> 21 #include <rte_per_lcore.h> 22 #include <rte_lcore.h> 23 #include <rte_branch_prediction.h> 24 #include <rte_memory.h> 25 #include <rte_mempool.h> 26 #include <rte_mbuf.h> 27 #include <rte_ether.h> 28 #include <rte_ethdev.h> 29 #include <rte_arp.h> 30 #include <rte_ip.h> 31 #include <rte_icmp.h> 32 #include <rte_string_fns.h> 33 #include <rte_flow.h> 34 35 #include "testpmd.h" 36 37 static const char * 38 arp_op_name(uint16_t arp_op) 39 { 40 switch (arp_op) { 41 case RTE_ARP_OP_REQUEST: 42 return "ARP Request"; 43 case RTE_ARP_OP_REPLY: 44 return "ARP Reply"; 45 case RTE_ARP_OP_REVREQUEST: 46 return "Reverse ARP Request"; 47 case RTE_ARP_OP_REVREPLY: 48 return "Reverse ARP Reply"; 49 case RTE_ARP_OP_INVREQUEST: 50 return "Peer Identify Request"; 51 case RTE_ARP_OP_INVREPLY: 52 return "Peer Identify Reply"; 53 default: 54 break; 55 } 56 return "Unknown ARP op"; 57 } 58 59 static const char * 60 ip_proto_name(uint16_t ip_proto) 61 { 62 static const char * ip_proto_names[] = { 63 "IP6HOPOPTS", /**< IP6 hop-by-hop options */ 64 "ICMP", /**< control message protocol */ 65 "IGMP", /**< group mgmt protocol */ 66 "GGP", /**< gateway^2 (deprecated) */ 67 "IPv4", /**< IPv4 encapsulation */ 68 69 "UNASSIGNED", 70 "TCP", /**< transport control protocol */ 71 "ST", /**< Stream protocol II */ 72 "EGP", /**< exterior gateway protocol */ 73 "PIGP", /**< private interior gateway */ 74 75 "RCC_MON", /**< BBN RCC Monitoring */ 76 "NVPII", /**< network voice protocol*/ 77 "PUP", /**< pup */ 78 "ARGUS", /**< Argus */ 79 "EMCON", /**< EMCON */ 80 81 "XNET", /**< Cross Net Debugger */ 82 "CHAOS", /**< Chaos*/ 83 "UDP", /**< user datagram protocol */ 84 "MUX", /**< Multiplexing */ 85 "DCN_MEAS", /**< DCN Measurement Subsystems */ 86 87 "HMP", /**< Host Monitoring */ 88 "PRM", /**< Packet Radio Measurement */ 89 "XNS_IDP", /**< xns idp */ 90 "TRUNK1", /**< Trunk-1 */ 91 "TRUNK2", /**< Trunk-2 */ 92 93 "LEAF1", /**< Leaf-1 */ 94 "LEAF2", /**< Leaf-2 */ 95 "RDP", /**< Reliable Data */ 96 "IRTP", /**< Reliable Transaction */ 97 "TP4", /**< tp-4 w/ class negotiation */ 98 99 "BLT", /**< Bulk Data Transfer */ 100 "NSP", /**< Network Services */ 101 "INP", /**< Merit Internodal */ 102 "SEP", /**< Sequential Exchange */ 103 "3PC", /**< Third Party Connect */ 104 105 "IDPR", /**< InterDomain Policy Routing */ 106 "XTP", /**< XTP */ 107 "DDP", /**< Datagram Delivery */ 108 "CMTP", /**< Control Message Transport */ 109 "TPXX", /**< TP++ Transport */ 110 111 "ILTP", /**< IL transport protocol */ 112 "IPv6_HDR", /**< IP6 header */ 113 "SDRP", /**< Source Demand Routing */ 114 "IPv6_RTG", /**< IP6 routing header */ 115 "IPv6_FRAG", /**< IP6 fragmentation header */ 116 117 "IDRP", /**< InterDomain Routing*/ 118 "RSVP", /**< resource reservation */ 119 "GRE", /**< General Routing Encap. */ 120 "MHRP", /**< Mobile Host Routing */ 121 "BHA", /**< BHA */ 122 123 "ESP", /**< IP6 Encap Sec. Payload */ 124 "AH", /**< IP6 Auth Header */ 125 "INLSP", /**< Integ. Net Layer Security */ 126 "SWIPE", /**< IP with encryption */ 127 "NHRP", /**< Next Hop Resolution */ 128 129 "UNASSIGNED", 130 "UNASSIGNED", 131 "UNASSIGNED", 132 "ICMPv6", /**< ICMP6 */ 133 "IPv6NONEXT", /**< IP6 no next header */ 134 135 "Ipv6DSTOPTS",/**< IP6 destination option */ 136 "AHIP", /**< any host internal protocol */ 137 "CFTP", /**< CFTP */ 138 "HELLO", /**< "hello" routing protocol */ 139 "SATEXPAK", /**< SATNET/Backroom EXPAK */ 140 141 "KRYPTOLAN", /**< Kryptolan */ 142 "RVD", /**< Remote Virtual Disk */ 143 "IPPC", /**< Pluribus Packet Core */ 144 "ADFS", /**< Any distributed FS */ 145 "SATMON", /**< Satnet Monitoring */ 146 147 "VISA", /**< VISA Protocol */ 148 "IPCV", /**< Packet Core Utility */ 149 "CPNX", /**< Comp. Prot. Net. Executive */ 150 "CPHB", /**< Comp. Prot. HeartBeat */ 151 "WSN", /**< Wang Span Network */ 152 153 "PVP", /**< Packet Video Protocol */ 154 "BRSATMON", /**< BackRoom SATNET Monitoring */ 155 "ND", /**< Sun net disk proto (temp.) */ 156 "WBMON", /**< WIDEBAND Monitoring */ 157 "WBEXPAK", /**< WIDEBAND EXPAK */ 158 159 "EON", /**< ISO cnlp */ 160 "VMTP", /**< VMTP */ 161 "SVMTP", /**< Secure VMTP */ 162 "VINES", /**< Banyon VINES */ 163 "TTP", /**< TTP */ 164 165 "IGP", /**< NSFNET-IGP */ 166 "DGP", /**< dissimilar gateway prot. */ 167 "TCF", /**< TCF */ 168 "IGRP", /**< Cisco/GXS IGRP */ 169 "OSPFIGP", /**< OSPFIGP */ 170 171 "SRPC", /**< Strite RPC protocol */ 172 "LARP", /**< Locus Address Resolution */ 173 "MTP", /**< Multicast Transport */ 174 "AX25", /**< AX.25 Frames */ 175 "4IN4", /**< IP encapsulated in IP */ 176 177 "MICP", /**< Mobile Int.ing control */ 178 "SCCSP", /**< Semaphore Comm. security */ 179 "ETHERIP", /**< Ethernet IP encapsulation */ 180 "ENCAP", /**< encapsulation header */ 181 "AES", /**< any private encr. scheme */ 182 183 "GMTP", /**< GMTP */ 184 "IPCOMP", /**< payload compression (IPComp) */ 185 "UNASSIGNED", 186 "UNASSIGNED", 187 "PIM", /**< Protocol Independent Mcast */ 188 }; 189 190 if (ip_proto < RTE_DIM(ip_proto_names)) 191 return ip_proto_names[ip_proto]; 192 switch (ip_proto) { 193 #ifdef IPPROTO_PGM 194 case IPPROTO_PGM: /**< PGM */ 195 return "PGM"; 196 #endif 197 case IPPROTO_SCTP: /**< Stream Control Transport Protocol */ 198 return "SCTP"; 199 #ifdef IPPROTO_DIVERT 200 case IPPROTO_DIVERT: /**< divert pseudo-protocol */ 201 return "DIVERT"; 202 #endif 203 case IPPROTO_RAW: /**< raw IP packet */ 204 return "RAW"; 205 default: 206 break; 207 } 208 return "UNASSIGNED"; 209 } 210 211 static void 212 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf) 213 { 214 uint32_t ipv4_addr; 215 216 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); 217 sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, 218 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, 219 ipv4_addr & 0xFF); 220 } 221 222 static void 223 ether_addr_dump(const char *what, const struct rte_ether_addr *ea) 224 { 225 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 226 227 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, ea); 228 if (what) 229 printf("%s", what); 230 printf("%s", buf); 231 } 232 233 static void 234 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr) 235 { 236 char buf[16]; 237 238 ipv4_addr_to_dot(be_ipv4_addr, buf); 239 if (what) 240 printf("%s", what); 241 printf("%s", buf); 242 } 243 244 static uint16_t 245 ipv4_hdr_cksum(struct rte_ipv4_hdr *ip_h) 246 { 247 uint16_t *v16_h; 248 uint32_t ip_cksum; 249 250 /* 251 * Compute the sum of successive 16-bit words of the IPv4 header, 252 * skipping the checksum field of the header. 253 */ 254 v16_h = (unaligned_uint16_t *) ip_h; 255 ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] + 256 v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9]; 257 258 /* reduce 32 bit checksum to 16 bits and complement it */ 259 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16); 260 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16); 261 ip_cksum = (~ip_cksum) & 0x0000FFFF; 262 return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum; 263 } 264 265 #define is_multicast_ipv4_addr(ipv4_addr) \ 266 (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0) 267 268 /* 269 * Receive a burst of packets, lookup for ICMP echo requests, and, if any, 270 * send back ICMP echo replies. 271 */ 272 static void 273 reply_to_icmp_echo_rqsts(struct fwd_stream *fs) 274 { 275 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 276 struct rte_mbuf *pkt; 277 struct rte_ether_hdr *eth_h; 278 struct rte_vlan_hdr *vlan_h; 279 struct rte_arp_hdr *arp_h; 280 struct rte_ipv4_hdr *ip_h; 281 struct rte_icmp_hdr *icmp_h; 282 struct rte_ether_addr eth_addr; 283 uint32_t retry; 284 uint32_t ip_addr; 285 uint16_t nb_rx; 286 uint16_t nb_tx; 287 uint16_t nb_replies; 288 uint16_t eth_type; 289 uint16_t vlan_id; 290 uint16_t arp_op; 291 uint16_t arp_pro; 292 uint32_t cksum; 293 uint8_t i; 294 int l2_len; 295 uint64_t start_tsc = 0; 296 297 get_start_cycles(&start_tsc); 298 299 /* 300 * First, receive a burst of packets. 301 */ 302 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, 303 nb_pkt_per_burst); 304 inc_rx_burst_stats(fs, nb_rx); 305 if (unlikely(nb_rx == 0)) 306 return; 307 308 fs->rx_packets += nb_rx; 309 nb_replies = 0; 310 for (i = 0; i < nb_rx; i++) { 311 if (likely(i < nb_rx - 1)) 312 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1], 313 void *)); 314 pkt = pkts_burst[i]; 315 eth_h = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); 316 eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type); 317 l2_len = sizeof(struct rte_ether_hdr); 318 if (verbose_level > 0) { 319 printf("\nPort %d pkt-len=%u nb-segs=%u\n", 320 fs->rx_port, pkt->pkt_len, pkt->nb_segs); 321 ether_addr_dump(" ETH: src=", ð_h->src_addr); 322 ether_addr_dump(" dst=", ð_h->dst_addr); 323 } 324 if (eth_type == RTE_ETHER_TYPE_VLAN) { 325 vlan_h = (struct rte_vlan_hdr *) 326 ((char *)eth_h + sizeof(struct rte_ether_hdr)); 327 l2_len += sizeof(struct rte_vlan_hdr); 328 eth_type = rte_be_to_cpu_16(vlan_h->eth_proto); 329 if (verbose_level > 0) { 330 vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci) 331 & 0xFFF; 332 printf(" [vlan id=%u]", vlan_id); 333 } 334 } 335 if (verbose_level > 0) { 336 printf(" type=0x%04x\n", eth_type); 337 } 338 339 /* Reply to ARP requests */ 340 if (eth_type == RTE_ETHER_TYPE_ARP) { 341 arp_h = (struct rte_arp_hdr *) ((char *)eth_h + l2_len); 342 arp_op = RTE_BE_TO_CPU_16(arp_h->arp_opcode); 343 arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_protocol); 344 if (verbose_level > 0) { 345 printf(" ARP: hrd=%d proto=0x%04x hln=%d " 346 "pln=%d op=%u (%s)\n", 347 RTE_BE_TO_CPU_16(arp_h->arp_hardware), 348 arp_pro, arp_h->arp_hlen, 349 arp_h->arp_plen, arp_op, 350 arp_op_name(arp_op)); 351 } 352 if ((RTE_BE_TO_CPU_16(arp_h->arp_hardware) != 353 RTE_ARP_HRD_ETHER) || 354 (arp_pro != RTE_ETHER_TYPE_IPV4) || 355 (arp_h->arp_hlen != 6) || 356 (arp_h->arp_plen != 4) 357 ) { 358 rte_pktmbuf_free(pkt); 359 if (verbose_level > 0) 360 printf("\n"); 361 continue; 362 } 363 if (verbose_level > 0) { 364 rte_ether_addr_copy(&arp_h->arp_data.arp_sha, 365 ð_addr); 366 ether_addr_dump(" sha=", ð_addr); 367 ip_addr = arp_h->arp_data.arp_sip; 368 ipv4_addr_dump(" sip=", ip_addr); 369 printf("\n"); 370 rte_ether_addr_copy(&arp_h->arp_data.arp_tha, 371 ð_addr); 372 ether_addr_dump(" tha=", ð_addr); 373 ip_addr = arp_h->arp_data.arp_tip; 374 ipv4_addr_dump(" tip=", ip_addr); 375 printf("\n"); 376 } 377 if (arp_op != RTE_ARP_OP_REQUEST) { 378 rte_pktmbuf_free(pkt); 379 continue; 380 } 381 382 /* 383 * Build ARP reply. 384 */ 385 386 /* Use source MAC address as destination MAC address. */ 387 rte_ether_addr_copy(ð_h->src_addr, ð_h->dst_addr); 388 /* Set source MAC address with MAC address of TX port */ 389 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, 390 ð_h->src_addr); 391 392 arp_h->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY); 393 rte_ether_addr_copy(&arp_h->arp_data.arp_tha, 394 ð_addr); 395 rte_ether_addr_copy(&arp_h->arp_data.arp_sha, 396 &arp_h->arp_data.arp_tha); 397 rte_ether_addr_copy(ð_h->src_addr, 398 &arp_h->arp_data.arp_sha); 399 400 /* Swap IP addresses in ARP payload */ 401 ip_addr = arp_h->arp_data.arp_sip; 402 arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip; 403 arp_h->arp_data.arp_tip = ip_addr; 404 pkts_burst[nb_replies++] = pkt; 405 continue; 406 } 407 408 if (eth_type != RTE_ETHER_TYPE_IPV4) { 409 rte_pktmbuf_free(pkt); 410 continue; 411 } 412 ip_h = (struct rte_ipv4_hdr *) ((char *)eth_h + l2_len); 413 if (verbose_level > 0) { 414 ipv4_addr_dump(" IPV4: src=", ip_h->src_addr); 415 ipv4_addr_dump(" dst=", ip_h->dst_addr); 416 printf(" proto=%d (%s)\n", 417 ip_h->next_proto_id, 418 ip_proto_name(ip_h->next_proto_id)); 419 } 420 421 /* 422 * Check if packet is a ICMP echo request. 423 */ 424 icmp_h = (struct rte_icmp_hdr *) ((char *)ip_h + 425 sizeof(struct rte_ipv4_hdr)); 426 if (! ((ip_h->next_proto_id == IPPROTO_ICMP) && 427 (icmp_h->icmp_type == RTE_IP_ICMP_ECHO_REQUEST) && 428 (icmp_h->icmp_code == 0))) { 429 rte_pktmbuf_free(pkt); 430 continue; 431 } 432 433 if (verbose_level > 0) 434 printf(" ICMP: echo request seq id=%d\n", 435 rte_be_to_cpu_16(icmp_h->icmp_seq_nb)); 436 437 /* 438 * Prepare ICMP echo reply to be sent back. 439 * - switch ethernet source and destinations addresses, 440 * - use the request IP source address as the reply IP 441 * destination address, 442 * - if the request IP destination address is a multicast 443 * address: 444 * - choose a reply IP source address different from the 445 * request IP source address, 446 * - re-compute the IP header checksum. 447 * Otherwise: 448 * - switch the request IP source and destination 449 * addresses in the reply IP header, 450 * - keep the IP header checksum unchanged. 451 * - set RTE_IP_ICMP_ECHO_REPLY in ICMP header. 452 * ICMP checksum is computed by assuming it is valid in the 453 * echo request and not verified. 454 */ 455 rte_ether_addr_copy(ð_h->src_addr, ð_addr); 456 rte_ether_addr_copy(ð_h->dst_addr, ð_h->src_addr); 457 rte_ether_addr_copy(ð_addr, ð_h->dst_addr); 458 ip_addr = ip_h->src_addr; 459 if (is_multicast_ipv4_addr(ip_h->dst_addr)) { 460 uint32_t ip_src; 461 462 ip_src = rte_be_to_cpu_32(ip_addr); 463 if ((ip_src & 0x00000003) == 1) 464 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002; 465 else 466 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001; 467 ip_h->src_addr = rte_cpu_to_be_32(ip_src); 468 ip_h->dst_addr = ip_addr; 469 ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h); 470 } else { 471 ip_h->src_addr = ip_h->dst_addr; 472 ip_h->dst_addr = ip_addr; 473 } 474 icmp_h->icmp_type = RTE_IP_ICMP_ECHO_REPLY; 475 cksum = ~icmp_h->icmp_cksum & 0xffff; 476 cksum += ~RTE_BE16(RTE_IP_ICMP_ECHO_REQUEST << 8) & 0xffff; 477 cksum += RTE_BE16(RTE_IP_ICMP_ECHO_REPLY << 8); 478 cksum = (cksum & 0xffff) + (cksum >> 16); 479 cksum = (cksum & 0xffff) + (cksum >> 16); 480 icmp_h->icmp_cksum = ~cksum; 481 pkts_burst[nb_replies++] = pkt; 482 } 483 484 /* Send back ICMP echo replies, if any. */ 485 if (nb_replies > 0) { 486 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 487 nb_replies); 488 /* 489 * Retry if necessary 490 */ 491 if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) { 492 retry = 0; 493 while (nb_tx < nb_replies && 494 retry++ < burst_tx_retry_num) { 495 rte_delay_us(burst_tx_delay_time); 496 nb_tx += rte_eth_tx_burst(fs->tx_port, 497 fs->tx_queue, 498 &pkts_burst[nb_tx], 499 nb_replies - nb_tx); 500 } 501 } 502 fs->tx_packets += nb_tx; 503 inc_tx_burst_stats(fs, nb_tx); 504 if (unlikely(nb_tx < nb_replies)) { 505 fs->fwd_dropped += (nb_replies - nb_tx); 506 do { 507 rte_pktmbuf_free(pkts_burst[nb_tx]); 508 } while (++nb_tx < nb_replies); 509 } 510 } 511 512 get_end_cycles(fs, start_tsc); 513 } 514 515 struct fwd_engine icmp_echo_engine = { 516 .fwd_mode_name = "icmpecho", 517 .port_fwd_begin = NULL, 518 .port_fwd_end = NULL, 519 .packet_fwd = reply_to_icmp_echo_rqsts, 520 }; 521