1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2013 6WIND 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 */ 34 35 #include <stdarg.h> 36 #include <string.h> 37 #include <stdio.h> 38 #include <errno.h> 39 #include <stdint.h> 40 #include <unistd.h> 41 #include <inttypes.h> 42 43 #include <sys/queue.h> 44 #include <sys/stat.h> 45 46 #include <rte_common.h> 47 #include <rte_byteorder.h> 48 #include <rte_log.h> 49 #include <rte_debug.h> 50 #include <rte_cycles.h> 51 #include <rte_per_lcore.h> 52 #include <rte_lcore.h> 53 #include <rte_atomic.h> 54 #include <rte_branch_prediction.h> 55 #include <rte_memory.h> 56 #include <rte_mempool.h> 57 #include <rte_mbuf.h> 58 #include <rte_ether.h> 59 #include <rte_ethdev.h> 60 #include <rte_arp.h> 61 #include <rte_ip.h> 62 #include <rte_icmp.h> 63 #include <rte_string_fns.h> 64 65 #include "testpmd.h" 66 67 static const char * 68 arp_op_name(uint16_t arp_op) 69 { 70 switch (arp_op ) { 71 case ARP_OP_REQUEST: 72 return "ARP Request"; 73 case ARP_OP_REPLY: 74 return "ARP Reply"; 75 case ARP_OP_REVREQUEST: 76 return "Reverse ARP Request"; 77 case ARP_OP_REVREPLY: 78 return "Reverse ARP Reply"; 79 case ARP_OP_INVREQUEST: 80 return "Peer Identify Request"; 81 case ARP_OP_INVREPLY: 82 return "Peer Identify Reply"; 83 default: 84 break; 85 } 86 return "Unkwown ARP op"; 87 } 88 89 static const char * 90 ip_proto_name(uint16_t ip_proto) 91 { 92 static const char * ip_proto_names[] = { 93 "IP6HOPOPTS", /**< IP6 hop-by-hop options */ 94 "ICMP", /**< control message protocol */ 95 "IGMP", /**< group mgmt protocol */ 96 "GGP", /**< gateway^2 (deprecated) */ 97 "IPv4", /**< IPv4 encapsulation */ 98 99 "UNASSIGNED", 100 "TCP", /**< transport control protocol */ 101 "ST", /**< Stream protocol II */ 102 "EGP", /**< exterior gateway protocol */ 103 "PIGP", /**< private interior gateway */ 104 105 "RCC_MON", /**< BBN RCC Monitoring */ 106 "NVPII", /**< network voice protocol*/ 107 "PUP", /**< pup */ 108 "ARGUS", /**< Argus */ 109 "EMCON", /**< EMCON */ 110 111 "XNET", /**< Cross Net Debugger */ 112 "CHAOS", /**< Chaos*/ 113 "UDP", /**< user datagram protocol */ 114 "MUX", /**< Multiplexing */ 115 "DCN_MEAS", /**< DCN Measurement Subsystems */ 116 117 "HMP", /**< Host Monitoring */ 118 "PRM", /**< Packet Radio Measurement */ 119 "XNS_IDP", /**< xns idp */ 120 "TRUNK1", /**< Trunk-1 */ 121 "TRUNK2", /**< Trunk-2 */ 122 123 "LEAF1", /**< Leaf-1 */ 124 "LEAF2", /**< Leaf-2 */ 125 "RDP", /**< Reliable Data */ 126 "IRTP", /**< Reliable Transaction */ 127 "TP4", /**< tp-4 w/ class negotiation */ 128 129 "BLT", /**< Bulk Data Transfer */ 130 "NSP", /**< Network Services */ 131 "INP", /**< Merit Internodal */ 132 "SEP", /**< Sequential Exchange */ 133 "3PC", /**< Third Party Connect */ 134 135 "IDPR", /**< InterDomain Policy Routing */ 136 "XTP", /**< XTP */ 137 "DDP", /**< Datagram Delivery */ 138 "CMTP", /**< Control Message Transport */ 139 "TPXX", /**< TP++ Transport */ 140 141 "ILTP", /**< IL transport protocol */ 142 "IPv6_HDR", /**< IP6 header */ 143 "SDRP", /**< Source Demand Routing */ 144 "IPv6_RTG", /**< IP6 routing header */ 145 "IPv6_FRAG", /**< IP6 fragmentation header */ 146 147 "IDRP", /**< InterDomain Routing*/ 148 "RSVP", /**< resource reservation */ 149 "GRE", /**< General Routing Encap. */ 150 "MHRP", /**< Mobile Host Routing */ 151 "BHA", /**< BHA */ 152 153 "ESP", /**< IP6 Encap Sec. Payload */ 154 "AH", /**< IP6 Auth Header */ 155 "INLSP", /**< Integ. Net Layer Security */ 156 "SWIPE", /**< IP with encryption */ 157 "NHRP", /**< Next Hop Resolution */ 158 159 "UNASSIGNED", 160 "UNASSIGNED", 161 "UNASSIGNED", 162 "ICMPv6", /**< ICMP6 */ 163 "IPv6NONEXT", /**< IP6 no next header */ 164 165 "Ipv6DSTOPTS",/**< IP6 destination option */ 166 "AHIP", /**< any host internal protocol */ 167 "CFTP", /**< CFTP */ 168 "HELLO", /**< "hello" routing protocol */ 169 "SATEXPAK", /**< SATNET/Backroom EXPAK */ 170 171 "KRYPTOLAN", /**< Kryptolan */ 172 "RVD", /**< Remote Virtual Disk */ 173 "IPPC", /**< Pluribus Packet Core */ 174 "ADFS", /**< Any distributed FS */ 175 "SATMON", /**< Satnet Monitoring */ 176 177 "VISA", /**< VISA Protocol */ 178 "IPCV", /**< Packet Core Utility */ 179 "CPNX", /**< Comp. Prot. Net. Executive */ 180 "CPHB", /**< Comp. Prot. HeartBeat */ 181 "WSN", /**< Wang Span Network */ 182 183 "PVP", /**< Packet Video Protocol */ 184 "BRSATMON", /**< BackRoom SATNET Monitoring */ 185 "ND", /**< Sun net disk proto (temp.) */ 186 "WBMON", /**< WIDEBAND Monitoring */ 187 "WBEXPAK", /**< WIDEBAND EXPAK */ 188 189 "EON", /**< ISO cnlp */ 190 "VMTP", /**< VMTP */ 191 "SVMTP", /**< Secure VMTP */ 192 "VINES", /**< Banyon VINES */ 193 "TTP", /**< TTP */ 194 195 "IGP", /**< NSFNET-IGP */ 196 "DGP", /**< dissimilar gateway prot. */ 197 "TCF", /**< TCF */ 198 "IGRP", /**< Cisco/GXS IGRP */ 199 "OSPFIGP", /**< OSPFIGP */ 200 201 "SRPC", /**< Strite RPC protocol */ 202 "LARP", /**< Locus Address Resoloution */ 203 "MTP", /**< Multicast Transport */ 204 "AX25", /**< AX.25 Frames */ 205 "4IN4", /**< IP encapsulated in IP */ 206 207 "MICP", /**< Mobile Int.ing control */ 208 "SCCSP", /**< Semaphore Comm. security */ 209 "ETHERIP", /**< Ethernet IP encapsulation */ 210 "ENCAP", /**< encapsulation header */ 211 "AES", /**< any private encr. scheme */ 212 213 "GMTP", /**< GMTP */ 214 "IPCOMP", /**< payload compression (IPComp) */ 215 "UNASSIGNED", 216 "UNASSIGNED", 217 "PIM", /**< Protocol Independent Mcast */ 218 }; 219 220 if (ip_proto < sizeof(ip_proto_names) / sizeof(ip_proto_names[0])) 221 return ip_proto_names[ip_proto]; 222 switch (ip_proto) { 223 #ifdef IPPROTO_PGM 224 case IPPROTO_PGM: /**< PGM */ 225 return "PGM"; 226 #endif 227 case IPPROTO_SCTP: /**< Stream Control Transport Protocol */ 228 return "SCTP"; 229 #ifdef IPPROTO_DIVERT 230 case IPPROTO_DIVERT: /**< divert pseudo-protocol */ 231 return "DIVERT"; 232 #endif 233 case IPPROTO_RAW: /**< raw IP packet */ 234 return "RAW"; 235 default: 236 break; 237 } 238 return "UNASSIGNED"; 239 } 240 241 static void 242 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf) 243 { 244 uint32_t ipv4_addr; 245 246 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); 247 sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, 248 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, 249 ipv4_addr & 0xFF); 250 } 251 252 static void 253 ether_addr_dump(const char *what, const struct ether_addr *ea) 254 { 255 char buf[ETHER_ADDR_FMT_SIZE]; 256 257 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, ea); 258 if (what) 259 printf("%s", what); 260 printf("%s", buf); 261 } 262 263 static void 264 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr) 265 { 266 char buf[16]; 267 268 ipv4_addr_to_dot(be_ipv4_addr, buf); 269 if (what) 270 printf("%s", what); 271 printf("%s", buf); 272 } 273 274 static uint16_t 275 ipv4_hdr_cksum(struct ipv4_hdr *ip_h) 276 { 277 uint16_t *v16_h; 278 uint32_t ip_cksum; 279 280 /* 281 * Compute the sum of successive 16-bit words of the IPv4 header, 282 * skipping the checksum field of the header. 283 */ 284 v16_h = (unaligned_uint16_t *) ip_h; 285 ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] + 286 v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9]; 287 288 /* reduce 32 bit checksum to 16 bits and complement it */ 289 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16); 290 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16); 291 ip_cksum = (~ip_cksum) & 0x0000FFFF; 292 return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum; 293 } 294 295 #define is_multicast_ipv4_addr(ipv4_addr) \ 296 (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0) 297 298 /* 299 * Receive a burst of packets, lookup for ICMP echo requets, and, if any, 300 * send back ICMP echo replies. 301 */ 302 static void 303 reply_to_icmp_echo_rqsts(struct fwd_stream *fs) 304 { 305 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 306 struct rte_mbuf *pkt; 307 struct ether_hdr *eth_h; 308 struct vlan_hdr *vlan_h; 309 struct arp_hdr *arp_h; 310 struct ipv4_hdr *ip_h; 311 struct icmp_hdr *icmp_h; 312 struct ether_addr eth_addr; 313 uint32_t retry; 314 uint32_t ip_addr; 315 uint16_t nb_rx; 316 uint16_t nb_tx; 317 uint16_t nb_replies; 318 uint16_t eth_type; 319 uint16_t vlan_id; 320 uint16_t arp_op; 321 uint16_t arp_pro; 322 uint32_t cksum; 323 uint8_t i; 324 int l2_len; 325 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 326 uint64_t start_tsc; 327 uint64_t end_tsc; 328 uint64_t core_cycles; 329 #endif 330 331 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 332 start_tsc = rte_rdtsc(); 333 #endif 334 335 /* 336 * First, receive a burst of packets. 337 */ 338 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, 339 nb_pkt_per_burst); 340 if (unlikely(nb_rx == 0)) 341 return; 342 343 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 344 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; 345 #endif 346 fs->rx_packets += nb_rx; 347 nb_replies = 0; 348 for (i = 0; i < nb_rx; i++) { 349 if (likely(i < nb_rx - 1)) 350 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1], 351 void *)); 352 pkt = pkts_burst[i]; 353 eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *); 354 eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type); 355 l2_len = sizeof(struct ether_hdr); 356 if (verbose_level > 0) { 357 printf("\nPort %d pkt-len=%u nb-segs=%u\n", 358 fs->rx_port, pkt->pkt_len, pkt->nb_segs); 359 ether_addr_dump(" ETH: src=", ð_h->s_addr); 360 ether_addr_dump(" dst=", ð_h->d_addr); 361 } 362 if (eth_type == ETHER_TYPE_VLAN) { 363 vlan_h = (struct vlan_hdr *) 364 ((char *)eth_h + sizeof(struct ether_hdr)); 365 l2_len += sizeof(struct vlan_hdr); 366 eth_type = rte_be_to_cpu_16(vlan_h->eth_proto); 367 if (verbose_level > 0) { 368 vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci) 369 & 0xFFF; 370 printf(" [vlan id=%u]", vlan_id); 371 } 372 } 373 if (verbose_level > 0) { 374 printf(" type=0x%04x\n", eth_type); 375 } 376 377 /* Reply to ARP requests */ 378 if (eth_type == ETHER_TYPE_ARP) { 379 arp_h = (struct arp_hdr *) ((char *)eth_h + l2_len); 380 arp_op = RTE_BE_TO_CPU_16(arp_h->arp_op); 381 arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_pro); 382 if (verbose_level > 0) { 383 printf(" ARP: hrd=%d proto=0x%04x hln=%d " 384 "pln=%d op=%u (%s)\n", 385 RTE_BE_TO_CPU_16(arp_h->arp_hrd), 386 arp_pro, arp_h->arp_hln, 387 arp_h->arp_pln, arp_op, 388 arp_op_name(arp_op)); 389 } 390 if ((RTE_BE_TO_CPU_16(arp_h->arp_hrd) != 391 ARP_HRD_ETHER) || 392 (arp_pro != ETHER_TYPE_IPv4) || 393 (arp_h->arp_hln != 6) || 394 (arp_h->arp_pln != 4) 395 ) { 396 rte_pktmbuf_free(pkt); 397 if (verbose_level > 0) 398 printf("\n"); 399 continue; 400 } 401 if (verbose_level > 0) { 402 ether_addr_copy(&arp_h->arp_data.arp_sha, ð_addr); 403 ether_addr_dump(" sha=", ð_addr); 404 ip_addr = arp_h->arp_data.arp_sip; 405 ipv4_addr_dump(" sip=", ip_addr); 406 printf("\n"); 407 ether_addr_copy(&arp_h->arp_data.arp_tha, ð_addr); 408 ether_addr_dump(" tha=", ð_addr); 409 ip_addr = arp_h->arp_data.arp_tip; 410 ipv4_addr_dump(" tip=", ip_addr); 411 printf("\n"); 412 } 413 if (arp_op != ARP_OP_REQUEST) { 414 rte_pktmbuf_free(pkt); 415 continue; 416 } 417 418 /* 419 * Build ARP reply. 420 */ 421 422 /* Use source MAC address as destination MAC address. */ 423 ether_addr_copy(ð_h->s_addr, ð_h->d_addr); 424 /* Set source MAC address with MAC address of TX port */ 425 ether_addr_copy(&ports[fs->tx_port].eth_addr, 426 ð_h->s_addr); 427 428 arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); 429 ether_addr_copy(&arp_h->arp_data.arp_tha, ð_addr); 430 ether_addr_copy(&arp_h->arp_data.arp_sha, &arp_h->arp_data.arp_tha); 431 ether_addr_copy(ð_h->s_addr, &arp_h->arp_data.arp_sha); 432 433 /* Swap IP addresses in ARP payload */ 434 ip_addr = arp_h->arp_data.arp_sip; 435 arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip; 436 arp_h->arp_data.arp_tip = ip_addr; 437 pkts_burst[nb_replies++] = pkt; 438 continue; 439 } 440 441 if (eth_type != ETHER_TYPE_IPv4) { 442 rte_pktmbuf_free(pkt); 443 continue; 444 } 445 ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len); 446 if (verbose_level > 0) { 447 ipv4_addr_dump(" IPV4: src=", ip_h->src_addr); 448 ipv4_addr_dump(" dst=", ip_h->dst_addr); 449 printf(" proto=%d (%s)\n", 450 ip_h->next_proto_id, 451 ip_proto_name(ip_h->next_proto_id)); 452 } 453 454 /* 455 * Check if packet is a ICMP echo request. 456 */ 457 icmp_h = (struct icmp_hdr *) ((char *)ip_h + 458 sizeof(struct ipv4_hdr)); 459 if (! ((ip_h->next_proto_id == IPPROTO_ICMP) && 460 (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST) && 461 (icmp_h->icmp_code == 0))) { 462 rte_pktmbuf_free(pkt); 463 continue; 464 } 465 466 if (verbose_level > 0) 467 printf(" ICMP: echo request seq id=%d\n", 468 rte_be_to_cpu_16(icmp_h->icmp_seq_nb)); 469 470 /* 471 * Prepare ICMP echo reply to be sent back. 472 * - switch ethernet source and destinations addresses, 473 * - use the request IP source address as the reply IP 474 * destination address, 475 * - if the request IP destination address is a multicast 476 * address: 477 * - choose a reply IP source address different from the 478 * request IP source address, 479 * - re-compute the IP header checksum. 480 * Otherwise: 481 * - switch the request IP source and destination 482 * addresses in the reply IP header, 483 * - keep the IP header checksum unchanged. 484 * - set IP_ICMP_ECHO_REPLY in ICMP header. 485 * ICMP checksum is computed by assuming it is valid in the 486 * echo request and not verified. 487 */ 488 ether_addr_copy(ð_h->s_addr, ð_addr); 489 ether_addr_copy(ð_h->d_addr, ð_h->s_addr); 490 ether_addr_copy(ð_addr, ð_h->d_addr); 491 ip_addr = ip_h->src_addr; 492 if (is_multicast_ipv4_addr(ip_h->dst_addr)) { 493 uint32_t ip_src; 494 495 ip_src = rte_be_to_cpu_32(ip_addr); 496 if ((ip_src & 0x00000003) == 1) 497 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002; 498 else 499 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001; 500 ip_h->src_addr = rte_cpu_to_be_32(ip_src); 501 ip_h->dst_addr = ip_addr; 502 ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h); 503 } else { 504 ip_h->src_addr = ip_h->dst_addr; 505 ip_h->dst_addr = ip_addr; 506 } 507 icmp_h->icmp_type = IP_ICMP_ECHO_REPLY; 508 cksum = ~icmp_h->icmp_cksum & 0xffff; 509 cksum += ~htons(IP_ICMP_ECHO_REQUEST << 8) & 0xffff; 510 cksum += htons(IP_ICMP_ECHO_REPLY << 8); 511 cksum = (cksum & 0xffff) + (cksum >> 16); 512 cksum = (cksum & 0xffff) + (cksum >> 16); 513 icmp_h->icmp_cksum = ~cksum; 514 pkts_burst[nb_replies++] = pkt; 515 } 516 517 /* Send back ICMP echo replies, if any. */ 518 if (nb_replies > 0) { 519 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 520 nb_replies); 521 /* 522 * Retry if necessary 523 */ 524 if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) { 525 retry = 0; 526 while (nb_tx < nb_replies && 527 retry++ < burst_tx_retry_num) { 528 rte_delay_us(burst_tx_delay_time); 529 nb_tx += rte_eth_tx_burst(fs->tx_port, 530 fs->tx_queue, 531 &pkts_burst[nb_tx], 532 nb_replies - nb_tx); 533 } 534 } 535 fs->tx_packets += nb_tx; 536 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 537 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; 538 #endif 539 if (unlikely(nb_tx < nb_replies)) { 540 fs->fwd_dropped += (nb_replies - nb_tx); 541 do { 542 rte_pktmbuf_free(pkts_burst[nb_tx]); 543 } while (++nb_tx < nb_replies); 544 } 545 } 546 547 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 548 end_tsc = rte_rdtsc(); 549 core_cycles = (end_tsc - start_tsc); 550 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); 551 #endif 552 } 553 554 struct fwd_engine icmp_echo_engine = { 555 .fwd_mode_name = "icmpecho", 556 .port_fwd_begin = NULL, 557 .port_fwd_end = NULL, 558 .packet_fwd = reply_to_icmp_echo_rqsts, 559 }; 560