1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation. 3 * Copyright 2014 6WIND S.A. 4 */ 5 6 #include <stdarg.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <sys/queue.h> 14 #include <sys/stat.h> 15 16 #include <rte_common.h> 17 #include <rte_byteorder.h> 18 #include <rte_log.h> 19 #include <rte_debug.h> 20 #include <rte_cycles.h> 21 #include <rte_memory.h> 22 #include <rte_memcpy.h> 23 #include <rte_launch.h> 24 #include <rte_eal.h> 25 #include <rte_per_lcore.h> 26 #include <rte_lcore.h> 27 #include <rte_atomic.h> 28 #include <rte_branch_prediction.h> 29 #include <rte_mempool.h> 30 #include <rte_mbuf.h> 31 #include <rte_interrupts.h> 32 #include <rte_pci.h> 33 #include <rte_ether.h> 34 #include <rte_ethdev.h> 35 #include <rte_ip.h> 36 #include <rte_tcp.h> 37 #include <rte_udp.h> 38 #include <rte_vxlan.h> 39 #include <rte_sctp.h> 40 #include <rte_gtp.h> 41 #include <rte_prefetch.h> 42 #include <rte_string_fns.h> 43 #include <rte_flow.h> 44 #include <rte_gro.h> 45 #include <rte_gso.h> 46 47 #include "testpmd.h" 48 49 #define IP_DEFTTL 64 /* from RFC 1340. */ 50 #define IP_VERSION 0x40 51 #define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */ 52 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN) 53 54 #define GRE_CHECKSUM_PRESENT 0x8000 55 #define GRE_KEY_PRESENT 0x2000 56 #define GRE_SEQUENCE_PRESENT 0x1000 57 #define GRE_EXT_LEN 4 58 #define GRE_SUPPORTED_FIELDS (GRE_CHECKSUM_PRESENT | GRE_KEY_PRESENT |\ 59 GRE_SEQUENCE_PRESENT) 60 61 /* We cannot use rte_cpu_to_be_16() on a constant in a switch/case */ 62 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN 63 #define _htons(x) ((uint16_t)((((x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8))) 64 #else 65 #define _htons(x) (x) 66 #endif 67 68 uint16_t vxlan_gpe_udp_port = 4790; 69 70 /* structure that caches offload info for the current packet */ 71 struct testpmd_offload_info { 72 uint16_t ethertype; 73 uint8_t gso_enable; 74 uint16_t l2_len; 75 uint16_t l3_len; 76 uint16_t l4_len; 77 uint8_t l4_proto; 78 uint8_t is_tunnel; 79 uint16_t outer_ethertype; 80 uint16_t outer_l2_len; 81 uint16_t outer_l3_len; 82 uint8_t outer_l4_proto; 83 uint16_t tso_segsz; 84 uint16_t tunnel_tso_segsz; 85 uint32_t pkt_len; 86 }; 87 88 /* simplified GRE header */ 89 struct simple_gre_hdr { 90 uint16_t flags; 91 uint16_t proto; 92 } __attribute__((__packed__)); 93 94 static uint16_t 95 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) 96 { 97 if (ethertype == _htons(RTE_ETHER_TYPE_IPV4)) 98 return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); 99 else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */ 100 return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); 101 } 102 103 /* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */ 104 static void 105 parse_ipv4(struct rte_ipv4_hdr *ipv4_hdr, struct testpmd_offload_info *info) 106 { 107 struct rte_tcp_hdr *tcp_hdr; 108 109 info->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; 110 info->l4_proto = ipv4_hdr->next_proto_id; 111 112 /* only fill l4_len for TCP, it's useful for TSO */ 113 if (info->l4_proto == IPPROTO_TCP) { 114 tcp_hdr = (struct rte_tcp_hdr *) 115 ((char *)ipv4_hdr + info->l3_len); 116 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; 117 } else if (info->l4_proto == IPPROTO_UDP) 118 info->l4_len = sizeof(struct rte_udp_hdr); 119 else 120 info->l4_len = 0; 121 } 122 123 /* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */ 124 static void 125 parse_ipv6(struct rte_ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info) 126 { 127 struct rte_tcp_hdr *tcp_hdr; 128 129 info->l3_len = sizeof(struct rte_ipv6_hdr); 130 info->l4_proto = ipv6_hdr->proto; 131 132 /* only fill l4_len for TCP, it's useful for TSO */ 133 if (info->l4_proto == IPPROTO_TCP) { 134 tcp_hdr = (struct rte_tcp_hdr *) 135 ((char *)ipv6_hdr + info->l3_len); 136 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; 137 } else if (info->l4_proto == IPPROTO_UDP) 138 info->l4_len = sizeof(struct rte_udp_hdr); 139 else 140 info->l4_len = 0; 141 } 142 143 /* 144 * Parse an ethernet header to fill the ethertype, l2_len, l3_len and 145 * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan 146 * header. The l4_len argument is only set in case of TCP (useful for TSO). 147 */ 148 static void 149 parse_ethernet(struct rte_ether_hdr *eth_hdr, struct testpmd_offload_info *info) 150 { 151 struct rte_ipv4_hdr *ipv4_hdr; 152 struct rte_ipv6_hdr *ipv6_hdr; 153 154 info->l2_len = sizeof(struct rte_ether_hdr); 155 info->ethertype = eth_hdr->ether_type; 156 157 if (info->ethertype == _htons(RTE_ETHER_TYPE_VLAN)) { 158 struct rte_vlan_hdr *vlan_hdr = ( 159 struct rte_vlan_hdr *)(eth_hdr + 1); 160 161 info->l2_len += sizeof(struct rte_vlan_hdr); 162 info->ethertype = vlan_hdr->eth_proto; 163 } 164 165 switch (info->ethertype) { 166 case _htons(RTE_ETHER_TYPE_IPV4): 167 ipv4_hdr = (struct rte_ipv4_hdr *) 168 ((char *)eth_hdr + info->l2_len); 169 parse_ipv4(ipv4_hdr, info); 170 break; 171 case _htons(RTE_ETHER_TYPE_IPV6): 172 ipv6_hdr = (struct rte_ipv6_hdr *) 173 ((char *)eth_hdr + info->l2_len); 174 parse_ipv6(ipv6_hdr, info); 175 break; 176 default: 177 info->l4_len = 0; 178 info->l3_len = 0; 179 info->l4_proto = 0; 180 break; 181 } 182 } 183 184 /* 185 * Parse a GTP protocol header. 186 * No optional fields and next extension header type. 187 */ 188 static void 189 parse_gtp(struct rte_udp_hdr *udp_hdr, 190 struct testpmd_offload_info *info) 191 { 192 struct rte_ipv4_hdr *ipv4_hdr; 193 struct rte_ipv6_hdr *ipv6_hdr; 194 struct rte_gtp_hdr *gtp_hdr; 195 uint8_t gtp_len = sizeof(*gtp_hdr); 196 uint8_t ip_ver; 197 198 /* Check udp destination port. */ 199 if (udp_hdr->dst_port != _htons(RTE_GTPC_UDP_PORT) && 200 udp_hdr->src_port != _htons(RTE_GTPC_UDP_PORT) && 201 udp_hdr->dst_port != _htons(RTE_GTPU_UDP_PORT)) 202 return; 203 204 info->is_tunnel = 1; 205 info->outer_ethertype = info->ethertype; 206 info->outer_l2_len = info->l2_len; 207 info->outer_l3_len = info->l3_len; 208 info->outer_l4_proto = info->l4_proto; 209 info->l2_len = 0; 210 211 gtp_hdr = (struct rte_gtp_hdr *)((char *)udp_hdr + 212 sizeof(struct rte_udp_hdr)); 213 214 /* 215 * Check message type. If message type is 0xff, it is 216 * a GTP data packet. If not, it is a GTP control packet 217 */ 218 if (gtp_hdr->msg_type == 0xff) { 219 ip_ver = *(uint8_t *)((char *)udp_hdr + 220 sizeof(struct rte_udp_hdr) + 221 sizeof(struct rte_gtp_hdr)); 222 ip_ver = (ip_ver) & 0xf0; 223 224 if (ip_ver == RTE_GTP_TYPE_IPV4) { 225 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gtp_hdr + 226 gtp_len); 227 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 228 parse_ipv4(ipv4_hdr, info); 229 } else if (ip_ver == RTE_GTP_TYPE_IPV6) { 230 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gtp_hdr + 231 gtp_len); 232 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 233 parse_ipv6(ipv6_hdr, info); 234 } 235 } else { 236 info->ethertype = 0; 237 info->l4_len = 0; 238 info->l3_len = 0; 239 info->l4_proto = 0; 240 } 241 242 info->l2_len += RTE_ETHER_GTP_HLEN; 243 } 244 245 /* Parse a vxlan header */ 246 static void 247 parse_vxlan(struct rte_udp_hdr *udp_hdr, 248 struct testpmd_offload_info *info, 249 uint32_t pkt_type) 250 { 251 struct rte_ether_hdr *eth_hdr; 252 253 /* check udp destination port, 4789 is the default vxlan port 254 * (rfc7348) or that the rx offload flag is set (i40e only 255 * currently) */ 256 if (udp_hdr->dst_port != _htons(4789) && 257 RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0) 258 return; 259 260 info->is_tunnel = 1; 261 info->outer_ethertype = info->ethertype; 262 info->outer_l2_len = info->l2_len; 263 info->outer_l3_len = info->l3_len; 264 info->outer_l4_proto = info->l4_proto; 265 266 eth_hdr = (struct rte_ether_hdr *)((char *)udp_hdr + 267 sizeof(struct rte_udp_hdr) + 268 sizeof(struct rte_vxlan_hdr)); 269 270 parse_ethernet(eth_hdr, info); 271 info->l2_len += RTE_ETHER_VXLAN_HLEN; /* add udp + vxlan */ 272 } 273 274 /* Parse a vxlan-gpe header */ 275 static void 276 parse_vxlan_gpe(struct rte_udp_hdr *udp_hdr, 277 struct testpmd_offload_info *info) 278 { 279 struct rte_ether_hdr *eth_hdr; 280 struct rte_ipv4_hdr *ipv4_hdr; 281 struct rte_ipv6_hdr *ipv6_hdr; 282 struct rte_vxlan_gpe_hdr *vxlan_gpe_hdr; 283 uint8_t vxlan_gpe_len = sizeof(*vxlan_gpe_hdr); 284 285 /* Check udp destination port. */ 286 if (udp_hdr->dst_port != _htons(vxlan_gpe_udp_port)) 287 return; 288 289 vxlan_gpe_hdr = (struct rte_vxlan_gpe_hdr *)((char *)udp_hdr + 290 sizeof(struct rte_udp_hdr)); 291 292 if (!vxlan_gpe_hdr->proto || vxlan_gpe_hdr->proto == 293 RTE_VXLAN_GPE_TYPE_IPV4) { 294 info->is_tunnel = 1; 295 info->outer_ethertype = info->ethertype; 296 info->outer_l2_len = info->l2_len; 297 info->outer_l3_len = info->l3_len; 298 info->outer_l4_proto = info->l4_proto; 299 300 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)vxlan_gpe_hdr + 301 vxlan_gpe_len); 302 303 parse_ipv4(ipv4_hdr, info); 304 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 305 info->l2_len = 0; 306 307 } else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_IPV6) { 308 info->is_tunnel = 1; 309 info->outer_ethertype = info->ethertype; 310 info->outer_l2_len = info->l2_len; 311 info->outer_l3_len = info->l3_len; 312 info->outer_l4_proto = info->l4_proto; 313 314 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)vxlan_gpe_hdr + 315 vxlan_gpe_len); 316 317 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 318 parse_ipv6(ipv6_hdr, info); 319 info->l2_len = 0; 320 321 } else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_ETH) { 322 info->is_tunnel = 1; 323 info->outer_ethertype = info->ethertype; 324 info->outer_l2_len = info->l2_len; 325 info->outer_l3_len = info->l3_len; 326 info->outer_l4_proto = info->l4_proto; 327 328 eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_gpe_hdr + 329 vxlan_gpe_len); 330 331 parse_ethernet(eth_hdr, info); 332 } else 333 return; 334 335 info->l2_len += RTE_ETHER_VXLAN_GPE_HLEN; 336 } 337 338 /* Parse a gre header */ 339 static void 340 parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info) 341 { 342 struct rte_ether_hdr *eth_hdr; 343 struct rte_ipv4_hdr *ipv4_hdr; 344 struct rte_ipv6_hdr *ipv6_hdr; 345 uint8_t gre_len = 0; 346 347 gre_len += sizeof(struct simple_gre_hdr); 348 349 if (gre_hdr->flags & _htons(GRE_KEY_PRESENT)) 350 gre_len += GRE_EXT_LEN; 351 if (gre_hdr->flags & _htons(GRE_SEQUENCE_PRESENT)) 352 gre_len += GRE_EXT_LEN; 353 if (gre_hdr->flags & _htons(GRE_CHECKSUM_PRESENT)) 354 gre_len += GRE_EXT_LEN; 355 356 if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_IPV4)) { 357 info->is_tunnel = 1; 358 info->outer_ethertype = info->ethertype; 359 info->outer_l2_len = info->l2_len; 360 info->outer_l3_len = info->l3_len; 361 info->outer_l4_proto = info->l4_proto; 362 363 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gre_hdr + gre_len); 364 365 parse_ipv4(ipv4_hdr, info); 366 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 367 info->l2_len = 0; 368 369 } else if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_IPV6)) { 370 info->is_tunnel = 1; 371 info->outer_ethertype = info->ethertype; 372 info->outer_l2_len = info->l2_len; 373 info->outer_l3_len = info->l3_len; 374 info->outer_l4_proto = info->l4_proto; 375 376 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gre_hdr + gre_len); 377 378 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 379 parse_ipv6(ipv6_hdr, info); 380 info->l2_len = 0; 381 382 } else if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_TEB)) { 383 info->is_tunnel = 1; 384 info->outer_ethertype = info->ethertype; 385 info->outer_l2_len = info->l2_len; 386 info->outer_l3_len = info->l3_len; 387 info->outer_l4_proto = info->l4_proto; 388 389 eth_hdr = (struct rte_ether_hdr *)((char *)gre_hdr + gre_len); 390 391 parse_ethernet(eth_hdr, info); 392 } else 393 return; 394 395 info->l2_len += gre_len; 396 } 397 398 399 /* Parse an encapsulated ip or ipv6 header */ 400 static void 401 parse_encap_ip(void *encap_ip, struct testpmd_offload_info *info) 402 { 403 struct rte_ipv4_hdr *ipv4_hdr = encap_ip; 404 struct rte_ipv6_hdr *ipv6_hdr = encap_ip; 405 uint8_t ip_version; 406 407 ip_version = (ipv4_hdr->version_ihl & 0xf0) >> 4; 408 409 if (ip_version != 4 && ip_version != 6) 410 return; 411 412 info->is_tunnel = 1; 413 info->outer_ethertype = info->ethertype; 414 info->outer_l2_len = info->l2_len; 415 info->outer_l3_len = info->l3_len; 416 417 if (ip_version == 4) { 418 parse_ipv4(ipv4_hdr, info); 419 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 420 } else { 421 parse_ipv6(ipv6_hdr, info); 422 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 423 } 424 info->l2_len = 0; 425 } 426 427 /* if possible, calculate the checksum of a packet in hw or sw, 428 * depending on the testpmd command line configuration */ 429 static uint64_t 430 process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, 431 uint64_t tx_offloads) 432 { 433 struct rte_ipv4_hdr *ipv4_hdr = l3_hdr; 434 struct rte_udp_hdr *udp_hdr; 435 struct rte_tcp_hdr *tcp_hdr; 436 struct rte_sctp_hdr *sctp_hdr; 437 uint64_t ol_flags = 0; 438 uint32_t max_pkt_len, tso_segsz = 0; 439 440 /* ensure packet is large enough to require tso */ 441 if (!info->is_tunnel) { 442 max_pkt_len = info->l2_len + info->l3_len + info->l4_len + 443 info->tso_segsz; 444 if (info->tso_segsz != 0 && info->pkt_len > max_pkt_len) 445 tso_segsz = info->tso_segsz; 446 } else { 447 max_pkt_len = info->outer_l2_len + info->outer_l3_len + 448 info->l2_len + info->l3_len + info->l4_len + 449 info->tunnel_tso_segsz; 450 if (info->tunnel_tso_segsz != 0 && info->pkt_len > max_pkt_len) 451 tso_segsz = info->tunnel_tso_segsz; 452 } 453 454 if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) { 455 ipv4_hdr = l3_hdr; 456 ipv4_hdr->hdr_checksum = 0; 457 458 ol_flags |= PKT_TX_IPV4; 459 if (info->l4_proto == IPPROTO_TCP && tso_segsz) { 460 ol_flags |= PKT_TX_IP_CKSUM; 461 } else { 462 if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 463 ol_flags |= PKT_TX_IP_CKSUM; 464 else 465 ipv4_hdr->hdr_checksum = 466 rte_ipv4_cksum(ipv4_hdr); 467 } 468 } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6)) 469 ol_flags |= PKT_TX_IPV6; 470 else 471 return 0; /* packet type not supported, nothing to do */ 472 473 if (info->l4_proto == IPPROTO_UDP) { 474 udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len); 475 /* do not recalculate udp cksum if it was 0 */ 476 if (udp_hdr->dgram_cksum != 0) { 477 udp_hdr->dgram_cksum = 0; 478 if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) 479 ol_flags |= PKT_TX_UDP_CKSUM; 480 else { 481 udp_hdr->dgram_cksum = 482 get_udptcp_checksum(l3_hdr, udp_hdr, 483 info->ethertype); 484 } 485 } 486 if (info->gso_enable) 487 ol_flags |= PKT_TX_UDP_SEG; 488 } else if (info->l4_proto == IPPROTO_TCP) { 489 tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len); 490 tcp_hdr->cksum = 0; 491 if (tso_segsz) 492 ol_flags |= PKT_TX_TCP_SEG; 493 else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) 494 ol_flags |= PKT_TX_TCP_CKSUM; 495 else { 496 tcp_hdr->cksum = 497 get_udptcp_checksum(l3_hdr, tcp_hdr, 498 info->ethertype); 499 } 500 if (info->gso_enable) 501 ol_flags |= PKT_TX_TCP_SEG; 502 } else if (info->l4_proto == IPPROTO_SCTP) { 503 sctp_hdr = (struct rte_sctp_hdr *) 504 ((char *)l3_hdr + info->l3_len); 505 sctp_hdr->cksum = 0; 506 /* sctp payload must be a multiple of 4 to be 507 * offloaded */ 508 if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) && 509 ((ipv4_hdr->total_length & 0x3) == 0)) { 510 ol_flags |= PKT_TX_SCTP_CKSUM; 511 } else { 512 /* XXX implement CRC32c, example available in 513 * RFC3309 */ 514 } 515 } 516 517 return ol_flags; 518 } 519 520 /* Calculate the checksum of outer header */ 521 static uint64_t 522 process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info, 523 uint64_t tx_offloads, int tso_enabled) 524 { 525 struct rte_ipv4_hdr *ipv4_hdr = outer_l3_hdr; 526 struct rte_ipv6_hdr *ipv6_hdr = outer_l3_hdr; 527 struct rte_udp_hdr *udp_hdr; 528 uint64_t ol_flags = 0; 529 530 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) { 531 ipv4_hdr->hdr_checksum = 0; 532 ol_flags |= PKT_TX_OUTER_IPV4; 533 534 if (tx_offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 535 ol_flags |= PKT_TX_OUTER_IP_CKSUM; 536 else 537 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 538 } else 539 ol_flags |= PKT_TX_OUTER_IPV6; 540 541 if (info->outer_l4_proto != IPPROTO_UDP) 542 return ol_flags; 543 544 udp_hdr = (struct rte_udp_hdr *) 545 ((char *)outer_l3_hdr + info->outer_l3_len); 546 547 if (tso_enabled) 548 ol_flags |= PKT_TX_TCP_SEG; 549 550 /* Skip SW outer UDP checksum generation if HW supports it */ 551 if (tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) { 552 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) 553 udp_hdr->dgram_cksum 554 = rte_ipv4_phdr_cksum(ipv4_hdr, ol_flags); 555 else 556 udp_hdr->dgram_cksum 557 = rte_ipv6_phdr_cksum(ipv6_hdr, ol_flags); 558 559 ol_flags |= PKT_TX_OUTER_UDP_CKSUM; 560 return ol_flags; 561 } 562 563 /* outer UDP checksum is done in software. In the other side, for 564 * UDP tunneling, like VXLAN or Geneve, outer UDP checksum can be 565 * set to zero. 566 * 567 * If a packet will be TSOed into small packets by NIC, we cannot 568 * set/calculate a non-zero checksum, because it will be a wrong 569 * value after the packet be split into several small packets. 570 */ 571 if (tso_enabled) 572 udp_hdr->dgram_cksum = 0; 573 574 /* do not recalculate udp cksum if it was 0 */ 575 if (udp_hdr->dgram_cksum != 0) { 576 udp_hdr->dgram_cksum = 0; 577 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) 578 udp_hdr->dgram_cksum = 579 rte_ipv4_udptcp_cksum(ipv4_hdr, udp_hdr); 580 else 581 udp_hdr->dgram_cksum = 582 rte_ipv6_udptcp_cksum(ipv6_hdr, udp_hdr); 583 } 584 585 return ol_flags; 586 } 587 588 /* 589 * Helper function. 590 * Performs actual copying. 591 * Returns number of segments in the destination mbuf on success, 592 * or negative error code on failure. 593 */ 594 static int 595 mbuf_copy_split(const struct rte_mbuf *ms, struct rte_mbuf *md[], 596 uint16_t seglen[], uint8_t nb_seg) 597 { 598 uint32_t dlen, slen, tlen; 599 uint32_t i, len; 600 const struct rte_mbuf *m; 601 const uint8_t *src; 602 uint8_t *dst; 603 604 dlen = 0; 605 slen = 0; 606 tlen = 0; 607 608 dst = NULL; 609 src = NULL; 610 611 m = ms; 612 i = 0; 613 while (ms != NULL && i != nb_seg) { 614 615 if (slen == 0) { 616 slen = rte_pktmbuf_data_len(ms); 617 src = rte_pktmbuf_mtod(ms, const uint8_t *); 618 } 619 620 if (dlen == 0) { 621 dlen = RTE_MIN(seglen[i], slen); 622 md[i]->data_len = dlen; 623 md[i]->next = (i + 1 == nb_seg) ? NULL : md[i + 1]; 624 dst = rte_pktmbuf_mtod(md[i], uint8_t *); 625 } 626 627 len = RTE_MIN(slen, dlen); 628 memcpy(dst, src, len); 629 tlen += len; 630 slen -= len; 631 dlen -= len; 632 src += len; 633 dst += len; 634 635 if (slen == 0) 636 ms = ms->next; 637 if (dlen == 0) 638 i++; 639 } 640 641 if (ms != NULL) 642 return -ENOBUFS; 643 else if (tlen != m->pkt_len) 644 return -EINVAL; 645 646 md[0]->nb_segs = nb_seg; 647 md[0]->pkt_len = tlen; 648 md[0]->vlan_tci = m->vlan_tci; 649 md[0]->vlan_tci_outer = m->vlan_tci_outer; 650 md[0]->ol_flags = m->ol_flags; 651 md[0]->tx_offload = m->tx_offload; 652 653 return nb_seg; 654 } 655 656 /* 657 * Allocate a new mbuf with up to tx_pkt_nb_segs segments. 658 * Copy packet contents and offload information into the new segmented mbuf. 659 */ 660 static struct rte_mbuf * 661 pkt_copy_split(const struct rte_mbuf *pkt) 662 { 663 int32_t n, rc; 664 uint32_t i, len, nb_seg; 665 struct rte_mempool *mp; 666 uint16_t seglen[RTE_MAX_SEGS_PER_PKT]; 667 struct rte_mbuf *p, *md[RTE_MAX_SEGS_PER_PKT]; 668 669 mp = current_fwd_lcore()->mbp; 670 671 if (tx_pkt_split == TX_PKT_SPLIT_RND) 672 nb_seg = random() % tx_pkt_nb_segs + 1; 673 else 674 nb_seg = tx_pkt_nb_segs; 675 676 memcpy(seglen, tx_pkt_seg_lengths, nb_seg * sizeof(seglen[0])); 677 678 /* calculate number of segments to use and their length. */ 679 len = 0; 680 for (i = 0; i != nb_seg && len < pkt->pkt_len; i++) { 681 len += seglen[i]; 682 md[i] = NULL; 683 } 684 685 n = pkt->pkt_len - len; 686 687 /* update size of the last segment to fit rest of the packet */ 688 if (n >= 0) { 689 seglen[i - 1] += n; 690 len += n; 691 } 692 693 nb_seg = i; 694 while (i != 0) { 695 p = rte_pktmbuf_alloc(mp); 696 if (p == NULL) { 697 TESTPMD_LOG(ERR, 698 "failed to allocate %u-th of %u mbuf " 699 "from mempool: %s\n", 700 nb_seg - i, nb_seg, mp->name); 701 break; 702 } 703 704 md[--i] = p; 705 if (rte_pktmbuf_tailroom(md[i]) < seglen[i]) { 706 TESTPMD_LOG(ERR, "mempool %s, %u-th segment: " 707 "expected seglen: %u, " 708 "actual mbuf tailroom: %u\n", 709 mp->name, i, seglen[i], 710 rte_pktmbuf_tailroom(md[i])); 711 break; 712 } 713 } 714 715 /* all mbufs successfully allocated, do copy */ 716 if (i == 0) { 717 rc = mbuf_copy_split(pkt, md, seglen, nb_seg); 718 if (rc < 0) 719 TESTPMD_LOG(ERR, 720 "mbuf_copy_split for %p(len=%u, nb_seg=%u) " 721 "into %u segments failed with error code: %d\n", 722 pkt, pkt->pkt_len, pkt->nb_segs, nb_seg, rc); 723 724 /* figure out how many mbufs to free. */ 725 i = RTE_MAX(rc, 0); 726 } 727 728 /* free unused mbufs */ 729 for (; i != nb_seg; i++) { 730 rte_pktmbuf_free_seg(md[i]); 731 md[i] = NULL; 732 } 733 734 return md[0]; 735 } 736 737 /* 738 * Receive a burst of packets, and for each packet: 739 * - parse packet, and try to recognize a supported packet type (1) 740 * - if it's not a supported packet type, don't touch the packet, else: 741 * - reprocess the checksum of all supported layers. This is done in SW 742 * or HW, depending on testpmd command line configuration 743 * - if TSO is enabled in testpmd command line, also flag the mbuf for TCP 744 * segmentation offload (this implies HW TCP checksum) 745 * Then transmit packets on the output port. 746 * 747 * (1) Supported packets are: 748 * Ether / (vlan) / IP|IP6 / UDP|TCP|SCTP . 749 * Ether / (vlan) / outer IP|IP6 / outer UDP / VxLAN / Ether / IP|IP6 / 750 * UDP|TCP|SCTP 751 * Ether / (vlan) / outer IP|IP6 / outer UDP / VXLAN-GPE / Ether / IP|IP6 / 752 * UDP|TCP|SCTP 753 * Ether / (vlan) / outer IP|IP6 / outer UDP / VXLAN-GPE / IP|IP6 / 754 * UDP|TCP|SCTP 755 * Ether / (vlan) / outer IP / outer UDP / GTP / IP|IP6 / UDP|TCP|SCTP 756 * Ether / (vlan) / outer IP|IP6 / GRE / Ether / IP|IP6 / UDP|TCP|SCTP 757 * Ether / (vlan) / outer IP|IP6 / GRE / IP|IP6 / UDP|TCP|SCTP 758 * Ether / (vlan) / outer IP|IP6 / IP|IP6 / UDP|TCP|SCTP 759 * 760 * The testpmd command line for this forward engine sets the flags 761 * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control 762 * wether a checksum must be calculated in software or in hardware. The 763 * IP, UDP, TCP and SCTP flags always concern the inner layer. The 764 * OUTER_IP is only useful for tunnel packets. 765 */ 766 static void 767 pkt_burst_checksum_forward(struct fwd_stream *fs) 768 { 769 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 770 struct rte_mbuf *gso_segments[GSO_MAX_PKT_BURST]; 771 struct rte_gso_ctx *gso_ctx; 772 struct rte_mbuf **tx_pkts_burst; 773 struct rte_port *txp; 774 struct rte_mbuf *m, *p; 775 struct rte_ether_hdr *eth_hdr; 776 void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ 777 void **gro_ctx; 778 uint16_t gro_pkts_num; 779 uint8_t gro_enable; 780 uint16_t nb_rx; 781 uint16_t nb_tx; 782 uint16_t nb_prep; 783 uint16_t i; 784 uint64_t rx_ol_flags, tx_ol_flags; 785 uint64_t tx_offloads; 786 uint32_t retry; 787 uint32_t rx_bad_ip_csum; 788 uint32_t rx_bad_l4_csum; 789 uint32_t rx_bad_outer_l4_csum; 790 struct testpmd_offload_info info; 791 uint16_t nb_segments = 0; 792 int ret; 793 794 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 795 uint64_t start_tsc; 796 uint64_t end_tsc; 797 uint64_t core_cycles; 798 #endif 799 800 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 801 start_tsc = rte_rdtsc(); 802 #endif 803 804 /* receive a burst of packet */ 805 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, 806 nb_pkt_per_burst); 807 if (unlikely(nb_rx == 0)) 808 return; 809 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 810 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; 811 #endif 812 fs->rx_packets += nb_rx; 813 rx_bad_ip_csum = 0; 814 rx_bad_l4_csum = 0; 815 rx_bad_outer_l4_csum = 0; 816 gro_enable = gro_ports[fs->rx_port].enable; 817 818 txp = &ports[fs->tx_port]; 819 tx_offloads = txp->dev_conf.txmode.offloads; 820 memset(&info, 0, sizeof(info)); 821 info.tso_segsz = txp->tso_segsz; 822 info.tunnel_tso_segsz = txp->tunnel_tso_segsz; 823 if (gso_ports[fs->tx_port].enable) 824 info.gso_enable = 1; 825 826 for (i = 0; i < nb_rx; i++) { 827 if (likely(i < nb_rx - 1)) 828 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1], 829 void *)); 830 831 m = pkts_burst[i]; 832 info.is_tunnel = 0; 833 info.pkt_len = rte_pktmbuf_pkt_len(m); 834 tx_ol_flags = m->ol_flags & 835 (IND_ATTACHED_MBUF | EXT_ATTACHED_MBUF); 836 rx_ol_flags = m->ol_flags; 837 838 /* Update the L3/L4 checksum error packet statistics */ 839 if ((rx_ol_flags & PKT_RX_IP_CKSUM_MASK) == PKT_RX_IP_CKSUM_BAD) 840 rx_bad_ip_csum += 1; 841 if ((rx_ol_flags & PKT_RX_L4_CKSUM_MASK) == PKT_RX_L4_CKSUM_BAD) 842 rx_bad_l4_csum += 1; 843 if (rx_ol_flags & PKT_RX_OUTER_L4_CKSUM_BAD) 844 rx_bad_outer_l4_csum += 1; 845 846 /* step 1: dissect packet, parsing optional vlan, ip4/ip6, vxlan 847 * and inner headers */ 848 849 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 850 rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr], 851 ð_hdr->d_addr); 852 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, 853 ð_hdr->s_addr); 854 parse_ethernet(eth_hdr, &info); 855 l3_hdr = (char *)eth_hdr + info.l2_len; 856 857 /* check if it's a supported tunnel */ 858 if (txp->parse_tunnel) { 859 if (info.l4_proto == IPPROTO_UDP) { 860 struct rte_udp_hdr *udp_hdr; 861 862 udp_hdr = (struct rte_udp_hdr *) 863 ((char *)l3_hdr + info.l3_len); 864 parse_gtp(udp_hdr, &info); 865 if (info.is_tunnel) { 866 tx_ol_flags |= PKT_TX_TUNNEL_GTP; 867 goto tunnel_update; 868 } 869 parse_vxlan_gpe(udp_hdr, &info); 870 if (info.is_tunnel) { 871 tx_ol_flags |= 872 PKT_TX_TUNNEL_VXLAN_GPE; 873 goto tunnel_update; 874 } 875 parse_vxlan(udp_hdr, &info, 876 m->packet_type); 877 if (info.is_tunnel) 878 tx_ol_flags |= 879 PKT_TX_TUNNEL_VXLAN; 880 } else if (info.l4_proto == IPPROTO_GRE) { 881 struct simple_gre_hdr *gre_hdr; 882 883 gre_hdr = (struct simple_gre_hdr *) 884 ((char *)l3_hdr + info.l3_len); 885 parse_gre(gre_hdr, &info); 886 if (info.is_tunnel) 887 tx_ol_flags |= PKT_TX_TUNNEL_GRE; 888 } else if (info.l4_proto == IPPROTO_IPIP) { 889 void *encap_ip_hdr; 890 891 encap_ip_hdr = (char *)l3_hdr + info.l3_len; 892 parse_encap_ip(encap_ip_hdr, &info); 893 if (info.is_tunnel) 894 tx_ol_flags |= PKT_TX_TUNNEL_IPIP; 895 } 896 } 897 898 tunnel_update: 899 /* update l3_hdr and outer_l3_hdr if a tunnel was parsed */ 900 if (info.is_tunnel) { 901 outer_l3_hdr = l3_hdr; 902 l3_hdr = (char *)l3_hdr + info.outer_l3_len + info.l2_len; 903 } 904 905 /* step 2: depending on user command line configuration, 906 * recompute checksum either in software or flag the 907 * mbuf to offload the calculation to the NIC. If TSO 908 * is configured, prepare the mbuf for TCP segmentation. */ 909 910 /* process checksums of inner headers first */ 911 tx_ol_flags |= process_inner_cksums(l3_hdr, &info, 912 tx_offloads); 913 914 /* Then process outer headers if any. Note that the software 915 * checksum will be wrong if one of the inner checksums is 916 * processed in hardware. */ 917 if (info.is_tunnel == 1) { 918 tx_ol_flags |= process_outer_cksums(outer_l3_hdr, &info, 919 tx_offloads, 920 !!(tx_ol_flags & PKT_TX_TCP_SEG)); 921 } 922 923 /* step 3: fill the mbuf meta data (flags and header lengths) */ 924 925 m->tx_offload = 0; 926 if (info.is_tunnel == 1) { 927 if (info.tunnel_tso_segsz || 928 (tx_offloads & 929 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) || 930 (tx_offloads & 931 DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) || 932 (tx_ol_flags & PKT_TX_OUTER_IPV6)) { 933 m->outer_l2_len = info.outer_l2_len; 934 m->outer_l3_len = info.outer_l3_len; 935 m->l2_len = info.l2_len; 936 m->l3_len = info.l3_len; 937 m->l4_len = info.l4_len; 938 m->tso_segsz = info.tunnel_tso_segsz; 939 } 940 else { 941 /* if there is a outer UDP cksum 942 processed in sw and the inner in hw, 943 the outer checksum will be wrong as 944 the payload will be modified by the 945 hardware */ 946 m->l2_len = info.outer_l2_len + 947 info.outer_l3_len + info.l2_len; 948 m->l3_len = info.l3_len; 949 m->l4_len = info.l4_len; 950 } 951 } else { 952 /* this is only useful if an offload flag is 953 * set, but it does not hurt to fill it in any 954 * case */ 955 m->l2_len = info.l2_len; 956 m->l3_len = info.l3_len; 957 m->l4_len = info.l4_len; 958 m->tso_segsz = info.tso_segsz; 959 } 960 m->ol_flags = tx_ol_flags; 961 962 /* Do split & copy for the packet. */ 963 if (tx_pkt_split != TX_PKT_SPLIT_OFF) { 964 p = pkt_copy_split(m); 965 if (p != NULL) { 966 rte_pktmbuf_free(m); 967 m = p; 968 pkts_burst[i] = m; 969 } 970 } 971 972 /* if verbose mode is enabled, dump debug info */ 973 if (verbose_level > 0) { 974 char buf[256]; 975 976 printf("-----------------\n"); 977 printf("port=%u, mbuf=%p, pkt_len=%u, nb_segs=%u:\n", 978 fs->rx_port, m, m->pkt_len, m->nb_segs); 979 /* dump rx parsed packet info */ 980 rte_get_rx_ol_flag_list(rx_ol_flags, buf, sizeof(buf)); 981 printf("rx: l2_len=%d ethertype=%x l3_len=%d " 982 "l4_proto=%d l4_len=%d flags=%s\n", 983 info.l2_len, rte_be_to_cpu_16(info.ethertype), 984 info.l3_len, info.l4_proto, info.l4_len, buf); 985 if (rx_ol_flags & PKT_RX_LRO) 986 printf("rx: m->lro_segsz=%u\n", m->tso_segsz); 987 if (info.is_tunnel == 1) 988 printf("rx: outer_l2_len=%d outer_ethertype=%x " 989 "outer_l3_len=%d\n", info.outer_l2_len, 990 rte_be_to_cpu_16(info.outer_ethertype), 991 info.outer_l3_len); 992 /* dump tx packet info */ 993 if ((tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 994 DEV_TX_OFFLOAD_UDP_CKSUM | 995 DEV_TX_OFFLOAD_TCP_CKSUM | 996 DEV_TX_OFFLOAD_SCTP_CKSUM)) || 997 info.tso_segsz != 0) 998 printf("tx: m->l2_len=%d m->l3_len=%d " 999 "m->l4_len=%d\n", 1000 m->l2_len, m->l3_len, m->l4_len); 1001 if (info.is_tunnel == 1) { 1002 if ((tx_offloads & 1003 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) || 1004 (tx_offloads & 1005 DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) || 1006 (tx_ol_flags & PKT_TX_OUTER_IPV6)) 1007 printf("tx: m->outer_l2_len=%d " 1008 "m->outer_l3_len=%d\n", 1009 m->outer_l2_len, 1010 m->outer_l3_len); 1011 if (info.tunnel_tso_segsz != 0 && 1012 (m->ol_flags & PKT_TX_TCP_SEG)) 1013 printf("tx: m->tso_segsz=%d\n", 1014 m->tso_segsz); 1015 } else if (info.tso_segsz != 0 && 1016 (m->ol_flags & PKT_TX_TCP_SEG)) 1017 printf("tx: m->tso_segsz=%d\n", m->tso_segsz); 1018 rte_get_tx_ol_flag_list(m->ol_flags, buf, sizeof(buf)); 1019 printf("tx: flags=%s", buf); 1020 printf("\n"); 1021 } 1022 } 1023 1024 if (unlikely(gro_enable)) { 1025 if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) { 1026 nb_rx = rte_gro_reassemble_burst(pkts_burst, nb_rx, 1027 &(gro_ports[fs->rx_port].param)); 1028 } else { 1029 gro_ctx = current_fwd_lcore()->gro_ctx; 1030 nb_rx = rte_gro_reassemble(pkts_burst, nb_rx, gro_ctx); 1031 1032 if (++fs->gro_times >= gro_flush_cycles) { 1033 gro_pkts_num = rte_gro_get_pkt_count(gro_ctx); 1034 if (gro_pkts_num > MAX_PKT_BURST - nb_rx) 1035 gro_pkts_num = MAX_PKT_BURST - nb_rx; 1036 1037 nb_rx += rte_gro_timeout_flush(gro_ctx, 0, 1038 RTE_GRO_TCP_IPV4, 1039 &pkts_burst[nb_rx], 1040 gro_pkts_num); 1041 fs->gro_times = 0; 1042 } 1043 } 1044 } 1045 1046 if (gso_ports[fs->tx_port].enable == 0) 1047 tx_pkts_burst = pkts_burst; 1048 else { 1049 gso_ctx = &(current_fwd_lcore()->gso_ctx); 1050 gso_ctx->gso_size = gso_max_segment_size; 1051 for (i = 0; i < nb_rx; i++) { 1052 ret = rte_gso_segment(pkts_burst[i], gso_ctx, 1053 &gso_segments[nb_segments], 1054 GSO_MAX_PKT_BURST - nb_segments); 1055 if (ret >= 0) 1056 nb_segments += ret; 1057 else { 1058 TESTPMD_LOG(DEBUG, "Unable to segment packet"); 1059 rte_pktmbuf_free(pkts_burst[i]); 1060 } 1061 } 1062 1063 tx_pkts_burst = gso_segments; 1064 nb_rx = nb_segments; 1065 } 1066 1067 nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue, 1068 tx_pkts_burst, nb_rx); 1069 if (nb_prep != nb_rx) 1070 printf("Preparing packet burst to transmit failed: %s\n", 1071 rte_strerror(rte_errno)); 1072 1073 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, tx_pkts_burst, 1074 nb_prep); 1075 1076 /* 1077 * Retry if necessary 1078 */ 1079 if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { 1080 retry = 0; 1081 while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { 1082 rte_delay_us(burst_tx_delay_time); 1083 nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, 1084 &tx_pkts_burst[nb_tx], nb_rx - nb_tx); 1085 } 1086 } 1087 fs->tx_packets += nb_tx; 1088 fs->rx_bad_ip_csum += rx_bad_ip_csum; 1089 fs->rx_bad_l4_csum += rx_bad_l4_csum; 1090 fs->rx_bad_outer_l4_csum += rx_bad_outer_l4_csum; 1091 1092 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 1093 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; 1094 #endif 1095 if (unlikely(nb_tx < nb_rx)) { 1096 fs->fwd_dropped += (nb_rx - nb_tx); 1097 do { 1098 rte_pktmbuf_free(tx_pkts_burst[nb_tx]); 1099 } while (++nb_tx < nb_rx); 1100 } 1101 1102 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 1103 end_tsc = rte_rdtsc(); 1104 core_cycles = (end_tsc - start_tsc); 1105 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); 1106 #endif 1107 } 1108 1109 struct fwd_engine csum_fwd_engine = { 1110 .fwd_mode_name = "csum", 1111 .port_fwd_begin = NULL, 1112 .port_fwd_end = NULL, 1113 .packet_fwd = pkt_burst_checksum_forward, 1114 }; 1115