1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation. 3 * Copyright 2014 6WIND S.A. 4 */ 5 6 #include <stdarg.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <sys/queue.h> 14 #include <sys/stat.h> 15 16 #include <rte_common.h> 17 #include <rte_byteorder.h> 18 #include <rte_log.h> 19 #include <rte_debug.h> 20 #include <rte_cycles.h> 21 #include <rte_memory.h> 22 #include <rte_memcpy.h> 23 #include <rte_launch.h> 24 #include <rte_eal.h> 25 #include <rte_per_lcore.h> 26 #include <rte_lcore.h> 27 #include <rte_atomic.h> 28 #include <rte_branch_prediction.h> 29 #include <rte_mempool.h> 30 #include <rte_mbuf.h> 31 #include <rte_interrupts.h> 32 #include <rte_pci.h> 33 #include <rte_ether.h> 34 #include <rte_ethdev.h> 35 #include <rte_ip.h> 36 #include <rte_tcp.h> 37 #include <rte_udp.h> 38 #include <rte_vxlan.h> 39 #include <rte_sctp.h> 40 #include <rte_gtp.h> 41 #include <rte_prefetch.h> 42 #include <rte_string_fns.h> 43 #include <rte_flow.h> 44 #include <rte_gro.h> 45 #include <rte_gso.h> 46 47 #include "testpmd.h" 48 49 #define IP_DEFTTL 64 /* from RFC 1340. */ 50 51 #define GRE_CHECKSUM_PRESENT 0x8000 52 #define GRE_KEY_PRESENT 0x2000 53 #define GRE_SEQUENCE_PRESENT 0x1000 54 #define GRE_EXT_LEN 4 55 #define GRE_SUPPORTED_FIELDS (GRE_CHECKSUM_PRESENT | GRE_KEY_PRESENT |\ 56 GRE_SEQUENCE_PRESENT) 57 58 /* We cannot use rte_cpu_to_be_16() on a constant in a switch/case */ 59 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN 60 #define _htons(x) ((uint16_t)((((x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8))) 61 #else 62 #define _htons(x) (x) 63 #endif 64 65 uint16_t vxlan_gpe_udp_port = 4790; 66 67 /* structure that caches offload info for the current packet */ 68 struct testpmd_offload_info { 69 uint16_t ethertype; 70 uint8_t gso_enable; 71 uint16_t l2_len; 72 uint16_t l3_len; 73 uint16_t l4_len; 74 uint8_t l4_proto; 75 uint8_t is_tunnel; 76 uint16_t outer_ethertype; 77 uint16_t outer_l2_len; 78 uint16_t outer_l3_len; 79 uint8_t outer_l4_proto; 80 uint16_t tso_segsz; 81 uint16_t tunnel_tso_segsz; 82 uint32_t pkt_len; 83 }; 84 85 /* simplified GRE header */ 86 struct simple_gre_hdr { 87 uint16_t flags; 88 uint16_t proto; 89 } __attribute__((__packed__)); 90 91 static uint16_t 92 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) 93 { 94 if (ethertype == _htons(RTE_ETHER_TYPE_IPV4)) 95 return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); 96 else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */ 97 return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); 98 } 99 100 /* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */ 101 static void 102 parse_ipv4(struct rte_ipv4_hdr *ipv4_hdr, struct testpmd_offload_info *info) 103 { 104 struct rte_tcp_hdr *tcp_hdr; 105 106 info->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; 107 info->l4_proto = ipv4_hdr->next_proto_id; 108 109 /* only fill l4_len for TCP, it's useful for TSO */ 110 if (info->l4_proto == IPPROTO_TCP) { 111 tcp_hdr = (struct rte_tcp_hdr *) 112 ((char *)ipv4_hdr + info->l3_len); 113 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; 114 } else if (info->l4_proto == IPPROTO_UDP) 115 info->l4_len = sizeof(struct rte_udp_hdr); 116 else 117 info->l4_len = 0; 118 } 119 120 /* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */ 121 static void 122 parse_ipv6(struct rte_ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info) 123 { 124 struct rte_tcp_hdr *tcp_hdr; 125 126 info->l3_len = sizeof(struct rte_ipv6_hdr); 127 info->l4_proto = ipv6_hdr->proto; 128 129 /* only fill l4_len for TCP, it's useful for TSO */ 130 if (info->l4_proto == IPPROTO_TCP) { 131 tcp_hdr = (struct rte_tcp_hdr *) 132 ((char *)ipv6_hdr + info->l3_len); 133 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; 134 } else if (info->l4_proto == IPPROTO_UDP) 135 info->l4_len = sizeof(struct rte_udp_hdr); 136 else 137 info->l4_len = 0; 138 } 139 140 /* 141 * Parse an ethernet header to fill the ethertype, l2_len, l3_len and 142 * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan 143 * header. The l4_len argument is only set in case of TCP (useful for TSO). 144 */ 145 static void 146 parse_ethernet(struct rte_ether_hdr *eth_hdr, struct testpmd_offload_info *info) 147 { 148 struct rte_ipv4_hdr *ipv4_hdr; 149 struct rte_ipv6_hdr *ipv6_hdr; 150 151 info->l2_len = sizeof(struct rte_ether_hdr); 152 info->ethertype = eth_hdr->ether_type; 153 154 if (info->ethertype == _htons(RTE_ETHER_TYPE_VLAN)) { 155 struct rte_vlan_hdr *vlan_hdr = ( 156 struct rte_vlan_hdr *)(eth_hdr + 1); 157 158 info->l2_len += sizeof(struct rte_vlan_hdr); 159 info->ethertype = vlan_hdr->eth_proto; 160 } 161 162 switch (info->ethertype) { 163 case _htons(RTE_ETHER_TYPE_IPV4): 164 ipv4_hdr = (struct rte_ipv4_hdr *) 165 ((char *)eth_hdr + info->l2_len); 166 parse_ipv4(ipv4_hdr, info); 167 break; 168 case _htons(RTE_ETHER_TYPE_IPV6): 169 ipv6_hdr = (struct rte_ipv6_hdr *) 170 ((char *)eth_hdr + info->l2_len); 171 parse_ipv6(ipv6_hdr, info); 172 break; 173 default: 174 info->l4_len = 0; 175 info->l3_len = 0; 176 info->l4_proto = 0; 177 break; 178 } 179 } 180 181 /* 182 * Parse a GTP protocol header. 183 * No optional fields and next extension header type. 184 */ 185 static void 186 parse_gtp(struct rte_udp_hdr *udp_hdr, 187 struct testpmd_offload_info *info) 188 { 189 struct rte_ipv4_hdr *ipv4_hdr; 190 struct rte_ipv6_hdr *ipv6_hdr; 191 struct rte_gtp_hdr *gtp_hdr; 192 uint8_t gtp_len = sizeof(*gtp_hdr); 193 uint8_t ip_ver; 194 195 /* Check udp destination port. */ 196 if (udp_hdr->dst_port != _htons(RTE_GTPC_UDP_PORT) && 197 udp_hdr->src_port != _htons(RTE_GTPC_UDP_PORT) && 198 udp_hdr->dst_port != _htons(RTE_GTPU_UDP_PORT)) 199 return; 200 201 info->is_tunnel = 1; 202 info->outer_ethertype = info->ethertype; 203 info->outer_l2_len = info->l2_len; 204 info->outer_l3_len = info->l3_len; 205 info->outer_l4_proto = info->l4_proto; 206 info->l2_len = 0; 207 208 gtp_hdr = (struct rte_gtp_hdr *)((char *)udp_hdr + 209 sizeof(struct rte_udp_hdr)); 210 211 /* 212 * Check message type. If message type is 0xff, it is 213 * a GTP data packet. If not, it is a GTP control packet 214 */ 215 if (gtp_hdr->msg_type == 0xff) { 216 ip_ver = *(uint8_t *)((char *)udp_hdr + 217 sizeof(struct rte_udp_hdr) + 218 sizeof(struct rte_gtp_hdr)); 219 ip_ver = (ip_ver) & 0xf0; 220 221 if (ip_ver == RTE_GTP_TYPE_IPV4) { 222 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gtp_hdr + 223 gtp_len); 224 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 225 parse_ipv4(ipv4_hdr, info); 226 } else if (ip_ver == RTE_GTP_TYPE_IPV6) { 227 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gtp_hdr + 228 gtp_len); 229 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 230 parse_ipv6(ipv6_hdr, info); 231 } 232 } else { 233 info->ethertype = 0; 234 info->l4_len = 0; 235 info->l3_len = 0; 236 info->l4_proto = 0; 237 } 238 239 info->l2_len += RTE_ETHER_GTP_HLEN; 240 } 241 242 /* Parse a vxlan header */ 243 static void 244 parse_vxlan(struct rte_udp_hdr *udp_hdr, 245 struct testpmd_offload_info *info, 246 uint32_t pkt_type) 247 { 248 struct rte_ether_hdr *eth_hdr; 249 250 /* check udp destination port, 4789 is the default vxlan port 251 * (rfc7348) or that the rx offload flag is set (i40e only 252 * currently) */ 253 if (udp_hdr->dst_port != _htons(4789) && 254 RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0) 255 return; 256 257 info->is_tunnel = 1; 258 info->outer_ethertype = info->ethertype; 259 info->outer_l2_len = info->l2_len; 260 info->outer_l3_len = info->l3_len; 261 info->outer_l4_proto = info->l4_proto; 262 263 eth_hdr = (struct rte_ether_hdr *)((char *)udp_hdr + 264 sizeof(struct rte_udp_hdr) + 265 sizeof(struct rte_vxlan_hdr)); 266 267 parse_ethernet(eth_hdr, info); 268 info->l2_len += RTE_ETHER_VXLAN_HLEN; /* add udp + vxlan */ 269 } 270 271 /* Parse a vxlan-gpe header */ 272 static void 273 parse_vxlan_gpe(struct rte_udp_hdr *udp_hdr, 274 struct testpmd_offload_info *info) 275 { 276 struct rte_ether_hdr *eth_hdr; 277 struct rte_ipv4_hdr *ipv4_hdr; 278 struct rte_ipv6_hdr *ipv6_hdr; 279 struct rte_vxlan_gpe_hdr *vxlan_gpe_hdr; 280 uint8_t vxlan_gpe_len = sizeof(*vxlan_gpe_hdr); 281 282 /* Check udp destination port. */ 283 if (udp_hdr->dst_port != _htons(vxlan_gpe_udp_port)) 284 return; 285 286 vxlan_gpe_hdr = (struct rte_vxlan_gpe_hdr *)((char *)udp_hdr + 287 sizeof(struct rte_udp_hdr)); 288 289 if (!vxlan_gpe_hdr->proto || vxlan_gpe_hdr->proto == 290 RTE_VXLAN_GPE_TYPE_IPV4) { 291 info->is_tunnel = 1; 292 info->outer_ethertype = info->ethertype; 293 info->outer_l2_len = info->l2_len; 294 info->outer_l3_len = info->l3_len; 295 info->outer_l4_proto = info->l4_proto; 296 297 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)vxlan_gpe_hdr + 298 vxlan_gpe_len); 299 300 parse_ipv4(ipv4_hdr, info); 301 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 302 info->l2_len = 0; 303 304 } else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_IPV6) { 305 info->is_tunnel = 1; 306 info->outer_ethertype = info->ethertype; 307 info->outer_l2_len = info->l2_len; 308 info->outer_l3_len = info->l3_len; 309 info->outer_l4_proto = info->l4_proto; 310 311 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)vxlan_gpe_hdr + 312 vxlan_gpe_len); 313 314 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 315 parse_ipv6(ipv6_hdr, info); 316 info->l2_len = 0; 317 318 } else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_ETH) { 319 info->is_tunnel = 1; 320 info->outer_ethertype = info->ethertype; 321 info->outer_l2_len = info->l2_len; 322 info->outer_l3_len = info->l3_len; 323 info->outer_l4_proto = info->l4_proto; 324 325 eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_gpe_hdr + 326 vxlan_gpe_len); 327 328 parse_ethernet(eth_hdr, info); 329 } else 330 return; 331 332 info->l2_len += RTE_ETHER_VXLAN_GPE_HLEN; 333 } 334 335 /* Parse a gre header */ 336 static void 337 parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info) 338 { 339 struct rte_ether_hdr *eth_hdr; 340 struct rte_ipv4_hdr *ipv4_hdr; 341 struct rte_ipv6_hdr *ipv6_hdr; 342 uint8_t gre_len = 0; 343 344 gre_len += sizeof(struct simple_gre_hdr); 345 346 if (gre_hdr->flags & _htons(GRE_KEY_PRESENT)) 347 gre_len += GRE_EXT_LEN; 348 if (gre_hdr->flags & _htons(GRE_SEQUENCE_PRESENT)) 349 gre_len += GRE_EXT_LEN; 350 if (gre_hdr->flags & _htons(GRE_CHECKSUM_PRESENT)) 351 gre_len += GRE_EXT_LEN; 352 353 if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_IPV4)) { 354 info->is_tunnel = 1; 355 info->outer_ethertype = info->ethertype; 356 info->outer_l2_len = info->l2_len; 357 info->outer_l3_len = info->l3_len; 358 info->outer_l4_proto = info->l4_proto; 359 360 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gre_hdr + gre_len); 361 362 parse_ipv4(ipv4_hdr, info); 363 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 364 info->l2_len = 0; 365 366 } else if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_IPV6)) { 367 info->is_tunnel = 1; 368 info->outer_ethertype = info->ethertype; 369 info->outer_l2_len = info->l2_len; 370 info->outer_l3_len = info->l3_len; 371 info->outer_l4_proto = info->l4_proto; 372 373 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gre_hdr + gre_len); 374 375 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 376 parse_ipv6(ipv6_hdr, info); 377 info->l2_len = 0; 378 379 } else if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_TEB)) { 380 info->is_tunnel = 1; 381 info->outer_ethertype = info->ethertype; 382 info->outer_l2_len = info->l2_len; 383 info->outer_l3_len = info->l3_len; 384 info->outer_l4_proto = info->l4_proto; 385 386 eth_hdr = (struct rte_ether_hdr *)((char *)gre_hdr + gre_len); 387 388 parse_ethernet(eth_hdr, info); 389 } else 390 return; 391 392 info->l2_len += gre_len; 393 } 394 395 396 /* Parse an encapsulated ip or ipv6 header */ 397 static void 398 parse_encap_ip(void *encap_ip, struct testpmd_offload_info *info) 399 { 400 struct rte_ipv4_hdr *ipv4_hdr = encap_ip; 401 struct rte_ipv6_hdr *ipv6_hdr = encap_ip; 402 uint8_t ip_version; 403 404 ip_version = (ipv4_hdr->version_ihl & 0xf0) >> 4; 405 406 if (ip_version != 4 && ip_version != 6) 407 return; 408 409 info->is_tunnel = 1; 410 info->outer_ethertype = info->ethertype; 411 info->outer_l2_len = info->l2_len; 412 info->outer_l3_len = info->l3_len; 413 414 if (ip_version == 4) { 415 parse_ipv4(ipv4_hdr, info); 416 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4); 417 } else { 418 parse_ipv6(ipv6_hdr, info); 419 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6); 420 } 421 info->l2_len = 0; 422 } 423 424 /* if possible, calculate the checksum of a packet in hw or sw, 425 * depending on the testpmd command line configuration */ 426 static uint64_t 427 process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, 428 uint64_t tx_offloads) 429 { 430 struct rte_ipv4_hdr *ipv4_hdr = l3_hdr; 431 struct rte_udp_hdr *udp_hdr; 432 struct rte_tcp_hdr *tcp_hdr; 433 struct rte_sctp_hdr *sctp_hdr; 434 uint64_t ol_flags = 0; 435 uint32_t max_pkt_len, tso_segsz = 0; 436 437 /* ensure packet is large enough to require tso */ 438 if (!info->is_tunnel) { 439 max_pkt_len = info->l2_len + info->l3_len + info->l4_len + 440 info->tso_segsz; 441 if (info->tso_segsz != 0 && info->pkt_len > max_pkt_len) 442 tso_segsz = info->tso_segsz; 443 } else { 444 max_pkt_len = info->outer_l2_len + info->outer_l3_len + 445 info->l2_len + info->l3_len + info->l4_len + 446 info->tunnel_tso_segsz; 447 if (info->tunnel_tso_segsz != 0 && info->pkt_len > max_pkt_len) 448 tso_segsz = info->tunnel_tso_segsz; 449 } 450 451 if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) { 452 ipv4_hdr = l3_hdr; 453 ipv4_hdr->hdr_checksum = 0; 454 455 ol_flags |= PKT_TX_IPV4; 456 if (info->l4_proto == IPPROTO_TCP && tso_segsz) { 457 ol_flags |= PKT_TX_IP_CKSUM; 458 } else { 459 if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 460 ol_flags |= PKT_TX_IP_CKSUM; 461 else 462 ipv4_hdr->hdr_checksum = 463 rte_ipv4_cksum(ipv4_hdr); 464 } 465 } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6)) 466 ol_flags |= PKT_TX_IPV6; 467 else 468 return 0; /* packet type not supported, nothing to do */ 469 470 if (info->l4_proto == IPPROTO_UDP) { 471 udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len); 472 /* do not recalculate udp cksum if it was 0 */ 473 if (udp_hdr->dgram_cksum != 0) { 474 udp_hdr->dgram_cksum = 0; 475 if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) 476 ol_flags |= PKT_TX_UDP_CKSUM; 477 else { 478 udp_hdr->dgram_cksum = 479 get_udptcp_checksum(l3_hdr, udp_hdr, 480 info->ethertype); 481 } 482 } 483 if (info->gso_enable) 484 ol_flags |= PKT_TX_UDP_SEG; 485 } else if (info->l4_proto == IPPROTO_TCP) { 486 tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len); 487 tcp_hdr->cksum = 0; 488 if (tso_segsz) 489 ol_flags |= PKT_TX_TCP_SEG; 490 else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) 491 ol_flags |= PKT_TX_TCP_CKSUM; 492 else { 493 tcp_hdr->cksum = 494 get_udptcp_checksum(l3_hdr, tcp_hdr, 495 info->ethertype); 496 } 497 if (info->gso_enable) 498 ol_flags |= PKT_TX_TCP_SEG; 499 } else if (info->l4_proto == IPPROTO_SCTP) { 500 sctp_hdr = (struct rte_sctp_hdr *) 501 ((char *)l3_hdr + info->l3_len); 502 sctp_hdr->cksum = 0; 503 /* sctp payload must be a multiple of 4 to be 504 * offloaded */ 505 if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) && 506 ((ipv4_hdr->total_length & 0x3) == 0)) { 507 ol_flags |= PKT_TX_SCTP_CKSUM; 508 } else { 509 /* XXX implement CRC32c, example available in 510 * RFC3309 */ 511 } 512 } 513 514 return ol_flags; 515 } 516 517 /* Calculate the checksum of outer header */ 518 static uint64_t 519 process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info, 520 uint64_t tx_offloads, int tso_enabled) 521 { 522 struct rte_ipv4_hdr *ipv4_hdr = outer_l3_hdr; 523 struct rte_ipv6_hdr *ipv6_hdr = outer_l3_hdr; 524 struct rte_udp_hdr *udp_hdr; 525 uint64_t ol_flags = 0; 526 527 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) { 528 ipv4_hdr->hdr_checksum = 0; 529 ol_flags |= PKT_TX_OUTER_IPV4; 530 531 if (tx_offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 532 ol_flags |= PKT_TX_OUTER_IP_CKSUM; 533 else 534 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 535 } else 536 ol_flags |= PKT_TX_OUTER_IPV6; 537 538 if (info->outer_l4_proto != IPPROTO_UDP) 539 return ol_flags; 540 541 udp_hdr = (struct rte_udp_hdr *) 542 ((char *)outer_l3_hdr + info->outer_l3_len); 543 544 if (tso_enabled) 545 ol_flags |= PKT_TX_TCP_SEG; 546 547 /* Skip SW outer UDP checksum generation if HW supports it */ 548 if (tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) { 549 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) 550 udp_hdr->dgram_cksum 551 = rte_ipv4_phdr_cksum(ipv4_hdr, ol_flags); 552 else 553 udp_hdr->dgram_cksum 554 = rte_ipv6_phdr_cksum(ipv6_hdr, ol_flags); 555 556 ol_flags |= PKT_TX_OUTER_UDP_CKSUM; 557 return ol_flags; 558 } 559 560 /* outer UDP checksum is done in software. In the other side, for 561 * UDP tunneling, like VXLAN or Geneve, outer UDP checksum can be 562 * set to zero. 563 * 564 * If a packet will be TSOed into small packets by NIC, we cannot 565 * set/calculate a non-zero checksum, because it will be a wrong 566 * value after the packet be split into several small packets. 567 */ 568 if (tso_enabled) 569 udp_hdr->dgram_cksum = 0; 570 571 /* do not recalculate udp cksum if it was 0 */ 572 if (udp_hdr->dgram_cksum != 0) { 573 udp_hdr->dgram_cksum = 0; 574 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) 575 udp_hdr->dgram_cksum = 576 rte_ipv4_udptcp_cksum(ipv4_hdr, udp_hdr); 577 else 578 udp_hdr->dgram_cksum = 579 rte_ipv6_udptcp_cksum(ipv6_hdr, udp_hdr); 580 } 581 582 return ol_flags; 583 } 584 585 /* 586 * Helper function. 587 * Performs actual copying. 588 * Returns number of segments in the destination mbuf on success, 589 * or negative error code on failure. 590 */ 591 static int 592 mbuf_copy_split(const struct rte_mbuf *ms, struct rte_mbuf *md[], 593 uint16_t seglen[], uint8_t nb_seg) 594 { 595 uint32_t dlen, slen, tlen; 596 uint32_t i, len; 597 const struct rte_mbuf *m; 598 const uint8_t *src; 599 uint8_t *dst; 600 601 dlen = 0; 602 slen = 0; 603 tlen = 0; 604 605 dst = NULL; 606 src = NULL; 607 608 m = ms; 609 i = 0; 610 while (ms != NULL && i != nb_seg) { 611 612 if (slen == 0) { 613 slen = rte_pktmbuf_data_len(ms); 614 src = rte_pktmbuf_mtod(ms, const uint8_t *); 615 } 616 617 if (dlen == 0) { 618 dlen = RTE_MIN(seglen[i], slen); 619 md[i]->data_len = dlen; 620 md[i]->next = (i + 1 == nb_seg) ? NULL : md[i + 1]; 621 dst = rte_pktmbuf_mtod(md[i], uint8_t *); 622 } 623 624 len = RTE_MIN(slen, dlen); 625 memcpy(dst, src, len); 626 tlen += len; 627 slen -= len; 628 dlen -= len; 629 src += len; 630 dst += len; 631 632 if (slen == 0) 633 ms = ms->next; 634 if (dlen == 0) 635 i++; 636 } 637 638 if (ms != NULL) 639 return -ENOBUFS; 640 else if (tlen != m->pkt_len) 641 return -EINVAL; 642 643 md[0]->nb_segs = nb_seg; 644 md[0]->pkt_len = tlen; 645 md[0]->vlan_tci = m->vlan_tci; 646 md[0]->vlan_tci_outer = m->vlan_tci_outer; 647 md[0]->ol_flags = m->ol_flags; 648 md[0]->tx_offload = m->tx_offload; 649 650 return nb_seg; 651 } 652 653 /* 654 * Allocate a new mbuf with up to tx_pkt_nb_segs segments. 655 * Copy packet contents and offload information into the new segmented mbuf. 656 */ 657 static struct rte_mbuf * 658 pkt_copy_split(const struct rte_mbuf *pkt) 659 { 660 int32_t n, rc; 661 uint32_t i, len, nb_seg; 662 struct rte_mempool *mp; 663 uint16_t seglen[RTE_MAX_SEGS_PER_PKT]; 664 struct rte_mbuf *p, *md[RTE_MAX_SEGS_PER_PKT]; 665 666 mp = current_fwd_lcore()->mbp; 667 668 if (tx_pkt_split == TX_PKT_SPLIT_RND) 669 nb_seg = random() % tx_pkt_nb_segs + 1; 670 else 671 nb_seg = tx_pkt_nb_segs; 672 673 memcpy(seglen, tx_pkt_seg_lengths, nb_seg * sizeof(seglen[0])); 674 675 /* calculate number of segments to use and their length. */ 676 len = 0; 677 for (i = 0; i != nb_seg && len < pkt->pkt_len; i++) { 678 len += seglen[i]; 679 md[i] = NULL; 680 } 681 682 n = pkt->pkt_len - len; 683 684 /* update size of the last segment to fit rest of the packet */ 685 if (n >= 0) { 686 seglen[i - 1] += n; 687 len += n; 688 } 689 690 nb_seg = i; 691 while (i != 0) { 692 p = rte_pktmbuf_alloc(mp); 693 if (p == NULL) { 694 TESTPMD_LOG(ERR, 695 "failed to allocate %u-th of %u mbuf " 696 "from mempool: %s\n", 697 nb_seg - i, nb_seg, mp->name); 698 break; 699 } 700 701 md[--i] = p; 702 if (rte_pktmbuf_tailroom(md[i]) < seglen[i]) { 703 TESTPMD_LOG(ERR, "mempool %s, %u-th segment: " 704 "expected seglen: %u, " 705 "actual mbuf tailroom: %u\n", 706 mp->name, i, seglen[i], 707 rte_pktmbuf_tailroom(md[i])); 708 break; 709 } 710 } 711 712 /* all mbufs successfully allocated, do copy */ 713 if (i == 0) { 714 rc = mbuf_copy_split(pkt, md, seglen, nb_seg); 715 if (rc < 0) 716 TESTPMD_LOG(ERR, 717 "mbuf_copy_split for %p(len=%u, nb_seg=%u) " 718 "into %u segments failed with error code: %d\n", 719 pkt, pkt->pkt_len, pkt->nb_segs, nb_seg, rc); 720 721 /* figure out how many mbufs to free. */ 722 i = RTE_MAX(rc, 0); 723 } 724 725 /* free unused mbufs */ 726 for (; i != nb_seg; i++) { 727 rte_pktmbuf_free_seg(md[i]); 728 md[i] = NULL; 729 } 730 731 return md[0]; 732 } 733 734 /* 735 * Receive a burst of packets, and for each packet: 736 * - parse packet, and try to recognize a supported packet type (1) 737 * - if it's not a supported packet type, don't touch the packet, else: 738 * - reprocess the checksum of all supported layers. This is done in SW 739 * or HW, depending on testpmd command line configuration 740 * - if TSO is enabled in testpmd command line, also flag the mbuf for TCP 741 * segmentation offload (this implies HW TCP checksum) 742 * Then transmit packets on the output port. 743 * 744 * (1) Supported packets are: 745 * Ether / (vlan) / IP|IP6 / UDP|TCP|SCTP . 746 * Ether / (vlan) / outer IP|IP6 / outer UDP / VxLAN / Ether / IP|IP6 / 747 * UDP|TCP|SCTP 748 * Ether / (vlan) / outer IP|IP6 / outer UDP / VXLAN-GPE / Ether / IP|IP6 / 749 * UDP|TCP|SCTP 750 * Ether / (vlan) / outer IP|IP6 / outer UDP / VXLAN-GPE / IP|IP6 / 751 * UDP|TCP|SCTP 752 * Ether / (vlan) / outer IP / outer UDP / GTP / IP|IP6 / UDP|TCP|SCTP 753 * Ether / (vlan) / outer IP|IP6 / GRE / Ether / IP|IP6 / UDP|TCP|SCTP 754 * Ether / (vlan) / outer IP|IP6 / GRE / IP|IP6 / UDP|TCP|SCTP 755 * Ether / (vlan) / outer IP|IP6 / IP|IP6 / UDP|TCP|SCTP 756 * 757 * The testpmd command line for this forward engine sets the flags 758 * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control 759 * wether a checksum must be calculated in software or in hardware. The 760 * IP, UDP, TCP and SCTP flags always concern the inner layer. The 761 * OUTER_IP is only useful for tunnel packets. 762 */ 763 static void 764 pkt_burst_checksum_forward(struct fwd_stream *fs) 765 { 766 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 767 struct rte_mbuf *gso_segments[GSO_MAX_PKT_BURST]; 768 struct rte_gso_ctx *gso_ctx; 769 struct rte_mbuf **tx_pkts_burst; 770 struct rte_port *txp; 771 struct rte_mbuf *m, *p; 772 struct rte_ether_hdr *eth_hdr; 773 void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ 774 void **gro_ctx; 775 uint16_t gro_pkts_num; 776 uint8_t gro_enable; 777 uint16_t nb_rx; 778 uint16_t nb_tx; 779 uint16_t nb_prep; 780 uint16_t i; 781 uint64_t rx_ol_flags, tx_ol_flags; 782 uint64_t tx_offloads; 783 uint32_t retry; 784 uint32_t rx_bad_ip_csum; 785 uint32_t rx_bad_l4_csum; 786 uint32_t rx_bad_outer_l4_csum; 787 struct testpmd_offload_info info; 788 uint16_t nb_segments = 0; 789 int ret; 790 791 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 792 uint64_t start_tsc; 793 uint64_t end_tsc; 794 uint64_t core_cycles; 795 #endif 796 797 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 798 start_tsc = rte_rdtsc(); 799 #endif 800 801 /* receive a burst of packet */ 802 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, 803 nb_pkt_per_burst); 804 if (unlikely(nb_rx == 0)) 805 return; 806 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 807 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; 808 #endif 809 fs->rx_packets += nb_rx; 810 rx_bad_ip_csum = 0; 811 rx_bad_l4_csum = 0; 812 rx_bad_outer_l4_csum = 0; 813 gro_enable = gro_ports[fs->rx_port].enable; 814 815 txp = &ports[fs->tx_port]; 816 tx_offloads = txp->dev_conf.txmode.offloads; 817 memset(&info, 0, sizeof(info)); 818 info.tso_segsz = txp->tso_segsz; 819 info.tunnel_tso_segsz = txp->tunnel_tso_segsz; 820 if (gso_ports[fs->tx_port].enable) 821 info.gso_enable = 1; 822 823 for (i = 0; i < nb_rx; i++) { 824 if (likely(i < nb_rx - 1)) 825 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1], 826 void *)); 827 828 m = pkts_burst[i]; 829 info.is_tunnel = 0; 830 info.pkt_len = rte_pktmbuf_pkt_len(m); 831 tx_ol_flags = m->ol_flags & 832 (IND_ATTACHED_MBUF | EXT_ATTACHED_MBUF); 833 rx_ol_flags = m->ol_flags; 834 835 /* Update the L3/L4 checksum error packet statistics */ 836 if ((rx_ol_flags & PKT_RX_IP_CKSUM_MASK) == PKT_RX_IP_CKSUM_BAD) 837 rx_bad_ip_csum += 1; 838 if ((rx_ol_flags & PKT_RX_L4_CKSUM_MASK) == PKT_RX_L4_CKSUM_BAD) 839 rx_bad_l4_csum += 1; 840 if (rx_ol_flags & PKT_RX_OUTER_L4_CKSUM_BAD) 841 rx_bad_outer_l4_csum += 1; 842 843 /* step 1: dissect packet, parsing optional vlan, ip4/ip6, vxlan 844 * and inner headers */ 845 846 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 847 rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr], 848 ð_hdr->d_addr); 849 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, 850 ð_hdr->s_addr); 851 parse_ethernet(eth_hdr, &info); 852 l3_hdr = (char *)eth_hdr + info.l2_len; 853 854 /* check if it's a supported tunnel */ 855 if (txp->parse_tunnel) { 856 if (info.l4_proto == IPPROTO_UDP) { 857 struct rte_udp_hdr *udp_hdr; 858 859 udp_hdr = (struct rte_udp_hdr *) 860 ((char *)l3_hdr + info.l3_len); 861 parse_gtp(udp_hdr, &info); 862 if (info.is_tunnel) { 863 tx_ol_flags |= PKT_TX_TUNNEL_GTP; 864 goto tunnel_update; 865 } 866 parse_vxlan_gpe(udp_hdr, &info); 867 if (info.is_tunnel) { 868 tx_ol_flags |= 869 PKT_TX_TUNNEL_VXLAN_GPE; 870 goto tunnel_update; 871 } 872 parse_vxlan(udp_hdr, &info, 873 m->packet_type); 874 if (info.is_tunnel) 875 tx_ol_flags |= 876 PKT_TX_TUNNEL_VXLAN; 877 } else if (info.l4_proto == IPPROTO_GRE) { 878 struct simple_gre_hdr *gre_hdr; 879 880 gre_hdr = (struct simple_gre_hdr *) 881 ((char *)l3_hdr + info.l3_len); 882 parse_gre(gre_hdr, &info); 883 if (info.is_tunnel) 884 tx_ol_flags |= PKT_TX_TUNNEL_GRE; 885 } else if (info.l4_proto == IPPROTO_IPIP) { 886 void *encap_ip_hdr; 887 888 encap_ip_hdr = (char *)l3_hdr + info.l3_len; 889 parse_encap_ip(encap_ip_hdr, &info); 890 if (info.is_tunnel) 891 tx_ol_flags |= PKT_TX_TUNNEL_IPIP; 892 } 893 } 894 895 tunnel_update: 896 /* update l3_hdr and outer_l3_hdr if a tunnel was parsed */ 897 if (info.is_tunnel) { 898 outer_l3_hdr = l3_hdr; 899 l3_hdr = (char *)l3_hdr + info.outer_l3_len + info.l2_len; 900 } 901 902 /* step 2: depending on user command line configuration, 903 * recompute checksum either in software or flag the 904 * mbuf to offload the calculation to the NIC. If TSO 905 * is configured, prepare the mbuf for TCP segmentation. */ 906 907 /* process checksums of inner headers first */ 908 tx_ol_flags |= process_inner_cksums(l3_hdr, &info, 909 tx_offloads); 910 911 /* Then process outer headers if any. Note that the software 912 * checksum will be wrong if one of the inner checksums is 913 * processed in hardware. */ 914 if (info.is_tunnel == 1) { 915 tx_ol_flags |= process_outer_cksums(outer_l3_hdr, &info, 916 tx_offloads, 917 !!(tx_ol_flags & PKT_TX_TCP_SEG)); 918 } 919 920 /* step 3: fill the mbuf meta data (flags and header lengths) */ 921 922 m->tx_offload = 0; 923 if (info.is_tunnel == 1) { 924 if (info.tunnel_tso_segsz || 925 (tx_offloads & 926 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) || 927 (tx_offloads & 928 DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) || 929 (tx_ol_flags & PKT_TX_OUTER_IPV6)) { 930 m->outer_l2_len = info.outer_l2_len; 931 m->outer_l3_len = info.outer_l3_len; 932 m->l2_len = info.l2_len; 933 m->l3_len = info.l3_len; 934 m->l4_len = info.l4_len; 935 m->tso_segsz = info.tunnel_tso_segsz; 936 } 937 else { 938 /* if there is a outer UDP cksum 939 processed in sw and the inner in hw, 940 the outer checksum will be wrong as 941 the payload will be modified by the 942 hardware */ 943 m->l2_len = info.outer_l2_len + 944 info.outer_l3_len + info.l2_len; 945 m->l3_len = info.l3_len; 946 m->l4_len = info.l4_len; 947 } 948 } else { 949 /* this is only useful if an offload flag is 950 * set, but it does not hurt to fill it in any 951 * case */ 952 m->l2_len = info.l2_len; 953 m->l3_len = info.l3_len; 954 m->l4_len = info.l4_len; 955 m->tso_segsz = info.tso_segsz; 956 } 957 m->ol_flags = tx_ol_flags; 958 959 /* Do split & copy for the packet. */ 960 if (tx_pkt_split != TX_PKT_SPLIT_OFF) { 961 p = pkt_copy_split(m); 962 if (p != NULL) { 963 rte_pktmbuf_free(m); 964 m = p; 965 pkts_burst[i] = m; 966 } 967 } 968 969 /* if verbose mode is enabled, dump debug info */ 970 if (verbose_level > 0) { 971 char buf[256]; 972 973 printf("-----------------\n"); 974 printf("port=%u, mbuf=%p, pkt_len=%u, nb_segs=%u:\n", 975 fs->rx_port, m, m->pkt_len, m->nb_segs); 976 /* dump rx parsed packet info */ 977 rte_get_rx_ol_flag_list(rx_ol_flags, buf, sizeof(buf)); 978 printf("rx: l2_len=%d ethertype=%x l3_len=%d " 979 "l4_proto=%d l4_len=%d flags=%s\n", 980 info.l2_len, rte_be_to_cpu_16(info.ethertype), 981 info.l3_len, info.l4_proto, info.l4_len, buf); 982 if (rx_ol_flags & PKT_RX_LRO) 983 printf("rx: m->lro_segsz=%u\n", m->tso_segsz); 984 if (info.is_tunnel == 1) 985 printf("rx: outer_l2_len=%d outer_ethertype=%x " 986 "outer_l3_len=%d\n", info.outer_l2_len, 987 rte_be_to_cpu_16(info.outer_ethertype), 988 info.outer_l3_len); 989 /* dump tx packet info */ 990 if ((tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 991 DEV_TX_OFFLOAD_UDP_CKSUM | 992 DEV_TX_OFFLOAD_TCP_CKSUM | 993 DEV_TX_OFFLOAD_SCTP_CKSUM)) || 994 info.tso_segsz != 0) 995 printf("tx: m->l2_len=%d m->l3_len=%d " 996 "m->l4_len=%d\n", 997 m->l2_len, m->l3_len, m->l4_len); 998 if (info.is_tunnel == 1) { 999 if ((tx_offloads & 1000 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) || 1001 (tx_offloads & 1002 DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) || 1003 (tx_ol_flags & PKT_TX_OUTER_IPV6)) 1004 printf("tx: m->outer_l2_len=%d " 1005 "m->outer_l3_len=%d\n", 1006 m->outer_l2_len, 1007 m->outer_l3_len); 1008 if (info.tunnel_tso_segsz != 0 && 1009 (m->ol_flags & PKT_TX_TCP_SEG)) 1010 printf("tx: m->tso_segsz=%d\n", 1011 m->tso_segsz); 1012 } else if (info.tso_segsz != 0 && 1013 (m->ol_flags & PKT_TX_TCP_SEG)) 1014 printf("tx: m->tso_segsz=%d\n", m->tso_segsz); 1015 rte_get_tx_ol_flag_list(m->ol_flags, buf, sizeof(buf)); 1016 printf("tx: flags=%s", buf); 1017 printf("\n"); 1018 } 1019 } 1020 1021 if (unlikely(gro_enable)) { 1022 if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) { 1023 nb_rx = rte_gro_reassemble_burst(pkts_burst, nb_rx, 1024 &(gro_ports[fs->rx_port].param)); 1025 } else { 1026 gro_ctx = current_fwd_lcore()->gro_ctx; 1027 nb_rx = rte_gro_reassemble(pkts_burst, nb_rx, gro_ctx); 1028 1029 if (++fs->gro_times >= gro_flush_cycles) { 1030 gro_pkts_num = rte_gro_get_pkt_count(gro_ctx); 1031 if (gro_pkts_num > MAX_PKT_BURST - nb_rx) 1032 gro_pkts_num = MAX_PKT_BURST - nb_rx; 1033 1034 nb_rx += rte_gro_timeout_flush(gro_ctx, 0, 1035 RTE_GRO_TCP_IPV4, 1036 &pkts_burst[nb_rx], 1037 gro_pkts_num); 1038 fs->gro_times = 0; 1039 } 1040 } 1041 } 1042 1043 if (gso_ports[fs->tx_port].enable == 0) 1044 tx_pkts_burst = pkts_burst; 1045 else { 1046 gso_ctx = &(current_fwd_lcore()->gso_ctx); 1047 gso_ctx->gso_size = gso_max_segment_size; 1048 for (i = 0; i < nb_rx; i++) { 1049 ret = rte_gso_segment(pkts_burst[i], gso_ctx, 1050 &gso_segments[nb_segments], 1051 GSO_MAX_PKT_BURST - nb_segments); 1052 if (ret >= 0) 1053 nb_segments += ret; 1054 else { 1055 TESTPMD_LOG(DEBUG, "Unable to segment packet"); 1056 rte_pktmbuf_free(pkts_burst[i]); 1057 } 1058 } 1059 1060 tx_pkts_burst = gso_segments; 1061 nb_rx = nb_segments; 1062 } 1063 1064 nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue, 1065 tx_pkts_burst, nb_rx); 1066 if (nb_prep != nb_rx) 1067 printf("Preparing packet burst to transmit failed: %s\n", 1068 rte_strerror(rte_errno)); 1069 1070 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, tx_pkts_burst, 1071 nb_prep); 1072 1073 /* 1074 * Retry if necessary 1075 */ 1076 if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { 1077 retry = 0; 1078 while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { 1079 rte_delay_us(burst_tx_delay_time); 1080 nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, 1081 &tx_pkts_burst[nb_tx], nb_rx - nb_tx); 1082 } 1083 } 1084 fs->tx_packets += nb_tx; 1085 fs->rx_bad_ip_csum += rx_bad_ip_csum; 1086 fs->rx_bad_l4_csum += rx_bad_l4_csum; 1087 fs->rx_bad_outer_l4_csum += rx_bad_outer_l4_csum; 1088 1089 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 1090 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; 1091 #endif 1092 if (unlikely(nb_tx < nb_rx)) { 1093 fs->fwd_dropped += (nb_rx - nb_tx); 1094 do { 1095 rte_pktmbuf_free(tx_pkts_burst[nb_tx]); 1096 } while (++nb_tx < nb_rx); 1097 } 1098 1099 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 1100 end_tsc = rte_rdtsc(); 1101 core_cycles = (end_tsc - start_tsc); 1102 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); 1103 #endif 1104 } 1105 1106 struct fwd_engine csum_fwd_engine = { 1107 .fwd_mode_name = "csum", 1108 .port_fwd_begin = NULL, 1109 .port_fwd_end = NULL, 1110 .packet_fwd = pkt_burst_checksum_forward, 1111 }; 1112