1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * Copyright 2014 6WIND S.A. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include <stdarg.h> 36 #include <stdio.h> 37 #include <errno.h> 38 #include <stdint.h> 39 #include <unistd.h> 40 #include <inttypes.h> 41 42 #include <sys/queue.h> 43 #include <sys/stat.h> 44 45 #include <rte_common.h> 46 #include <rte_byteorder.h> 47 #include <rte_log.h> 48 #include <rte_debug.h> 49 #include <rte_cycles.h> 50 #include <rte_memory.h> 51 #include <rte_memcpy.h> 52 #include <rte_memzone.h> 53 #include <rte_launch.h> 54 #include <rte_eal.h> 55 #include <rte_per_lcore.h> 56 #include <rte_lcore.h> 57 #include <rte_atomic.h> 58 #include <rte_branch_prediction.h> 59 #include <rte_memory.h> 60 #include <rte_mempool.h> 61 #include <rte_mbuf.h> 62 #include <rte_memcpy.h> 63 #include <rte_interrupts.h> 64 #include <rte_pci.h> 65 #include <rte_ether.h> 66 #include <rte_ethdev.h> 67 #include <rte_ip.h> 68 #include <rte_tcp.h> 69 #include <rte_udp.h> 70 #include <rte_sctp.h> 71 #include <rte_prefetch.h> 72 #include <rte_string_fns.h> 73 #include "testpmd.h" 74 75 #define IP_DEFTTL 64 /* from RFC 1340. */ 76 #define IP_VERSION 0x40 77 #define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */ 78 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN) 79 80 #define GRE_KEY_PRESENT 0x2000 81 #define GRE_KEY_LEN 4 82 #define GRE_SUPPORTED_FIELDS GRE_KEY_PRESENT 83 84 /* We cannot use rte_cpu_to_be_16() on a constant in a switch/case */ 85 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN 86 #define _htons(x) ((uint16_t)((((x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8))) 87 #else 88 #define _htons(x) (x) 89 #endif 90 91 /* structure that caches offload info for the current packet */ 92 struct testpmd_offload_info { 93 uint16_t ethertype; 94 uint16_t l2_len; 95 uint16_t l3_len; 96 uint16_t l4_len; 97 uint8_t l4_proto; 98 uint8_t is_tunnel; 99 uint16_t outer_ethertype; 100 uint16_t outer_l2_len; 101 uint16_t outer_l3_len; 102 uint8_t outer_l4_proto; 103 uint16_t tso_segsz; 104 }; 105 106 /* simplified GRE header */ 107 struct simple_gre_hdr { 108 uint16_t flags; 109 uint16_t proto; 110 } __attribute__((__packed__)); 111 112 static uint16_t 113 get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) 114 { 115 if (ethertype == _htons(ETHER_TYPE_IPv4)) 116 return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); 117 else /* assume ethertype == ETHER_TYPE_IPv6 */ 118 return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); 119 } 120 121 static uint16_t 122 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) 123 { 124 if (ethertype == _htons(ETHER_TYPE_IPv4)) 125 return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); 126 else /* assume ethertype == ETHER_TYPE_IPv6 */ 127 return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); 128 } 129 130 /* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */ 131 static void 132 parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct testpmd_offload_info *info) 133 { 134 struct tcp_hdr *tcp_hdr; 135 136 info->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; 137 info->l4_proto = ipv4_hdr->next_proto_id; 138 139 /* only fill l4_len for TCP, it's useful for TSO */ 140 if (info->l4_proto == IPPROTO_TCP) { 141 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + info->l3_len); 142 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; 143 } else 144 info->l4_len = 0; 145 } 146 147 /* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */ 148 static void 149 parse_ipv6(struct ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info) 150 { 151 struct tcp_hdr *tcp_hdr; 152 153 info->l3_len = sizeof(struct ipv6_hdr); 154 info->l4_proto = ipv6_hdr->proto; 155 156 /* only fill l4_len for TCP, it's useful for TSO */ 157 if (info->l4_proto == IPPROTO_TCP) { 158 tcp_hdr = (struct tcp_hdr *)((char *)ipv6_hdr + info->l3_len); 159 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; 160 } else 161 info->l4_len = 0; 162 } 163 164 /* 165 * Parse an ethernet header to fill the ethertype, l2_len, l3_len and 166 * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan 167 * header. The l4_len argument is only set in case of TCP (useful for TSO). 168 */ 169 static void 170 parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info) 171 { 172 struct ipv4_hdr *ipv4_hdr; 173 struct ipv6_hdr *ipv6_hdr; 174 175 info->l2_len = sizeof(struct ether_hdr); 176 info->ethertype = eth_hdr->ether_type; 177 178 if (info->ethertype == _htons(ETHER_TYPE_VLAN)) { 179 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); 180 181 info->l2_len += sizeof(struct vlan_hdr); 182 info->ethertype = vlan_hdr->eth_proto; 183 } 184 185 switch (info->ethertype) { 186 case _htons(ETHER_TYPE_IPv4): 187 ipv4_hdr = (struct ipv4_hdr *) ((char *)eth_hdr + info->l2_len); 188 parse_ipv4(ipv4_hdr, info); 189 break; 190 case _htons(ETHER_TYPE_IPv6): 191 ipv6_hdr = (struct ipv6_hdr *) ((char *)eth_hdr + info->l2_len); 192 parse_ipv6(ipv6_hdr, info); 193 break; 194 default: 195 info->l4_len = 0; 196 info->l3_len = 0; 197 info->l4_proto = 0; 198 break; 199 } 200 } 201 202 /* Parse a vxlan header */ 203 static void 204 parse_vxlan(struct udp_hdr *udp_hdr, 205 struct testpmd_offload_info *info, 206 uint32_t pkt_type) 207 { 208 struct ether_hdr *eth_hdr; 209 210 /* check udp destination port, 4789 is the default vxlan port 211 * (rfc7348) or that the rx offload flag is set (i40e only 212 * currently) */ 213 if (udp_hdr->dst_port != _htons(4789) && 214 RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0) 215 return; 216 217 info->is_tunnel = 1; 218 info->outer_ethertype = info->ethertype; 219 info->outer_l2_len = info->l2_len; 220 info->outer_l3_len = info->l3_len; 221 info->outer_l4_proto = info->l4_proto; 222 223 eth_hdr = (struct ether_hdr *)((char *)udp_hdr + 224 sizeof(struct udp_hdr) + 225 sizeof(struct vxlan_hdr)); 226 227 parse_ethernet(eth_hdr, info); 228 info->l2_len += ETHER_VXLAN_HLEN; /* add udp + vxlan */ 229 } 230 231 /* Parse a gre header */ 232 static void 233 parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info) 234 { 235 struct ether_hdr *eth_hdr; 236 struct ipv4_hdr *ipv4_hdr; 237 struct ipv6_hdr *ipv6_hdr; 238 uint8_t gre_len = 0; 239 240 /* check which fields are supported */ 241 if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0) 242 return; 243 244 gre_len += sizeof(struct simple_gre_hdr); 245 246 if (gre_hdr->flags & _htons(GRE_KEY_PRESENT)) 247 gre_len += GRE_KEY_LEN; 248 249 if (gre_hdr->proto == _htons(ETHER_TYPE_IPv4)) { 250 info->is_tunnel = 1; 251 info->outer_ethertype = info->ethertype; 252 info->outer_l2_len = info->l2_len; 253 info->outer_l3_len = info->l3_len; 254 info->outer_l4_proto = info->l4_proto; 255 256 ipv4_hdr = (struct ipv4_hdr *)((char *)gre_hdr + gre_len); 257 258 parse_ipv4(ipv4_hdr, info); 259 info->ethertype = _htons(ETHER_TYPE_IPv4); 260 info->l2_len = 0; 261 262 } else if (gre_hdr->proto == _htons(ETHER_TYPE_IPv6)) { 263 info->is_tunnel = 1; 264 info->outer_ethertype = info->ethertype; 265 info->outer_l2_len = info->l2_len; 266 info->outer_l3_len = info->l3_len; 267 info->outer_l4_proto = info->l4_proto; 268 269 ipv6_hdr = (struct ipv6_hdr *)((char *)gre_hdr + gre_len); 270 271 info->ethertype = _htons(ETHER_TYPE_IPv6); 272 parse_ipv6(ipv6_hdr, info); 273 info->l2_len = 0; 274 275 } else if (gre_hdr->proto == _htons(ETHER_TYPE_TEB)) { 276 info->is_tunnel = 1; 277 info->outer_ethertype = info->ethertype; 278 info->outer_l2_len = info->l2_len; 279 info->outer_l3_len = info->l3_len; 280 info->outer_l4_proto = info->l4_proto; 281 282 eth_hdr = (struct ether_hdr *)((char *)gre_hdr + gre_len); 283 284 parse_ethernet(eth_hdr, info); 285 } else 286 return; 287 288 info->l2_len += gre_len; 289 } 290 291 292 /* Parse an encapsulated ip or ipv6 header */ 293 static void 294 parse_encap_ip(void *encap_ip, struct testpmd_offload_info *info) 295 { 296 struct ipv4_hdr *ipv4_hdr = encap_ip; 297 struct ipv6_hdr *ipv6_hdr = encap_ip; 298 uint8_t ip_version; 299 300 ip_version = (ipv4_hdr->version_ihl & 0xf0) >> 4; 301 302 if (ip_version != 4 && ip_version != 6) 303 return; 304 305 info->is_tunnel = 1; 306 info->outer_ethertype = info->ethertype; 307 info->outer_l2_len = info->l2_len; 308 info->outer_l3_len = info->l3_len; 309 310 if (ip_version == 4) { 311 parse_ipv4(ipv4_hdr, info); 312 info->ethertype = _htons(ETHER_TYPE_IPv4); 313 } else { 314 parse_ipv6(ipv6_hdr, info); 315 info->ethertype = _htons(ETHER_TYPE_IPv6); 316 } 317 info->l2_len = 0; 318 } 319 320 /* modify the IPv4 or IPv4 source address of a packet */ 321 static void 322 change_ip_addresses(void *l3_hdr, uint16_t ethertype) 323 { 324 struct ipv4_hdr *ipv4_hdr = l3_hdr; 325 struct ipv6_hdr *ipv6_hdr = l3_hdr; 326 327 if (ethertype == _htons(ETHER_TYPE_IPv4)) { 328 ipv4_hdr->src_addr = 329 rte_cpu_to_be_32(rte_be_to_cpu_32(ipv4_hdr->src_addr) + 1); 330 } else if (ethertype == _htons(ETHER_TYPE_IPv6)) { 331 ipv6_hdr->src_addr[15] = ipv6_hdr->src_addr[15] + 1; 332 } 333 } 334 335 /* if possible, calculate the checksum of a packet in hw or sw, 336 * depending on the testpmd command line configuration */ 337 static uint64_t 338 process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, 339 uint16_t testpmd_ol_flags) 340 { 341 struct ipv4_hdr *ipv4_hdr = l3_hdr; 342 struct udp_hdr *udp_hdr; 343 struct tcp_hdr *tcp_hdr; 344 struct sctp_hdr *sctp_hdr; 345 uint64_t ol_flags = 0; 346 347 if (info->ethertype == _htons(ETHER_TYPE_IPv4)) { 348 ipv4_hdr = l3_hdr; 349 ipv4_hdr->hdr_checksum = 0; 350 351 ol_flags |= PKT_TX_IPV4; 352 if (info->tso_segsz != 0 && info->l4_proto == IPPROTO_TCP) { 353 ol_flags |= PKT_TX_IP_CKSUM; 354 } else { 355 if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_IP_CKSUM) 356 ol_flags |= PKT_TX_IP_CKSUM; 357 else 358 ipv4_hdr->hdr_checksum = 359 rte_ipv4_cksum(ipv4_hdr); 360 } 361 } else if (info->ethertype == _htons(ETHER_TYPE_IPv6)) 362 ol_flags |= PKT_TX_IPV6; 363 else 364 return 0; /* packet type not supported, nothing to do */ 365 366 if (info->l4_proto == IPPROTO_UDP) { 367 udp_hdr = (struct udp_hdr *)((char *)l3_hdr + info->l3_len); 368 /* do not recalculate udp cksum if it was 0 */ 369 if (udp_hdr->dgram_cksum != 0) { 370 udp_hdr->dgram_cksum = 0; 371 if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { 372 ol_flags |= PKT_TX_UDP_CKSUM; 373 udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, 374 info->ethertype, ol_flags); 375 } else { 376 udp_hdr->dgram_cksum = 377 get_udptcp_checksum(l3_hdr, udp_hdr, 378 info->ethertype); 379 } 380 } 381 } else if (info->l4_proto == IPPROTO_TCP) { 382 tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); 383 tcp_hdr->cksum = 0; 384 if (info->tso_segsz != 0) { 385 ol_flags |= PKT_TX_TCP_SEG; 386 tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, 387 ol_flags); 388 } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { 389 ol_flags |= PKT_TX_TCP_CKSUM; 390 tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, 391 ol_flags); 392 } else { 393 tcp_hdr->cksum = 394 get_udptcp_checksum(l3_hdr, tcp_hdr, 395 info->ethertype); 396 } 397 } else if (info->l4_proto == IPPROTO_SCTP) { 398 sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len); 399 sctp_hdr->cksum = 0; 400 /* sctp payload must be a multiple of 4 to be 401 * offloaded */ 402 if ((testpmd_ol_flags & TESTPMD_TX_OFFLOAD_SCTP_CKSUM) && 403 ((ipv4_hdr->total_length & 0x3) == 0)) { 404 ol_flags |= PKT_TX_SCTP_CKSUM; 405 } else { 406 /* XXX implement CRC32c, example available in 407 * RFC3309 */ 408 } 409 } 410 411 return ol_flags; 412 } 413 414 /* Calculate the checksum of outer header (only vxlan is supported, 415 * meaning IP + UDP). The caller already checked that it's a vxlan 416 * packet */ 417 static uint64_t 418 process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info, 419 uint16_t testpmd_ol_flags) 420 { 421 struct ipv4_hdr *ipv4_hdr = outer_l3_hdr; 422 struct ipv6_hdr *ipv6_hdr = outer_l3_hdr; 423 struct udp_hdr *udp_hdr; 424 uint64_t ol_flags = 0; 425 426 if (info->outer_ethertype == _htons(ETHER_TYPE_IPv4)) { 427 ipv4_hdr->hdr_checksum = 0; 428 ol_flags |= PKT_TX_OUTER_IPV4; 429 430 if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM) 431 ol_flags |= PKT_TX_OUTER_IP_CKSUM; 432 else 433 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); 434 } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM) 435 ol_flags |= PKT_TX_OUTER_IPV6; 436 437 if (info->outer_l4_proto != IPPROTO_UDP) 438 return ol_flags; 439 440 /* outer UDP checksum is always done in software as we have no 441 * hardware supporting it today, and no API for it. */ 442 443 udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len); 444 /* do not recalculate udp cksum if it was 0 */ 445 if (udp_hdr->dgram_cksum != 0) { 446 udp_hdr->dgram_cksum = 0; 447 if (info->outer_ethertype == _htons(ETHER_TYPE_IPv4)) 448 udp_hdr->dgram_cksum = 449 rte_ipv4_udptcp_cksum(ipv4_hdr, udp_hdr); 450 else 451 udp_hdr->dgram_cksum = 452 rte_ipv6_udptcp_cksum(ipv6_hdr, udp_hdr); 453 } 454 455 return ol_flags; 456 } 457 458 /* 459 * Helper function. 460 * Performs actual copying. 461 * Returns number of segments in the destination mbuf on success, 462 * or negative error code on failure. 463 */ 464 static int 465 mbuf_copy_split(const struct rte_mbuf *ms, struct rte_mbuf *md[], 466 uint16_t seglen[], uint8_t nb_seg) 467 { 468 uint32_t dlen, slen, tlen; 469 uint32_t i, len; 470 const struct rte_mbuf *m; 471 const uint8_t *src; 472 uint8_t *dst; 473 474 dlen = 0; 475 slen = 0; 476 tlen = 0; 477 478 dst = NULL; 479 src = NULL; 480 481 m = ms; 482 i = 0; 483 while (ms != NULL && i != nb_seg) { 484 485 if (slen == 0) { 486 slen = rte_pktmbuf_data_len(ms); 487 src = rte_pktmbuf_mtod(ms, const uint8_t *); 488 } 489 490 if (dlen == 0) { 491 dlen = RTE_MIN(seglen[i], slen); 492 md[i]->data_len = dlen; 493 md[i]->next = (i + 1 == nb_seg) ? NULL : md[i + 1]; 494 dst = rte_pktmbuf_mtod(md[i], uint8_t *); 495 } 496 497 len = RTE_MIN(slen, dlen); 498 memcpy(dst, src, len); 499 tlen += len; 500 slen -= len; 501 dlen -= len; 502 src += len; 503 dst += len; 504 505 if (slen == 0) 506 ms = ms->next; 507 if (dlen == 0) 508 i++; 509 } 510 511 if (ms != NULL) 512 return -ENOBUFS; 513 else if (tlen != m->pkt_len) 514 return -EINVAL; 515 516 md[0]->nb_segs = nb_seg; 517 md[0]->pkt_len = tlen; 518 md[0]->vlan_tci = m->vlan_tci; 519 md[0]->vlan_tci_outer = m->vlan_tci_outer; 520 md[0]->ol_flags = m->ol_flags; 521 md[0]->tx_offload = m->tx_offload; 522 523 return nb_seg; 524 } 525 526 /* 527 * Allocate a new mbuf with up to tx_pkt_nb_segs segments. 528 * Copy packet contents and offload information into then new segmented mbuf. 529 */ 530 static struct rte_mbuf * 531 pkt_copy_split(const struct rte_mbuf *pkt) 532 { 533 int32_t n, rc; 534 uint32_t i, len, nb_seg; 535 struct rte_mempool *mp; 536 uint16_t seglen[RTE_MAX_SEGS_PER_PKT]; 537 struct rte_mbuf *p, *md[RTE_MAX_SEGS_PER_PKT]; 538 539 mp = current_fwd_lcore()->mbp; 540 541 if (tx_pkt_split == TX_PKT_SPLIT_RND) 542 nb_seg = random() % tx_pkt_nb_segs + 1; 543 else 544 nb_seg = tx_pkt_nb_segs; 545 546 memcpy(seglen, tx_pkt_seg_lengths, nb_seg * sizeof(seglen[0])); 547 548 /* calculate number of segments to use and their length. */ 549 len = 0; 550 for (i = 0; i != nb_seg && len < pkt->pkt_len; i++) { 551 len += seglen[i]; 552 md[i] = NULL; 553 } 554 555 n = pkt->pkt_len - len; 556 557 /* update size of the last segment to fit rest of the packet */ 558 if (n >= 0) { 559 seglen[i - 1] += n; 560 len += n; 561 } 562 563 nb_seg = i; 564 while (i != 0) { 565 p = rte_pktmbuf_alloc(mp); 566 if (p == NULL) { 567 RTE_LOG(ERR, USER1, 568 "failed to allocate %u-th of %u mbuf " 569 "from mempool: %s\n", 570 nb_seg - i, nb_seg, mp->name); 571 break; 572 } 573 574 md[--i] = p; 575 if (rte_pktmbuf_tailroom(md[i]) < seglen[i]) { 576 RTE_LOG(ERR, USER1, "mempool %s, %u-th segment: " 577 "expected seglen: %u, " 578 "actual mbuf tailroom: %u\n", 579 mp->name, i, seglen[i], 580 rte_pktmbuf_tailroom(md[i])); 581 break; 582 } 583 } 584 585 /* all mbufs successfully allocated, do copy */ 586 if (i == 0) { 587 rc = mbuf_copy_split(pkt, md, seglen, nb_seg); 588 if (rc < 0) 589 RTE_LOG(ERR, USER1, 590 "mbuf_copy_split for %p(len=%u, nb_seg=%hhu) " 591 "into %u segments failed with error code: %d\n", 592 pkt, pkt->pkt_len, pkt->nb_segs, nb_seg, rc); 593 594 /* figure out how many mbufs to free. */ 595 i = RTE_MAX(rc, 0); 596 } 597 598 /* free unused mbufs */ 599 for (; i != nb_seg; i++) { 600 rte_pktmbuf_free_seg(md[i]); 601 md[i] = NULL; 602 } 603 604 return md[0]; 605 } 606 607 /* 608 * Receive a burst of packets, and for each packet: 609 * - parse packet, and try to recognize a supported packet type (1) 610 * - if it's not a supported packet type, don't touch the packet, else: 611 * - modify the IPs in inner headers and in outer headers if any 612 * - reprocess the checksum of all supported layers. This is done in SW 613 * or HW, depending on testpmd command line configuration 614 * - if TSO is enabled in testpmd command line, also flag the mbuf for TCP 615 * segmentation offload (this implies HW TCP checksum) 616 * Then transmit packets on the output port. 617 * 618 * (1) Supported packets are: 619 * Ether / (vlan) / IP|IP6 / UDP|TCP|SCTP . 620 * Ether / (vlan) / outer IP|IP6 / outer UDP / VxLAN / Ether / IP|IP6 / 621 * UDP|TCP|SCTP 622 * Ether / (vlan) / outer IP|IP6 / GRE / Ether / IP|IP6 / UDP|TCP|SCTP 623 * Ether / (vlan) / outer IP|IP6 / GRE / IP|IP6 / UDP|TCP|SCTP 624 * Ether / (vlan) / outer IP|IP6 / IP|IP6 / UDP|TCP|SCTP 625 * 626 * The testpmd command line for this forward engine sets the flags 627 * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control 628 * wether a checksum must be calculated in software or in hardware. The 629 * IP, UDP, TCP and SCTP flags always concern the inner layer. The 630 * OUTER_IP is only useful for tunnel packets. 631 */ 632 static void 633 pkt_burst_checksum_forward(struct fwd_stream *fs) 634 { 635 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 636 struct rte_port *txp; 637 struct rte_mbuf *m, *p; 638 struct ether_hdr *eth_hdr; 639 void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ 640 uint16_t nb_rx; 641 uint16_t nb_tx; 642 uint16_t i; 643 uint64_t ol_flags; 644 uint16_t testpmd_ol_flags; 645 uint32_t retry; 646 uint32_t rx_bad_ip_csum; 647 uint32_t rx_bad_l4_csum; 648 struct testpmd_offload_info info; 649 650 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 651 uint64_t start_tsc; 652 uint64_t end_tsc; 653 uint64_t core_cycles; 654 #endif 655 656 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 657 start_tsc = rte_rdtsc(); 658 #endif 659 660 /* receive a burst of packet */ 661 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, 662 nb_pkt_per_burst); 663 if (unlikely(nb_rx == 0)) 664 return; 665 666 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 667 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; 668 #endif 669 fs->rx_packets += nb_rx; 670 rx_bad_ip_csum = 0; 671 rx_bad_l4_csum = 0; 672 673 txp = &ports[fs->tx_port]; 674 testpmd_ol_flags = txp->tx_ol_flags; 675 memset(&info, 0, sizeof(info)); 676 info.tso_segsz = txp->tso_segsz; 677 678 for (i = 0; i < nb_rx; i++) { 679 if (likely(i < nb_rx - 1)) 680 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1], 681 void *)); 682 683 ol_flags = 0; 684 info.is_tunnel = 0; 685 m = pkts_burst[i]; 686 687 /* Update the L3/L4 checksum error packet statistics */ 688 rx_bad_ip_csum += ((m->ol_flags & PKT_RX_IP_CKSUM_BAD) != 0); 689 rx_bad_l4_csum += ((m->ol_flags & PKT_RX_L4_CKSUM_BAD) != 0); 690 691 /* step 1: dissect packet, parsing optional vlan, ip4/ip6, vxlan 692 * and inner headers */ 693 694 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 695 ether_addr_copy(&peer_eth_addrs[fs->peer_addr], 696 ð_hdr->d_addr); 697 ether_addr_copy(&ports[fs->tx_port].eth_addr, 698 ð_hdr->s_addr); 699 parse_ethernet(eth_hdr, &info); 700 l3_hdr = (char *)eth_hdr + info.l2_len; 701 702 /* check if it's a supported tunnel */ 703 if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_PARSE_TUNNEL) { 704 if (info.l4_proto == IPPROTO_UDP) { 705 struct udp_hdr *udp_hdr; 706 udp_hdr = (struct udp_hdr *)((char *)l3_hdr + 707 info.l3_len); 708 parse_vxlan(udp_hdr, &info, m->packet_type); 709 } else if (info.l4_proto == IPPROTO_GRE) { 710 struct simple_gre_hdr *gre_hdr; 711 gre_hdr = (struct simple_gre_hdr *) 712 ((char *)l3_hdr + info.l3_len); 713 parse_gre(gre_hdr, &info); 714 } else if (info.l4_proto == IPPROTO_IPIP) { 715 void *encap_ip_hdr; 716 encap_ip_hdr = (char *)l3_hdr + info.l3_len; 717 parse_encap_ip(encap_ip_hdr, &info); 718 } 719 } 720 721 /* update l3_hdr and outer_l3_hdr if a tunnel was parsed */ 722 if (info.is_tunnel) { 723 outer_l3_hdr = l3_hdr; 724 l3_hdr = (char *)l3_hdr + info.outer_l3_len + info.l2_len; 725 } 726 727 /* step 2: change all source IPs (v4 or v6) so we need 728 * to recompute the chksums even if they were correct */ 729 730 change_ip_addresses(l3_hdr, info.ethertype); 731 if (info.is_tunnel == 1) 732 change_ip_addresses(outer_l3_hdr, info.outer_ethertype); 733 734 /* step 3: depending on user command line configuration, 735 * recompute checksum either in software or flag the 736 * mbuf to offload the calculation to the NIC. If TSO 737 * is configured, prepare the mbuf for TCP segmentation. */ 738 739 /* process checksums of inner headers first */ 740 ol_flags |= process_inner_cksums(l3_hdr, &info, testpmd_ol_flags); 741 742 /* Then process outer headers if any. Note that the software 743 * checksum will be wrong if one of the inner checksums is 744 * processed in hardware. */ 745 if (info.is_tunnel == 1) { 746 ol_flags |= process_outer_cksums(outer_l3_hdr, &info, 747 testpmd_ol_flags); 748 } 749 750 /* step 4: fill the mbuf meta data (flags and header lengths) */ 751 752 if (info.is_tunnel == 1) { 753 if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM) { 754 m->outer_l2_len = info.outer_l2_len; 755 m->outer_l3_len = info.outer_l3_len; 756 m->l2_len = info.l2_len; 757 m->l3_len = info.l3_len; 758 m->l4_len = info.l4_len; 759 } 760 else { 761 /* if there is a outer UDP cksum 762 processed in sw and the inner in hw, 763 the outer checksum will be wrong as 764 the payload will be modified by the 765 hardware */ 766 m->l2_len = info.outer_l2_len + 767 info.outer_l3_len + info.l2_len; 768 m->l3_len = info.l3_len; 769 m->l4_len = info.l4_len; 770 } 771 } else { 772 /* this is only useful if an offload flag is 773 * set, but it does not hurt to fill it in any 774 * case */ 775 m->l2_len = info.l2_len; 776 m->l3_len = info.l3_len; 777 m->l4_len = info.l4_len; 778 } 779 m->tso_segsz = info.tso_segsz; 780 m->ol_flags = ol_flags; 781 782 /* Do split & copy for the packet. */ 783 if (tx_pkt_split != TX_PKT_SPLIT_OFF) { 784 p = pkt_copy_split(m); 785 if (p != NULL) { 786 rte_pktmbuf_free(m); 787 m = p; 788 pkts_burst[i] = m; 789 } 790 } 791 792 /* if verbose mode is enabled, dump debug info */ 793 if (verbose_level > 0) { 794 struct { 795 uint64_t flag; 796 uint64_t mask; 797 } tx_flags[] = { 798 { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM }, 799 { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK }, 800 { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK }, 801 { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK }, 802 { PKT_TX_IPV4, PKT_TX_IPV4 }, 803 { PKT_TX_IPV6, PKT_TX_IPV6 }, 804 { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM }, 805 { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4 }, 806 { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6 }, 807 { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG }, 808 }; 809 unsigned j; 810 const char *name; 811 812 printf("-----------------\n"); 813 printf("mbuf=%p, pkt_len=%u, nb_segs=%hhu:\n", 814 m, m->pkt_len, m->nb_segs); 815 /* dump rx parsed packet info */ 816 printf("rx: l2_len=%d ethertype=%x l3_len=%d " 817 "l4_proto=%d l4_len=%d\n", 818 info.l2_len, rte_be_to_cpu_16(info.ethertype), 819 info.l3_len, info.l4_proto, info.l4_len); 820 if (info.is_tunnel == 1) 821 printf("rx: outer_l2_len=%d outer_ethertype=%x " 822 "outer_l3_len=%d\n", info.outer_l2_len, 823 rte_be_to_cpu_16(info.outer_ethertype), 824 info.outer_l3_len); 825 /* dump tx packet info */ 826 if ((testpmd_ol_flags & (TESTPMD_TX_OFFLOAD_IP_CKSUM | 827 TESTPMD_TX_OFFLOAD_UDP_CKSUM | 828 TESTPMD_TX_OFFLOAD_TCP_CKSUM | 829 TESTPMD_TX_OFFLOAD_SCTP_CKSUM)) || 830 info.tso_segsz != 0) 831 printf("tx: m->l2_len=%d m->l3_len=%d " 832 "m->l4_len=%d\n", 833 m->l2_len, m->l3_len, m->l4_len); 834 if ((info.is_tunnel == 1) && 835 (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM)) 836 printf("tx: m->outer_l2_len=%d m->outer_l3_len=%d\n", 837 m->outer_l2_len, m->outer_l3_len); 838 if (info.tso_segsz != 0) 839 printf("tx: m->tso_segsz=%d\n", m->tso_segsz); 840 printf("tx: flags="); 841 for (j = 0; j < sizeof(tx_flags)/sizeof(*tx_flags); j++) { 842 name = rte_get_tx_ol_flag_name(tx_flags[j].flag); 843 if ((m->ol_flags & tx_flags[j].mask) == 844 tx_flags[j].flag) 845 printf("%s ", name); 846 } 847 printf("\n"); 848 } 849 } 850 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); 851 /* 852 * Retry if necessary 853 */ 854 if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { 855 retry = 0; 856 while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { 857 rte_delay_us(burst_tx_delay_time); 858 nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, 859 &pkts_burst[nb_tx], nb_rx - nb_tx); 860 } 861 } 862 fs->tx_packets += nb_tx; 863 fs->rx_bad_ip_csum += rx_bad_ip_csum; 864 fs->rx_bad_l4_csum += rx_bad_l4_csum; 865 866 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS 867 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; 868 #endif 869 if (unlikely(nb_tx < nb_rx)) { 870 fs->fwd_dropped += (nb_rx - nb_tx); 871 do { 872 rte_pktmbuf_free(pkts_burst[nb_tx]); 873 } while (++nb_tx < nb_rx); 874 } 875 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES 876 end_tsc = rte_rdtsc(); 877 core_cycles = (end_tsc - start_tsc); 878 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); 879 #endif 880 } 881 882 struct fwd_engine csum_fwd_engine = { 883 .fwd_mode_name = "csum", 884 .port_fwd_begin = NULL, 885 .port_fwd_end = NULL, 886 .packet_fwd = pkt_burst_checksum_forward, 887 }; 888