1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 1982, 1986, 1990, 1993 3 * The Regents of the University of California. 4 * Copyright(c) 2010-2014 Intel Corporation. 5 * Copyright(c) 2014 6WIND S.A. 6 * All rights reserved. 7 */ 8 9 #ifndef _RTE_IP_H_ 10 #define _RTE_IP_H_ 11 12 /** 13 * @file 14 * 15 * IP-related defines 16 */ 17 18 #include <stdint.h> 19 20 #ifdef RTE_EXEC_ENV_WINDOWS 21 #include <ws2tcpip.h> 22 #else 23 #include <sys/socket.h> 24 #include <sys/types.h> 25 #include <netinet/in.h> 26 #include <arpa/inet.h> 27 #include <netinet/ip.h> 28 #endif 29 30 #include <rte_byteorder.h> 31 #include <rte_mbuf.h> 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 /** 38 * IPv4 Header 39 */ 40 struct rte_ipv4_hdr { 41 uint8_t version_ihl; /**< version and header length */ 42 uint8_t type_of_service; /**< type of service */ 43 rte_be16_t total_length; /**< length of packet */ 44 rte_be16_t packet_id; /**< packet ID */ 45 rte_be16_t fragment_offset; /**< fragmentation offset */ 46 uint8_t time_to_live; /**< time to live */ 47 uint8_t next_proto_id; /**< protocol ID */ 48 rte_be16_t hdr_checksum; /**< header checksum */ 49 rte_be32_t src_addr; /**< source address */ 50 rte_be32_t dst_addr; /**< destination address */ 51 } __rte_packed; 52 53 /** Create IPv4 address */ 54 #define RTE_IPV4(a, b, c, d) ((uint32_t)(((a) & 0xff) << 24) | \ 55 (((b) & 0xff) << 16) | \ 56 (((c) & 0xff) << 8) | \ 57 ((d) & 0xff)) 58 59 /** Maximal IPv4 packet length (including a header) */ 60 #define RTE_IPV4_MAX_PKT_LEN 65535 61 62 /** Internet header length mask for version_ihl field */ 63 #define RTE_IPV4_HDR_IHL_MASK (0x0f) 64 /** 65 * Internet header length field multiplier (IHL field specifies overall header 66 * length in number of 4-byte words) 67 */ 68 #define RTE_IPV4_IHL_MULTIPLIER (4) 69 70 /* Type of Service fields */ 71 #define RTE_IPV4_HDR_DSCP_MASK (0xfc) 72 #define RTE_IPV4_HDR_ECN_MASK (0x03) 73 #define RTE_IPV4_HDR_ECN_CE RTE_IPV4_HDR_ECN_MASK 74 75 /* Fragment Offset * Flags. */ 76 #define RTE_IPV4_HDR_DF_SHIFT 14 77 #define RTE_IPV4_HDR_MF_SHIFT 13 78 #define RTE_IPV4_HDR_FO_SHIFT 3 79 80 #define RTE_IPV4_HDR_DF_FLAG (1 << RTE_IPV4_HDR_DF_SHIFT) 81 #define RTE_IPV4_HDR_MF_FLAG (1 << RTE_IPV4_HDR_MF_SHIFT) 82 83 #define RTE_IPV4_HDR_OFFSET_MASK ((1 << RTE_IPV4_HDR_MF_SHIFT) - 1) 84 85 #define RTE_IPV4_HDR_OFFSET_UNITS 8 86 87 /* 88 * IPv4 address types 89 */ 90 #define RTE_IPV4_ANY ((uint32_t)0x00000000) /**< 0.0.0.0 */ 91 #define RTE_IPV4_LOOPBACK ((uint32_t)0x7f000001) /**< 127.0.0.1 */ 92 #define RTE_IPV4_BROADCAST ((uint32_t)0xe0000000) /**< 224.0.0.0 */ 93 #define RTE_IPV4_ALLHOSTS_GROUP ((uint32_t)0xe0000001) /**< 224.0.0.1 */ 94 #define RTE_IPV4_ALLRTRS_GROUP ((uint32_t)0xe0000002) /**< 224.0.0.2 */ 95 #define RTE_IPV4_MAX_LOCAL_GROUP ((uint32_t)0xe00000ff) /**< 224.0.0.255 */ 96 97 /* 98 * IPv4 Multicast-related macros 99 */ 100 #define RTE_IPV4_MIN_MCAST \ 101 RTE_IPV4(224, 0, 0, 0) /**< Minimal IPv4-multicast address */ 102 #define RTE_IPV4_MAX_MCAST \ 103 RTE_IPV4(239, 255, 255, 255) /**< Maximum IPv4 multicast address */ 104 105 #define RTE_IS_IPV4_MCAST(x) \ 106 ((x) >= RTE_IPV4_MIN_MCAST && (x) <= RTE_IPV4_MAX_MCAST) 107 /**< check if IPv4 address is multicast */ 108 109 /* IPv4 default fields values */ 110 #define RTE_IPV4_MIN_IHL (0x5) 111 #define RTE_IPV4_VHL_DEF ((IPVERSION << 4) | RTE_IPV4_MIN_IHL) 112 113 /** 114 * Get the length of an IPv4 header. 115 * 116 * @param ipv4_hdr 117 * Pointer to the IPv4 header. 118 * @return 119 * The length of the IPv4 header (with options if present) in bytes. 120 */ 121 static inline uint8_t 122 rte_ipv4_hdr_len(const struct rte_ipv4_hdr *ipv4_hdr) 123 { 124 return (uint8_t)((ipv4_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) * 125 RTE_IPV4_IHL_MULTIPLIER); 126 } 127 128 /** 129 * @internal Calculate a sum of all words in the buffer. 130 * Helper routine for the rte_raw_cksum(). 131 * 132 * @param buf 133 * Pointer to the buffer. 134 * @param len 135 * Length of the buffer. 136 * @param sum 137 * Initial value of the sum. 138 * @return 139 * sum += Sum of all words in the buffer. 140 */ 141 static inline uint32_t 142 __rte_raw_cksum(const void *buf, size_t len, uint32_t sum) 143 { 144 /* workaround gcc strict-aliasing warning */ 145 uintptr_t ptr = (uintptr_t)buf; 146 typedef uint16_t __attribute__((__may_alias__)) u16_p; 147 const u16_p *u16_buf = (const u16_p *)ptr; 148 149 while (len >= (sizeof(*u16_buf) * 4)) { 150 sum += u16_buf[0]; 151 sum += u16_buf[1]; 152 sum += u16_buf[2]; 153 sum += u16_buf[3]; 154 len -= sizeof(*u16_buf) * 4; 155 u16_buf += 4; 156 } 157 while (len >= sizeof(*u16_buf)) { 158 sum += *u16_buf; 159 len -= sizeof(*u16_buf); 160 u16_buf += 1; 161 } 162 163 /* if length is in odd bytes */ 164 if (len == 1) { 165 uint16_t left = 0; 166 *(uint8_t *)&left = *(const uint8_t *)u16_buf; 167 sum += left; 168 } 169 170 return sum; 171 } 172 173 /** 174 * @internal Reduce a sum to the non-complemented checksum. 175 * Helper routine for the rte_raw_cksum(). 176 * 177 * @param sum 178 * Value of the sum. 179 * @return 180 * The non-complemented checksum. 181 */ 182 static inline uint16_t 183 __rte_raw_cksum_reduce(uint32_t sum) 184 { 185 sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); 186 sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); 187 return (uint16_t)sum; 188 } 189 190 /** 191 * Process the non-complemented checksum of a buffer. 192 * 193 * @param buf 194 * Pointer to the buffer. 195 * @param len 196 * Length of the buffer. 197 * @return 198 * The non-complemented checksum. 199 */ 200 static inline uint16_t 201 rte_raw_cksum(const void *buf, size_t len) 202 { 203 uint32_t sum; 204 205 sum = __rte_raw_cksum(buf, len, 0); 206 return __rte_raw_cksum_reduce(sum); 207 } 208 209 /** 210 * Compute the raw (non complemented) checksum of a packet. 211 * 212 * @param m 213 * The pointer to the mbuf. 214 * @param off 215 * The offset in bytes to start the checksum. 216 * @param len 217 * The length in bytes of the data to checksum. 218 * @param cksum 219 * A pointer to the checksum, filled on success. 220 * @return 221 * 0 on success, -1 on error (bad length or offset). 222 */ 223 static inline int 224 rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len, 225 uint16_t *cksum) 226 { 227 const struct rte_mbuf *seg; 228 const char *buf; 229 uint32_t sum, tmp; 230 uint32_t seglen, done; 231 232 /* easy case: all data in the first segment */ 233 if (off + len <= rte_pktmbuf_data_len(m)) { 234 *cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m, 235 const char *, off), len); 236 return 0; 237 } 238 239 if (unlikely(off + len > rte_pktmbuf_pkt_len(m))) 240 return -1; /* invalid params, return a dummy value */ 241 242 /* else browse the segment to find offset */ 243 seglen = 0; 244 for (seg = m; seg != NULL; seg = seg->next) { 245 seglen = rte_pktmbuf_data_len(seg); 246 if (off < seglen) 247 break; 248 off -= seglen; 249 } 250 RTE_ASSERT(seg != NULL); 251 if (seg == NULL) 252 return -1; 253 seglen -= off; 254 buf = rte_pktmbuf_mtod_offset(seg, const char *, off); 255 if (seglen >= len) { 256 /* all in one segment */ 257 *cksum = rte_raw_cksum(buf, len); 258 return 0; 259 } 260 261 /* hard case: process checksum of several segments */ 262 sum = 0; 263 done = 0; 264 for (;;) { 265 tmp = __rte_raw_cksum(buf, seglen, 0); 266 if (done & 1) 267 tmp = rte_bswap16((uint16_t)tmp); 268 sum += tmp; 269 done += seglen; 270 if (done == len) 271 break; 272 seg = seg->next; 273 buf = rte_pktmbuf_mtod(seg, const char *); 274 seglen = rte_pktmbuf_data_len(seg); 275 if (seglen > len - done) 276 seglen = len - done; 277 } 278 279 *cksum = __rte_raw_cksum_reduce(sum); 280 return 0; 281 } 282 283 /** 284 * Process the IPv4 checksum of an IPv4 header. 285 * 286 * The checksum field must be set to 0 by the caller. 287 * 288 * @param ipv4_hdr 289 * The pointer to the contiguous IPv4 header. 290 * @return 291 * The complemented checksum to set in the IP packet. 292 */ 293 static inline uint16_t 294 rte_ipv4_cksum(const struct rte_ipv4_hdr *ipv4_hdr) 295 { 296 uint16_t cksum; 297 cksum = rte_raw_cksum(ipv4_hdr, rte_ipv4_hdr_len(ipv4_hdr)); 298 return (uint16_t)~cksum; 299 } 300 301 /** 302 * Process the pseudo-header checksum of an IPv4 header. 303 * 304 * The checksum field must be set to 0 by the caller. 305 * 306 * Depending on the ol_flags, the pseudo-header checksum expected by the 307 * drivers is not the same. For instance, when TSO is enabled, the IP 308 * payload length must not be included in the packet. 309 * 310 * When ol_flags is 0, it computes the standard pseudo-header checksum. 311 * 312 * @param ipv4_hdr 313 * The pointer to the contiguous IPv4 header. 314 * @param ol_flags 315 * The ol_flags of the associated mbuf. 316 * @return 317 * The non-complemented checksum to set in the L4 header. 318 */ 319 static inline uint16_t 320 rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags) 321 { 322 struct ipv4_psd_header { 323 uint32_t src_addr; /* IP address of source host. */ 324 uint32_t dst_addr; /* IP address of destination host. */ 325 uint8_t zero; /* zero. */ 326 uint8_t proto; /* L4 protocol type. */ 327 uint16_t len; /* L4 length. */ 328 } psd_hdr; 329 330 uint32_t l3_len; 331 332 psd_hdr.src_addr = ipv4_hdr->src_addr; 333 psd_hdr.dst_addr = ipv4_hdr->dst_addr; 334 psd_hdr.zero = 0; 335 psd_hdr.proto = ipv4_hdr->next_proto_id; 336 if (ol_flags & PKT_TX_TCP_SEG) { 337 psd_hdr.len = 0; 338 } else { 339 l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length); 340 psd_hdr.len = rte_cpu_to_be_16((uint16_t)(l3_len - 341 rte_ipv4_hdr_len(ipv4_hdr))); 342 } 343 return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); 344 } 345 346 /** 347 * Process the IPv4 UDP or TCP checksum. 348 * 349 * The IP and layer 4 checksum must be set to 0 in the packet by 350 * the caller. 351 * 352 * @param ipv4_hdr 353 * The pointer to the contiguous IPv4 header. 354 * @param l4_hdr 355 * The pointer to the beginning of the L4 header. 356 * @return 357 * The complemented checksum to set in the IP packet. 358 */ 359 static inline uint16_t 360 rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr) 361 { 362 uint32_t cksum; 363 uint32_t l3_len, l4_len; 364 uint8_t ip_hdr_len; 365 366 ip_hdr_len = rte_ipv4_hdr_len(ipv4_hdr); 367 l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length); 368 if (l3_len < ip_hdr_len) 369 return 0; 370 371 l4_len = l3_len - ip_hdr_len; 372 373 cksum = rte_raw_cksum(l4_hdr, l4_len); 374 cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); 375 376 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); 377 cksum = (~cksum) & 0xffff; 378 /* 379 * Per RFC 768:If the computed checksum is zero for UDP, 380 * it is transmitted as all ones 381 * (the equivalent in one's complement arithmetic). 382 */ 383 if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP) 384 cksum = 0xffff; 385 386 return (uint16_t)cksum; 387 } 388 389 /** 390 * IPv6 Header 391 */ 392 struct rte_ipv6_hdr { 393 rte_be32_t vtc_flow; /**< IP version, traffic class & flow label. */ 394 rte_be16_t payload_len; /**< IP payload size, including ext. headers */ 395 uint8_t proto; /**< Protocol, next header. */ 396 uint8_t hop_limits; /**< Hop limits. */ 397 uint8_t src_addr[16]; /**< IP address of source host. */ 398 uint8_t dst_addr[16]; /**< IP address of destination host(s). */ 399 } __rte_packed; 400 401 /* IPv6 vtc_flow: IPv / TC / flow_label */ 402 #define RTE_IPV6_HDR_FL_SHIFT 0 403 #define RTE_IPV6_HDR_TC_SHIFT 20 404 #define RTE_IPV6_HDR_FL_MASK ((1u << RTE_IPV6_HDR_TC_SHIFT) - 1) 405 #define RTE_IPV6_HDR_TC_MASK (0xff << RTE_IPV6_HDR_TC_SHIFT) 406 #define RTE_IPV6_HDR_DSCP_MASK (0xfc << RTE_IPV6_HDR_TC_SHIFT) 407 #define RTE_IPV6_HDR_ECN_MASK (0x03 << RTE_IPV6_HDR_TC_SHIFT) 408 #define RTE_IPV6_HDR_ECN_CE RTE_IPV6_HDR_ECN_MASK 409 410 #define RTE_IPV6_MIN_MTU 1280 /**< Minimum MTU for IPv6, see RFC 8200. */ 411 412 /** 413 * Process the pseudo-header checksum of an IPv6 header. 414 * 415 * Depending on the ol_flags, the pseudo-header checksum expected by the 416 * drivers is not the same. For instance, when TSO is enabled, the IPv6 417 * payload length must not be included in the packet. 418 * 419 * When ol_flags is 0, it computes the standard pseudo-header checksum. 420 * 421 * @param ipv6_hdr 422 * The pointer to the contiguous IPv6 header. 423 * @param ol_flags 424 * The ol_flags of the associated mbuf. 425 * @return 426 * The non-complemented checksum to set in the L4 header. 427 */ 428 static inline uint16_t 429 rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags) 430 { 431 uint32_t sum; 432 struct { 433 rte_be32_t len; /* L4 length. */ 434 rte_be32_t proto; /* L4 protocol - top 3 bytes must be zero */ 435 } psd_hdr; 436 437 psd_hdr.proto = (uint32_t)(ipv6_hdr->proto << 24); 438 if (ol_flags & PKT_TX_TCP_SEG) { 439 psd_hdr.len = 0; 440 } else { 441 psd_hdr.len = ipv6_hdr->payload_len; 442 } 443 444 sum = __rte_raw_cksum(ipv6_hdr->src_addr, 445 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 446 0); 447 sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum); 448 return __rte_raw_cksum_reduce(sum); 449 } 450 451 /** 452 * Process the IPv6 UDP or TCP checksum. 453 * 454 * The IPv4 header should not contains options. The layer 4 checksum 455 * must be set to 0 in the packet by the caller. 456 * 457 * @param ipv6_hdr 458 * The pointer to the contiguous IPv6 header. 459 * @param l4_hdr 460 * The pointer to the beginning of the L4 header. 461 * @return 462 * The complemented checksum to set in the IP packet. 463 */ 464 static inline uint16_t 465 rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr) 466 { 467 uint32_t cksum; 468 uint32_t l4_len; 469 470 l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); 471 472 cksum = rte_raw_cksum(l4_hdr, l4_len); 473 cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0); 474 475 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); 476 cksum = (~cksum) & 0xffff; 477 /* 478 * Per RFC 768: If the computed checksum is zero for UDP, 479 * it is transmitted as all ones 480 * (the equivalent in one's complement arithmetic). 481 */ 482 if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP) 483 cksum = 0xffff; 484 485 return (uint16_t)cksum; 486 } 487 488 /** IPv6 fragment extension header. */ 489 #define RTE_IPV6_EHDR_MF_SHIFT 0 490 #define RTE_IPV6_EHDR_MF_MASK 1 491 #define RTE_IPV6_EHDR_FO_SHIFT 3 492 #define RTE_IPV6_EHDR_FO_MASK (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1)) 493 #define RTE_IPV6_EHDR_FO_ALIGN (1 << RTE_IPV6_EHDR_FO_SHIFT) 494 495 #define RTE_IPV6_FRAG_USED_MASK (RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK) 496 497 #define RTE_IPV6_GET_MF(x) ((x) & RTE_IPV6_EHDR_MF_MASK) 498 #define RTE_IPV6_GET_FO(x) ((x) >> RTE_IPV6_EHDR_FO_SHIFT) 499 500 #define RTE_IPV6_SET_FRAG_DATA(fo, mf) \ 501 (((fo) & RTE_IPV6_EHDR_FO_MASK) | ((mf) & RTE_IPV6_EHDR_MF_MASK)) 502 503 struct rte_ipv6_fragment_ext { 504 uint8_t next_header; /**< Next header type */ 505 uint8_t reserved; /**< Reserved */ 506 rte_be16_t frag_data; /**< All fragmentation data */ 507 rte_be32_t id; /**< Packet ID */ 508 } __rte_packed; 509 510 /* IPv6 fragment extension header size */ 511 #define RTE_IPV6_FRAG_HDR_SIZE sizeof(struct rte_ipv6_fragment_ext) 512 513 /** 514 * Parse next IPv6 header extension 515 * 516 * This function checks if proto number is an IPv6 extensions and parses its 517 * data if so, providing information on next header and extension length. 518 * 519 * @param p 520 * Pointer to an extension raw data. 521 * @param proto 522 * Protocol number extracted from the "next header" field from 523 * the IPv6 header or the previous extension. 524 * @param ext_len 525 * Extension data length. 526 * @return 527 * next protocol number if proto is an IPv6 extension, -EINVAL otherwise 528 */ 529 __rte_experimental 530 static inline int 531 rte_ipv6_get_next_ext(const uint8_t *p, int proto, size_t *ext_len) 532 { 533 int next_proto; 534 535 switch (proto) { 536 case IPPROTO_AH: 537 next_proto = *p++; 538 *ext_len = (*p + 2) * sizeof(uint32_t); 539 break; 540 541 case IPPROTO_HOPOPTS: 542 case IPPROTO_ROUTING: 543 case IPPROTO_DSTOPTS: 544 next_proto = *p++; 545 *ext_len = (*p + 1) * sizeof(uint64_t); 546 break; 547 548 case IPPROTO_FRAGMENT: 549 next_proto = *p; 550 *ext_len = RTE_IPV6_FRAG_HDR_SIZE; 551 break; 552 553 default: 554 return -EINVAL; 555 } 556 557 return next_proto; 558 } 559 560 #ifdef __cplusplus 561 } 562 #endif 563 564 #endif /* _RTE_IP_H_ */ 565