xref: /dpdk/lib/net/rte_ip.h (revision c5c507100ea58e24f812401c77c66cdb9bceee36)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 1982, 1986, 1990, 1993
3  *      The Regents of the University of California.
4  * Copyright(c) 2010-2014 Intel Corporation.
5  * Copyright(c) 2014 6WIND S.A.
6  * All rights reserved.
7  */
8 
9 #ifndef _RTE_IP_H_
10 #define _RTE_IP_H_
11 
12 /**
13  * @file
14  *
15  * IP-related defines
16  */
17 
18 #include <stdint.h>
19 
20 #ifdef RTE_EXEC_ENV_WINDOWS
21 #include <ws2tcpip.h>
22 #else
23 #include <sys/socket.h>
24 #include <sys/types.h>
25 #include <netinet/in.h>
26 #include <arpa/inet.h>
27 #include <netinet/ip.h>
28 #endif
29 
30 #include <rte_byteorder.h>
31 #include <rte_mbuf.h>
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 /**
38  * IPv4 Header
39  */
40 struct rte_ipv4_hdr {
41 	uint8_t  version_ihl;		/**< version and header length */
42 	uint8_t  type_of_service;	/**< type of service */
43 	rte_be16_t total_length;	/**< length of packet */
44 	rte_be16_t packet_id;		/**< packet ID */
45 	rte_be16_t fragment_offset;	/**< fragmentation offset */
46 	uint8_t  time_to_live;		/**< time to live */
47 	uint8_t  next_proto_id;		/**< protocol ID */
48 	rte_be16_t hdr_checksum;	/**< header checksum */
49 	rte_be32_t src_addr;		/**< source address */
50 	rte_be32_t dst_addr;		/**< destination address */
51 } __rte_packed;
52 
53 /** Create IPv4 address */
54 #define RTE_IPV4(a, b, c, d) ((uint32_t)(((a) & 0xff) << 24) | \
55 					   (((b) & 0xff) << 16) | \
56 					   (((c) & 0xff) << 8)  | \
57 					   ((d) & 0xff))
58 
59 /** Maximal IPv4 packet length (including a header) */
60 #define RTE_IPV4_MAX_PKT_LEN        65535
61 
62 /** Internet header length mask for version_ihl field */
63 #define RTE_IPV4_HDR_IHL_MASK	(0x0f)
64 /**
65  * Internet header length field multiplier (IHL field specifies overall header
66  * length in number of 4-byte words)
67  */
68 #define RTE_IPV4_IHL_MULTIPLIER	(4)
69 
70 /* Type of Service fields */
71 #define RTE_IPV4_HDR_DSCP_MASK	(0xfc)
72 #define RTE_IPV4_HDR_ECN_MASK	(0x03)
73 #define RTE_IPV4_HDR_ECN_CE	RTE_IPV4_HDR_ECN_MASK
74 
75 /* Fragment Offset * Flags. */
76 #define	RTE_IPV4_HDR_DF_SHIFT	14
77 #define	RTE_IPV4_HDR_MF_SHIFT	13
78 #define	RTE_IPV4_HDR_FO_SHIFT	3
79 
80 #define	RTE_IPV4_HDR_DF_FLAG	(1 << RTE_IPV4_HDR_DF_SHIFT)
81 #define	RTE_IPV4_HDR_MF_FLAG	(1 << RTE_IPV4_HDR_MF_SHIFT)
82 
83 #define	RTE_IPV4_HDR_OFFSET_MASK	((1 << RTE_IPV4_HDR_MF_SHIFT) - 1)
84 
85 #define	RTE_IPV4_HDR_OFFSET_UNITS	8
86 
87 /*
88  * IPv4 address types
89  */
90 #define RTE_IPV4_ANY              ((uint32_t)0x00000000) /**< 0.0.0.0 */
91 #define RTE_IPV4_LOOPBACK         ((uint32_t)0x7f000001) /**< 127.0.0.1 */
92 #define RTE_IPV4_BROADCAST        ((uint32_t)0xe0000000) /**< 224.0.0.0 */
93 #define RTE_IPV4_ALLHOSTS_GROUP   ((uint32_t)0xe0000001) /**< 224.0.0.1 */
94 #define RTE_IPV4_ALLRTRS_GROUP    ((uint32_t)0xe0000002) /**< 224.0.0.2 */
95 #define RTE_IPV4_MAX_LOCAL_GROUP  ((uint32_t)0xe00000ff) /**< 224.0.0.255 */
96 
97 /*
98  * IPv4 Multicast-related macros
99  */
100 #define RTE_IPV4_MIN_MCAST \
101 	RTE_IPV4(224, 0, 0, 0)          /**< Minimal IPv4-multicast address */
102 #define RTE_IPV4_MAX_MCAST \
103 	RTE_IPV4(239, 255, 255, 255)    /**< Maximum IPv4 multicast address */
104 
105 #define RTE_IS_IPV4_MCAST(x) \
106 	((x) >= RTE_IPV4_MIN_MCAST && (x) <= RTE_IPV4_MAX_MCAST)
107 	/**< check if IPv4 address is multicast */
108 
109 /* IPv4 default fields values */
110 #define RTE_IPV4_MIN_IHL    (0x5)
111 #define RTE_IPV4_VHL_DEF    ((IPVERSION << 4) | RTE_IPV4_MIN_IHL)
112 
113 /**
114  * Get the length of an IPv4 header.
115  *
116  * @param ipv4_hdr
117  *   Pointer to the IPv4 header.
118  * @return
119  *   The length of the IPv4 header (with options if present) in bytes.
120  */
121 static inline uint8_t
122 rte_ipv4_hdr_len(const struct rte_ipv4_hdr *ipv4_hdr)
123 {
124 	return (uint8_t)((ipv4_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
125 		RTE_IPV4_IHL_MULTIPLIER);
126 }
127 
128 /**
129  * @internal Calculate a sum of all words in the buffer.
130  * Helper routine for the rte_raw_cksum().
131  *
132  * @param buf
133  *   Pointer to the buffer.
134  * @param len
135  *   Length of the buffer.
136  * @param sum
137  *   Initial value of the sum.
138  * @return
139  *   sum += Sum of all words in the buffer.
140  */
141 static inline uint32_t
142 __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
143 {
144 	/* workaround gcc strict-aliasing warning */
145 	uintptr_t ptr = (uintptr_t)buf;
146 	typedef uint16_t __attribute__((__may_alias__)) u16_p;
147 	const u16_p *u16_buf = (const u16_p *)ptr;
148 
149 	while (len >= (sizeof(*u16_buf) * 4)) {
150 		sum += u16_buf[0];
151 		sum += u16_buf[1];
152 		sum += u16_buf[2];
153 		sum += u16_buf[3];
154 		len -= sizeof(*u16_buf) * 4;
155 		u16_buf += 4;
156 	}
157 	while (len >= sizeof(*u16_buf)) {
158 		sum += *u16_buf;
159 		len -= sizeof(*u16_buf);
160 		u16_buf += 1;
161 	}
162 
163 	/* if length is in odd bytes */
164 	if (len == 1) {
165 		uint16_t left = 0;
166 		*(uint8_t *)&left = *(const uint8_t *)u16_buf;
167 		sum += left;
168 	}
169 
170 	return sum;
171 }
172 
173 /**
174  * @internal Reduce a sum to the non-complemented checksum.
175  * Helper routine for the rte_raw_cksum().
176  *
177  * @param sum
178  *   Value of the sum.
179  * @return
180  *   The non-complemented checksum.
181  */
182 static inline uint16_t
183 __rte_raw_cksum_reduce(uint32_t sum)
184 {
185 	sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
186 	sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
187 	return (uint16_t)sum;
188 }
189 
190 /**
191  * Process the non-complemented checksum of a buffer.
192  *
193  * @param buf
194  *   Pointer to the buffer.
195  * @param len
196  *   Length of the buffer.
197  * @return
198  *   The non-complemented checksum.
199  */
200 static inline uint16_t
201 rte_raw_cksum(const void *buf, size_t len)
202 {
203 	uint32_t sum;
204 
205 	sum = __rte_raw_cksum(buf, len, 0);
206 	return __rte_raw_cksum_reduce(sum);
207 }
208 
209 /**
210  * Compute the raw (non complemented) checksum of a packet.
211  *
212  * @param m
213  *   The pointer to the mbuf.
214  * @param off
215  *   The offset in bytes to start the checksum.
216  * @param len
217  *   The length in bytes of the data to checksum.
218  * @param cksum
219  *   A pointer to the checksum, filled on success.
220  * @return
221  *   0 on success, -1 on error (bad length or offset).
222  */
223 static inline int
224 rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
225 	uint16_t *cksum)
226 {
227 	const struct rte_mbuf *seg;
228 	const char *buf;
229 	uint32_t sum, tmp;
230 	uint32_t seglen, done;
231 
232 	/* easy case: all data in the first segment */
233 	if (off + len <= rte_pktmbuf_data_len(m)) {
234 		*cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m,
235 				const char *, off), len);
236 		return 0;
237 	}
238 
239 	if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))
240 		return -1; /* invalid params, return a dummy value */
241 
242 	/* else browse the segment to find offset */
243 	seglen = 0;
244 	for (seg = m; seg != NULL; seg = seg->next) {
245 		seglen = rte_pktmbuf_data_len(seg);
246 		if (off < seglen)
247 			break;
248 		off -= seglen;
249 	}
250 	RTE_ASSERT(seg != NULL);
251 	if (seg == NULL)
252 		return -1;
253 	seglen -= off;
254 	buf = rte_pktmbuf_mtod_offset(seg, const char *, off);
255 	if (seglen >= len) {
256 		/* all in one segment */
257 		*cksum = rte_raw_cksum(buf, len);
258 		return 0;
259 	}
260 
261 	/* hard case: process checksum of several segments */
262 	sum = 0;
263 	done = 0;
264 	for (;;) {
265 		tmp = __rte_raw_cksum(buf, seglen, 0);
266 		if (done & 1)
267 			tmp = rte_bswap16((uint16_t)tmp);
268 		sum += tmp;
269 		done += seglen;
270 		if (done == len)
271 			break;
272 		seg = seg->next;
273 		buf = rte_pktmbuf_mtod(seg, const char *);
274 		seglen = rte_pktmbuf_data_len(seg);
275 		if (seglen > len - done)
276 			seglen = len - done;
277 	}
278 
279 	*cksum = __rte_raw_cksum_reduce(sum);
280 	return 0;
281 }
282 
283 /**
284  * Process the IPv4 checksum of an IPv4 header.
285  *
286  * The checksum field must be set to 0 by the caller.
287  *
288  * @param ipv4_hdr
289  *   The pointer to the contiguous IPv4 header.
290  * @return
291  *   The complemented checksum to set in the IP packet.
292  */
293 static inline uint16_t
294 rte_ipv4_cksum(const struct rte_ipv4_hdr *ipv4_hdr)
295 {
296 	uint16_t cksum;
297 	cksum = rte_raw_cksum(ipv4_hdr, rte_ipv4_hdr_len(ipv4_hdr));
298 	return (uint16_t)~cksum;
299 }
300 
301 /**
302  * Process the pseudo-header checksum of an IPv4 header.
303  *
304  * The checksum field must be set to 0 by the caller.
305  *
306  * Depending on the ol_flags, the pseudo-header checksum expected by the
307  * drivers is not the same. For instance, when TSO is enabled, the IP
308  * payload length must not be included in the packet.
309  *
310  * When ol_flags is 0, it computes the standard pseudo-header checksum.
311  *
312  * @param ipv4_hdr
313  *   The pointer to the contiguous IPv4 header.
314  * @param ol_flags
315  *   The ol_flags of the associated mbuf.
316  * @return
317  *   The non-complemented checksum to set in the L4 header.
318  */
319 static inline uint16_t
320 rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
321 {
322 	struct ipv4_psd_header {
323 		uint32_t src_addr; /* IP address of source host. */
324 		uint32_t dst_addr; /* IP address of destination host. */
325 		uint8_t  zero;     /* zero. */
326 		uint8_t  proto;    /* L4 protocol type. */
327 		uint16_t len;      /* L4 length. */
328 	} psd_hdr;
329 
330 	uint32_t l3_len;
331 
332 	psd_hdr.src_addr = ipv4_hdr->src_addr;
333 	psd_hdr.dst_addr = ipv4_hdr->dst_addr;
334 	psd_hdr.zero = 0;
335 	psd_hdr.proto = ipv4_hdr->next_proto_id;
336 	if (ol_flags & PKT_TX_TCP_SEG) {
337 		psd_hdr.len = 0;
338 	} else {
339 		l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length);
340 		psd_hdr.len = rte_cpu_to_be_16((uint16_t)(l3_len -
341 			rte_ipv4_hdr_len(ipv4_hdr)));
342 	}
343 	return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
344 }
345 
346 /**
347  * Process the IPv4 UDP or TCP checksum.
348  *
349  * The IP and layer 4 checksum must be set to 0 in the packet by
350  * the caller.
351  *
352  * @param ipv4_hdr
353  *   The pointer to the contiguous IPv4 header.
354  * @param l4_hdr
355  *   The pointer to the beginning of the L4 header.
356  * @return
357  *   The complemented checksum to set in the IP packet.
358  */
359 static inline uint16_t
360 rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
361 {
362 	uint32_t cksum;
363 	uint32_t l3_len, l4_len;
364 	uint8_t ip_hdr_len;
365 
366 	ip_hdr_len = rte_ipv4_hdr_len(ipv4_hdr);
367 	l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length);
368 	if (l3_len < ip_hdr_len)
369 		return 0;
370 
371 	l4_len = l3_len - ip_hdr_len;
372 
373 	cksum = rte_raw_cksum(l4_hdr, l4_len);
374 	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
375 
376 	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
377 	cksum = (~cksum) & 0xffff;
378 	/*
379 	 * Per RFC 768:If the computed checksum is zero for UDP,
380 	 * it is transmitted as all ones
381 	 * (the equivalent in one's complement arithmetic).
382 	 */
383 	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
384 		cksum = 0xffff;
385 
386 	return (uint16_t)cksum;
387 }
388 
389 /**
390  * IPv6 Header
391  */
392 struct rte_ipv6_hdr {
393 	rte_be32_t vtc_flow;	/**< IP version, traffic class & flow label. */
394 	rte_be16_t payload_len;	/**< IP payload size, including ext. headers */
395 	uint8_t  proto;		/**< Protocol, next header. */
396 	uint8_t  hop_limits;	/**< Hop limits. */
397 	uint8_t  src_addr[16];	/**< IP address of source host. */
398 	uint8_t  dst_addr[16];	/**< IP address of destination host(s). */
399 } __rte_packed;
400 
401 /* IPv6 vtc_flow: IPv / TC / flow_label */
402 #define RTE_IPV6_HDR_FL_SHIFT 0
403 #define RTE_IPV6_HDR_TC_SHIFT 20
404 #define RTE_IPV6_HDR_FL_MASK	((1u << RTE_IPV6_HDR_TC_SHIFT) - 1)
405 #define RTE_IPV6_HDR_TC_MASK	(0xff << RTE_IPV6_HDR_TC_SHIFT)
406 #define RTE_IPV6_HDR_DSCP_MASK	(0xfc << RTE_IPV6_HDR_TC_SHIFT)
407 #define RTE_IPV6_HDR_ECN_MASK	(0x03 << RTE_IPV6_HDR_TC_SHIFT)
408 #define RTE_IPV6_HDR_ECN_CE	RTE_IPV6_HDR_ECN_MASK
409 
410 #define RTE_IPV6_MIN_MTU 1280 /**< Minimum MTU for IPv6, see RFC 8200. */
411 
412 /**
413  * Process the pseudo-header checksum of an IPv6 header.
414  *
415  * Depending on the ol_flags, the pseudo-header checksum expected by the
416  * drivers is not the same. For instance, when TSO is enabled, the IPv6
417  * payload length must not be included in the packet.
418  *
419  * When ol_flags is 0, it computes the standard pseudo-header checksum.
420  *
421  * @param ipv6_hdr
422  *   The pointer to the contiguous IPv6 header.
423  * @param ol_flags
424  *   The ol_flags of the associated mbuf.
425  * @return
426  *   The non-complemented checksum to set in the L4 header.
427  */
428 static inline uint16_t
429 rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
430 {
431 	uint32_t sum;
432 	struct {
433 		rte_be32_t len;   /* L4 length. */
434 		rte_be32_t proto; /* L4 protocol - top 3 bytes must be zero */
435 	} psd_hdr;
436 
437 	psd_hdr.proto = (uint32_t)(ipv6_hdr->proto << 24);
438 	if (ol_flags & PKT_TX_TCP_SEG) {
439 		psd_hdr.len = 0;
440 	} else {
441 		psd_hdr.len = ipv6_hdr->payload_len;
442 	}
443 
444 	sum = __rte_raw_cksum(ipv6_hdr->src_addr,
445 		sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr),
446 		0);
447 	sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
448 	return __rte_raw_cksum_reduce(sum);
449 }
450 
451 /**
452  * Process the IPv6 UDP or TCP checksum.
453  *
454  * The IPv4 header should not contains options. The layer 4 checksum
455  * must be set to 0 in the packet by the caller.
456  *
457  * @param ipv6_hdr
458  *   The pointer to the contiguous IPv6 header.
459  * @param l4_hdr
460  *   The pointer to the beginning of the L4 header.
461  * @return
462  *   The complemented checksum to set in the IP packet.
463  */
464 static inline uint16_t
465 rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
466 {
467 	uint32_t cksum;
468 	uint32_t l4_len;
469 
470 	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
471 
472 	cksum = rte_raw_cksum(l4_hdr, l4_len);
473 	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
474 
475 	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
476 	cksum = (~cksum) & 0xffff;
477 	/*
478 	 * Per RFC 768: If the computed checksum is zero for UDP,
479 	 * it is transmitted as all ones
480 	 * (the equivalent in one's complement arithmetic).
481 	 */
482 	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
483 		cksum = 0xffff;
484 
485 	return (uint16_t)cksum;
486 }
487 
488 /** IPv6 fragment extension header. */
489 #define	RTE_IPV6_EHDR_MF_SHIFT	0
490 #define	RTE_IPV6_EHDR_MF_MASK	1
491 #define	RTE_IPV6_EHDR_FO_SHIFT	3
492 #define	RTE_IPV6_EHDR_FO_MASK	(~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1))
493 #define	RTE_IPV6_EHDR_FO_ALIGN	(1 << RTE_IPV6_EHDR_FO_SHIFT)
494 
495 #define RTE_IPV6_FRAG_USED_MASK	(RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK)
496 
497 #define RTE_IPV6_GET_MF(x)	((x) & RTE_IPV6_EHDR_MF_MASK)
498 #define RTE_IPV6_GET_FO(x)	((x) >> RTE_IPV6_EHDR_FO_SHIFT)
499 
500 #define RTE_IPV6_SET_FRAG_DATA(fo, mf)	\
501 	(((fo) & RTE_IPV6_EHDR_FO_MASK) | ((mf) & RTE_IPV6_EHDR_MF_MASK))
502 
503 struct rte_ipv6_fragment_ext {
504 	uint8_t next_header;	/**< Next header type */
505 	uint8_t reserved;	/**< Reserved */
506 	rte_be16_t frag_data;	/**< All fragmentation data */
507 	rte_be32_t id;		/**< Packet ID */
508 } __rte_packed;
509 
510 /* IPv6 fragment extension header size */
511 #define RTE_IPV6_FRAG_HDR_SIZE	sizeof(struct rte_ipv6_fragment_ext)
512 
513 /**
514  * Parse next IPv6 header extension
515  *
516  * This function checks if proto number is an IPv6 extensions and parses its
517  * data if so, providing information on next header and extension length.
518  *
519  * @param p
520  *   Pointer to an extension raw data.
521  * @param proto
522  *   Protocol number extracted from the "next header" field from
523  *   the IPv6 header or the previous extension.
524  * @param ext_len
525  *   Extension data length.
526  * @return
527  *   next protocol number if proto is an IPv6 extension, -EINVAL otherwise
528  */
529 __rte_experimental
530 static inline int
531 rte_ipv6_get_next_ext(const uint8_t *p, int proto, size_t *ext_len)
532 {
533 	int next_proto;
534 
535 	switch (proto) {
536 	case IPPROTO_AH:
537 		next_proto = *p++;
538 		*ext_len = (*p + 2) * sizeof(uint32_t);
539 		break;
540 
541 	case IPPROTO_HOPOPTS:
542 	case IPPROTO_ROUTING:
543 	case IPPROTO_DSTOPTS:
544 		next_proto = *p++;
545 		*ext_len = (*p + 1) * sizeof(uint64_t);
546 		break;
547 
548 	case IPPROTO_FRAGMENT:
549 		next_proto = *p;
550 		*ext_len = RTE_IPV6_FRAG_HDR_SIZE;
551 		break;
552 
553 	default:
554 		return -EINVAL;
555 	}
556 
557 	return next_proto;
558 }
559 
560 #ifdef __cplusplus
561 }
562 #endif
563 
564 #endif /* _RTE_IP_H_ */
565