xref: /dpdk/lib/ip_frag/rte_ipv4_fragmentation.c (revision f12c41bf4074efb438fc21ab7db13f011f5a1e84)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stddef.h>
6 #include <errno.h>
7 
8 #include <rte_memcpy.h>
9 #include <rte_ether.h>
10 
11 #include "ip_frag_common.h"
12 
13 /* Fragment Offset */
14 #define	RTE_IPV4_HDR_DF_SHIFT			14
15 #define	RTE_IPV4_HDR_MF_SHIFT			13
16 #define	RTE_IPV4_HDR_FO_SHIFT			3
17 
18 #define	IPV4_HDR_DF_MASK			(1 << RTE_IPV4_HDR_DF_SHIFT)
19 #define	IPV4_HDR_MF_MASK			(1 << RTE_IPV4_HDR_MF_SHIFT)
20 
21 #define	IPV4_HDR_FO_ALIGN			(1 << RTE_IPV4_HDR_FO_SHIFT)
22 
23 #define IPV4_HDR_MAX_LEN			60
24 
25 static inline void __fill_ipv4hdr_frag(struct rte_ipv4_hdr *dst,
26 		const struct rte_ipv4_hdr *src, uint16_t header_len,
27 		uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf)
28 {
29 	rte_memcpy(dst, src, header_len);
30 	fofs = (uint16_t)(fofs + (dofs >> RTE_IPV4_HDR_FO_SHIFT));
31 	fofs = (uint16_t)(fofs | mf << RTE_IPV4_HDR_MF_SHIFT);
32 	dst->fragment_offset = rte_cpu_to_be_16(fofs);
33 	dst->total_length = rte_cpu_to_be_16(len);
34 	dst->hdr_checksum = 0;
35 }
36 
37 static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num)
38 {
39 	uint32_t i;
40 	for (i = 0; i != num; i++)
41 		rte_pktmbuf_free(mb[i]);
42 }
43 
44 static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
45 	uint16_t ipopt_len, uint8_t *ipopt_frag_hdr)
46 {
47 	uint16_t len = ipopt_len;
48 	struct rte_ipv4_hdr *iph_opt = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
49 
50 	ipopt_len = 0;
51 	rte_memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr));
52 	ipopt_frag_hdr += sizeof(struct rte_ipv4_hdr);
53 
54 	uint8_t *p_opt = iph + sizeof(struct rte_ipv4_hdr);
55 
56 	while (len > 0) {
57 		if (unlikely(*p_opt == RTE_IPV4_HDR_OPT_NOP)) {
58 			len--;
59 			p_opt++;
60 			continue;
61 		} else if (unlikely(*p_opt == RTE_IPV4_HDR_OPT_EOL))
62 			break;
63 
64 		if (unlikely(p_opt[1] < 2 || p_opt[1] > len))
65 			break;
66 
67 		if (RTE_IPV4_HDR_OPT_COPIED(*p_opt)) {
68 			rte_memcpy(ipopt_frag_hdr + ipopt_len,
69 				p_opt, p_opt[1]);
70 			ipopt_len += p_opt[1];
71 		}
72 
73 		len -= p_opt[1];
74 		p_opt += p_opt[1];
75 	}
76 
77 	len = RTE_ALIGN_CEIL(ipopt_len, RTE_IPV4_IHL_MULTIPLIER);
78 	memset(ipopt_frag_hdr + ipopt_len,
79 		RTE_IPV4_HDR_OPT_EOL, len - ipopt_len);
80 	ipopt_len = len;
81 	iph_opt->ihl = (sizeof(struct rte_ipv4_hdr) + ipopt_len) /
82 		RTE_IPV4_IHL_MULTIPLIER;
83 
84 	return ipopt_len;
85 }
86 
87 /**
88  * IPv4 fragmentation.
89  *
90  * This function implements the fragmentation of IPv4 packets.
91  *
92  * @param pkt_in
93  *   The input packet.
94  * @param pkts_out
95  *   Array storing the output fragments.
96  * @param mtu_size
97  *   Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4
98  *   datagrams. This value includes the size of the IPv4 header.
99  * @param pool_direct
100  *   MBUF pool used for allocating direct buffers for the output fragments.
101  * @param pool_indirect
102  *   MBUF pool used for allocating indirect buffers for the output fragments.
103  * @return
104  *   Upon successful completion - number of output fragments placed
105  *   in the pkts_out array.
106  *   Otherwise - (-1) * <errno>.
107  */
108 int32_t
109 rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
110 	struct rte_mbuf **pkts_out,
111 	uint16_t nb_pkts_out,
112 	uint16_t mtu_size,
113 	struct rte_mempool *pool_direct,
114 	struct rte_mempool *pool_indirect)
115 {
116 	struct rte_mbuf *in_seg = NULL;
117 	struct rte_ipv4_hdr *in_hdr;
118 	uint32_t out_pkt_pos, in_seg_data_pos;
119 	uint32_t more_in_segs;
120 	uint16_t fragment_offset, flag_offset, frag_size, header_len;
121 	uint16_t frag_bytes_remaining;
122 	uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
123 	uint16_t ipopt_len;
124 
125 	/*
126 	 * Formal parameter checking.
127 	 */
128 	if (unlikely(pkt_in == NULL) || unlikely(pkts_out == NULL) ||
129 	    unlikely(nb_pkts_out == 0) ||
130 	    unlikely(pool_direct == NULL) || unlikely(pool_indirect == NULL) ||
131 	    unlikely(mtu_size < RTE_ETHER_MIN_MTU))
132 		return -EINVAL;
133 
134 	in_hdr = rte_pktmbuf_mtod(pkt_in, struct rte_ipv4_hdr *);
135 	header_len = (in_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
136 	    RTE_IPV4_IHL_MULTIPLIER;
137 
138 	/* Check IP header length */
139 	if (unlikely(pkt_in->data_len < header_len) ||
140 	    unlikely(mtu_size < header_len))
141 		return -EINVAL;
142 
143 	/*
144 	 * Ensure the IP payload length of all fragments is aligned to a
145 	 * multiple of 8 bytes as per RFC791 section 2.3.
146 	 */
147 	frag_size = RTE_ALIGN_FLOOR((mtu_size - header_len),
148 				    IPV4_HDR_FO_ALIGN);
149 
150 	flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset);
151 
152 	/* If Don't Fragment flag is set */
153 	if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0))
154 		return -ENOTSUP;
155 
156 	/* Check that pkts_out is big enough to hold all fragments */
157 	if (unlikely(frag_size * nb_pkts_out <
158 	    (uint16_t)(pkt_in->pkt_len - header_len)))
159 		return -EINVAL;
160 
161 	in_seg = pkt_in;
162 	in_seg_data_pos = header_len;
163 	out_pkt_pos = 0;
164 	fragment_offset = 0;
165 
166 	ipopt_len = header_len - sizeof(struct rte_ipv4_hdr);
167 	if (unlikely(ipopt_len > RTE_IPV4_HDR_OPT_MAX_LEN))
168 		return -EINVAL;
169 
170 	more_in_segs = 1;
171 	while (likely(more_in_segs)) {
172 		struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
173 		uint32_t more_out_segs;
174 		struct rte_ipv4_hdr *out_hdr;
175 
176 		/* Allocate direct buffer */
177 		out_pkt = rte_pktmbuf_alloc(pool_direct);
178 		if (unlikely(out_pkt == NULL)) {
179 			__free_fragments(pkts_out, out_pkt_pos);
180 			return -ENOMEM;
181 		}
182 
183 		/* Reserve space for the IP header that will be built later */
184 		out_pkt->data_len = header_len;
185 		out_pkt->pkt_len = header_len;
186 		frag_bytes_remaining = frag_size;
187 
188 		out_seg_prev = out_pkt;
189 		more_out_segs = 1;
190 		while (likely(more_out_segs && more_in_segs)) {
191 			struct rte_mbuf *out_seg = NULL;
192 			uint32_t len;
193 
194 			/* Allocate indirect buffer */
195 			out_seg = rte_pktmbuf_alloc(pool_indirect);
196 			if (unlikely(out_seg == NULL)) {
197 				rte_pktmbuf_free(out_pkt);
198 				__free_fragments(pkts_out, out_pkt_pos);
199 				return -ENOMEM;
200 			}
201 			out_seg_prev->next = out_seg;
202 			out_seg_prev = out_seg;
203 
204 			/* Prepare indirect buffer */
205 			rte_pktmbuf_attach(out_seg, in_seg);
206 			len = frag_bytes_remaining;
207 			if (len > (in_seg->data_len - in_seg_data_pos)) {
208 				len = in_seg->data_len - in_seg_data_pos;
209 			}
210 			out_seg->data_off = in_seg->data_off + in_seg_data_pos;
211 			out_seg->data_len = (uint16_t)len;
212 			out_pkt->pkt_len = (uint16_t)(len +
213 			    out_pkt->pkt_len);
214 			out_pkt->nb_segs += 1;
215 			in_seg_data_pos += len;
216 			frag_bytes_remaining -= len;
217 
218 			/* Current output packet (i.e. fragment) done ? */
219 			if (unlikely(frag_bytes_remaining == 0))
220 				more_out_segs = 0;
221 
222 			/* Current input segment done ? */
223 			if (unlikely(in_seg_data_pos == in_seg->data_len)) {
224 				in_seg = in_seg->next;
225 				in_seg_data_pos = 0;
226 
227 				if (unlikely(in_seg == NULL))
228 					more_in_segs = 0;
229 			}
230 		}
231 
232 		/* Build the IP header */
233 
234 		out_hdr = rte_pktmbuf_mtod(out_pkt, struct rte_ipv4_hdr *);
235 
236 		__fill_ipv4hdr_frag(out_hdr, in_hdr, header_len,
237 		    (uint16_t)out_pkt->pkt_len,
238 		    flag_offset, fragment_offset, more_in_segs);
239 
240 		if (unlikely((fragment_offset == 0) && (ipopt_len) &&
241 			    ((flag_offset & RTE_IPV4_HDR_OFFSET_MASK) == 0))) {
242 			ipopt_len = __create_ipopt_frag_hdr((uint8_t *)in_hdr,
243 				ipopt_len, ipopt_frag_hdr);
244 			fragment_offset = (uint16_t)(fragment_offset +
245 				out_pkt->pkt_len - header_len);
246 			out_pkt->l3_len = header_len;
247 
248 			header_len = sizeof(struct rte_ipv4_hdr) + ipopt_len;
249 			in_hdr = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
250 		} else {
251 			fragment_offset = (uint16_t)(fragment_offset +
252 				out_pkt->pkt_len - header_len);
253 			out_pkt->l3_len = header_len;
254 		}
255 
256 		/* Write the fragment to the output list */
257 		pkts_out[out_pkt_pos] = out_pkt;
258 		out_pkt_pos ++;
259 	}
260 
261 	return out_pkt_pos;
262 }
263