xref: /dpdk/lib/node/ip6_lookup.c (revision 2cfebc3f1046e4166e13b4f906e3ddc1c26c7eeb)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2023 Marvell.
3  */
4 
5 #include <arpa/inet.h>
6 #include <sys/socket.h>
7 
8 #include <rte_ethdev.h>
9 #include <rte_ether.h>
10 #include <rte_graph.h>
11 #include <rte_graph_worker.h>
12 #include <rte_ip.h>
13 #include <rte_lpm6.h>
14 
15 #include "rte_node_ip6_api.h"
16 
17 #include "node_private.h"
18 
19 #define IPV6_L3FWD_LPM_MAX_RULES 1024
20 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
21 
22 /* IP6 Lookup global data struct */
23 struct ip6_lookup_node_main {
24 	struct rte_lpm6 *lpm_tbl[RTE_MAX_NUMA_NODES];
25 };
26 
27 struct ip6_lookup_node_ctx {
28 	/* Socket's LPM table */
29 	struct rte_lpm6 *lpm6;
30 	/* Dynamic offset to mbuf priv1 */
31 	int mbuf_priv1_off;
32 };
33 
34 static struct ip6_lookup_node_main ip6_lookup_nm;
35 
36 #define IP6_LOOKUP_NODE_LPM(ctx) \
37 	(((struct ip6_lookup_node_ctx *)ctx)->lpm6)
38 
39 #define IP6_LOOKUP_NODE_PRIV1_OFF(ctx) \
40 	(((struct ip6_lookup_node_ctx *)ctx)->mbuf_priv1_off)
41 
42 static uint16_t
43 ip6_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
44 			void **objs, uint16_t nb_objs)
45 {
46 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
47 	struct rte_lpm6 *lpm6 = IP6_LOOKUP_NODE_LPM(node->ctx);
48 	const int dyn = IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx);
49 	struct rte_ipv6_hdr *ipv6_hdr;
50 	void **to_next, **from;
51 	uint16_t last_spec = 0;
52 	rte_edge_t next_index;
53 	uint16_t n_left_from;
54 	uint16_t held = 0;
55 	uint32_t drop_nh;
56 	int i, rc;
57 
58 	/* Speculative next */
59 	next_index = RTE_NODE_IP6_LOOKUP_NEXT_REWRITE;
60 	/* Drop node */
61 	drop_nh = ((uint32_t)RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP) << 16;
62 
63 	pkts = (struct rte_mbuf **)objs;
64 	from = objs;
65 	n_left_from = nb_objs;
66 
67 	for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE)
68 		rte_prefetch0(&objs[i]);
69 
70 	for (i = 0; i < 4 && i < n_left_from; i++)
71 		rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *,
72 						sizeof(struct rte_ether_hdr)));
73 
74 	/* Get stream for the speculated next node */
75 	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
76 	while (n_left_from >= 4) {
77 		struct rte_ipv6_addr ip_batch[4];
78 		int32_t next_hop[4];
79 		uint16_t next[4];
80 
81 #if RTE_GRAPH_BURST_SIZE > 64
82 		/* Prefetch next-next mbufs */
83 		if (likely(n_left_from > 11)) {
84 			rte_prefetch0(pkts[8]);
85 			rte_prefetch0(pkts[9]);
86 			rte_prefetch0(pkts[10]);
87 			rte_prefetch0(pkts[11]);
88 		}
89 #endif
90 		/* Prefetch next mbuf data */
91 		if (likely(n_left_from > 7)) {
92 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *,
93 						sizeof(struct rte_ether_hdr)));
94 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *,
95 						sizeof(struct rte_ether_hdr)));
96 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *,
97 						sizeof(struct rte_ether_hdr)));
98 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *,
99 						sizeof(struct rte_ether_hdr)));
100 		}
101 
102 		mbuf0 = pkts[0];
103 		mbuf1 = pkts[1];
104 		mbuf2 = pkts[2];
105 		mbuf3 = pkts[3];
106 
107 		pkts += 4;
108 		n_left_from -= 4;
109 
110 		/* Extract DIP of mbuf0 */
111 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr *,
112 				sizeof(struct rte_ether_hdr));
113 		/* Extract hop_limits as ipv6 hdr is in cache */
114 		node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits;
115 		ip_batch[0] = ipv6_hdr->dst_addr;
116 
117 		/* Extract DIP of mbuf1 */
118 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv6_hdr *,
119 				sizeof(struct rte_ether_hdr));
120 		/* Extract hop_limits as ipv6 hdr is in cache */
121 		node_mbuf_priv1(mbuf1, dyn)->ttl = ipv6_hdr->hop_limits;
122 		ip_batch[1] = ipv6_hdr->dst_addr;
123 
124 		/* Extract DIP of mbuf2 */
125 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv6_hdr *,
126 				sizeof(struct rte_ether_hdr));
127 		/* Extract hop_limits as ipv6 hdr is in cache */
128 		node_mbuf_priv1(mbuf2, dyn)->ttl = ipv6_hdr->hop_limits;
129 		ip_batch[2] = ipv6_hdr->dst_addr;
130 
131 		/* Extract DIP of mbuf3 */
132 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv6_hdr *,
133 				sizeof(struct rte_ether_hdr));
134 		/* Extract hop_limits as ipv6 hdr is in cache */
135 		node_mbuf_priv1(mbuf3, dyn)->ttl = ipv6_hdr->hop_limits;
136 		ip_batch[3] = ipv6_hdr->dst_addr;
137 
138 		rte_lpm6_lookup_bulk_func(lpm6, ip_batch, next_hop, 4);
139 
140 		next_hop[0] = (next_hop[0] < 0) ? (int32_t)drop_nh : next_hop[0];
141 		node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop[0];
142 		next[0] = (uint16_t)(next_hop[0] >> 16);
143 
144 		next_hop[1] = (next_hop[1] < 0) ? (int32_t)drop_nh : next_hop[1];
145 		node_mbuf_priv1(mbuf1, dyn)->nh = (uint16_t)next_hop[1];
146 		next[1] = (uint16_t)(next_hop[1] >> 16);
147 
148 		next_hop[2] = (next_hop[2] < 0) ? (int32_t)drop_nh : next_hop[2];
149 		node_mbuf_priv1(mbuf2, dyn)->nh = (uint16_t)next_hop[2];
150 		next[2] = (uint16_t)(next_hop[2] >> 16);
151 
152 		next_hop[3] = (next_hop[3] < 0) ? (int32_t)drop_nh : next_hop[3];
153 		node_mbuf_priv1(mbuf3, dyn)->nh = (uint16_t)next_hop[3];
154 		next[3] = (uint16_t)(next_hop[3] >> 16);
155 
156 		rte_edge_t fix_spec = ((next_index == next[0]) &&
157 					(next_index == next[1]) &&
158 					(next_index == next[2]) &&
159 					(next_index == next[3]));
160 
161 		if (unlikely(fix_spec == 0)) {
162 			/* Copy things successfully speculated till now */
163 			rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
164 			from += last_spec;
165 			to_next += last_spec;
166 			held += last_spec;
167 			last_spec = 0;
168 
169 			/* Next0 */
170 			if (next_index == next[0]) {
171 				to_next[0] = from[0];
172 				to_next++;
173 				held++;
174 			} else {
175 				rte_node_enqueue_x1(graph, node, next[0], from[0]);
176 			}
177 
178 			/* Next1 */
179 			if (next_index == next[1]) {
180 				to_next[0] = from[1];
181 				to_next++;
182 				held++;
183 			} else {
184 				rte_node_enqueue_x1(graph, node, next[1], from[1]);
185 			}
186 
187 			/* Next2 */
188 			if (next_index == next[2]) {
189 				to_next[0] = from[2];
190 				to_next++;
191 				held++;
192 			} else {
193 				rte_node_enqueue_x1(graph, node, next[2], from[2]);
194 			}
195 
196 			/* Next3 */
197 			if (next_index == next[3]) {
198 				to_next[0] = from[3];
199 				to_next++;
200 				held++;
201 			} else {
202 				rte_node_enqueue_x1(graph, node, next[3], from[3]);
203 			}
204 
205 			from += 4;
206 		} else {
207 			last_spec += 4;
208 		}
209 	}
210 
211 	while (n_left_from > 0) {
212 		uint32_t next_hop;
213 		uint16_t next0;
214 
215 		mbuf0 = pkts[0];
216 
217 		pkts += 1;
218 		n_left_from -= 1;
219 
220 		/* Extract DIP of mbuf0 */
221 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr *,
222 						sizeof(struct rte_ether_hdr));
223 		/* Extract TTL as IPv6 hdr is in cache */
224 		node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits;
225 
226 		rc = rte_lpm6_lookup(lpm6, &ipv6_hdr->dst_addr, &next_hop);
227 		next_hop = (rc == 0) ? next_hop : drop_nh;
228 
229 		node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop;
230 		next_hop = next_hop >> 16;
231 		next0 = (uint16_t)next_hop;
232 
233 		if (unlikely(next_index ^ next0)) {
234 			/* Copy things successfully speculated till now */
235 			rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
236 			from += last_spec;
237 			to_next += last_spec;
238 			held += last_spec;
239 			last_spec = 0;
240 
241 			rte_node_enqueue_x1(graph, node, next0, from[0]);
242 			from += 1;
243 		} else {
244 			last_spec += 1;
245 		}
246 	}
247 
248 	/* !!! Home run !!! */
249 	if (likely(last_spec == nb_objs)) {
250 		rte_node_next_stream_move(graph, node, next_index);
251 		return nb_objs;
252 	}
253 	held += last_spec;
254 	rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
255 	rte_node_next_stream_put(graph, node, next_index, held);
256 
257 	return nb_objs;
258 }
259 
260 int
261 rte_node_ip6_route_add(const struct rte_ipv6_addr *ip, uint8_t depth, uint16_t next_hop,
262 		       enum rte_node_ip6_lookup_next next_node)
263 {
264 	char abuf[INET6_ADDRSTRLEN];
265 	uint8_t socket;
266 	uint32_t val;
267 	int ret;
268 
269 	inet_ntop(AF_INET6, ip, abuf, sizeof(abuf));
270 	/* Embedded next node id into 24 bit next hop */
271 	val = ((next_node << 16) | next_hop) & ((1ull << 24) - 1);
272 	node_dbg("ip6_lookup", "LPM: Adding route %s / %d nh (0x%x)", abuf,
273 		 depth, val);
274 
275 	for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
276 		if (!ip6_lookup_nm.lpm_tbl[socket])
277 			continue;
278 
279 		ret = rte_lpm6_add(ip6_lookup_nm.lpm_tbl[socket], ip, depth, val);
280 		if (ret < 0) {
281 			node_err("ip6_lookup",
282 				 "Unable to add entry %s / %d nh (%x) to LPM "
283 				 "table on sock %d, rc=%d",
284 				 abuf, depth, val, socket, ret);
285 			return ret;
286 		}
287 	}
288 
289 	return 0;
290 }
291 
292 static int
293 setup_lpm6(struct ip6_lookup_node_main *nm, int socket)
294 {
295 	struct rte_lpm6_config config_ipv6;
296 	char s[RTE_LPM6_NAMESIZE];
297 
298 	/* One LPM table per socket */
299 	if (nm->lpm_tbl[socket])
300 		return 0;
301 
302 	/* create the LPM table */
303 	config_ipv6.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
304 	config_ipv6.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
305 	config_ipv6.flags = 0;
306 	snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socket);
307 	nm->lpm_tbl[socket] = rte_lpm6_create(s, socket, &config_ipv6);
308 	if (nm->lpm_tbl[socket] == NULL)
309 		return -rte_errno;
310 
311 	return 0;
312 }
313 
314 static int
315 ip6_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
316 {
317 	uint16_t socket, lcore_id;
318 	static uint8_t init_once;
319 	int rc;
320 
321 	RTE_SET_USED(graph);
322 	RTE_BUILD_BUG_ON(sizeof(struct ip6_lookup_node_ctx) > RTE_NODE_CTX_SZ);
323 
324 	if (!init_once) {
325 		node_mbuf_priv1_dynfield_offset =
326 			rte_mbuf_dynfield_register(
327 				&node_mbuf_priv1_dynfield_desc);
328 		if (node_mbuf_priv1_dynfield_offset < 0)
329 			return -rte_errno;
330 
331 		/* Setup LPM tables for all sockets */
332 		RTE_LCORE_FOREACH(lcore_id)
333 		{
334 			socket = rte_lcore_to_socket_id(lcore_id);
335 			rc = setup_lpm6(&ip6_lookup_nm, socket);
336 			if (rc) {
337 				node_err("ip6_lookup",
338 					 "Failed to setup lpm6 tbl for "
339 					 "sock %u, rc=%d", socket, rc);
340 				return rc;
341 			}
342 		}
343 		init_once = 1;
344 	}
345 
346 	/* Update socket's LPM and mbuf dyn priv1 offset in node ctx */
347 	IP6_LOOKUP_NODE_LPM(node->ctx) = ip6_lookup_nm.lpm_tbl[graph->socket];
348 	IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx) =
349 					node_mbuf_priv1_dynfield_offset;
350 
351 	node_dbg("ip6_lookup", "Initialized ip6_lookup node");
352 
353 	return 0;
354 }
355 
356 static struct rte_node_register ip6_lookup_node = {
357 	.process = ip6_lookup_node_process_scalar,
358 	.name = "ip6_lookup",
359 
360 	.init = ip6_lookup_node_init,
361 
362 	.nb_edges = RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP + 1,
363 	.next_nodes = {
364 		[RTE_NODE_IP6_LOOKUP_NEXT_REWRITE] = "ip6_rewrite",
365 		[RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP] = "pkt_drop",
366 	},
367 };
368 
369 RTE_NODE_REGISTER(ip6_lookup_node);
370