xref: /dpdk/lib/node/ip6_lookup.c (revision e9fd1ebf981f361844aea9ec94e17f4bda5e1479)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2023 Marvell.
3  */
4 
5 #include <arpa/inet.h>
6 #include <sys/socket.h>
7 
8 #include <rte_ethdev.h>
9 #include <rte_ether.h>
10 #include <rte_graph.h>
11 #include <rte_graph_worker.h>
12 #include <rte_ip.h>
13 #include <rte_lpm6.h>
14 
15 #include "rte_node_ip6_api.h"
16 
17 #include "node_private.h"
18 
19 #define IPV6_L3FWD_LPM_MAX_RULES 1024
20 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
21 
22 /* IP6 Lookup global data struct */
23 struct ip6_lookup_node_main {
24 	struct rte_lpm6 *lpm_tbl[RTE_MAX_NUMA_NODES];
25 };
26 
27 struct ip6_lookup_node_ctx {
28 	/* Socket's LPM table */
29 	struct rte_lpm6 *lpm6;
30 	/* Dynamic offset to mbuf priv1 */
31 	int mbuf_priv1_off;
32 };
33 
34 static struct ip6_lookup_node_main ip6_lookup_nm;
35 
36 #define IP6_LOOKUP_NODE_LPM(ctx) \
37 	(((struct ip6_lookup_node_ctx *)ctx)->lpm6)
38 
39 #define IP6_LOOKUP_NODE_PRIV1_OFF(ctx) \
40 	(((struct ip6_lookup_node_ctx *)ctx)->mbuf_priv1_off)
41 
42 static uint16_t
43 ip6_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node,
44 			void **objs, uint16_t nb_objs)
45 {
46 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
47 	struct rte_lpm6 *lpm6 = IP6_LOOKUP_NODE_LPM(node->ctx);
48 	const int dyn = IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx);
49 	struct rte_ipv6_hdr *ipv6_hdr;
50 	void **to_next, **from;
51 	uint16_t last_spec = 0;
52 	rte_edge_t next_index;
53 	uint16_t n_left_from;
54 	uint16_t held = 0;
55 	uint32_t drop_nh;
56 	int i, rc;
57 
58 	/* Speculative next */
59 	next_index = RTE_NODE_IP6_LOOKUP_NEXT_REWRITE;
60 	/* Drop node */
61 	drop_nh = ((uint32_t)RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP) << 16;
62 
63 	pkts = (struct rte_mbuf **)objs;
64 	from = objs;
65 	n_left_from = nb_objs;
66 
67 	for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE)
68 		rte_prefetch0(&objs[i]);
69 
70 	for (i = 0; i < 4 && i < n_left_from; i++)
71 		rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *,
72 						sizeof(struct rte_ether_hdr)));
73 
74 	/* Get stream for the speculated next node */
75 	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
76 	while (n_left_from >= 4) {
77 		uint8_t ip_batch[4][16];
78 		int32_t next_hop[4];
79 		uint16_t next[4];
80 
81 #if RTE_GRAPH_BURST_SIZE > 64
82 		/* Prefetch next-next mbufs */
83 		if (likely(n_left_from > 11)) {
84 			rte_prefetch0(pkts[8]);
85 			rte_prefetch0(pkts[9]);
86 			rte_prefetch0(pkts[10]);
87 			rte_prefetch0(pkts[11]);
88 		}
89 #endif
90 		/* Prefetch next mbuf data */
91 		if (likely(n_left_from > 7)) {
92 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *,
93 						sizeof(struct rte_ether_hdr)));
94 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *,
95 						sizeof(struct rte_ether_hdr)));
96 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *,
97 						sizeof(struct rte_ether_hdr)));
98 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *,
99 						sizeof(struct rte_ether_hdr)));
100 		}
101 
102 		mbuf0 = pkts[0];
103 		mbuf1 = pkts[1];
104 		mbuf2 = pkts[2];
105 		mbuf3 = pkts[3];
106 
107 		pkts += 4;
108 		n_left_from -= 4;
109 
110 		/* Extract DIP of mbuf0 */
111 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr *,
112 				sizeof(struct rte_ether_hdr));
113 		/* Extract hop_limits as ipv6 hdr is in cache */
114 		node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits;
115 		rte_memcpy(ip_batch[0], ipv6_hdr->dst_addr, 16);
116 
117 		/* Extract DIP of mbuf1 */
118 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv6_hdr *,
119 				sizeof(struct rte_ether_hdr));
120 		/* Extract hop_limits as ipv6 hdr is in cache */
121 		node_mbuf_priv1(mbuf1, dyn)->ttl = ipv6_hdr->hop_limits;
122 		rte_memcpy(ip_batch[1], ipv6_hdr->dst_addr, 16);
123 
124 		/* Extract DIP of mbuf2 */
125 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv6_hdr *,
126 				sizeof(struct rte_ether_hdr));
127 		/* Extract hop_limits as ipv6 hdr is in cache */
128 		node_mbuf_priv1(mbuf2, dyn)->ttl = ipv6_hdr->hop_limits;
129 		rte_memcpy(ip_batch[2], ipv6_hdr->dst_addr, 16);
130 
131 		/* Extract DIP of mbuf3 */
132 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv6_hdr *,
133 				sizeof(struct rte_ether_hdr));
134 		/* Extract hop_limits as ipv6 hdr is in cache */
135 		node_mbuf_priv1(mbuf3, dyn)->ttl = ipv6_hdr->hop_limits;
136 		rte_memcpy(ip_batch[3], ipv6_hdr->dst_addr, 16);
137 
138 		rte_lpm6_lookup_bulk_func(lpm6, ip_batch, next_hop, 4);
139 
140 		next_hop[0] = (next_hop[0] < 0) ? (int32_t)drop_nh : next_hop[0];
141 		node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop[0];
142 		next[0] = (uint16_t)(next_hop[0] >> 16);
143 
144 		next_hop[1] = (next_hop[1] < 0) ? (int32_t)drop_nh : next_hop[1];
145 		node_mbuf_priv1(mbuf1, dyn)->nh = (uint16_t)next_hop[1];
146 		next[1] = (uint16_t)(next_hop[1] >> 16);
147 
148 		next_hop[2] = (next_hop[2] < 0) ? (int32_t)drop_nh : next_hop[2];
149 		node_mbuf_priv1(mbuf2, dyn)->nh = (uint16_t)next_hop[2];
150 		next[2] = (uint16_t)(next_hop[2] >> 16);
151 
152 		next_hop[3] = (next_hop[3] < 0) ? (int32_t)drop_nh : next_hop[3];
153 		node_mbuf_priv1(mbuf3, dyn)->nh = (uint16_t)next_hop[3];
154 		next[3] = (uint16_t)(next_hop[3] >> 16);
155 
156 		rte_edge_t fix_spec = ((next_index == next[0]) &&
157 					(next_index == next[1]) &&
158 					(next_index == next[2]) &&
159 					(next_index == next[3]));
160 
161 		if (unlikely(fix_spec == 0)) {
162 			/* Copy things successfully speculated till now */
163 			rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
164 			from += last_spec;
165 			to_next += last_spec;
166 			held += last_spec;
167 			last_spec = 0;
168 
169 			/* Next0 */
170 			if (next_index == next[0]) {
171 				to_next[0] = from[0];
172 				to_next++;
173 				held++;
174 			} else {
175 				rte_node_enqueue_x1(graph, node, next[0], from[0]);
176 			}
177 
178 			/* Next1 */
179 			if (next_index == next[1]) {
180 				to_next[0] = from[1];
181 				to_next++;
182 				held++;
183 			} else {
184 				rte_node_enqueue_x1(graph, node, next[1], from[1]);
185 			}
186 
187 			/* Next2 */
188 			if (next_index == next[2]) {
189 				to_next[0] = from[2];
190 				to_next++;
191 				held++;
192 			} else {
193 				rte_node_enqueue_x1(graph, node, next[2], from[2]);
194 			}
195 
196 			/* Next3 */
197 			if (next_index == next[3]) {
198 				to_next[0] = from[3];
199 				to_next++;
200 				held++;
201 			} else {
202 				rte_node_enqueue_x1(graph, node, next[3], from[3]);
203 			}
204 
205 			from += 4;
206 		} else {
207 			last_spec += 4;
208 		}
209 	}
210 
211 	while (n_left_from > 0) {
212 		uint32_t next_hop;
213 		uint16_t next0;
214 
215 		mbuf0 = pkts[0];
216 
217 		pkts += 1;
218 		n_left_from -= 1;
219 
220 		/* Extract DIP of mbuf0 */
221 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr *,
222 						sizeof(struct rte_ether_hdr));
223 		/* Extract TTL as IPv6 hdr is in cache */
224 		node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits;
225 
226 		rc = rte_lpm6_lookup(lpm6, ipv6_hdr->dst_addr, &next_hop);
227 		next_hop = (rc == 0) ? next_hop : drop_nh;
228 
229 		node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop;
230 		next_hop = next_hop >> 16;
231 		next0 = (uint16_t)next_hop;
232 
233 		if (unlikely(next_index ^ next0)) {
234 			/* Copy things successfully speculated till now */
235 			rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
236 			from += last_spec;
237 			to_next += last_spec;
238 			held += last_spec;
239 			last_spec = 0;
240 
241 			rte_node_enqueue_x1(graph, node, next0, from[0]);
242 			from += 1;
243 		} else {
244 			last_spec += 1;
245 		}
246 	}
247 
248 	/* !!! Home run !!! */
249 	if (likely(last_spec == nb_objs)) {
250 		rte_node_next_stream_move(graph, node, next_index);
251 		return nb_objs;
252 	}
253 	held += last_spec;
254 	rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
255 	rte_node_next_stream_put(graph, node, next_index, held);
256 
257 	return nb_objs;
258 }
259 
260 int
261 rte_node_ip6_route_add(const uint8_t *ip, uint8_t depth, uint16_t next_hop,
262 		       enum rte_node_ip6_lookup_next next_node)
263 {
264 	char abuf[INET6_ADDRSTRLEN];
265 	struct in6_addr in6;
266 	uint8_t socket;
267 	uint32_t val;
268 	int ret;
269 
270 	memcpy(in6.s6_addr, ip, RTE_LPM6_IPV6_ADDR_SIZE);
271 	inet_ntop(AF_INET6, &in6, abuf, sizeof(abuf));
272 	/* Embedded next node id into 24 bit next hop */
273 	val = ((next_node << 16) | next_hop) & ((1ull << 24) - 1);
274 	node_dbg("ip6_lookup", "LPM: Adding route %s / %d nh (0x%x)", abuf,
275 		 depth, val);
276 
277 	for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
278 		if (!ip6_lookup_nm.lpm_tbl[socket])
279 			continue;
280 
281 		ret = rte_lpm6_add(ip6_lookup_nm.lpm_tbl[socket], ip, depth,
282 				   val);
283 		if (ret < 0) {
284 			node_err("ip6_lookup",
285 				 "Unable to add entry %s / %d nh (%x) to LPM "
286 				 "table on sock %d, rc=%d",
287 				 abuf, depth, val, socket, ret);
288 			return ret;
289 		}
290 	}
291 
292 	return 0;
293 }
294 
295 static int
296 setup_lpm6(struct ip6_lookup_node_main *nm, int socket)
297 {
298 	struct rte_lpm6_config config_ipv6;
299 	char s[RTE_LPM6_NAMESIZE];
300 
301 	/* One LPM table per socket */
302 	if (nm->lpm_tbl[socket])
303 		return 0;
304 
305 	/* create the LPM table */
306 	config_ipv6.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
307 	config_ipv6.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
308 	config_ipv6.flags = 0;
309 	snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socket);
310 	nm->lpm_tbl[socket] = rte_lpm6_create(s, socket, &config_ipv6);
311 	if (nm->lpm_tbl[socket] == NULL)
312 		return -rte_errno;
313 
314 	return 0;
315 }
316 
317 static int
318 ip6_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
319 {
320 	uint16_t socket, lcore_id;
321 	static uint8_t init_once;
322 	int rc;
323 
324 	RTE_SET_USED(graph);
325 	RTE_BUILD_BUG_ON(sizeof(struct ip6_lookup_node_ctx) > RTE_NODE_CTX_SZ);
326 
327 	if (!init_once) {
328 		node_mbuf_priv1_dynfield_offset =
329 			rte_mbuf_dynfield_register(
330 				&node_mbuf_priv1_dynfield_desc);
331 		if (node_mbuf_priv1_dynfield_offset < 0)
332 			return -rte_errno;
333 
334 		/* Setup LPM tables for all sockets */
335 		RTE_LCORE_FOREACH(lcore_id)
336 		{
337 			socket = rte_lcore_to_socket_id(lcore_id);
338 			rc = setup_lpm6(&ip6_lookup_nm, socket);
339 			if (rc) {
340 				node_err("ip6_lookup",
341 					 "Failed to setup lpm6 tbl for "
342 					 "sock %u, rc=%d", socket, rc);
343 				return rc;
344 			}
345 		}
346 		init_once = 1;
347 	}
348 
349 	/* Update socket's LPM and mbuf dyn priv1 offset in node ctx */
350 	IP6_LOOKUP_NODE_LPM(node->ctx) = ip6_lookup_nm.lpm_tbl[graph->socket];
351 	IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx) =
352 					node_mbuf_priv1_dynfield_offset;
353 
354 	node_dbg("ip6_lookup", "Initialized ip6_lookup node");
355 
356 	return 0;
357 }
358 
359 static struct rte_node_register ip6_lookup_node = {
360 	.process = ip6_lookup_node_process_scalar,
361 	.name = "ip6_lookup",
362 
363 	.init = ip6_lookup_node_init,
364 
365 	.nb_edges = RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP + 1,
366 	.next_nodes = {
367 		[RTE_NODE_IP6_LOOKUP_NEXT_REWRITE] = "ip6_rewrite",
368 		[RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP] = "pkt_drop",
369 	},
370 };
371 
372 RTE_NODE_REGISTER(ip6_lookup_node);
373