xref: /dpdk/lib/rib/rte_rib6.c (revision 59b993151ff57e9e8b0fdb1d4b57913243b605fa)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <sys/queue.h>
9 
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_log.h>
13 #include <rte_malloc.h>
14 #include <rte_mempool.h>
15 #include <rte_string_fns.h>
16 #include <rte_tailq.h>
17 
18 #include <rte_rib6.h>
19 
20 #include "rib_log.h"
21 
22 #define RTE_RIB_VALID_NODE	1
23 /* Maximum length of a RIB6 name. */
24 #define RTE_RIB6_NAMESIZE	64
25 
26 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry);
27 static struct rte_tailq_elem rte_rib6_tailq = {
28 	.name = "RTE_RIB6",
29 };
30 EAL_REGISTER_TAILQ(rte_rib6_tailq)
31 
32 struct rte_rib6_node {
33 	struct rte_rib6_node	*left;
34 	struct rte_rib6_node	*right;
35 	struct rte_rib6_node	*parent;
36 	uint64_t		nh;
37 	struct rte_ipv6_addr	ip;
38 	uint8_t			depth;
39 	uint8_t			flag;
40 	uint64_t ext[];
41 };
42 
43 struct rte_rib6 {
44 	char		name[RTE_RIB6_NAMESIZE];
45 	struct rte_rib6_node	*tree;
46 	struct rte_mempool	*node_pool;
47 	uint32_t		cur_nodes;
48 	uint32_t		cur_routes;
49 	int			max_nodes;
50 };
51 
52 static inline bool
53 is_valid_node(const struct rte_rib6_node *node)
54 {
55 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
56 }
57 
58 static inline bool
59 is_right_node(const struct rte_rib6_node *node)
60 {
61 	return node->parent->right == node;
62 }
63 
64 static inline int
65 get_dir(const struct rte_ipv6_addr *ip, uint8_t depth)
66 {
67 	uint8_t index, msk;
68 
69 	/*
70 	 * depth & 127 clamps depth to values that will not
71 	 * read off the end of ip.
72 	 * depth is the number of bits deep into ip to traverse, and
73 	 * is incremented in blocks of 8 (1 byte). This means the last
74 	 * 3 bits are irrelevant to what the index of ip should be.
75 	 */
76 	index = (depth & INT8_MAX) / CHAR_BIT;
77 
78 	/*
79 	 * msk is the bitmask used to extract the bit used to decide the
80 	 * direction of the next step of the binary search.
81 	 */
82 	msk = 1 << (7 - (depth & 7));
83 
84 	return (ip->a[index] & msk) != 0;
85 }
86 
87 static inline struct rte_rib6_node *
88 get_nxt_node(struct rte_rib6_node *node,
89 	const struct rte_ipv6_addr *ip)
90 {
91 	if (node->depth == RTE_IPV6_MAX_DEPTH)
92 		return NULL;
93 
94 	return (get_dir(ip, node->depth)) ? node->right : node->left;
95 }
96 
97 static struct rte_rib6_node *
98 node_alloc(struct rte_rib6 *rib)
99 {
100 	struct rte_rib6_node *ent;
101 	int ret;
102 
103 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
104 	if (unlikely(ret != 0))
105 		return NULL;
106 	++rib->cur_nodes;
107 	return ent;
108 }
109 
110 static void
111 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent)
112 {
113 	--rib->cur_nodes;
114 	rte_mempool_put(rib->node_pool, ent);
115 }
116 
117 struct rte_rib6_node *
118 rte_rib6_lookup(struct rte_rib6 *rib,
119 	const struct rte_ipv6_addr *ip)
120 {
121 	struct rte_rib6_node *cur;
122 	struct rte_rib6_node *prev = NULL;
123 
124 	if (unlikely(rib == NULL)) {
125 		rte_errno = EINVAL;
126 		return NULL;
127 	}
128 	cur = rib->tree;
129 
130 	while ((cur != NULL) && rte_ipv6_addr_eq_prefix(ip, &cur->ip, cur->depth)) {
131 		if (is_valid_node(cur))
132 			prev = cur;
133 		cur = get_nxt_node(cur, ip);
134 	}
135 	return prev;
136 }
137 
138 struct rte_rib6_node *
139 rte_rib6_lookup_parent(struct rte_rib6_node *ent)
140 {
141 	struct rte_rib6_node *tmp;
142 
143 	if (ent == NULL)
144 		return NULL;
145 
146 	tmp = ent->parent;
147 	while ((tmp != NULL) && (!is_valid_node(tmp)))
148 		tmp = tmp->parent;
149 
150 	return tmp;
151 }
152 
153 struct rte_rib6_node *
154 rte_rib6_lookup_exact(struct rte_rib6 *rib,
155 	const struct rte_ipv6_addr *ip, uint8_t depth)
156 {
157 	struct rte_rib6_node *cur;
158 	struct rte_ipv6_addr tmp_ip;
159 
160 	if (unlikely(rib == NULL || ip == NULL || depth > RTE_IPV6_MAX_DEPTH)) {
161 		rte_errno = EINVAL;
162 		return NULL;
163 	}
164 	cur = rib->tree;
165 
166 	tmp_ip = *ip;
167 	rte_ipv6_addr_mask(&tmp_ip, depth);
168 
169 	while (cur != NULL) {
170 		if (rte_ipv6_addr_eq(&cur->ip, &tmp_ip) &&
171 				(cur->depth == depth) &&
172 				is_valid_node(cur))
173 			return cur;
174 
175 		if (!rte_ipv6_addr_eq_prefix(&tmp_ip, &cur->ip, cur->depth) ||
176 				(cur->depth >= depth))
177 			break;
178 
179 		cur = get_nxt_node(cur, &tmp_ip);
180 	}
181 
182 	return NULL;
183 }
184 
185 /*
186  *  Traverses on subtree and retrieves more specific routes
187  *  for a given in args ip/depth prefix
188  *  last = NULL means the first invocation
189  */
190 struct rte_rib6_node *
191 rte_rib6_get_nxt(struct rte_rib6 *rib,
192 	const struct rte_ipv6_addr *ip,
193 	uint8_t depth, struct rte_rib6_node *last, int flag)
194 {
195 	struct rte_rib6_node *tmp, *prev = NULL;
196 	struct rte_ipv6_addr tmp_ip;
197 
198 	if (unlikely(rib == NULL || ip == NULL || depth > RTE_IPV6_MAX_DEPTH)) {
199 		rte_errno = EINVAL;
200 		return NULL;
201 	}
202 
203 	tmp_ip = *ip;
204 	rte_ipv6_addr_mask(&tmp_ip, depth);
205 
206 	if (last == NULL) {
207 		tmp = rib->tree;
208 		while ((tmp) && (tmp->depth < depth))
209 			tmp = get_nxt_node(tmp, &tmp_ip);
210 	} else {
211 		tmp = last;
212 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
213 				(tmp->parent->right == NULL))) {
214 			tmp = tmp->parent;
215 			if (is_valid_node(tmp) &&
216 					(rte_ipv6_addr_eq_prefix(&tmp->ip, &tmp_ip, depth) &&
217 					(tmp->depth > depth)))
218 				return tmp;
219 		}
220 		tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL;
221 	}
222 	while (tmp) {
223 		if (is_valid_node(tmp) &&
224 				(rte_ipv6_addr_eq_prefix(&tmp->ip, &tmp_ip, depth) &&
225 				(tmp->depth > depth))) {
226 			prev = tmp;
227 			if (flag == RTE_RIB6_GET_NXT_COVER)
228 				return prev;
229 		}
230 		tmp = (tmp->left != NULL) ? tmp->left : tmp->right;
231 	}
232 	return prev;
233 }
234 
235 void
236 rte_rib6_remove(struct rte_rib6 *rib,
237 	const struct rte_ipv6_addr *ip, uint8_t depth)
238 {
239 	struct rte_rib6_node *cur, *prev, *child;
240 
241 	cur = rte_rib6_lookup_exact(rib, ip, depth);
242 	if (cur == NULL)
243 		return;
244 
245 	--rib->cur_routes;
246 	cur->flag &= ~RTE_RIB_VALID_NODE;
247 	while (!is_valid_node(cur)) {
248 		if ((cur->left != NULL) && (cur->right != NULL))
249 			return;
250 		child = (cur->left == NULL) ? cur->right : cur->left;
251 		if (child != NULL)
252 			child->parent = cur->parent;
253 		if (cur->parent == NULL) {
254 			rib->tree = child;
255 			node_free(rib, cur);
256 			return;
257 		}
258 		if (cur->parent->left == cur)
259 			cur->parent->left = child;
260 		else
261 			cur->parent->right = child;
262 		prev = cur;
263 		cur = cur->parent;
264 		node_free(rib, prev);
265 	}
266 }
267 
268 struct rte_rib6_node *
269 rte_rib6_insert(struct rte_rib6 *rib,
270 	const struct rte_ipv6_addr *ip, uint8_t depth)
271 {
272 	struct rte_rib6_node **tmp;
273 	struct rte_rib6_node *prev = NULL;
274 	struct rte_rib6_node *new_node = NULL;
275 	struct rte_rib6_node *common_node = NULL;
276 	struct rte_ipv6_addr common_prefix;
277 	struct rte_ipv6_addr tmp_ip;
278 	int i, d;
279 	uint8_t common_depth, ip_xor;
280 
281 	if (unlikely((rib == NULL || ip == NULL || depth > RTE_IPV6_MAX_DEPTH))) {
282 		rte_errno = EINVAL;
283 		return NULL;
284 	}
285 
286 	tmp = &rib->tree;
287 
288 	tmp_ip = *ip;
289 	rte_ipv6_addr_mask(&tmp_ip, depth);
290 
291 	new_node = rte_rib6_lookup_exact(rib, &tmp_ip, depth);
292 	if (new_node != NULL) {
293 		rte_errno = EEXIST;
294 		return NULL;
295 	}
296 
297 	new_node = node_alloc(rib);
298 	if (new_node == NULL) {
299 		rte_errno = ENOMEM;
300 		return NULL;
301 	}
302 	new_node->left = NULL;
303 	new_node->right = NULL;
304 	new_node->parent = NULL;
305 	new_node->ip = tmp_ip;
306 	new_node->depth = depth;
307 	new_node->flag = RTE_RIB_VALID_NODE;
308 
309 	/* traverse down the tree to find matching node or closest matching */
310 	while (1) {
311 		/* insert as the last node in the branch */
312 		if (*tmp == NULL) {
313 			*tmp = new_node;
314 			new_node->parent = prev;
315 			++rib->cur_routes;
316 			return *tmp;
317 		}
318 		/*
319 		 * Intermediate node found.
320 		 * Previous rte_rib6_lookup_exact() returned NULL
321 		 * but node with proper search criteria is found.
322 		 * Validate intermediate node and return.
323 		 */
324 		if (rte_ipv6_addr_eq(&tmp_ip, &(*tmp)->ip) && (depth == (*tmp)->depth)) {
325 			node_free(rib, new_node);
326 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
327 			++rib->cur_routes;
328 			return *tmp;
329 		}
330 
331 		if (!rte_ipv6_addr_eq_prefix(&tmp_ip, &(*tmp)->ip, (*tmp)->depth) ||
332 				((*tmp)->depth >= depth)) {
333 			break;
334 		}
335 		prev = *tmp;
336 
337 		tmp = (get_dir(&tmp_ip, (*tmp)->depth)) ? &(*tmp)->right :
338 				&(*tmp)->left;
339 	}
340 
341 	/* closest node found, new_node should be inserted in the middle */
342 	common_depth = RTE_MIN(depth, (*tmp)->depth);
343 	for (i = 0, d = 0; i < RTE_IPV6_ADDR_SIZE; i++) {
344 		ip_xor = tmp_ip.a[i] ^ (*tmp)->ip.a[i];
345 		if (ip_xor == 0)
346 			d += 8;
347 		else {
348 			d += rte_clz32(ip_xor << 24);
349 			break;
350 		}
351 	}
352 
353 	common_depth = RTE_MIN(d, common_depth);
354 
355 	common_prefix = tmp_ip;
356 	rte_ipv6_addr_mask(&common_prefix, common_depth);
357 
358 	if (rte_ipv6_addr_eq(&common_prefix, &tmp_ip) &&
359 			(common_depth == depth)) {
360 		/* insert as a parent */
361 		if (get_dir(&(*tmp)->ip, depth))
362 			new_node->right = *tmp;
363 		else
364 			new_node->left = *tmp;
365 		new_node->parent = (*tmp)->parent;
366 		(*tmp)->parent = new_node;
367 		*tmp = new_node;
368 	} else {
369 		/* create intermediate node */
370 		common_node = node_alloc(rib);
371 		if (common_node == NULL) {
372 			node_free(rib, new_node);
373 			rte_errno = ENOMEM;
374 			return NULL;
375 		}
376 		common_node->ip = common_prefix;
377 		common_node->depth = common_depth;
378 		common_node->flag = 0;
379 		common_node->parent = (*tmp)->parent;
380 		new_node->parent = common_node;
381 		(*tmp)->parent = common_node;
382 		if (get_dir(&(*tmp)->ip, common_depth) == 1) {
383 			common_node->left = new_node;
384 			common_node->right = *tmp;
385 		} else {
386 			common_node->left = *tmp;
387 			common_node->right = new_node;
388 		}
389 		*tmp = common_node;
390 	}
391 	++rib->cur_routes;
392 	return new_node;
393 }
394 
395 int
396 rte_rib6_get_ip(const struct rte_rib6_node *node,
397 		struct rte_ipv6_addr *ip)
398 {
399 	if (unlikely(node == NULL || ip == NULL)) {
400 		rte_errno = EINVAL;
401 		return -1;
402 	}
403 	*ip = node->ip;
404 	return 0;
405 }
406 
407 int
408 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth)
409 {
410 	if (unlikely(node == NULL || depth == NULL)) {
411 		rte_errno = EINVAL;
412 		return -1;
413 	}
414 	*depth = node->depth;
415 	return 0;
416 }
417 
418 void *
419 rte_rib6_get_ext(struct rte_rib6_node *node)
420 {
421 	return (node == NULL) ? NULL : &node->ext[0];
422 }
423 
424 int
425 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh)
426 {
427 	if (unlikely(node == NULL || nh == NULL)) {
428 		rte_errno = EINVAL;
429 		return -1;
430 	}
431 	*nh = node->nh;
432 	return 0;
433 }
434 
435 int
436 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh)
437 {
438 	if (unlikely(node == NULL)) {
439 		rte_errno = EINVAL;
440 		return -1;
441 	}
442 	node->nh = nh;
443 	return 0;
444 }
445 
446 struct rte_rib6 *
447 rte_rib6_create(const char *name, int socket_id,
448 		const struct rte_rib6_conf *conf)
449 {
450 	char mem_name[RTE_RIB6_NAMESIZE];
451 	struct rte_rib6 *rib = NULL;
452 	struct rte_tailq_entry *te;
453 	struct rte_rib6_list *rib6_list;
454 	struct rte_mempool *node_pool;
455 
456 	/* Check user arguments. */
457 	if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) {
458 		rte_errno = EINVAL;
459 		return NULL;
460 	}
461 
462 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
463 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
464 		sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0,
465 		NULL, NULL, NULL, NULL, socket_id, 0);
466 
467 	if (node_pool == NULL) {
468 		RIB_LOG(ERR,
469 			"Can not allocate mempool for RIB6 %s", name);
470 		return NULL;
471 	}
472 
473 	snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name);
474 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
475 
476 	rte_mcfg_tailq_write_lock();
477 
478 	/* guarantee there's no existing */
479 	TAILQ_FOREACH(te, rib6_list, next) {
480 		rib = (struct rte_rib6 *)te->data;
481 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
482 			break;
483 	}
484 	rib = NULL;
485 	if (te != NULL) {
486 		rte_errno = EEXIST;
487 		goto exit;
488 	}
489 
490 	/* allocate tailq entry */
491 	te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0);
492 	if (unlikely(te == NULL)) {
493 		RIB_LOG(ERR,
494 			"Can not allocate tailq entry for RIB6 %s", name);
495 		rte_errno = ENOMEM;
496 		goto exit;
497 	}
498 
499 	/* Allocate memory to store the RIB6 data structures. */
500 	rib = rte_zmalloc_socket(mem_name,
501 		sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id);
502 	if (unlikely(rib == NULL)) {
503 		RIB_LOG(ERR, "RIB6 %s memory allocation failed", name);
504 		rte_errno = ENOMEM;
505 		goto free_te;
506 	}
507 
508 	rte_strlcpy(rib->name, name, sizeof(rib->name));
509 	rib->tree = NULL;
510 	rib->max_nodes = conf->max_nodes;
511 	rib->node_pool = node_pool;
512 
513 	te->data = (void *)rib;
514 	TAILQ_INSERT_TAIL(rib6_list, te, next);
515 
516 	rte_mcfg_tailq_write_unlock();
517 
518 	return rib;
519 
520 free_te:
521 	rte_free(te);
522 exit:
523 	rte_mcfg_tailq_write_unlock();
524 	rte_mempool_free(node_pool);
525 
526 	return NULL;
527 }
528 
529 struct rte_rib6 *
530 rte_rib6_find_existing(const char *name)
531 {
532 	struct rte_rib6 *rib = NULL;
533 	struct rte_tailq_entry *te;
534 	struct rte_rib6_list *rib6_list;
535 
536 	if (unlikely(name == NULL)) {
537 		rte_errno = EINVAL;
538 		return NULL;
539 	}
540 
541 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
542 
543 	rte_mcfg_tailq_read_lock();
544 	TAILQ_FOREACH(te, rib6_list, next) {
545 		rib = (struct rte_rib6 *) te->data;
546 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
547 			break;
548 	}
549 	rte_mcfg_tailq_read_unlock();
550 
551 	if (te == NULL) {
552 		rte_errno = ENOENT;
553 		return NULL;
554 	}
555 
556 	return rib;
557 }
558 
559 void
560 rte_rib6_free(struct rte_rib6 *rib)
561 {
562 	struct rte_tailq_entry *te;
563 	struct rte_rib6_list *rib6_list;
564 	struct rte_rib6_node *tmp = NULL;
565 
566 	if (unlikely(rib == NULL)) {
567 		rte_errno = EINVAL;
568 		return;
569 	}
570 
571 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
572 
573 	rte_mcfg_tailq_write_lock();
574 
575 	/* find our tailq entry */
576 	TAILQ_FOREACH(te, rib6_list, next) {
577 		if (te->data == (void *)rib)
578 			break;
579 	}
580 	if (te != NULL)
581 		TAILQ_REMOVE(rib6_list, te, next);
582 
583 	rte_mcfg_tailq_write_unlock();
584 
585 	while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp,
586 			RTE_RIB6_GET_NXT_ALL)) != NULL)
587 		rte_rib6_remove(rib, &tmp->ip, tmp->depth);
588 
589 	rte_mempool_free(rib->node_pool);
590 
591 	rte_free(rib);
592 	rte_free(te);
593 }
594