xref: /dpdk/lib/rib/rte_rib6.c (revision e9fd1ebf981f361844aea9ec94e17f4bda5e1479)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <sys/queue.h>
9 
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_log.h>
13 #include <rte_malloc.h>
14 #include <rte_mempool.h>
15 #include <rte_string_fns.h>
16 #include <rte_tailq.h>
17 
18 #include <rte_rib6.h>
19 
20 #include "rib_log.h"
21 
22 #define RTE_RIB_VALID_NODE	1
23 #define RIB6_MAXDEPTH		128
24 /* Maximum length of a RIB6 name. */
25 #define RTE_RIB6_NAMESIZE	64
26 
27 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry);
28 static struct rte_tailq_elem rte_rib6_tailq = {
29 	.name = "RTE_RIB6",
30 };
31 EAL_REGISTER_TAILQ(rte_rib6_tailq)
32 
33 struct rte_rib6_node {
34 	struct rte_rib6_node	*left;
35 	struct rte_rib6_node	*right;
36 	struct rte_rib6_node	*parent;
37 	uint64_t		nh;
38 	uint8_t			ip[RTE_RIB6_IPV6_ADDR_SIZE];
39 	uint8_t			depth;
40 	uint8_t			flag;
41 	__extension__ uint64_t ext[];
42 };
43 
44 struct rte_rib6 {
45 	char		name[RTE_RIB6_NAMESIZE];
46 	struct rte_rib6_node	*tree;
47 	struct rte_mempool	*node_pool;
48 	uint32_t		cur_nodes;
49 	uint32_t		cur_routes;
50 	int			max_nodes;
51 };
52 
53 static inline bool
54 is_valid_node(const struct rte_rib6_node *node)
55 {
56 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
57 }
58 
59 static inline bool
60 is_right_node(const struct rte_rib6_node *node)
61 {
62 	return node->parent->right == node;
63 }
64 
65 /*
66  * Check if ip1 is covered by ip2/depth prefix
67  */
68 static inline bool
69 is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],
70 		const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
71 {
72 	int i;
73 
74 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
75 		if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i))
76 			return false;
77 
78 	return true;
79 }
80 
81 static inline int
82 get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
83 {
84 	uint8_t index, msk;
85 
86 	/*
87 	 * depth & 127 clamps depth to values that will not
88 	 * read off the end of ip.
89 	 * depth is the number of bits deep into ip to traverse, and
90 	 * is incremented in blocks of 8 (1 byte). This means the last
91 	 * 3 bits are irrelevant to what the index of ip should be.
92 	 */
93 	index = (depth & INT8_MAX) / CHAR_BIT;
94 
95 	/*
96 	 * msk is the bitmask used to extract the bit used to decide the
97 	 * direction of the next step of the binary search.
98 	 */
99 	msk = 1 << (7 - (depth & 7));
100 
101 	return (ip[index] & msk) != 0;
102 }
103 
104 static inline struct rte_rib6_node *
105 get_nxt_node(struct rte_rib6_node *node,
106 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
107 {
108 	if (node->depth == RIB6_MAXDEPTH)
109 		return NULL;
110 
111 	return (get_dir(ip, node->depth)) ? node->right : node->left;
112 }
113 
114 static struct rte_rib6_node *
115 node_alloc(struct rte_rib6 *rib)
116 {
117 	struct rte_rib6_node *ent;
118 	int ret;
119 
120 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
121 	if (unlikely(ret != 0))
122 		return NULL;
123 	++rib->cur_nodes;
124 	return ent;
125 }
126 
127 static void
128 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent)
129 {
130 	--rib->cur_nodes;
131 	rte_mempool_put(rib->node_pool, ent);
132 }
133 
134 struct rte_rib6_node *
135 rte_rib6_lookup(struct rte_rib6 *rib,
136 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
137 {
138 	struct rte_rib6_node *cur;
139 	struct rte_rib6_node *prev = NULL;
140 
141 	if (unlikely(rib == NULL)) {
142 		rte_errno = EINVAL;
143 		return NULL;
144 	}
145 	cur = rib->tree;
146 
147 	while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
148 		if (is_valid_node(cur))
149 			prev = cur;
150 		cur = get_nxt_node(cur, ip);
151 	}
152 	return prev;
153 }
154 
155 struct rte_rib6_node *
156 rte_rib6_lookup_parent(struct rte_rib6_node *ent)
157 {
158 	struct rte_rib6_node *tmp;
159 
160 	if (ent == NULL)
161 		return NULL;
162 
163 	tmp = ent->parent;
164 	while ((tmp != NULL) && (!is_valid_node(tmp)))
165 		tmp = tmp->parent;
166 
167 	return tmp;
168 }
169 
170 struct rte_rib6_node *
171 rte_rib6_lookup_exact(struct rte_rib6 *rib,
172 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
173 {
174 	struct rte_rib6_node *cur;
175 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
176 	int i;
177 
178 	if (unlikely(rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH)) {
179 		rte_errno = EINVAL;
180 		return NULL;
181 	}
182 	cur = rib->tree;
183 
184 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
185 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
186 
187 	while (cur != NULL) {
188 		if (rte_rib6_is_equal(cur->ip, tmp_ip) &&
189 				(cur->depth == depth) &&
190 				is_valid_node(cur))
191 			return cur;
192 
193 		if (!(is_covered(tmp_ip, cur->ip, cur->depth)) ||
194 				(cur->depth >= depth))
195 			break;
196 
197 		cur = get_nxt_node(cur, tmp_ip);
198 	}
199 
200 	return NULL;
201 }
202 
203 /*
204  *  Traverses on subtree and retrieves more specific routes
205  *  for a given in args ip/depth prefix
206  *  last = NULL means the first invocation
207  */
208 struct rte_rib6_node *
209 rte_rib6_get_nxt(struct rte_rib6 *rib,
210 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],
211 	uint8_t depth, struct rte_rib6_node *last, int flag)
212 {
213 	struct rte_rib6_node *tmp, *prev = NULL;
214 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
215 	int i;
216 
217 	if (unlikely(rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH)) {
218 		rte_errno = EINVAL;
219 		return NULL;
220 	}
221 
222 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
223 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
224 
225 	if (last == NULL) {
226 		tmp = rib->tree;
227 		while ((tmp) && (tmp->depth < depth))
228 			tmp = get_nxt_node(tmp, tmp_ip);
229 	} else {
230 		tmp = last;
231 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
232 				(tmp->parent->right == NULL))) {
233 			tmp = tmp->parent;
234 			if (is_valid_node(tmp) &&
235 					(is_covered(tmp->ip, tmp_ip, depth) &&
236 					(tmp->depth > depth)))
237 				return tmp;
238 		}
239 		tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL;
240 	}
241 	while (tmp) {
242 		if (is_valid_node(tmp) &&
243 				(is_covered(tmp->ip, tmp_ip, depth) &&
244 				(tmp->depth > depth))) {
245 			prev = tmp;
246 			if (flag == RTE_RIB6_GET_NXT_COVER)
247 				return prev;
248 		}
249 		tmp = (tmp->left != NULL) ? tmp->left : tmp->right;
250 	}
251 	return prev;
252 }
253 
254 void
255 rte_rib6_remove(struct rte_rib6 *rib,
256 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
257 {
258 	struct rte_rib6_node *cur, *prev, *child;
259 
260 	cur = rte_rib6_lookup_exact(rib, ip, depth);
261 	if (cur == NULL)
262 		return;
263 
264 	--rib->cur_routes;
265 	cur->flag &= ~RTE_RIB_VALID_NODE;
266 	while (!is_valid_node(cur)) {
267 		if ((cur->left != NULL) && (cur->right != NULL))
268 			return;
269 		child = (cur->left == NULL) ? cur->right : cur->left;
270 		if (child != NULL)
271 			child->parent = cur->parent;
272 		if (cur->parent == NULL) {
273 			rib->tree = child;
274 			node_free(rib, cur);
275 			return;
276 		}
277 		if (cur->parent->left == cur)
278 			cur->parent->left = child;
279 		else
280 			cur->parent->right = child;
281 		prev = cur;
282 		cur = cur->parent;
283 		node_free(rib, prev);
284 	}
285 }
286 
287 struct rte_rib6_node *
288 rte_rib6_insert(struct rte_rib6 *rib,
289 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
290 {
291 	struct rte_rib6_node **tmp;
292 	struct rte_rib6_node *prev = NULL;
293 	struct rte_rib6_node *new_node = NULL;
294 	struct rte_rib6_node *common_node = NULL;
295 	uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE];
296 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
297 	int i, d;
298 	uint8_t common_depth, ip_xor;
299 
300 	if (unlikely((rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH))) {
301 		rte_errno = EINVAL;
302 		return NULL;
303 	}
304 
305 	tmp = &rib->tree;
306 
307 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
308 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
309 
310 	new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth);
311 	if (new_node != NULL) {
312 		rte_errno = EEXIST;
313 		return NULL;
314 	}
315 
316 	new_node = node_alloc(rib);
317 	if (new_node == NULL) {
318 		rte_errno = ENOMEM;
319 		return NULL;
320 	}
321 	new_node->left = NULL;
322 	new_node->right = NULL;
323 	new_node->parent = NULL;
324 	rte_rib6_copy_addr(new_node->ip, tmp_ip);
325 	new_node->depth = depth;
326 	new_node->flag = RTE_RIB_VALID_NODE;
327 
328 	/* traverse down the tree to find matching node or closest matching */
329 	while (1) {
330 		/* insert as the last node in the branch */
331 		if (*tmp == NULL) {
332 			*tmp = new_node;
333 			new_node->parent = prev;
334 			++rib->cur_routes;
335 			return *tmp;
336 		}
337 		/*
338 		 * Intermediate node found.
339 		 * Previous rte_rib6_lookup_exact() returned NULL
340 		 * but node with proper search criteria is found.
341 		 * Validate intermediate node and return.
342 		 */
343 		if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) &&
344 				(depth == (*tmp)->depth)) {
345 			node_free(rib, new_node);
346 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
347 			++rib->cur_routes;
348 			return *tmp;
349 		}
350 
351 		if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) ||
352 				((*tmp)->depth >= depth)) {
353 			break;
354 		}
355 		prev = *tmp;
356 
357 		tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right :
358 				&(*tmp)->left;
359 	}
360 
361 	/* closest node found, new_node should be inserted in the middle */
362 	common_depth = RTE_MIN(depth, (*tmp)->depth);
363 	for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) {
364 		ip_xor = tmp_ip[i] ^ (*tmp)->ip[i];
365 		if (ip_xor == 0)
366 			d += 8;
367 		else {
368 			d += rte_clz32(ip_xor << 24);
369 			break;
370 		}
371 	}
372 
373 	common_depth = RTE_MIN(d, common_depth);
374 
375 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
376 		common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i);
377 
378 	if (rte_rib6_is_equal(common_prefix, tmp_ip) &&
379 			(common_depth == depth)) {
380 		/* insert as a parent */
381 		if (get_dir((*tmp)->ip, depth))
382 			new_node->right = *tmp;
383 		else
384 			new_node->left = *tmp;
385 		new_node->parent = (*tmp)->parent;
386 		(*tmp)->parent = new_node;
387 		*tmp = new_node;
388 	} else {
389 		/* create intermediate node */
390 		common_node = node_alloc(rib);
391 		if (common_node == NULL) {
392 			node_free(rib, new_node);
393 			rte_errno = ENOMEM;
394 			return NULL;
395 		}
396 		rte_rib6_copy_addr(common_node->ip, common_prefix);
397 		common_node->depth = common_depth;
398 		common_node->flag = 0;
399 		common_node->parent = (*tmp)->parent;
400 		new_node->parent = common_node;
401 		(*tmp)->parent = common_node;
402 		if (get_dir((*tmp)->ip, common_depth) == 1) {
403 			common_node->left = new_node;
404 			common_node->right = *tmp;
405 		} else {
406 			common_node->left = *tmp;
407 			common_node->right = new_node;
408 		}
409 		*tmp = common_node;
410 	}
411 	++rib->cur_routes;
412 	return new_node;
413 }
414 
415 int
416 rte_rib6_get_ip(const struct rte_rib6_node *node,
417 		uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
418 {
419 	if (unlikely(node == NULL || ip == NULL)) {
420 		rte_errno = EINVAL;
421 		return -1;
422 	}
423 	rte_rib6_copy_addr(ip, node->ip);
424 	return 0;
425 }
426 
427 int
428 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth)
429 {
430 	if (unlikely(node == NULL || depth == NULL)) {
431 		rte_errno = EINVAL;
432 		return -1;
433 	}
434 	*depth = node->depth;
435 	return 0;
436 }
437 
438 void *
439 rte_rib6_get_ext(struct rte_rib6_node *node)
440 {
441 	return (node == NULL) ? NULL : &node->ext[0];
442 }
443 
444 int
445 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh)
446 {
447 	if (unlikely(node == NULL || nh == NULL)) {
448 		rte_errno = EINVAL;
449 		return -1;
450 	}
451 	*nh = node->nh;
452 	return 0;
453 }
454 
455 int
456 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh)
457 {
458 	if (unlikely(node == NULL)) {
459 		rte_errno = EINVAL;
460 		return -1;
461 	}
462 	node->nh = nh;
463 	return 0;
464 }
465 
466 struct rte_rib6 *
467 rte_rib6_create(const char *name, int socket_id,
468 		const struct rte_rib6_conf *conf)
469 {
470 	char mem_name[RTE_RIB6_NAMESIZE];
471 	struct rte_rib6 *rib = NULL;
472 	struct rte_tailq_entry *te;
473 	struct rte_rib6_list *rib6_list;
474 	struct rte_mempool *node_pool;
475 
476 	/* Check user arguments. */
477 	if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) {
478 		rte_errno = EINVAL;
479 		return NULL;
480 	}
481 
482 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
483 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
484 		sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0,
485 		NULL, NULL, NULL, NULL, socket_id, 0);
486 
487 	if (node_pool == NULL) {
488 		RIB_LOG(ERR,
489 			"Can not allocate mempool for RIB6 %s", name);
490 		return NULL;
491 	}
492 
493 	snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name);
494 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
495 
496 	rte_mcfg_tailq_write_lock();
497 
498 	/* guarantee there's no existing */
499 	TAILQ_FOREACH(te, rib6_list, next) {
500 		rib = (struct rte_rib6 *)te->data;
501 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
502 			break;
503 	}
504 	rib = NULL;
505 	if (te != NULL) {
506 		rte_errno = EEXIST;
507 		goto exit;
508 	}
509 
510 	/* allocate tailq entry */
511 	te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0);
512 	if (unlikely(te == NULL)) {
513 		RIB_LOG(ERR,
514 			"Can not allocate tailq entry for RIB6 %s", name);
515 		rte_errno = ENOMEM;
516 		goto exit;
517 	}
518 
519 	/* Allocate memory to store the RIB6 data structures. */
520 	rib = rte_zmalloc_socket(mem_name,
521 		sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id);
522 	if (unlikely(rib == NULL)) {
523 		RIB_LOG(ERR, "RIB6 %s memory allocation failed", name);
524 		rte_errno = ENOMEM;
525 		goto free_te;
526 	}
527 
528 	rte_strlcpy(rib->name, name, sizeof(rib->name));
529 	rib->tree = NULL;
530 	rib->max_nodes = conf->max_nodes;
531 	rib->node_pool = node_pool;
532 
533 	te->data = (void *)rib;
534 	TAILQ_INSERT_TAIL(rib6_list, te, next);
535 
536 	rte_mcfg_tailq_write_unlock();
537 
538 	return rib;
539 
540 free_te:
541 	rte_free(te);
542 exit:
543 	rte_mcfg_tailq_write_unlock();
544 	rte_mempool_free(node_pool);
545 
546 	return NULL;
547 }
548 
549 struct rte_rib6 *
550 rte_rib6_find_existing(const char *name)
551 {
552 	struct rte_rib6 *rib = NULL;
553 	struct rte_tailq_entry *te;
554 	struct rte_rib6_list *rib6_list;
555 
556 	if (unlikely(name == NULL)) {
557 		rte_errno = EINVAL;
558 		return NULL;
559 	}
560 
561 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
562 
563 	rte_mcfg_tailq_read_lock();
564 	TAILQ_FOREACH(te, rib6_list, next) {
565 		rib = (struct rte_rib6 *) te->data;
566 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
567 			break;
568 	}
569 	rte_mcfg_tailq_read_unlock();
570 
571 	if (te == NULL) {
572 		rte_errno = ENOENT;
573 		return NULL;
574 	}
575 
576 	return rib;
577 }
578 
579 void
580 rte_rib6_free(struct rte_rib6 *rib)
581 {
582 	struct rte_tailq_entry *te;
583 	struct rte_rib6_list *rib6_list;
584 	struct rte_rib6_node *tmp = NULL;
585 
586 	if (unlikely(rib == NULL)) {
587 		rte_errno = EINVAL;
588 		return;
589 	}
590 
591 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
592 
593 	rte_mcfg_tailq_write_lock();
594 
595 	/* find our tailq entry */
596 	TAILQ_FOREACH(te, rib6_list, next) {
597 		if (te->data == (void *)rib)
598 			break;
599 	}
600 	if (te != NULL)
601 		TAILQ_REMOVE(rib6_list, te, next);
602 
603 	rte_mcfg_tailq_write_unlock();
604 
605 	while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp,
606 			RTE_RIB6_GET_NXT_ALL)) != NULL)
607 		rte_rib6_remove(rib, tmp->ip, tmp->depth);
608 
609 	rte_mempool_free(rib->node_pool);
610 
611 	rte_free(rib);
612 	rte_free(te);
613 }
614