xref: /dpdk/lib/rib/rte_rib.c (revision 3da59f30a23f2e795d2315f3d949e1b3e0ce0c3d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <sys/queue.h>
9 
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_malloc.h>
13 #include <rte_mempool.h>
14 #include <rte_string_fns.h>
15 #include <rte_tailq.h>
16 
17 #include <rte_rib.h>
18 
19 #include "rib_log.h"
20 
21 RTE_LOG_REGISTER_DEFAULT(rib_logtype, INFO);
22 
23 TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
24 static struct rte_tailq_elem rte_rib_tailq = {
25 	.name = "RTE_RIB",
26 };
27 EAL_REGISTER_TAILQ(rte_rib_tailq)
28 
29 #define RTE_RIB_VALID_NODE	1
30 /* Maximum depth value possible for IPv4 RIB. */
31 #define RIB_MAXDEPTH		32
32 /* Maximum length of a RIB name. */
33 #define RTE_RIB_NAMESIZE	64
34 
35 struct rte_rib_node {
36 	struct rte_rib_node	*left;
37 	struct rte_rib_node	*right;
38 	struct rte_rib_node	*parent;
39 	uint32_t	ip;
40 	uint8_t		depth;
41 	uint8_t		flag;
42 	uint64_t	nh;
43 	__extension__ uint64_t ext[];
44 };
45 
46 struct rte_rib {
47 	char		name[RTE_RIB_NAMESIZE];
48 	struct rte_rib_node	*tree;
49 	struct rte_mempool	*node_pool;
50 	uint32_t		cur_nodes;
51 	uint32_t		cur_routes;
52 	uint32_t		max_nodes;
53 };
54 
55 static inline bool
56 is_valid_node(const struct rte_rib_node *node)
57 {
58 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
59 }
60 
61 static inline bool
62 is_right_node(const struct rte_rib_node *node)
63 {
64 	return node->parent->right == node;
65 }
66 
67 /*
68  * Check if ip1 is covered by ip2/depth prefix
69  */
70 static inline bool
71 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth)
72 {
73 	return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0;
74 }
75 
76 static inline struct rte_rib_node *
77 get_nxt_node(struct rte_rib_node *node, uint32_t ip)
78 {
79 	if (node->depth == RIB_MAXDEPTH)
80 		return NULL;
81 	return (ip & (1 << (31 - node->depth))) ? node->right : node->left;
82 }
83 
84 static struct rte_rib_node *
85 node_alloc(struct rte_rib *rib)
86 {
87 	struct rte_rib_node *ent;
88 	int ret;
89 
90 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
91 	if (unlikely(ret != 0))
92 		return NULL;
93 	++rib->cur_nodes;
94 	return ent;
95 }
96 
97 static void
98 node_free(struct rte_rib *rib, struct rte_rib_node *ent)
99 {
100 	--rib->cur_nodes;
101 	rte_mempool_put(rib->node_pool, ent);
102 }
103 
104 struct rte_rib_node *
105 rte_rib_lookup(struct rte_rib *rib, uint32_t ip)
106 {
107 	struct rte_rib_node *cur, *prev = NULL;
108 
109 	if (unlikely(rib == NULL)) {
110 		rte_errno = EINVAL;
111 		return NULL;
112 	}
113 
114 	cur = rib->tree;
115 	while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
116 		if (is_valid_node(cur))
117 			prev = cur;
118 		cur = get_nxt_node(cur, ip);
119 	}
120 	return prev;
121 }
122 
123 struct rte_rib_node *
124 rte_rib_lookup_parent(struct rte_rib_node *ent)
125 {
126 	struct rte_rib_node *tmp;
127 
128 	if (ent == NULL)
129 		return NULL;
130 	tmp = ent->parent;
131 	while ((tmp != NULL) &&	!is_valid_node(tmp))
132 		tmp = tmp->parent;
133 	return tmp;
134 }
135 
136 static struct rte_rib_node *
137 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
138 {
139 	struct rte_rib_node *cur;
140 
141 	cur = rib->tree;
142 	while (cur != NULL) {
143 		if ((cur->ip == ip) && (cur->depth == depth) &&
144 				is_valid_node(cur))
145 			return cur;
146 		if ((cur->depth > depth) ||
147 				!is_covered(ip, cur->ip, cur->depth))
148 			break;
149 		cur = get_nxt_node(cur, ip);
150 	}
151 	return NULL;
152 }
153 
154 struct rte_rib_node *
155 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
156 {
157 	if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
158 		rte_errno = EINVAL;
159 		return NULL;
160 	}
161 	ip &= rte_rib_depth_to_mask(depth);
162 
163 	return __rib_lookup_exact(rib, ip, depth);
164 }
165 
166 /*
167  *  Traverses on subtree and retrieves more specific routes
168  *  for a given in args ip/depth prefix
169  *  last = NULL means the first invocation
170  */
171 struct rte_rib_node *
172 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip,
173 	uint8_t depth, struct rte_rib_node *last, int flag)
174 {
175 	struct rte_rib_node *tmp, *prev = NULL;
176 
177 	if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
178 		rte_errno = EINVAL;
179 		return NULL;
180 	}
181 
182 	if (last == NULL) {
183 		tmp = rib->tree;
184 		while ((tmp) && (tmp->depth < depth))
185 			tmp = get_nxt_node(tmp, ip);
186 	} else {
187 		tmp = last;
188 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
189 				(tmp->parent->right == NULL))) {
190 			tmp = tmp->parent;
191 			if (is_valid_node(tmp) &&
192 					(is_covered(tmp->ip, ip, depth) &&
193 					(tmp->depth > depth)))
194 				return tmp;
195 		}
196 		tmp = (tmp->parent) ? tmp->parent->right : NULL;
197 	}
198 	while (tmp) {
199 		if (is_valid_node(tmp) &&
200 				(is_covered(tmp->ip, ip, depth) &&
201 				(tmp->depth > depth))) {
202 			prev = tmp;
203 			if (flag == RTE_RIB_GET_NXT_COVER)
204 				return prev;
205 		}
206 		tmp = (tmp->left) ? tmp->left : tmp->right;
207 	}
208 	return prev;
209 }
210 
211 void
212 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth)
213 {
214 	struct rte_rib_node *cur, *prev, *child;
215 
216 	cur = rte_rib_lookup_exact(rib, ip, depth);
217 	if (cur == NULL)
218 		return;
219 
220 	--rib->cur_routes;
221 	cur->flag &= ~RTE_RIB_VALID_NODE;
222 	while (!is_valid_node(cur)) {
223 		if ((cur->left != NULL) && (cur->right != NULL))
224 			return;
225 		child = (cur->left == NULL) ? cur->right : cur->left;
226 		if (child != NULL)
227 			child->parent = cur->parent;
228 		if (cur->parent == NULL) {
229 			rib->tree = child;
230 			node_free(rib, cur);
231 			return;
232 		}
233 		if (cur->parent->left == cur)
234 			cur->parent->left = child;
235 		else
236 			cur->parent->right = child;
237 		prev = cur;
238 		cur = cur->parent;
239 		node_free(rib, prev);
240 	}
241 }
242 
243 struct rte_rib_node *
244 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
245 {
246 	struct rte_rib_node **tmp;
247 	struct rte_rib_node *prev = NULL;
248 	struct rte_rib_node *new_node = NULL;
249 	struct rte_rib_node *common_node = NULL;
250 	int d = 0;
251 	uint32_t common_prefix;
252 	uint8_t common_depth;
253 
254 	if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
255 		rte_errno = EINVAL;
256 		return NULL;
257 	}
258 
259 	tmp = &rib->tree;
260 	ip &= rte_rib_depth_to_mask(depth);
261 	new_node = __rib_lookup_exact(rib, ip, depth);
262 	if (new_node != NULL) {
263 		rte_errno = EEXIST;
264 		return NULL;
265 	}
266 
267 	new_node = node_alloc(rib);
268 	if (new_node == NULL) {
269 		rte_errno = ENOMEM;
270 		return NULL;
271 	}
272 	new_node->left = NULL;
273 	new_node->right = NULL;
274 	new_node->parent = NULL;
275 	new_node->ip = ip;
276 	new_node->depth = depth;
277 	new_node->flag = RTE_RIB_VALID_NODE;
278 
279 	/* traverse down the tree to find matching node or closest matching */
280 	while (1) {
281 		/* insert as the last node in the branch */
282 		if (*tmp == NULL) {
283 			*tmp = new_node;
284 			new_node->parent = prev;
285 			++rib->cur_routes;
286 			return *tmp;
287 		}
288 		/*
289 		 * Intermediate node found.
290 		 * Previous rte_rib_lookup_exact() returned NULL
291 		 * but node with proper search criteria is found.
292 		 * Validate intermediate node and return.
293 		 */
294 		if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) {
295 			node_free(rib, new_node);
296 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
297 			++rib->cur_routes;
298 			return *tmp;
299 		}
300 		d = (*tmp)->depth;
301 		if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d))
302 			break;
303 		prev = *tmp;
304 		tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left;
305 	}
306 	/* closest node found, new_node should be inserted in the middle */
307 	common_depth = RTE_MIN(depth, (*tmp)->depth);
308 	common_prefix = ip ^ (*tmp)->ip;
309 	d = (common_prefix == 0) ? 32 : rte_clz32(common_prefix);
310 
311 	common_depth = RTE_MIN(d, common_depth);
312 	common_prefix = ip & rte_rib_depth_to_mask(common_depth);
313 	if ((common_prefix == ip) && (common_depth == depth)) {
314 		/* insert as a parent */
315 		if ((*tmp)->ip & (1 << (31 - depth)))
316 			new_node->right = *tmp;
317 		else
318 			new_node->left = *tmp;
319 		new_node->parent = (*tmp)->parent;
320 		(*tmp)->parent = new_node;
321 		*tmp = new_node;
322 	} else {
323 		/* create intermediate node */
324 		common_node = node_alloc(rib);
325 		if (common_node == NULL) {
326 			node_free(rib, new_node);
327 			rte_errno = ENOMEM;
328 			return NULL;
329 		}
330 		common_node->ip = common_prefix;
331 		common_node->depth = common_depth;
332 		common_node->flag = 0;
333 		common_node->parent = (*tmp)->parent;
334 		new_node->parent = common_node;
335 		(*tmp)->parent = common_node;
336 		if ((new_node->ip & (1 << (31 - common_depth))) == 0) {
337 			common_node->left = new_node;
338 			common_node->right = *tmp;
339 		} else {
340 			common_node->left = *tmp;
341 			common_node->right = new_node;
342 		}
343 		*tmp = common_node;
344 	}
345 	++rib->cur_routes;
346 	return new_node;
347 }
348 
349 int
350 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip)
351 {
352 	if (unlikely(node == NULL || ip == NULL)) {
353 		rte_errno = EINVAL;
354 		return -1;
355 	}
356 	*ip = node->ip;
357 	return 0;
358 }
359 
360 int
361 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth)
362 {
363 	if (unlikely(node == NULL || depth == NULL)) {
364 		rte_errno = EINVAL;
365 		return -1;
366 	}
367 	*depth = node->depth;
368 	return 0;
369 }
370 
371 void *
372 rte_rib_get_ext(struct rte_rib_node *node)
373 {
374 	return (node == NULL) ? NULL : &node->ext[0];
375 }
376 
377 int
378 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh)
379 {
380 	if (unlikely(node == NULL || nh == NULL)) {
381 		rte_errno = EINVAL;
382 		return -1;
383 	}
384 	*nh = node->nh;
385 	return 0;
386 }
387 
388 int
389 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh)
390 {
391 	if (unlikely(node == NULL)) {
392 		rte_errno = EINVAL;
393 		return -1;
394 	}
395 	node->nh = nh;
396 	return 0;
397 }
398 
399 struct rte_rib *
400 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf)
401 {
402 	char mem_name[RTE_RIB_NAMESIZE];
403 	struct rte_rib *rib = NULL;
404 	struct rte_tailq_entry *te;
405 	struct rte_rib_list *rib_list;
406 	struct rte_mempool *node_pool;
407 
408 	/* Check user arguments. */
409 	if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) {
410 		rte_errno = EINVAL;
411 		return NULL;
412 	}
413 
414 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
415 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
416 		sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0,
417 		NULL, NULL, NULL, NULL, socket_id, 0);
418 
419 	if (node_pool == NULL) {
420 		RIB_LOG(ERR,
421 			"Can not allocate mempool for RIB %s", name);
422 		return NULL;
423 	}
424 
425 	snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
426 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
427 
428 	rte_mcfg_tailq_write_lock();
429 
430 	/* guarantee there's no existing */
431 	TAILQ_FOREACH(te, rib_list, next) {
432 		rib = (struct rte_rib *)te->data;
433 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
434 			break;
435 	}
436 	rib = NULL;
437 	if (te != NULL) {
438 		rte_errno = EEXIST;
439 		goto exit;
440 	}
441 
442 	/* allocate tailq entry */
443 	te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
444 	if (unlikely(te == NULL)) {
445 		RIB_LOG(ERR,
446 			"Can not allocate tailq entry for RIB %s", name);
447 		rte_errno = ENOMEM;
448 		goto exit;
449 	}
450 
451 	/* Allocate memory to store the RIB data structures. */
452 	rib = rte_zmalloc_socket(mem_name,
453 		sizeof(struct rte_rib),	RTE_CACHE_LINE_SIZE, socket_id);
454 	if (unlikely(rib == NULL)) {
455 		RIB_LOG(ERR, "RIB %s memory allocation failed", name);
456 		rte_errno = ENOMEM;
457 		goto free_te;
458 	}
459 
460 	rte_strlcpy(rib->name, name, sizeof(rib->name));
461 	rib->tree = NULL;
462 	rib->max_nodes = conf->max_nodes;
463 	rib->node_pool = node_pool;
464 	te->data = (void *)rib;
465 	TAILQ_INSERT_TAIL(rib_list, te, next);
466 
467 	rte_mcfg_tailq_write_unlock();
468 
469 	return rib;
470 
471 free_te:
472 	rte_free(te);
473 exit:
474 	rte_mcfg_tailq_write_unlock();
475 	rte_mempool_free(node_pool);
476 
477 	return NULL;
478 }
479 
480 struct rte_rib *
481 rte_rib_find_existing(const char *name)
482 {
483 	struct rte_rib *rib = NULL;
484 	struct rte_tailq_entry *te;
485 	struct rte_rib_list *rib_list;
486 
487 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
488 
489 	rte_mcfg_tailq_read_lock();
490 	TAILQ_FOREACH(te, rib_list, next) {
491 		rib = (struct rte_rib *) te->data;
492 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
493 			break;
494 	}
495 	rte_mcfg_tailq_read_unlock();
496 
497 	if (te == NULL) {
498 		rte_errno = ENOENT;
499 		return NULL;
500 	}
501 
502 	return rib;
503 }
504 
505 void
506 rte_rib_free(struct rte_rib *rib)
507 {
508 	struct rte_tailq_entry *te;
509 	struct rte_rib_list *rib_list;
510 	struct rte_rib_node *tmp = NULL;
511 
512 	if (rib == NULL)
513 		return;
514 
515 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
516 
517 	rte_mcfg_tailq_write_lock();
518 
519 	/* find our tailq entry */
520 	TAILQ_FOREACH(te, rib_list, next) {
521 		if (te->data == (void *)rib)
522 			break;
523 	}
524 	if (te != NULL)
525 		TAILQ_REMOVE(rib_list, te, next);
526 
527 	rte_mcfg_tailq_write_unlock();
528 
529 	while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp,
530 			RTE_RIB_GET_NXT_ALL)) != NULL)
531 		rte_rib_remove(rib, tmp->ip, tmp->depth);
532 
533 	rte_mempool_free(rib->node_pool);
534 	rte_free(rib);
535 	rte_free(te);
536 }
537