xref: /dpdk/lib/rib/rte_rib.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 
9 #include <rte_eal.h>
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_malloc.h>
13 #include <rte_mempool.h>
14 #include <rte_rwlock.h>
15 #include <rte_string_fns.h>
16 #include <rte_tailq.h>
17 
18 #include <rte_rib.h>
19 
20 TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
21 static struct rte_tailq_elem rte_rib_tailq = {
22 	.name = "RTE_RIB",
23 };
24 EAL_REGISTER_TAILQ(rte_rib_tailq)
25 
26 #define RTE_RIB_VALID_NODE	1
27 /* Maximum depth value possible for IPv4 RIB. */
28 #define RIB_MAXDEPTH		32
29 /* Maximum length of a RIB name. */
30 #define RTE_RIB_NAMESIZE	64
31 
32 struct rte_rib_node {
33 	struct rte_rib_node	*left;
34 	struct rte_rib_node	*right;
35 	struct rte_rib_node	*parent;
36 	uint32_t	ip;
37 	uint8_t		depth;
38 	uint8_t		flag;
39 	uint64_t	nh;
40 	__extension__ uint64_t	ext[0];
41 };
42 
43 struct rte_rib {
44 	char		name[RTE_RIB_NAMESIZE];
45 	struct rte_rib_node	*tree;
46 	struct rte_mempool	*node_pool;
47 	uint32_t		cur_nodes;
48 	uint32_t		cur_routes;
49 	uint32_t		max_nodes;
50 };
51 
52 static inline bool
53 is_valid_node(struct rte_rib_node *node)
54 {
55 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
56 }
57 
58 static inline bool
59 is_right_node(struct rte_rib_node *node)
60 {
61 	return node->parent->right == node;
62 }
63 
64 /*
65  * Check if ip1 is covered by ip2/depth prefix
66  */
67 static inline bool
68 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth)
69 {
70 	return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0;
71 }
72 
73 static inline struct rte_rib_node *
74 get_nxt_node(struct rte_rib_node *node, uint32_t ip)
75 {
76 	return (ip & (1 << (31 - node->depth))) ? node->right : node->left;
77 }
78 
79 static struct rte_rib_node *
80 node_alloc(struct rte_rib *rib)
81 {
82 	struct rte_rib_node *ent;
83 	int ret;
84 
85 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
86 	if (unlikely(ret != 0))
87 		return NULL;
88 	++rib->cur_nodes;
89 	return ent;
90 }
91 
92 static void
93 node_free(struct rte_rib *rib, struct rte_rib_node *ent)
94 {
95 	--rib->cur_nodes;
96 	rte_mempool_put(rib->node_pool, ent);
97 }
98 
99 struct rte_rib_node *
100 rte_rib_lookup(struct rte_rib *rib, uint32_t ip)
101 {
102 	struct rte_rib_node *cur, *prev = NULL;
103 
104 	if (rib == NULL) {
105 		rte_errno = EINVAL;
106 		return NULL;
107 	}
108 
109 	cur = rib->tree;
110 	while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
111 		if (is_valid_node(cur))
112 			prev = cur;
113 		cur = get_nxt_node(cur, ip);
114 	}
115 	return prev;
116 }
117 
118 struct rte_rib_node *
119 rte_rib_lookup_parent(struct rte_rib_node *ent)
120 {
121 	struct rte_rib_node *tmp;
122 
123 	if (ent == NULL)
124 		return NULL;
125 	tmp = ent->parent;
126 	while ((tmp != NULL) &&	!is_valid_node(tmp))
127 		tmp = tmp->parent;
128 	return tmp;
129 }
130 
131 static struct rte_rib_node *
132 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
133 {
134 	struct rte_rib_node *cur;
135 
136 	cur = rib->tree;
137 	while (cur != NULL) {
138 		if ((cur->ip == ip) && (cur->depth == depth) &&
139 				is_valid_node(cur))
140 			return cur;
141 		if ((cur->depth > depth) ||
142 				!is_covered(ip, cur->ip, cur->depth))
143 			break;
144 		cur = get_nxt_node(cur, ip);
145 	}
146 	return NULL;
147 }
148 
149 struct rte_rib_node *
150 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
151 {
152 	if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
153 		rte_errno = EINVAL;
154 		return NULL;
155 	}
156 	ip &= rte_rib_depth_to_mask(depth);
157 
158 	return __rib_lookup_exact(rib, ip, depth);
159 }
160 
161 /*
162  *  Traverses on subtree and retrieves more specific routes
163  *  for a given in args ip/depth prefix
164  *  last = NULL means the first invocation
165  */
166 struct rte_rib_node *
167 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip,
168 	uint8_t depth, struct rte_rib_node *last, int flag)
169 {
170 	struct rte_rib_node *tmp, *prev = NULL;
171 
172 	if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
173 		rte_errno = EINVAL;
174 		return NULL;
175 	}
176 
177 	if (last == NULL) {
178 		tmp = rib->tree;
179 		while ((tmp) && (tmp->depth < depth))
180 			tmp = get_nxt_node(tmp, ip);
181 	} else {
182 		tmp = last;
183 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
184 				(tmp->parent->right == NULL))) {
185 			tmp = tmp->parent;
186 			if (is_valid_node(tmp) &&
187 					(is_covered(tmp->ip, ip, depth) &&
188 					(tmp->depth > depth)))
189 				return tmp;
190 		}
191 		tmp = (tmp->parent) ? tmp->parent->right : NULL;
192 	}
193 	while (tmp) {
194 		if (is_valid_node(tmp) &&
195 				(is_covered(tmp->ip, ip, depth) &&
196 				(tmp->depth > depth))) {
197 			prev = tmp;
198 			if (flag == RTE_RIB_GET_NXT_COVER)
199 				return prev;
200 		}
201 		tmp = (tmp->left) ? tmp->left : tmp->right;
202 	}
203 	return prev;
204 }
205 
206 void
207 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth)
208 {
209 	struct rte_rib_node *cur, *prev, *child;
210 
211 	cur = rte_rib_lookup_exact(rib, ip, depth);
212 	if (cur == NULL)
213 		return;
214 
215 	--rib->cur_routes;
216 	cur->flag &= ~RTE_RIB_VALID_NODE;
217 	while (!is_valid_node(cur)) {
218 		if ((cur->left != NULL) && (cur->right != NULL))
219 			return;
220 		child = (cur->left == NULL) ? cur->right : cur->left;
221 		if (child != NULL)
222 			child->parent = cur->parent;
223 		if (cur->parent == NULL) {
224 			rib->tree = child;
225 			node_free(rib, cur);
226 			return;
227 		}
228 		if (cur->parent->left == cur)
229 			cur->parent->left = child;
230 		else
231 			cur->parent->right = child;
232 		prev = cur;
233 		cur = cur->parent;
234 		node_free(rib, prev);
235 	}
236 }
237 
238 struct rte_rib_node *
239 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
240 {
241 	struct rte_rib_node **tmp;
242 	struct rte_rib_node *prev = NULL;
243 	struct rte_rib_node *new_node = NULL;
244 	struct rte_rib_node *common_node = NULL;
245 	int d = 0;
246 	uint32_t common_prefix;
247 	uint8_t common_depth;
248 
249 	if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
250 		rte_errno = EINVAL;
251 		return NULL;
252 	}
253 
254 	tmp = &rib->tree;
255 	ip &= rte_rib_depth_to_mask(depth);
256 	new_node = __rib_lookup_exact(rib, ip, depth);
257 	if (new_node != NULL) {
258 		rte_errno = EEXIST;
259 		return NULL;
260 	}
261 
262 	new_node = node_alloc(rib);
263 	if (new_node == NULL) {
264 		rte_errno = ENOMEM;
265 		return NULL;
266 	}
267 	new_node->left = NULL;
268 	new_node->right = NULL;
269 	new_node->parent = NULL;
270 	new_node->ip = ip;
271 	new_node->depth = depth;
272 	new_node->flag = RTE_RIB_VALID_NODE;
273 
274 	/* traverse down the tree to find matching node or closest matching */
275 	while (1) {
276 		/* insert as the last node in the branch */
277 		if (*tmp == NULL) {
278 			*tmp = new_node;
279 			new_node->parent = prev;
280 			++rib->cur_routes;
281 			return *tmp;
282 		}
283 		/*
284 		 * Intermediate node found.
285 		 * Previous rte_rib_lookup_exact() returned NULL
286 		 * but node with proper search criteria is found.
287 		 * Validate intermediate node and return.
288 		 */
289 		if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) {
290 			node_free(rib, new_node);
291 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
292 			++rib->cur_routes;
293 			return *tmp;
294 		}
295 		d = (*tmp)->depth;
296 		if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d))
297 			break;
298 		prev = *tmp;
299 		tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left;
300 	}
301 	/* closest node found, new_node should be inserted in the middle */
302 	common_depth = RTE_MIN(depth, (*tmp)->depth);
303 	common_prefix = ip ^ (*tmp)->ip;
304 	d = (common_prefix == 0) ? 32 : __builtin_clz(common_prefix);
305 
306 	common_depth = RTE_MIN(d, common_depth);
307 	common_prefix = ip & rte_rib_depth_to_mask(common_depth);
308 	if ((common_prefix == ip) && (common_depth == depth)) {
309 		/* insert as a parent */
310 		if ((*tmp)->ip & (1 << (31 - depth)))
311 			new_node->right = *tmp;
312 		else
313 			new_node->left = *tmp;
314 		new_node->parent = (*tmp)->parent;
315 		(*tmp)->parent = new_node;
316 		*tmp = new_node;
317 	} else {
318 		/* create intermediate node */
319 		common_node = node_alloc(rib);
320 		if (common_node == NULL) {
321 			node_free(rib, new_node);
322 			rte_errno = ENOMEM;
323 			return NULL;
324 		}
325 		common_node->ip = common_prefix;
326 		common_node->depth = common_depth;
327 		common_node->flag = 0;
328 		common_node->parent = (*tmp)->parent;
329 		new_node->parent = common_node;
330 		(*tmp)->parent = common_node;
331 		if ((new_node->ip & (1 << (31 - common_depth))) == 0) {
332 			common_node->left = new_node;
333 			common_node->right = *tmp;
334 		} else {
335 			common_node->left = *tmp;
336 			common_node->right = new_node;
337 		}
338 		*tmp = common_node;
339 	}
340 	++rib->cur_routes;
341 	return new_node;
342 }
343 
344 int
345 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip)
346 {
347 	if ((node == NULL) || (ip == NULL)) {
348 		rte_errno = EINVAL;
349 		return -1;
350 	}
351 	*ip = node->ip;
352 	return 0;
353 }
354 
355 int
356 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth)
357 {
358 	if ((node == NULL) || (depth == NULL)) {
359 		rte_errno = EINVAL;
360 		return -1;
361 	}
362 	*depth = node->depth;
363 	return 0;
364 }
365 
366 void *
367 rte_rib_get_ext(struct rte_rib_node *node)
368 {
369 	return (node == NULL) ? NULL : &node->ext[0];
370 }
371 
372 int
373 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh)
374 {
375 	if ((node == NULL) || (nh == NULL)) {
376 		rte_errno = EINVAL;
377 		return -1;
378 	}
379 	*nh = node->nh;
380 	return 0;
381 }
382 
383 int
384 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh)
385 {
386 	if (node == NULL) {
387 		rte_errno = EINVAL;
388 		return -1;
389 	}
390 	node->nh = nh;
391 	return 0;
392 }
393 
394 struct rte_rib *
395 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf)
396 {
397 	char mem_name[RTE_RIB_NAMESIZE];
398 	struct rte_rib *rib = NULL;
399 	struct rte_tailq_entry *te;
400 	struct rte_rib_list *rib_list;
401 	struct rte_mempool *node_pool;
402 
403 	/* Check user arguments. */
404 	if (name == NULL || conf == NULL || conf->max_nodes <= 0) {
405 		rte_errno = EINVAL;
406 		return NULL;
407 	}
408 
409 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
410 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
411 		sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0,
412 		NULL, NULL, NULL, NULL, socket_id, 0);
413 
414 	if (node_pool == NULL) {
415 		RTE_LOG(ERR, LPM,
416 			"Can not allocate mempool for RIB %s\n", name);
417 		return NULL;
418 	}
419 
420 	snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
421 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
422 
423 	rte_mcfg_tailq_write_lock();
424 
425 	/* guarantee there's no existing */
426 	TAILQ_FOREACH(te, rib_list, next) {
427 		rib = (struct rte_rib *)te->data;
428 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
429 			break;
430 	}
431 	rib = NULL;
432 	if (te != NULL) {
433 		rte_errno = EEXIST;
434 		goto exit;
435 	}
436 
437 	/* allocate tailq entry */
438 	te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
439 	if (te == NULL) {
440 		RTE_LOG(ERR, LPM,
441 			"Can not allocate tailq entry for RIB %s\n", name);
442 		rte_errno = ENOMEM;
443 		goto exit;
444 	}
445 
446 	/* Allocate memory to store the RIB data structures. */
447 	rib = rte_zmalloc_socket(mem_name,
448 		sizeof(struct rte_rib),	RTE_CACHE_LINE_SIZE, socket_id);
449 	if (rib == NULL) {
450 		RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name);
451 		rte_errno = ENOMEM;
452 		goto free_te;
453 	}
454 
455 	rte_strlcpy(rib->name, name, sizeof(rib->name));
456 	rib->tree = NULL;
457 	rib->max_nodes = conf->max_nodes;
458 	rib->node_pool = node_pool;
459 	te->data = (void *)rib;
460 	TAILQ_INSERT_TAIL(rib_list, te, next);
461 
462 	rte_mcfg_tailq_write_unlock();
463 
464 	return rib;
465 
466 free_te:
467 	rte_free(te);
468 exit:
469 	rte_mcfg_tailq_write_unlock();
470 	rte_mempool_free(node_pool);
471 
472 	return NULL;
473 }
474 
475 struct rte_rib *
476 rte_rib_find_existing(const char *name)
477 {
478 	struct rte_rib *rib = NULL;
479 	struct rte_tailq_entry *te;
480 	struct rte_rib_list *rib_list;
481 
482 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
483 
484 	rte_mcfg_tailq_read_lock();
485 	TAILQ_FOREACH(te, rib_list, next) {
486 		rib = (struct rte_rib *) te->data;
487 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
488 			break;
489 	}
490 	rte_mcfg_tailq_read_unlock();
491 
492 	if (te == NULL) {
493 		rte_errno = ENOENT;
494 		return NULL;
495 	}
496 
497 	return rib;
498 }
499 
500 void
501 rte_rib_free(struct rte_rib *rib)
502 {
503 	struct rte_tailq_entry *te;
504 	struct rte_rib_list *rib_list;
505 	struct rte_rib_node *tmp = NULL;
506 
507 	if (rib == NULL)
508 		return;
509 
510 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
511 
512 	rte_mcfg_tailq_write_lock();
513 
514 	/* find our tailq entry */
515 	TAILQ_FOREACH(te, rib_list, next) {
516 		if (te->data == (void *)rib)
517 			break;
518 	}
519 	if (te != NULL)
520 		TAILQ_REMOVE(rib_list, te, next);
521 
522 	rte_mcfg_tailq_write_unlock();
523 
524 	while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp,
525 			RTE_RIB_GET_NXT_ALL)) != NULL)
526 		rte_rib_remove(rib, tmp->ip, tmp->depth);
527 
528 	rte_mempool_free(rib->node_pool);
529 	rte_free(rib);
530 	rte_free(te);
531 }
532