xref: /dpdk/lib/rib/rte_rib.c (revision da7e701151ea8b742d4c38ace3e4fefd1b4507fc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <sys/queue.h>
9 
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_malloc.h>
13 #include <rte_mempool.h>
14 #include <rte_string_fns.h>
15 #include <rte_tailq.h>
16 
17 #include <rte_rib.h>
18 
19 TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
20 static struct rte_tailq_elem rte_rib_tailq = {
21 	.name = "RTE_RIB",
22 };
23 EAL_REGISTER_TAILQ(rte_rib_tailq)
24 
25 #define RTE_RIB_VALID_NODE	1
26 /* Maximum depth value possible for IPv4 RIB. */
27 #define RIB_MAXDEPTH		32
28 /* Maximum length of a RIB name. */
29 #define RTE_RIB_NAMESIZE	64
30 
31 struct rte_rib_node {
32 	struct rte_rib_node	*left;
33 	struct rte_rib_node	*right;
34 	struct rte_rib_node	*parent;
35 	uint32_t	ip;
36 	uint8_t		depth;
37 	uint8_t		flag;
38 	uint64_t	nh;
39 	__extension__ uint64_t ext[];
40 };
41 
42 struct rte_rib {
43 	char		name[RTE_RIB_NAMESIZE];
44 	struct rte_rib_node	*tree;
45 	struct rte_mempool	*node_pool;
46 	uint32_t		cur_nodes;
47 	uint32_t		cur_routes;
48 	uint32_t		max_nodes;
49 };
50 
51 static inline bool
52 is_valid_node(const struct rte_rib_node *node)
53 {
54 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
55 }
56 
57 static inline bool
58 is_right_node(const struct rte_rib_node *node)
59 {
60 	return node->parent->right == node;
61 }
62 
63 /*
64  * Check if ip1 is covered by ip2/depth prefix
65  */
66 static inline bool
67 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth)
68 {
69 	return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0;
70 }
71 
72 static inline struct rte_rib_node *
73 get_nxt_node(struct rte_rib_node *node, uint32_t ip)
74 {
75 	if (node->depth == RIB_MAXDEPTH)
76 		return NULL;
77 	return (ip & (1 << (31 - node->depth))) ? node->right : node->left;
78 }
79 
80 static struct rte_rib_node *
81 node_alloc(struct rte_rib *rib)
82 {
83 	struct rte_rib_node *ent;
84 	int ret;
85 
86 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
87 	if (unlikely(ret != 0))
88 		return NULL;
89 	++rib->cur_nodes;
90 	return ent;
91 }
92 
93 static void
94 node_free(struct rte_rib *rib, struct rte_rib_node *ent)
95 {
96 	--rib->cur_nodes;
97 	rte_mempool_put(rib->node_pool, ent);
98 }
99 
100 struct rte_rib_node *
101 rte_rib_lookup(struct rte_rib *rib, uint32_t ip)
102 {
103 	struct rte_rib_node *cur, *prev = NULL;
104 
105 	if (unlikely(rib == NULL)) {
106 		rte_errno = EINVAL;
107 		return NULL;
108 	}
109 
110 	cur = rib->tree;
111 	while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
112 		if (is_valid_node(cur))
113 			prev = cur;
114 		cur = get_nxt_node(cur, ip);
115 	}
116 	return prev;
117 }
118 
119 struct rte_rib_node *
120 rte_rib_lookup_parent(struct rte_rib_node *ent)
121 {
122 	struct rte_rib_node *tmp;
123 
124 	if (ent == NULL)
125 		return NULL;
126 	tmp = ent->parent;
127 	while ((tmp != NULL) &&	!is_valid_node(tmp))
128 		tmp = tmp->parent;
129 	return tmp;
130 }
131 
132 static struct rte_rib_node *
133 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
134 {
135 	struct rte_rib_node *cur;
136 
137 	cur = rib->tree;
138 	while (cur != NULL) {
139 		if ((cur->ip == ip) && (cur->depth == depth) &&
140 				is_valid_node(cur))
141 			return cur;
142 		if ((cur->depth > depth) ||
143 				!is_covered(ip, cur->ip, cur->depth))
144 			break;
145 		cur = get_nxt_node(cur, ip);
146 	}
147 	return NULL;
148 }
149 
150 struct rte_rib_node *
151 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
152 {
153 	if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
154 		rte_errno = EINVAL;
155 		return NULL;
156 	}
157 	ip &= rte_rib_depth_to_mask(depth);
158 
159 	return __rib_lookup_exact(rib, ip, depth);
160 }
161 
162 /*
163  *  Traverses on subtree and retrieves more specific routes
164  *  for a given in args ip/depth prefix
165  *  last = NULL means the first invocation
166  */
167 struct rte_rib_node *
168 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip,
169 	uint8_t depth, struct rte_rib_node *last, int flag)
170 {
171 	struct rte_rib_node *tmp, *prev = NULL;
172 
173 	if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
174 		rte_errno = EINVAL;
175 		return NULL;
176 	}
177 
178 	if (last == NULL) {
179 		tmp = rib->tree;
180 		while ((tmp) && (tmp->depth < depth))
181 			tmp = get_nxt_node(tmp, ip);
182 	} else {
183 		tmp = last;
184 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
185 				(tmp->parent->right == NULL))) {
186 			tmp = tmp->parent;
187 			if (is_valid_node(tmp) &&
188 					(is_covered(tmp->ip, ip, depth) &&
189 					(tmp->depth > depth)))
190 				return tmp;
191 		}
192 		tmp = (tmp->parent) ? tmp->parent->right : NULL;
193 	}
194 	while (tmp) {
195 		if (is_valid_node(tmp) &&
196 				(is_covered(tmp->ip, ip, depth) &&
197 				(tmp->depth > depth))) {
198 			prev = tmp;
199 			if (flag == RTE_RIB_GET_NXT_COVER)
200 				return prev;
201 		}
202 		tmp = (tmp->left) ? tmp->left : tmp->right;
203 	}
204 	return prev;
205 }
206 
207 void
208 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth)
209 {
210 	struct rte_rib_node *cur, *prev, *child;
211 
212 	cur = rte_rib_lookup_exact(rib, ip, depth);
213 	if (cur == NULL)
214 		return;
215 
216 	--rib->cur_routes;
217 	cur->flag &= ~RTE_RIB_VALID_NODE;
218 	while (!is_valid_node(cur)) {
219 		if ((cur->left != NULL) && (cur->right != NULL))
220 			return;
221 		child = (cur->left == NULL) ? cur->right : cur->left;
222 		if (child != NULL)
223 			child->parent = cur->parent;
224 		if (cur->parent == NULL) {
225 			rib->tree = child;
226 			node_free(rib, cur);
227 			return;
228 		}
229 		if (cur->parent->left == cur)
230 			cur->parent->left = child;
231 		else
232 			cur->parent->right = child;
233 		prev = cur;
234 		cur = cur->parent;
235 		node_free(rib, prev);
236 	}
237 }
238 
239 struct rte_rib_node *
240 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
241 {
242 	struct rte_rib_node **tmp;
243 	struct rte_rib_node *prev = NULL;
244 	struct rte_rib_node *new_node = NULL;
245 	struct rte_rib_node *common_node = NULL;
246 	int d = 0;
247 	uint32_t common_prefix;
248 	uint8_t common_depth;
249 
250 	if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
251 		rte_errno = EINVAL;
252 		return NULL;
253 	}
254 
255 	tmp = &rib->tree;
256 	ip &= rte_rib_depth_to_mask(depth);
257 	new_node = __rib_lookup_exact(rib, ip, depth);
258 	if (new_node != NULL) {
259 		rte_errno = EEXIST;
260 		return NULL;
261 	}
262 
263 	new_node = node_alloc(rib);
264 	if (new_node == NULL) {
265 		rte_errno = ENOMEM;
266 		return NULL;
267 	}
268 	new_node->left = NULL;
269 	new_node->right = NULL;
270 	new_node->parent = NULL;
271 	new_node->ip = ip;
272 	new_node->depth = depth;
273 	new_node->flag = RTE_RIB_VALID_NODE;
274 
275 	/* traverse down the tree to find matching node or closest matching */
276 	while (1) {
277 		/* insert as the last node in the branch */
278 		if (*tmp == NULL) {
279 			*tmp = new_node;
280 			new_node->parent = prev;
281 			++rib->cur_routes;
282 			return *tmp;
283 		}
284 		/*
285 		 * Intermediate node found.
286 		 * Previous rte_rib_lookup_exact() returned NULL
287 		 * but node with proper search criteria is found.
288 		 * Validate intermediate node and return.
289 		 */
290 		if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) {
291 			node_free(rib, new_node);
292 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
293 			++rib->cur_routes;
294 			return *tmp;
295 		}
296 		d = (*tmp)->depth;
297 		if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d))
298 			break;
299 		prev = *tmp;
300 		tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left;
301 	}
302 	/* closest node found, new_node should be inserted in the middle */
303 	common_depth = RTE_MIN(depth, (*tmp)->depth);
304 	common_prefix = ip ^ (*tmp)->ip;
305 	d = (common_prefix == 0) ? 32 : rte_clz32(common_prefix);
306 
307 	common_depth = RTE_MIN(d, common_depth);
308 	common_prefix = ip & rte_rib_depth_to_mask(common_depth);
309 	if ((common_prefix == ip) && (common_depth == depth)) {
310 		/* insert as a parent */
311 		if ((*tmp)->ip & (1 << (31 - depth)))
312 			new_node->right = *tmp;
313 		else
314 			new_node->left = *tmp;
315 		new_node->parent = (*tmp)->parent;
316 		(*tmp)->parent = new_node;
317 		*tmp = new_node;
318 	} else {
319 		/* create intermediate node */
320 		common_node = node_alloc(rib);
321 		if (common_node == NULL) {
322 			node_free(rib, new_node);
323 			rte_errno = ENOMEM;
324 			return NULL;
325 		}
326 		common_node->ip = common_prefix;
327 		common_node->depth = common_depth;
328 		common_node->flag = 0;
329 		common_node->parent = (*tmp)->parent;
330 		new_node->parent = common_node;
331 		(*tmp)->parent = common_node;
332 		if ((new_node->ip & (1 << (31 - common_depth))) == 0) {
333 			common_node->left = new_node;
334 			common_node->right = *tmp;
335 		} else {
336 			common_node->left = *tmp;
337 			common_node->right = new_node;
338 		}
339 		*tmp = common_node;
340 	}
341 	++rib->cur_routes;
342 	return new_node;
343 }
344 
345 int
346 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip)
347 {
348 	if (unlikely(node == NULL || ip == NULL)) {
349 		rte_errno = EINVAL;
350 		return -1;
351 	}
352 	*ip = node->ip;
353 	return 0;
354 }
355 
356 int
357 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth)
358 {
359 	if (unlikely(node == NULL || depth == NULL)) {
360 		rte_errno = EINVAL;
361 		return -1;
362 	}
363 	*depth = node->depth;
364 	return 0;
365 }
366 
367 void *
368 rte_rib_get_ext(struct rte_rib_node *node)
369 {
370 	return (node == NULL) ? NULL : &node->ext[0];
371 }
372 
373 int
374 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh)
375 {
376 	if (unlikely(node == NULL || nh == NULL)) {
377 		rte_errno = EINVAL;
378 		return -1;
379 	}
380 	*nh = node->nh;
381 	return 0;
382 }
383 
384 int
385 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh)
386 {
387 	if (unlikely(node == NULL)) {
388 		rte_errno = EINVAL;
389 		return -1;
390 	}
391 	node->nh = nh;
392 	return 0;
393 }
394 
395 struct rte_rib *
396 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf)
397 {
398 	char mem_name[RTE_RIB_NAMESIZE];
399 	struct rte_rib *rib = NULL;
400 	struct rte_tailq_entry *te;
401 	struct rte_rib_list *rib_list;
402 	struct rte_mempool *node_pool;
403 
404 	/* Check user arguments. */
405 	if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) {
406 		rte_errno = EINVAL;
407 		return NULL;
408 	}
409 
410 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
411 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
412 		sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0,
413 		NULL, NULL, NULL, NULL, socket_id, 0);
414 
415 	if (node_pool == NULL) {
416 		RTE_LOG(ERR, LPM,
417 			"Can not allocate mempool for RIB %s\n", name);
418 		return NULL;
419 	}
420 
421 	snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
422 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
423 
424 	rte_mcfg_tailq_write_lock();
425 
426 	/* guarantee there's no existing */
427 	TAILQ_FOREACH(te, rib_list, next) {
428 		rib = (struct rte_rib *)te->data;
429 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
430 			break;
431 	}
432 	rib = NULL;
433 	if (te != NULL) {
434 		rte_errno = EEXIST;
435 		goto exit;
436 	}
437 
438 	/* allocate tailq entry */
439 	te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
440 	if (unlikely(te == NULL)) {
441 		RTE_LOG(ERR, LPM,
442 			"Can not allocate tailq entry for RIB %s\n", name);
443 		rte_errno = ENOMEM;
444 		goto exit;
445 	}
446 
447 	/* Allocate memory to store the RIB data structures. */
448 	rib = rte_zmalloc_socket(mem_name,
449 		sizeof(struct rte_rib),	RTE_CACHE_LINE_SIZE, socket_id);
450 	if (unlikely(rib == NULL)) {
451 		RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name);
452 		rte_errno = ENOMEM;
453 		goto free_te;
454 	}
455 
456 	rte_strlcpy(rib->name, name, sizeof(rib->name));
457 	rib->tree = NULL;
458 	rib->max_nodes = conf->max_nodes;
459 	rib->node_pool = node_pool;
460 	te->data = (void *)rib;
461 	TAILQ_INSERT_TAIL(rib_list, te, next);
462 
463 	rte_mcfg_tailq_write_unlock();
464 
465 	return rib;
466 
467 free_te:
468 	rte_free(te);
469 exit:
470 	rte_mcfg_tailq_write_unlock();
471 	rte_mempool_free(node_pool);
472 
473 	return NULL;
474 }
475 
476 struct rte_rib *
477 rte_rib_find_existing(const char *name)
478 {
479 	struct rte_rib *rib = NULL;
480 	struct rte_tailq_entry *te;
481 	struct rte_rib_list *rib_list;
482 
483 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
484 
485 	rte_mcfg_tailq_read_lock();
486 	TAILQ_FOREACH(te, rib_list, next) {
487 		rib = (struct rte_rib *) te->data;
488 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
489 			break;
490 	}
491 	rte_mcfg_tailq_read_unlock();
492 
493 	if (te == NULL) {
494 		rte_errno = ENOENT;
495 		return NULL;
496 	}
497 
498 	return rib;
499 }
500 
501 void
502 rte_rib_free(struct rte_rib *rib)
503 {
504 	struct rte_tailq_entry *te;
505 	struct rte_rib_list *rib_list;
506 	struct rte_rib_node *tmp = NULL;
507 
508 	if (rib == NULL)
509 		return;
510 
511 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
512 
513 	rte_mcfg_tailq_write_lock();
514 
515 	/* find our tailq entry */
516 	TAILQ_FOREACH(te, rib_list, next) {
517 		if (te->data == (void *)rib)
518 			break;
519 	}
520 	if (te != NULL)
521 		TAILQ_REMOVE(rib_list, te, next);
522 
523 	rte_mcfg_tailq_write_unlock();
524 
525 	while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp,
526 			RTE_RIB_GET_NXT_ALL)) != NULL)
527 		rte_rib_remove(rib, tmp->ip, tmp->depth);
528 
529 	rte_mempool_free(rib->node_pool);
530 	rte_free(rib);
531 	rte_free(te);
532 }
533