xref: /dpdk/lib/rib/rte_rib.c (revision 9ad3a41ab2a10db0059e1decdbf3ec038f348e08)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 
9 #include <rte_eal_memconfig.h>
10 #include <rte_errno.h>
11 #include <rte_malloc.h>
12 #include <rte_mempool.h>
13 #include <rte_string_fns.h>
14 #include <rte_tailq.h>
15 
16 #include <rte_rib.h>
17 
18 TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
19 static struct rte_tailq_elem rte_rib_tailq = {
20 	.name = "RTE_RIB",
21 };
22 EAL_REGISTER_TAILQ(rte_rib_tailq)
23 
24 #define RTE_RIB_VALID_NODE	1
25 /* Maximum depth value possible for IPv4 RIB. */
26 #define RIB_MAXDEPTH		32
27 /* Maximum length of a RIB name. */
28 #define RTE_RIB_NAMESIZE	64
29 
30 struct rte_rib_node {
31 	struct rte_rib_node	*left;
32 	struct rte_rib_node	*right;
33 	struct rte_rib_node	*parent;
34 	uint32_t	ip;
35 	uint8_t		depth;
36 	uint8_t		flag;
37 	uint64_t	nh;
38 	__extension__ uint64_t	ext[0];
39 };
40 
41 struct rte_rib {
42 	char		name[RTE_RIB_NAMESIZE];
43 	struct rte_rib_node	*tree;
44 	struct rte_mempool	*node_pool;
45 	uint32_t		cur_nodes;
46 	uint32_t		cur_routes;
47 	uint32_t		max_nodes;
48 };
49 
50 static inline bool
51 is_valid_node(struct rte_rib_node *node)
52 {
53 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
54 }
55 
56 static inline bool
57 is_right_node(struct rte_rib_node *node)
58 {
59 	return node->parent->right == node;
60 }
61 
62 /*
63  * Check if ip1 is covered by ip2/depth prefix
64  */
65 static inline bool
66 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth)
67 {
68 	return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0;
69 }
70 
71 static inline struct rte_rib_node *
72 get_nxt_node(struct rte_rib_node *node, uint32_t ip)
73 {
74 	return (ip & (1 << (31 - node->depth))) ? node->right : node->left;
75 }
76 
77 static struct rte_rib_node *
78 node_alloc(struct rte_rib *rib)
79 {
80 	struct rte_rib_node *ent;
81 	int ret;
82 
83 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
84 	if (unlikely(ret != 0))
85 		return NULL;
86 	++rib->cur_nodes;
87 	return ent;
88 }
89 
90 static void
91 node_free(struct rte_rib *rib, struct rte_rib_node *ent)
92 {
93 	--rib->cur_nodes;
94 	rte_mempool_put(rib->node_pool, ent);
95 }
96 
97 struct rte_rib_node *
98 rte_rib_lookup(struct rte_rib *rib, uint32_t ip)
99 {
100 	struct rte_rib_node *cur, *prev = NULL;
101 
102 	if (rib == NULL) {
103 		rte_errno = EINVAL;
104 		return NULL;
105 	}
106 
107 	cur = rib->tree;
108 	while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
109 		if (is_valid_node(cur))
110 			prev = cur;
111 		cur = get_nxt_node(cur, ip);
112 	}
113 	return prev;
114 }
115 
116 struct rte_rib_node *
117 rte_rib_lookup_parent(struct rte_rib_node *ent)
118 {
119 	struct rte_rib_node *tmp;
120 
121 	if (ent == NULL)
122 		return NULL;
123 	tmp = ent->parent;
124 	while ((tmp != NULL) &&	!is_valid_node(tmp))
125 		tmp = tmp->parent;
126 	return tmp;
127 }
128 
129 static struct rte_rib_node *
130 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
131 {
132 	struct rte_rib_node *cur;
133 
134 	cur = rib->tree;
135 	while (cur != NULL) {
136 		if ((cur->ip == ip) && (cur->depth == depth) &&
137 				is_valid_node(cur))
138 			return cur;
139 		if ((cur->depth > depth) ||
140 				!is_covered(ip, cur->ip, cur->depth))
141 			break;
142 		cur = get_nxt_node(cur, ip);
143 	}
144 	return NULL;
145 }
146 
147 struct rte_rib_node *
148 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
149 {
150 	if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
151 		rte_errno = EINVAL;
152 		return NULL;
153 	}
154 	ip &= rte_rib_depth_to_mask(depth);
155 
156 	return __rib_lookup_exact(rib, ip, depth);
157 }
158 
159 /*
160  *  Traverses on subtree and retrieves more specific routes
161  *  for a given in args ip/depth prefix
162  *  last = NULL means the first invocation
163  */
164 struct rte_rib_node *
165 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip,
166 	uint8_t depth, struct rte_rib_node *last, int flag)
167 {
168 	struct rte_rib_node *tmp, *prev = NULL;
169 
170 	if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
171 		rte_errno = EINVAL;
172 		return NULL;
173 	}
174 
175 	if (last == NULL) {
176 		tmp = rib->tree;
177 		while ((tmp) && (tmp->depth < depth))
178 			tmp = get_nxt_node(tmp, ip);
179 	} else {
180 		tmp = last;
181 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
182 				(tmp->parent->right == NULL))) {
183 			tmp = tmp->parent;
184 			if (is_valid_node(tmp) &&
185 					(is_covered(tmp->ip, ip, depth) &&
186 					(tmp->depth > depth)))
187 				return tmp;
188 		}
189 		tmp = (tmp->parent) ? tmp->parent->right : NULL;
190 	}
191 	while (tmp) {
192 		if (is_valid_node(tmp) &&
193 				(is_covered(tmp->ip, ip, depth) &&
194 				(tmp->depth > depth))) {
195 			prev = tmp;
196 			if (flag == RTE_RIB_GET_NXT_COVER)
197 				return prev;
198 		}
199 		tmp = (tmp->left) ? tmp->left : tmp->right;
200 	}
201 	return prev;
202 }
203 
204 void
205 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth)
206 {
207 	struct rte_rib_node *cur, *prev, *child;
208 
209 	cur = rte_rib_lookup_exact(rib, ip, depth);
210 	if (cur == NULL)
211 		return;
212 
213 	--rib->cur_routes;
214 	cur->flag &= ~RTE_RIB_VALID_NODE;
215 	while (!is_valid_node(cur)) {
216 		if ((cur->left != NULL) && (cur->right != NULL))
217 			return;
218 		child = (cur->left == NULL) ? cur->right : cur->left;
219 		if (child != NULL)
220 			child->parent = cur->parent;
221 		if (cur->parent == NULL) {
222 			rib->tree = child;
223 			node_free(rib, cur);
224 			return;
225 		}
226 		if (cur->parent->left == cur)
227 			cur->parent->left = child;
228 		else
229 			cur->parent->right = child;
230 		prev = cur;
231 		cur = cur->parent;
232 		node_free(rib, prev);
233 	}
234 }
235 
236 struct rte_rib_node *
237 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
238 {
239 	struct rte_rib_node **tmp;
240 	struct rte_rib_node *prev = NULL;
241 	struct rte_rib_node *new_node = NULL;
242 	struct rte_rib_node *common_node = NULL;
243 	int d = 0;
244 	uint32_t common_prefix;
245 	uint8_t common_depth;
246 
247 	if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
248 		rte_errno = EINVAL;
249 		return NULL;
250 	}
251 
252 	tmp = &rib->tree;
253 	ip &= rte_rib_depth_to_mask(depth);
254 	new_node = __rib_lookup_exact(rib, ip, depth);
255 	if (new_node != NULL) {
256 		rte_errno = EEXIST;
257 		return NULL;
258 	}
259 
260 	new_node = node_alloc(rib);
261 	if (new_node == NULL) {
262 		rte_errno = ENOMEM;
263 		return NULL;
264 	}
265 	new_node->left = NULL;
266 	new_node->right = NULL;
267 	new_node->parent = NULL;
268 	new_node->ip = ip;
269 	new_node->depth = depth;
270 	new_node->flag = RTE_RIB_VALID_NODE;
271 
272 	/* traverse down the tree to find matching node or closest matching */
273 	while (1) {
274 		/* insert as the last node in the branch */
275 		if (*tmp == NULL) {
276 			*tmp = new_node;
277 			new_node->parent = prev;
278 			++rib->cur_routes;
279 			return *tmp;
280 		}
281 		/*
282 		 * Intermediate node found.
283 		 * Previous rte_rib_lookup_exact() returned NULL
284 		 * but node with proper search criteria is found.
285 		 * Validate intermediate node and return.
286 		 */
287 		if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) {
288 			node_free(rib, new_node);
289 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
290 			++rib->cur_routes;
291 			return *tmp;
292 		}
293 		d = (*tmp)->depth;
294 		if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d))
295 			break;
296 		prev = *tmp;
297 		tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left;
298 	}
299 	/* closest node found, new_node should be inserted in the middle */
300 	common_depth = RTE_MIN(depth, (*tmp)->depth);
301 	common_prefix = ip ^ (*tmp)->ip;
302 	d = (common_prefix == 0) ? 32 : __builtin_clz(common_prefix);
303 
304 	common_depth = RTE_MIN(d, common_depth);
305 	common_prefix = ip & rte_rib_depth_to_mask(common_depth);
306 	if ((common_prefix == ip) && (common_depth == depth)) {
307 		/* insert as a parent */
308 		if ((*tmp)->ip & (1 << (31 - depth)))
309 			new_node->right = *tmp;
310 		else
311 			new_node->left = *tmp;
312 		new_node->parent = (*tmp)->parent;
313 		(*tmp)->parent = new_node;
314 		*tmp = new_node;
315 	} else {
316 		/* create intermediate node */
317 		common_node = node_alloc(rib);
318 		if (common_node == NULL) {
319 			node_free(rib, new_node);
320 			rte_errno = ENOMEM;
321 			return NULL;
322 		}
323 		common_node->ip = common_prefix;
324 		common_node->depth = common_depth;
325 		common_node->flag = 0;
326 		common_node->parent = (*tmp)->parent;
327 		new_node->parent = common_node;
328 		(*tmp)->parent = common_node;
329 		if ((new_node->ip & (1 << (31 - common_depth))) == 0) {
330 			common_node->left = new_node;
331 			common_node->right = *tmp;
332 		} else {
333 			common_node->left = *tmp;
334 			common_node->right = new_node;
335 		}
336 		*tmp = common_node;
337 	}
338 	++rib->cur_routes;
339 	return new_node;
340 }
341 
342 int
343 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip)
344 {
345 	if ((node == NULL) || (ip == NULL)) {
346 		rte_errno = EINVAL;
347 		return -1;
348 	}
349 	*ip = node->ip;
350 	return 0;
351 }
352 
353 int
354 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth)
355 {
356 	if ((node == NULL) || (depth == NULL)) {
357 		rte_errno = EINVAL;
358 		return -1;
359 	}
360 	*depth = node->depth;
361 	return 0;
362 }
363 
364 void *
365 rte_rib_get_ext(struct rte_rib_node *node)
366 {
367 	return (node == NULL) ? NULL : &node->ext[0];
368 }
369 
370 int
371 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh)
372 {
373 	if ((node == NULL) || (nh == NULL)) {
374 		rte_errno = EINVAL;
375 		return -1;
376 	}
377 	*nh = node->nh;
378 	return 0;
379 }
380 
381 int
382 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh)
383 {
384 	if (node == NULL) {
385 		rte_errno = EINVAL;
386 		return -1;
387 	}
388 	node->nh = nh;
389 	return 0;
390 }
391 
392 struct rte_rib *
393 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf)
394 {
395 	char mem_name[RTE_RIB_NAMESIZE];
396 	struct rte_rib *rib = NULL;
397 	struct rte_tailq_entry *te;
398 	struct rte_rib_list *rib_list;
399 	struct rte_mempool *node_pool;
400 
401 	/* Check user arguments. */
402 	if (name == NULL || conf == NULL || conf->max_nodes <= 0) {
403 		rte_errno = EINVAL;
404 		return NULL;
405 	}
406 
407 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
408 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
409 		sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0,
410 		NULL, NULL, NULL, NULL, socket_id, 0);
411 
412 	if (node_pool == NULL) {
413 		RTE_LOG(ERR, LPM,
414 			"Can not allocate mempool for RIB %s\n", name);
415 		return NULL;
416 	}
417 
418 	snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
419 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
420 
421 	rte_mcfg_tailq_write_lock();
422 
423 	/* guarantee there's no existing */
424 	TAILQ_FOREACH(te, rib_list, next) {
425 		rib = (struct rte_rib *)te->data;
426 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
427 			break;
428 	}
429 	rib = NULL;
430 	if (te != NULL) {
431 		rte_errno = EEXIST;
432 		goto exit;
433 	}
434 
435 	/* allocate tailq entry */
436 	te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
437 	if (te == NULL) {
438 		RTE_LOG(ERR, LPM,
439 			"Can not allocate tailq entry for RIB %s\n", name);
440 		rte_errno = ENOMEM;
441 		goto exit;
442 	}
443 
444 	/* Allocate memory to store the RIB data structures. */
445 	rib = rte_zmalloc_socket(mem_name,
446 		sizeof(struct rte_rib),	RTE_CACHE_LINE_SIZE, socket_id);
447 	if (rib == NULL) {
448 		RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name);
449 		rte_errno = ENOMEM;
450 		goto free_te;
451 	}
452 
453 	rte_strlcpy(rib->name, name, sizeof(rib->name));
454 	rib->tree = NULL;
455 	rib->max_nodes = conf->max_nodes;
456 	rib->node_pool = node_pool;
457 	te->data = (void *)rib;
458 	TAILQ_INSERT_TAIL(rib_list, te, next);
459 
460 	rte_mcfg_tailq_write_unlock();
461 
462 	return rib;
463 
464 free_te:
465 	rte_free(te);
466 exit:
467 	rte_mcfg_tailq_write_unlock();
468 	rte_mempool_free(node_pool);
469 
470 	return NULL;
471 }
472 
473 struct rte_rib *
474 rte_rib_find_existing(const char *name)
475 {
476 	struct rte_rib *rib = NULL;
477 	struct rte_tailq_entry *te;
478 	struct rte_rib_list *rib_list;
479 
480 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
481 
482 	rte_mcfg_tailq_read_lock();
483 	TAILQ_FOREACH(te, rib_list, next) {
484 		rib = (struct rte_rib *) te->data;
485 		if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
486 			break;
487 	}
488 	rte_mcfg_tailq_read_unlock();
489 
490 	if (te == NULL) {
491 		rte_errno = ENOENT;
492 		return NULL;
493 	}
494 
495 	return rib;
496 }
497 
498 void
499 rte_rib_free(struct rte_rib *rib)
500 {
501 	struct rte_tailq_entry *te;
502 	struct rte_rib_list *rib_list;
503 	struct rte_rib_node *tmp = NULL;
504 
505 	if (rib == NULL)
506 		return;
507 
508 	rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
509 
510 	rte_mcfg_tailq_write_lock();
511 
512 	/* find our tailq entry */
513 	TAILQ_FOREACH(te, rib_list, next) {
514 		if (te->data == (void *)rib)
515 			break;
516 	}
517 	if (te != NULL)
518 		TAILQ_REMOVE(rib_list, te, next);
519 
520 	rte_mcfg_tailq_write_unlock();
521 
522 	while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp,
523 			RTE_RIB_GET_NXT_ALL)) != NULL)
524 		rte_rib_remove(rib, tmp->ip, tmp->depth);
525 
526 	rte_mempool_free(rib->node_pool);
527 	rte_free(rib);
528 	rte_free(te);
529 }
530