1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3 * Copyright(c) 2019 Intel Corporation
4 */
5
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <sys/queue.h>
9
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_malloc.h>
13 #include <rte_mempool.h>
14 #include <rte_string_fns.h>
15 #include <rte_tailq.h>
16
17 #include <rte_rib.h>
18
19 #include "rib_log.h"
20
21 RTE_LOG_REGISTER_DEFAULT(rib_logtype, INFO);
22
23 TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
24 static struct rte_tailq_elem rte_rib_tailq = {
25 .name = "RTE_RIB",
26 };
27 EAL_REGISTER_TAILQ(rte_rib_tailq)
28
29 #define RTE_RIB_VALID_NODE 1
30 /* Maximum depth value possible for IPv4 RIB. */
31 #define RIB_MAXDEPTH 32
32 /* Maximum length of a RIB name. */
33 #define RTE_RIB_NAMESIZE 64
34
35 struct rte_rib_node {
36 struct rte_rib_node *left;
37 struct rte_rib_node *right;
38 struct rte_rib_node *parent;
39 uint32_t ip;
40 uint8_t depth;
41 uint8_t flag;
42 uint64_t nh;
43 uint64_t ext[];
44 };
45
46 struct rte_rib {
47 char name[RTE_RIB_NAMESIZE];
48 struct rte_rib_node *tree;
49 struct rte_mempool *node_pool;
50 uint32_t cur_nodes;
51 uint32_t cur_routes;
52 uint32_t max_nodes;
53 };
54
55 static inline bool
is_valid_node(const struct rte_rib_node * node)56 is_valid_node(const struct rte_rib_node *node)
57 {
58 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
59 }
60
61 static inline bool
is_right_node(const struct rte_rib_node * node)62 is_right_node(const struct rte_rib_node *node)
63 {
64 return node->parent->right == node;
65 }
66
67 /*
68 * Check if ip1 is covered by ip2/depth prefix
69 */
70 static inline bool
is_covered(uint32_t ip1,uint32_t ip2,uint8_t depth)71 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth)
72 {
73 return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0;
74 }
75
76 static inline struct rte_rib_node *
get_nxt_node(struct rte_rib_node * node,uint32_t ip)77 get_nxt_node(struct rte_rib_node *node, uint32_t ip)
78 {
79 if (node->depth == RIB_MAXDEPTH)
80 return NULL;
81 return (ip & (1 << (31 - node->depth))) ? node->right : node->left;
82 }
83
84 static struct rte_rib_node *
node_alloc(struct rte_rib * rib)85 node_alloc(struct rte_rib *rib)
86 {
87 struct rte_rib_node *ent;
88 int ret;
89
90 ret = rte_mempool_get(rib->node_pool, (void *)&ent);
91 if (unlikely(ret != 0))
92 return NULL;
93 ++rib->cur_nodes;
94 return ent;
95 }
96
97 static void
node_free(struct rte_rib * rib,struct rte_rib_node * ent)98 node_free(struct rte_rib *rib, struct rte_rib_node *ent)
99 {
100 --rib->cur_nodes;
101 rte_mempool_put(rib->node_pool, ent);
102 }
103
104 struct rte_rib_node *
rte_rib_lookup(struct rte_rib * rib,uint32_t ip)105 rte_rib_lookup(struct rte_rib *rib, uint32_t ip)
106 {
107 struct rte_rib_node *cur, *prev = NULL;
108
109 if (unlikely(rib == NULL)) {
110 rte_errno = EINVAL;
111 return NULL;
112 }
113
114 cur = rib->tree;
115 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
116 if (is_valid_node(cur))
117 prev = cur;
118 cur = get_nxt_node(cur, ip);
119 }
120 return prev;
121 }
122
123 struct rte_rib_node *
rte_rib_lookup_parent(struct rte_rib_node * ent)124 rte_rib_lookup_parent(struct rte_rib_node *ent)
125 {
126 struct rte_rib_node *tmp;
127
128 if (ent == NULL)
129 return NULL;
130 tmp = ent->parent;
131 while ((tmp != NULL) && !is_valid_node(tmp))
132 tmp = tmp->parent;
133 return tmp;
134 }
135
136 static struct rte_rib_node *
__rib_lookup_exact(struct rte_rib * rib,uint32_t ip,uint8_t depth)137 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
138 {
139 struct rte_rib_node *cur;
140
141 cur = rib->tree;
142 while (cur != NULL) {
143 if ((cur->ip == ip) && (cur->depth == depth) &&
144 is_valid_node(cur))
145 return cur;
146 if ((cur->depth > depth) ||
147 !is_covered(ip, cur->ip, cur->depth))
148 break;
149 cur = get_nxt_node(cur, ip);
150 }
151 return NULL;
152 }
153
154 struct rte_rib_node *
rte_rib_lookup_exact(struct rte_rib * rib,uint32_t ip,uint8_t depth)155 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
156 {
157 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
158 rte_errno = EINVAL;
159 return NULL;
160 }
161 ip &= rte_rib_depth_to_mask(depth);
162
163 return __rib_lookup_exact(rib, ip, depth);
164 }
165
166 /*
167 * Traverses on subtree and retrieves more specific routes
168 * for a given in args ip/depth prefix
169 * last = NULL means the first invocation
170 */
171 struct rte_rib_node *
rte_rib_get_nxt(struct rte_rib * rib,uint32_t ip,uint8_t depth,struct rte_rib_node * last,int flag)172 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip,
173 uint8_t depth, struct rte_rib_node *last, int flag)
174 {
175 struct rte_rib_node *tmp, *prev = NULL;
176
177 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
178 rte_errno = EINVAL;
179 return NULL;
180 }
181
182 if (last == NULL) {
183 tmp = rib->tree;
184 while ((tmp) && (tmp->depth < depth))
185 tmp = get_nxt_node(tmp, ip);
186 } else {
187 tmp = last;
188 while ((tmp->parent != NULL) && (is_right_node(tmp) ||
189 (tmp->parent->right == NULL))) {
190 tmp = tmp->parent;
191 if (is_valid_node(tmp) &&
192 (is_covered(tmp->ip, ip, depth) &&
193 (tmp->depth > depth)))
194 return tmp;
195 }
196 tmp = (tmp->parent) ? tmp->parent->right : NULL;
197 }
198 while (tmp) {
199 if (is_valid_node(tmp) &&
200 (is_covered(tmp->ip, ip, depth) &&
201 (tmp->depth > depth))) {
202 prev = tmp;
203 if (flag == RTE_RIB_GET_NXT_COVER)
204 return prev;
205 }
206 tmp = (tmp->left) ? tmp->left : tmp->right;
207 }
208 return prev;
209 }
210
211 void
rte_rib_remove(struct rte_rib * rib,uint32_t ip,uint8_t depth)212 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth)
213 {
214 struct rte_rib_node *cur, *prev, *child;
215
216 cur = rte_rib_lookup_exact(rib, ip, depth);
217 if (cur == NULL)
218 return;
219
220 --rib->cur_routes;
221 cur->flag &= ~RTE_RIB_VALID_NODE;
222 while (!is_valid_node(cur)) {
223 if ((cur->left != NULL) && (cur->right != NULL))
224 return;
225 child = (cur->left == NULL) ? cur->right : cur->left;
226 if (child != NULL)
227 child->parent = cur->parent;
228 if (cur->parent == NULL) {
229 rib->tree = child;
230 node_free(rib, cur);
231 return;
232 }
233 if (cur->parent->left == cur)
234 cur->parent->left = child;
235 else
236 cur->parent->right = child;
237 prev = cur;
238 cur = cur->parent;
239 node_free(rib, prev);
240 }
241 }
242
243 struct rte_rib_node *
rte_rib_insert(struct rte_rib * rib,uint32_t ip,uint8_t depth)244 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
245 {
246 struct rte_rib_node **tmp;
247 struct rte_rib_node *prev = NULL;
248 struct rte_rib_node *new_node = NULL;
249 struct rte_rib_node *common_node = NULL;
250 int d = 0;
251 uint32_t common_prefix;
252 uint8_t common_depth;
253
254 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) {
255 rte_errno = EINVAL;
256 return NULL;
257 }
258
259 tmp = &rib->tree;
260 ip &= rte_rib_depth_to_mask(depth);
261 new_node = __rib_lookup_exact(rib, ip, depth);
262 if (new_node != NULL) {
263 rte_errno = EEXIST;
264 return NULL;
265 }
266
267 new_node = node_alloc(rib);
268 if (new_node == NULL) {
269 rte_errno = ENOMEM;
270 return NULL;
271 }
272 new_node->left = NULL;
273 new_node->right = NULL;
274 new_node->parent = NULL;
275 new_node->ip = ip;
276 new_node->depth = depth;
277 new_node->flag = RTE_RIB_VALID_NODE;
278
279 /* traverse down the tree to find matching node or closest matching */
280 while (1) {
281 /* insert as the last node in the branch */
282 if (*tmp == NULL) {
283 *tmp = new_node;
284 new_node->parent = prev;
285 ++rib->cur_routes;
286 return *tmp;
287 }
288 /*
289 * Intermediate node found.
290 * Previous rte_rib_lookup_exact() returned NULL
291 * but node with proper search criteria is found.
292 * Validate intermediate node and return.
293 */
294 if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) {
295 node_free(rib, new_node);
296 (*tmp)->flag |= RTE_RIB_VALID_NODE;
297 ++rib->cur_routes;
298 return *tmp;
299 }
300 d = (*tmp)->depth;
301 if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d))
302 break;
303 prev = *tmp;
304 tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left;
305 }
306 /* closest node found, new_node should be inserted in the middle */
307 common_depth = RTE_MIN(depth, (*tmp)->depth);
308 common_prefix = ip ^ (*tmp)->ip;
309 d = (common_prefix == 0) ? 32 : rte_clz32(common_prefix);
310
311 common_depth = RTE_MIN(d, common_depth);
312 common_prefix = ip & rte_rib_depth_to_mask(common_depth);
313 if ((common_prefix == ip) && (common_depth == depth)) {
314 /* insert as a parent */
315 if ((*tmp)->ip & (1 << (31 - depth)))
316 new_node->right = *tmp;
317 else
318 new_node->left = *tmp;
319 new_node->parent = (*tmp)->parent;
320 (*tmp)->parent = new_node;
321 *tmp = new_node;
322 } else {
323 /* create intermediate node */
324 common_node = node_alloc(rib);
325 if (common_node == NULL) {
326 node_free(rib, new_node);
327 rte_errno = ENOMEM;
328 return NULL;
329 }
330 common_node->ip = common_prefix;
331 common_node->depth = common_depth;
332 common_node->flag = 0;
333 common_node->parent = (*tmp)->parent;
334 new_node->parent = common_node;
335 (*tmp)->parent = common_node;
336 if ((new_node->ip & (1 << (31 - common_depth))) == 0) {
337 common_node->left = new_node;
338 common_node->right = *tmp;
339 } else {
340 common_node->left = *tmp;
341 common_node->right = new_node;
342 }
343 *tmp = common_node;
344 }
345 ++rib->cur_routes;
346 return new_node;
347 }
348
349 int
rte_rib_get_ip(const struct rte_rib_node * node,uint32_t * ip)350 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip)
351 {
352 if (unlikely(node == NULL || ip == NULL)) {
353 rte_errno = EINVAL;
354 return -1;
355 }
356 *ip = node->ip;
357 return 0;
358 }
359
360 int
rte_rib_get_depth(const struct rte_rib_node * node,uint8_t * depth)361 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth)
362 {
363 if (unlikely(node == NULL || depth == NULL)) {
364 rte_errno = EINVAL;
365 return -1;
366 }
367 *depth = node->depth;
368 return 0;
369 }
370
371 void *
rte_rib_get_ext(struct rte_rib_node * node)372 rte_rib_get_ext(struct rte_rib_node *node)
373 {
374 return (node == NULL) ? NULL : &node->ext[0];
375 }
376
377 int
rte_rib_get_nh(const struct rte_rib_node * node,uint64_t * nh)378 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh)
379 {
380 if (unlikely(node == NULL || nh == NULL)) {
381 rte_errno = EINVAL;
382 return -1;
383 }
384 *nh = node->nh;
385 return 0;
386 }
387
388 int
rte_rib_set_nh(struct rte_rib_node * node,uint64_t nh)389 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh)
390 {
391 if (unlikely(node == NULL)) {
392 rte_errno = EINVAL;
393 return -1;
394 }
395 node->nh = nh;
396 return 0;
397 }
398
399 struct rte_rib *
rte_rib_create(const char * name,int socket_id,const struct rte_rib_conf * conf)400 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf)
401 {
402 char mem_name[RTE_RIB_NAMESIZE];
403 struct rte_rib *rib = NULL;
404 struct rte_tailq_entry *te;
405 struct rte_rib_list *rib_list;
406 struct rte_mempool *node_pool;
407
408 /* Check user arguments. */
409 if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) {
410 rte_errno = EINVAL;
411 return NULL;
412 }
413
414 snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
415 node_pool = rte_mempool_create(mem_name, conf->max_nodes,
416 sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0,
417 NULL, NULL, NULL, NULL, socket_id, 0);
418
419 if (node_pool == NULL) {
420 RIB_LOG(ERR,
421 "Can not allocate mempool for RIB %s", name);
422 return NULL;
423 }
424
425 snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
426 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
427
428 rte_mcfg_tailq_write_lock();
429
430 /* guarantee there's no existing */
431 TAILQ_FOREACH(te, rib_list, next) {
432 rib = (struct rte_rib *)te->data;
433 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
434 break;
435 }
436 rib = NULL;
437 if (te != NULL) {
438 rte_errno = EEXIST;
439 goto exit;
440 }
441
442 /* allocate tailq entry */
443 te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
444 if (unlikely(te == NULL)) {
445 RIB_LOG(ERR,
446 "Can not allocate tailq entry for RIB %s", name);
447 rte_errno = ENOMEM;
448 goto exit;
449 }
450
451 /* Allocate memory to store the RIB data structures. */
452 rib = rte_zmalloc_socket(mem_name,
453 sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id);
454 if (unlikely(rib == NULL)) {
455 RIB_LOG(ERR, "RIB %s memory allocation failed", name);
456 rte_errno = ENOMEM;
457 goto free_te;
458 }
459
460 rte_strlcpy(rib->name, name, sizeof(rib->name));
461 rib->tree = NULL;
462 rib->max_nodes = conf->max_nodes;
463 rib->node_pool = node_pool;
464 te->data = (void *)rib;
465 TAILQ_INSERT_TAIL(rib_list, te, next);
466
467 rte_mcfg_tailq_write_unlock();
468
469 return rib;
470
471 free_te:
472 rte_free(te);
473 exit:
474 rte_mcfg_tailq_write_unlock();
475 rte_mempool_free(node_pool);
476
477 return NULL;
478 }
479
480 struct rte_rib *
rte_rib_find_existing(const char * name)481 rte_rib_find_existing(const char *name)
482 {
483 struct rte_rib *rib = NULL;
484 struct rte_tailq_entry *te;
485 struct rte_rib_list *rib_list;
486
487 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
488
489 rte_mcfg_tailq_read_lock();
490 TAILQ_FOREACH(te, rib_list, next) {
491 rib = (struct rte_rib *) te->data;
492 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
493 break;
494 }
495 rte_mcfg_tailq_read_unlock();
496
497 if (te == NULL) {
498 rte_errno = ENOENT;
499 return NULL;
500 }
501
502 return rib;
503 }
504
505 void
rte_rib_free(struct rte_rib * rib)506 rte_rib_free(struct rte_rib *rib)
507 {
508 struct rte_tailq_entry *te;
509 struct rte_rib_list *rib_list;
510 struct rte_rib_node *tmp = NULL;
511
512 if (rib == NULL)
513 return;
514
515 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
516
517 rte_mcfg_tailq_write_lock();
518
519 /* find our tailq entry */
520 TAILQ_FOREACH(te, rib_list, next) {
521 if (te->data == (void *)rib)
522 break;
523 }
524 if (te != NULL)
525 TAILQ_REMOVE(rib_list, te, next);
526
527 rte_mcfg_tailq_write_unlock();
528
529 while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp,
530 RTE_RIB_GET_NXT_ALL)) != NULL)
531 rte_rib_remove(rib, tmp->ip, tmp->depth);
532
533 rte_mempool_free(rib->node_pool);
534 rte_free(rib);
535 rte_free(te);
536 }
537