1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com> 3 * Copyright(c) 2019 Intel Corporation 4 */ 5 6 #include <stdbool.h> 7 #include <stdint.h> 8 9 #include <rte_eal.h> 10 #include <rte_eal_memconfig.h> 11 #include <rte_errno.h> 12 #include <rte_malloc.h> 13 #include <rte_mempool.h> 14 #include <rte_rwlock.h> 15 #include <rte_string_fns.h> 16 #include <rte_tailq.h> 17 18 #include <rte_rib.h> 19 20 TAILQ_HEAD(rte_rib_list, rte_tailq_entry); 21 static struct rte_tailq_elem rte_rib_tailq = { 22 .name = "RTE_RIB", 23 }; 24 EAL_REGISTER_TAILQ(rte_rib_tailq) 25 26 #define RTE_RIB_VALID_NODE 1 27 /* Maximum depth value possible for IPv4 RIB. */ 28 #define RIB_MAXDEPTH 32 29 /* Maximum length of a RIB name. */ 30 #define RTE_RIB_NAMESIZE 64 31 32 struct rte_rib_node { 33 struct rte_rib_node *left; 34 struct rte_rib_node *right; 35 struct rte_rib_node *parent; 36 uint32_t ip; 37 uint8_t depth; 38 uint8_t flag; 39 uint64_t nh; 40 __extension__ uint64_t ext[0]; 41 }; 42 43 struct rte_rib { 44 char name[RTE_RIB_NAMESIZE]; 45 struct rte_rib_node *tree; 46 struct rte_mempool *node_pool; 47 uint32_t cur_nodes; 48 uint32_t cur_routes; 49 uint32_t max_nodes; 50 }; 51 52 static inline bool 53 is_valid_node(struct rte_rib_node *node) 54 { 55 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE; 56 } 57 58 static inline bool 59 is_right_node(struct rte_rib_node *node) 60 { 61 return node->parent->right == node; 62 } 63 64 /* 65 * Check if ip1 is covered by ip2/depth prefix 66 */ 67 static inline bool 68 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth) 69 { 70 return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0; 71 } 72 73 static inline struct rte_rib_node * 74 get_nxt_node(struct rte_rib_node *node, uint32_t ip) 75 { 76 return (ip & (1 << (31 - node->depth))) ? node->right : node->left; 77 } 78 79 static struct rte_rib_node * 80 node_alloc(struct rte_rib *rib) 81 { 82 struct rte_rib_node *ent; 83 int ret; 84 85 ret = rte_mempool_get(rib->node_pool, (void *)&ent); 86 if (unlikely(ret != 0)) 87 return NULL; 88 ++rib->cur_nodes; 89 return ent; 90 } 91 92 static void 93 node_free(struct rte_rib *rib, struct rte_rib_node *ent) 94 { 95 --rib->cur_nodes; 96 rte_mempool_put(rib->node_pool, ent); 97 } 98 99 struct rte_rib_node * 100 rte_rib_lookup(struct rte_rib *rib, uint32_t ip) 101 { 102 struct rte_rib_node *cur, *prev = NULL; 103 104 if (rib == NULL) { 105 rte_errno = EINVAL; 106 return NULL; 107 } 108 109 cur = rib->tree; 110 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) { 111 if (is_valid_node(cur)) 112 prev = cur; 113 cur = get_nxt_node(cur, ip); 114 } 115 return prev; 116 } 117 118 struct rte_rib_node * 119 rte_rib_lookup_parent(struct rte_rib_node *ent) 120 { 121 struct rte_rib_node *tmp; 122 123 if (ent == NULL) 124 return NULL; 125 tmp = ent->parent; 126 while ((tmp != NULL) && !is_valid_node(tmp)) 127 tmp = tmp->parent; 128 return tmp; 129 } 130 131 static struct rte_rib_node * 132 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 133 { 134 struct rte_rib_node *cur; 135 136 cur = rib->tree; 137 while (cur != NULL) { 138 if ((cur->ip == ip) && (cur->depth == depth) && 139 is_valid_node(cur)) 140 return cur; 141 if ((cur->depth > depth) || 142 !is_covered(ip, cur->ip, cur->depth)) 143 break; 144 cur = get_nxt_node(cur, ip); 145 } 146 return NULL; 147 } 148 149 struct rte_rib_node * 150 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 151 { 152 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) { 153 rte_errno = EINVAL; 154 return NULL; 155 } 156 ip &= rte_rib_depth_to_mask(depth); 157 158 return __rib_lookup_exact(rib, ip, depth); 159 } 160 161 /* 162 * Traverses on subtree and retrieves more specific routes 163 * for a given in args ip/depth prefix 164 * last = NULL means the first invocation 165 */ 166 struct rte_rib_node * 167 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip, 168 uint8_t depth, struct rte_rib_node *last, int flag) 169 { 170 struct rte_rib_node *tmp, *prev = NULL; 171 172 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) { 173 rte_errno = EINVAL; 174 return NULL; 175 } 176 177 if (last == NULL) { 178 tmp = rib->tree; 179 while ((tmp) && (tmp->depth < depth)) 180 tmp = get_nxt_node(tmp, ip); 181 } else { 182 tmp = last; 183 while ((tmp->parent != NULL) && (is_right_node(tmp) || 184 (tmp->parent->right == NULL))) { 185 tmp = tmp->parent; 186 if (is_valid_node(tmp) && 187 (is_covered(tmp->ip, ip, depth) && 188 (tmp->depth > depth))) 189 return tmp; 190 } 191 tmp = (tmp->parent) ? tmp->parent->right : NULL; 192 } 193 while (tmp) { 194 if (is_valid_node(tmp) && 195 (is_covered(tmp->ip, ip, depth) && 196 (tmp->depth > depth))) { 197 prev = tmp; 198 if (flag == RTE_RIB_GET_NXT_COVER) 199 return prev; 200 } 201 tmp = (tmp->left) ? tmp->left : tmp->right; 202 } 203 return prev; 204 } 205 206 void 207 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth) 208 { 209 struct rte_rib_node *cur, *prev, *child; 210 211 cur = rte_rib_lookup_exact(rib, ip, depth); 212 if (cur == NULL) 213 return; 214 215 --rib->cur_routes; 216 cur->flag &= ~RTE_RIB_VALID_NODE; 217 while (!is_valid_node(cur)) { 218 if ((cur->left != NULL) && (cur->right != NULL)) 219 return; 220 child = (cur->left == NULL) ? cur->right : cur->left; 221 if (child != NULL) 222 child->parent = cur->parent; 223 if (cur->parent == NULL) { 224 rib->tree = child; 225 node_free(rib, cur); 226 return; 227 } 228 if (cur->parent->left == cur) 229 cur->parent->left = child; 230 else 231 cur->parent->right = child; 232 prev = cur; 233 cur = cur->parent; 234 node_free(rib, prev); 235 } 236 } 237 238 struct rte_rib_node * 239 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth) 240 { 241 struct rte_rib_node **tmp; 242 struct rte_rib_node *prev = NULL; 243 struct rte_rib_node *new_node = NULL; 244 struct rte_rib_node *common_node = NULL; 245 int d = 0; 246 uint32_t common_prefix; 247 uint8_t common_depth; 248 249 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) { 250 rte_errno = EINVAL; 251 return NULL; 252 } 253 254 tmp = &rib->tree; 255 ip &= rte_rib_depth_to_mask(depth); 256 new_node = __rib_lookup_exact(rib, ip, depth); 257 if (new_node != NULL) { 258 rte_errno = EEXIST; 259 return NULL; 260 } 261 262 new_node = node_alloc(rib); 263 if (new_node == NULL) { 264 rte_errno = ENOMEM; 265 return NULL; 266 } 267 new_node->left = NULL; 268 new_node->right = NULL; 269 new_node->parent = NULL; 270 new_node->ip = ip; 271 new_node->depth = depth; 272 new_node->flag = RTE_RIB_VALID_NODE; 273 274 /* traverse down the tree to find matching node or closest matching */ 275 while (1) { 276 /* insert as the last node in the branch */ 277 if (*tmp == NULL) { 278 *tmp = new_node; 279 new_node->parent = prev; 280 ++rib->cur_routes; 281 return *tmp; 282 } 283 /* 284 * Intermediate node found. 285 * Previous rte_rib_lookup_exact() returned NULL 286 * but node with proper search criteria is found. 287 * Validate intermediate node and return. 288 */ 289 if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) { 290 node_free(rib, new_node); 291 (*tmp)->flag |= RTE_RIB_VALID_NODE; 292 ++rib->cur_routes; 293 return *tmp; 294 } 295 d = (*tmp)->depth; 296 if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d)) 297 break; 298 prev = *tmp; 299 tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left; 300 } 301 /* closest node found, new_node should be inserted in the middle */ 302 common_depth = RTE_MIN(depth, (*tmp)->depth); 303 common_prefix = ip ^ (*tmp)->ip; 304 d = (common_prefix == 0) ? 32 : __builtin_clz(common_prefix); 305 306 common_depth = RTE_MIN(d, common_depth); 307 common_prefix = ip & rte_rib_depth_to_mask(common_depth); 308 if ((common_prefix == ip) && (common_depth == depth)) { 309 /* insert as a parent */ 310 if ((*tmp)->ip & (1 << (31 - depth))) 311 new_node->right = *tmp; 312 else 313 new_node->left = *tmp; 314 new_node->parent = (*tmp)->parent; 315 (*tmp)->parent = new_node; 316 *tmp = new_node; 317 } else { 318 /* create intermediate node */ 319 common_node = node_alloc(rib); 320 if (common_node == NULL) { 321 node_free(rib, new_node); 322 rte_errno = ENOMEM; 323 return NULL; 324 } 325 common_node->ip = common_prefix; 326 common_node->depth = common_depth; 327 common_node->flag = 0; 328 common_node->parent = (*tmp)->parent; 329 new_node->parent = common_node; 330 (*tmp)->parent = common_node; 331 if ((new_node->ip & (1 << (31 - common_depth))) == 0) { 332 common_node->left = new_node; 333 common_node->right = *tmp; 334 } else { 335 common_node->left = *tmp; 336 common_node->right = new_node; 337 } 338 *tmp = common_node; 339 } 340 ++rib->cur_routes; 341 return new_node; 342 } 343 344 int 345 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip) 346 { 347 if ((node == NULL) || (ip == NULL)) { 348 rte_errno = EINVAL; 349 return -1; 350 } 351 *ip = node->ip; 352 return 0; 353 } 354 355 int 356 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth) 357 { 358 if ((node == NULL) || (depth == NULL)) { 359 rte_errno = EINVAL; 360 return -1; 361 } 362 *depth = node->depth; 363 return 0; 364 } 365 366 void * 367 rte_rib_get_ext(struct rte_rib_node *node) 368 { 369 return (node == NULL) ? NULL : &node->ext[0]; 370 } 371 372 int 373 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh) 374 { 375 if ((node == NULL) || (nh == NULL)) { 376 rte_errno = EINVAL; 377 return -1; 378 } 379 *nh = node->nh; 380 return 0; 381 } 382 383 int 384 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh) 385 { 386 if (node == NULL) { 387 rte_errno = EINVAL; 388 return -1; 389 } 390 node->nh = nh; 391 return 0; 392 } 393 394 struct rte_rib * 395 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf) 396 { 397 char mem_name[RTE_RIB_NAMESIZE]; 398 struct rte_rib *rib = NULL; 399 struct rte_tailq_entry *te; 400 struct rte_rib_list *rib_list; 401 struct rte_mempool *node_pool; 402 403 /* Check user arguments. */ 404 if (name == NULL || conf == NULL || conf->max_nodes <= 0) { 405 rte_errno = EINVAL; 406 return NULL; 407 } 408 409 snprintf(mem_name, sizeof(mem_name), "MP_%s", name); 410 node_pool = rte_mempool_create(mem_name, conf->max_nodes, 411 sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0, 412 NULL, NULL, NULL, NULL, socket_id, 0); 413 414 if (node_pool == NULL) { 415 RTE_LOG(ERR, LPM, 416 "Can not allocate mempool for RIB %s\n", name); 417 return NULL; 418 } 419 420 snprintf(mem_name, sizeof(mem_name), "RIB_%s", name); 421 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 422 423 rte_mcfg_tailq_write_lock(); 424 425 /* guarantee there's no existing */ 426 TAILQ_FOREACH(te, rib_list, next) { 427 rib = (struct rte_rib *)te->data; 428 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 429 break; 430 } 431 rib = NULL; 432 if (te != NULL) { 433 rte_errno = EEXIST; 434 goto exit; 435 } 436 437 /* allocate tailq entry */ 438 te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0); 439 if (te == NULL) { 440 RTE_LOG(ERR, LPM, 441 "Can not allocate tailq entry for RIB %s\n", name); 442 rte_errno = ENOMEM; 443 goto exit; 444 } 445 446 /* Allocate memory to store the RIB data structures. */ 447 rib = rte_zmalloc_socket(mem_name, 448 sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id); 449 if (rib == NULL) { 450 RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name); 451 rte_errno = ENOMEM; 452 goto free_te; 453 } 454 455 rte_strlcpy(rib->name, name, sizeof(rib->name)); 456 rib->tree = NULL; 457 rib->max_nodes = conf->max_nodes; 458 rib->node_pool = node_pool; 459 te->data = (void *)rib; 460 TAILQ_INSERT_TAIL(rib_list, te, next); 461 462 rte_mcfg_tailq_write_unlock(); 463 464 return rib; 465 466 free_te: 467 rte_free(te); 468 exit: 469 rte_mcfg_tailq_write_unlock(); 470 rte_mempool_free(node_pool); 471 472 return NULL; 473 } 474 475 struct rte_rib * 476 rte_rib_find_existing(const char *name) 477 { 478 struct rte_rib *rib = NULL; 479 struct rte_tailq_entry *te; 480 struct rte_rib_list *rib_list; 481 482 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 483 484 rte_mcfg_tailq_read_lock(); 485 TAILQ_FOREACH(te, rib_list, next) { 486 rib = (struct rte_rib *) te->data; 487 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 488 break; 489 } 490 rte_mcfg_tailq_read_unlock(); 491 492 if (te == NULL) { 493 rte_errno = ENOENT; 494 return NULL; 495 } 496 497 return rib; 498 } 499 500 void 501 rte_rib_free(struct rte_rib *rib) 502 { 503 struct rte_tailq_entry *te; 504 struct rte_rib_list *rib_list; 505 struct rte_rib_node *tmp = NULL; 506 507 if (rib == NULL) 508 return; 509 510 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 511 512 rte_mcfg_tailq_write_lock(); 513 514 /* find our tailq entry */ 515 TAILQ_FOREACH(te, rib_list, next) { 516 if (te->data == (void *)rib) 517 break; 518 } 519 if (te != NULL) 520 TAILQ_REMOVE(rib_list, te, next); 521 522 rte_mcfg_tailq_write_unlock(); 523 524 while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp, 525 RTE_RIB_GET_NXT_ALL)) != NULL) 526 rte_rib_remove(rib, tmp->ip, tmp->depth); 527 528 rte_mempool_free(rib->node_pool); 529 rte_free(rib); 530 rte_free(te); 531 } 532