1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com> 3 * Copyright(c) 2019 Intel Corporation 4 */ 5 6 #include <stdbool.h> 7 #include <stdint.h> 8 #include <sys/queue.h> 9 10 #include <rte_eal_memconfig.h> 11 #include <rte_errno.h> 12 #include <rte_log.h> 13 #include <rte_malloc.h> 14 #include <rte_mempool.h> 15 #include <rte_string_fns.h> 16 #include <rte_tailq.h> 17 18 #include <rte_rib6.h> 19 20 #include "rib_log.h" 21 22 #define RTE_RIB_VALID_NODE 1 23 #define RIB6_MAXDEPTH 128 24 /* Maximum length of a RIB6 name. */ 25 #define RTE_RIB6_NAMESIZE 64 26 27 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry); 28 static struct rte_tailq_elem rte_rib6_tailq = { 29 .name = "RTE_RIB6", 30 }; 31 EAL_REGISTER_TAILQ(rte_rib6_tailq) 32 33 struct rte_rib6_node { 34 struct rte_rib6_node *left; 35 struct rte_rib6_node *right; 36 struct rte_rib6_node *parent; 37 uint64_t nh; 38 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]; 39 uint8_t depth; 40 uint8_t flag; 41 __extension__ uint64_t ext[]; 42 }; 43 44 struct rte_rib6 { 45 char name[RTE_RIB6_NAMESIZE]; 46 struct rte_rib6_node *tree; 47 struct rte_mempool *node_pool; 48 uint32_t cur_nodes; 49 uint32_t cur_routes; 50 int max_nodes; 51 }; 52 53 static inline bool 54 is_valid_node(const struct rte_rib6_node *node) 55 { 56 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE; 57 } 58 59 static inline bool 60 is_right_node(const struct rte_rib6_node *node) 61 { 62 return node->parent->right == node; 63 } 64 65 /* 66 * Check if ip1 is covered by ip2/depth prefix 67 */ 68 static inline bool 69 is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE], 70 const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 71 { 72 int i; 73 74 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 75 if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i)) 76 return false; 77 78 return true; 79 } 80 81 static inline int 82 get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 83 { 84 uint8_t index, msk; 85 86 /* 87 * depth & 127 clamps depth to values that will not 88 * read off the end of ip. 89 * depth is the number of bits deep into ip to traverse, and 90 * is incremented in blocks of 8 (1 byte). This means the last 91 * 3 bits are irrelevant to what the index of ip should be. 92 */ 93 index = (depth & INT8_MAX) / CHAR_BIT; 94 95 /* 96 * msk is the bitmask used to extract the bit used to decide the 97 * direction of the next step of the binary search. 98 */ 99 msk = 1 << (7 - (depth & 7)); 100 101 return (ip[index] & msk) != 0; 102 } 103 104 static inline struct rte_rib6_node * 105 get_nxt_node(struct rte_rib6_node *node, 106 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]) 107 { 108 if (node->depth == RIB6_MAXDEPTH) 109 return NULL; 110 111 return (get_dir(ip, node->depth)) ? node->right : node->left; 112 } 113 114 static struct rte_rib6_node * 115 node_alloc(struct rte_rib6 *rib) 116 { 117 struct rte_rib6_node *ent; 118 int ret; 119 120 ret = rte_mempool_get(rib->node_pool, (void *)&ent); 121 if (unlikely(ret != 0)) 122 return NULL; 123 ++rib->cur_nodes; 124 return ent; 125 } 126 127 static void 128 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent) 129 { 130 --rib->cur_nodes; 131 rte_mempool_put(rib->node_pool, ent); 132 } 133 134 struct rte_rib6_node * 135 rte_rib6_lookup(struct rte_rib6 *rib, 136 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]) 137 { 138 struct rte_rib6_node *cur; 139 struct rte_rib6_node *prev = NULL; 140 141 if (unlikely(rib == NULL)) { 142 rte_errno = EINVAL; 143 return NULL; 144 } 145 cur = rib->tree; 146 147 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) { 148 if (is_valid_node(cur)) 149 prev = cur; 150 cur = get_nxt_node(cur, ip); 151 } 152 return prev; 153 } 154 155 struct rte_rib6_node * 156 rte_rib6_lookup_parent(struct rte_rib6_node *ent) 157 { 158 struct rte_rib6_node *tmp; 159 160 if (ent == NULL) 161 return NULL; 162 163 tmp = ent->parent; 164 while ((tmp != NULL) && (!is_valid_node(tmp))) 165 tmp = tmp->parent; 166 167 return tmp; 168 } 169 170 struct rte_rib6_node * 171 rte_rib6_lookup_exact(struct rte_rib6 *rib, 172 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 173 { 174 struct rte_rib6_node *cur; 175 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE]; 176 int i; 177 178 if (unlikely(rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH)) { 179 rte_errno = EINVAL; 180 return NULL; 181 } 182 cur = rib->tree; 183 184 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 185 tmp_ip[i] = ip[i] & get_msk_part(depth, i); 186 187 while (cur != NULL) { 188 if (rte_rib6_is_equal(cur->ip, tmp_ip) && 189 (cur->depth == depth) && 190 is_valid_node(cur)) 191 return cur; 192 193 if (!(is_covered(tmp_ip, cur->ip, cur->depth)) || 194 (cur->depth >= depth)) 195 break; 196 197 cur = get_nxt_node(cur, tmp_ip); 198 } 199 200 return NULL; 201 } 202 203 /* 204 * Traverses on subtree and retrieves more specific routes 205 * for a given in args ip/depth prefix 206 * last = NULL means the first invocation 207 */ 208 struct rte_rib6_node * 209 rte_rib6_get_nxt(struct rte_rib6 *rib, 210 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], 211 uint8_t depth, struct rte_rib6_node *last, int flag) 212 { 213 struct rte_rib6_node *tmp, *prev = NULL; 214 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE]; 215 int i; 216 217 if (unlikely(rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH)) { 218 rte_errno = EINVAL; 219 return NULL; 220 } 221 222 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 223 tmp_ip[i] = ip[i] & get_msk_part(depth, i); 224 225 if (last == NULL) { 226 tmp = rib->tree; 227 while ((tmp) && (tmp->depth < depth)) 228 tmp = get_nxt_node(tmp, tmp_ip); 229 } else { 230 tmp = last; 231 while ((tmp->parent != NULL) && (is_right_node(tmp) || 232 (tmp->parent->right == NULL))) { 233 tmp = tmp->parent; 234 if (is_valid_node(tmp) && 235 (is_covered(tmp->ip, tmp_ip, depth) && 236 (tmp->depth > depth))) 237 return tmp; 238 } 239 tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL; 240 } 241 while (tmp) { 242 if (is_valid_node(tmp) && 243 (is_covered(tmp->ip, tmp_ip, depth) && 244 (tmp->depth > depth))) { 245 prev = tmp; 246 if (flag == RTE_RIB6_GET_NXT_COVER) 247 return prev; 248 } 249 tmp = (tmp->left != NULL) ? tmp->left : tmp->right; 250 } 251 return prev; 252 } 253 254 void 255 rte_rib6_remove(struct rte_rib6 *rib, 256 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 257 { 258 struct rte_rib6_node *cur, *prev, *child; 259 260 cur = rte_rib6_lookup_exact(rib, ip, depth); 261 if (cur == NULL) 262 return; 263 264 --rib->cur_routes; 265 cur->flag &= ~RTE_RIB_VALID_NODE; 266 while (!is_valid_node(cur)) { 267 if ((cur->left != NULL) && (cur->right != NULL)) 268 return; 269 child = (cur->left == NULL) ? cur->right : cur->left; 270 if (child != NULL) 271 child->parent = cur->parent; 272 if (cur->parent == NULL) { 273 rib->tree = child; 274 node_free(rib, cur); 275 return; 276 } 277 if (cur->parent->left == cur) 278 cur->parent->left = child; 279 else 280 cur->parent->right = child; 281 prev = cur; 282 cur = cur->parent; 283 node_free(rib, prev); 284 } 285 } 286 287 struct rte_rib6_node * 288 rte_rib6_insert(struct rte_rib6 *rib, 289 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 290 { 291 struct rte_rib6_node **tmp; 292 struct rte_rib6_node *prev = NULL; 293 struct rte_rib6_node *new_node = NULL; 294 struct rte_rib6_node *common_node = NULL; 295 uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE]; 296 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE]; 297 int i, d; 298 uint8_t common_depth, ip_xor; 299 300 if (unlikely((rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH))) { 301 rte_errno = EINVAL; 302 return NULL; 303 } 304 305 tmp = &rib->tree; 306 307 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 308 tmp_ip[i] = ip[i] & get_msk_part(depth, i); 309 310 new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth); 311 if (new_node != NULL) { 312 rte_errno = EEXIST; 313 return NULL; 314 } 315 316 new_node = node_alloc(rib); 317 if (new_node == NULL) { 318 rte_errno = ENOMEM; 319 return NULL; 320 } 321 new_node->left = NULL; 322 new_node->right = NULL; 323 new_node->parent = NULL; 324 rte_rib6_copy_addr(new_node->ip, tmp_ip); 325 new_node->depth = depth; 326 new_node->flag = RTE_RIB_VALID_NODE; 327 328 /* traverse down the tree to find matching node or closest matching */ 329 while (1) { 330 /* insert as the last node in the branch */ 331 if (*tmp == NULL) { 332 *tmp = new_node; 333 new_node->parent = prev; 334 ++rib->cur_routes; 335 return *tmp; 336 } 337 /* 338 * Intermediate node found. 339 * Previous rte_rib6_lookup_exact() returned NULL 340 * but node with proper search criteria is found. 341 * Validate intermediate node and return. 342 */ 343 if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) && 344 (depth == (*tmp)->depth)) { 345 node_free(rib, new_node); 346 (*tmp)->flag |= RTE_RIB_VALID_NODE; 347 ++rib->cur_routes; 348 return *tmp; 349 } 350 351 if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) || 352 ((*tmp)->depth >= depth)) { 353 break; 354 } 355 prev = *tmp; 356 357 tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right : 358 &(*tmp)->left; 359 } 360 361 /* closest node found, new_node should be inserted in the middle */ 362 common_depth = RTE_MIN(depth, (*tmp)->depth); 363 for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) { 364 ip_xor = tmp_ip[i] ^ (*tmp)->ip[i]; 365 if (ip_xor == 0) 366 d += 8; 367 else { 368 d += rte_clz32(ip_xor << 24); 369 break; 370 } 371 } 372 373 common_depth = RTE_MIN(d, common_depth); 374 375 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 376 common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i); 377 378 if (rte_rib6_is_equal(common_prefix, tmp_ip) && 379 (common_depth == depth)) { 380 /* insert as a parent */ 381 if (get_dir((*tmp)->ip, depth)) 382 new_node->right = *tmp; 383 else 384 new_node->left = *tmp; 385 new_node->parent = (*tmp)->parent; 386 (*tmp)->parent = new_node; 387 *tmp = new_node; 388 } else { 389 /* create intermediate node */ 390 common_node = node_alloc(rib); 391 if (common_node == NULL) { 392 node_free(rib, new_node); 393 rte_errno = ENOMEM; 394 return NULL; 395 } 396 rte_rib6_copy_addr(common_node->ip, common_prefix); 397 common_node->depth = common_depth; 398 common_node->flag = 0; 399 common_node->parent = (*tmp)->parent; 400 new_node->parent = common_node; 401 (*tmp)->parent = common_node; 402 if (get_dir((*tmp)->ip, common_depth) == 1) { 403 common_node->left = new_node; 404 common_node->right = *tmp; 405 } else { 406 common_node->left = *tmp; 407 common_node->right = new_node; 408 } 409 *tmp = common_node; 410 } 411 ++rib->cur_routes; 412 return new_node; 413 } 414 415 int 416 rte_rib6_get_ip(const struct rte_rib6_node *node, 417 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]) 418 { 419 if (unlikely(node == NULL || ip == NULL)) { 420 rte_errno = EINVAL; 421 return -1; 422 } 423 rte_rib6_copy_addr(ip, node->ip); 424 return 0; 425 } 426 427 int 428 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth) 429 { 430 if (unlikely(node == NULL || depth == NULL)) { 431 rte_errno = EINVAL; 432 return -1; 433 } 434 *depth = node->depth; 435 return 0; 436 } 437 438 void * 439 rte_rib6_get_ext(struct rte_rib6_node *node) 440 { 441 return (node == NULL) ? NULL : &node->ext[0]; 442 } 443 444 int 445 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh) 446 { 447 if (unlikely(node == NULL || nh == NULL)) { 448 rte_errno = EINVAL; 449 return -1; 450 } 451 *nh = node->nh; 452 return 0; 453 } 454 455 int 456 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh) 457 { 458 if (unlikely(node == NULL)) { 459 rte_errno = EINVAL; 460 return -1; 461 } 462 node->nh = nh; 463 return 0; 464 } 465 466 struct rte_rib6 * 467 rte_rib6_create(const char *name, int socket_id, 468 const struct rte_rib6_conf *conf) 469 { 470 char mem_name[RTE_RIB6_NAMESIZE]; 471 struct rte_rib6 *rib = NULL; 472 struct rte_tailq_entry *te; 473 struct rte_rib6_list *rib6_list; 474 struct rte_mempool *node_pool; 475 476 /* Check user arguments. */ 477 if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) { 478 rte_errno = EINVAL; 479 return NULL; 480 } 481 482 snprintf(mem_name, sizeof(mem_name), "MP_%s", name); 483 node_pool = rte_mempool_create(mem_name, conf->max_nodes, 484 sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0, 485 NULL, NULL, NULL, NULL, socket_id, 0); 486 487 if (node_pool == NULL) { 488 RIB_LOG(ERR, 489 "Can not allocate mempool for RIB6 %s", name); 490 return NULL; 491 } 492 493 snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name); 494 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 495 496 rte_mcfg_tailq_write_lock(); 497 498 /* guarantee there's no existing */ 499 TAILQ_FOREACH(te, rib6_list, next) { 500 rib = (struct rte_rib6 *)te->data; 501 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0) 502 break; 503 } 504 rib = NULL; 505 if (te != NULL) { 506 rte_errno = EEXIST; 507 goto exit; 508 } 509 510 /* allocate tailq entry */ 511 te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0); 512 if (unlikely(te == NULL)) { 513 RIB_LOG(ERR, 514 "Can not allocate tailq entry for RIB6 %s", name); 515 rte_errno = ENOMEM; 516 goto exit; 517 } 518 519 /* Allocate memory to store the RIB6 data structures. */ 520 rib = rte_zmalloc_socket(mem_name, 521 sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id); 522 if (unlikely(rib == NULL)) { 523 RIB_LOG(ERR, "RIB6 %s memory allocation failed", name); 524 rte_errno = ENOMEM; 525 goto free_te; 526 } 527 528 rte_strlcpy(rib->name, name, sizeof(rib->name)); 529 rib->tree = NULL; 530 rib->max_nodes = conf->max_nodes; 531 rib->node_pool = node_pool; 532 533 te->data = (void *)rib; 534 TAILQ_INSERT_TAIL(rib6_list, te, next); 535 536 rte_mcfg_tailq_write_unlock(); 537 538 return rib; 539 540 free_te: 541 rte_free(te); 542 exit: 543 rte_mcfg_tailq_write_unlock(); 544 rte_mempool_free(node_pool); 545 546 return NULL; 547 } 548 549 struct rte_rib6 * 550 rte_rib6_find_existing(const char *name) 551 { 552 struct rte_rib6 *rib = NULL; 553 struct rte_tailq_entry *te; 554 struct rte_rib6_list *rib6_list; 555 556 if (unlikely(name == NULL)) { 557 rte_errno = EINVAL; 558 return NULL; 559 } 560 561 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 562 563 rte_mcfg_tailq_read_lock(); 564 TAILQ_FOREACH(te, rib6_list, next) { 565 rib = (struct rte_rib6 *) te->data; 566 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0) 567 break; 568 } 569 rte_mcfg_tailq_read_unlock(); 570 571 if (te == NULL) { 572 rte_errno = ENOENT; 573 return NULL; 574 } 575 576 return rib; 577 } 578 579 void 580 rte_rib6_free(struct rte_rib6 *rib) 581 { 582 struct rte_tailq_entry *te; 583 struct rte_rib6_list *rib6_list; 584 struct rte_rib6_node *tmp = NULL; 585 586 if (unlikely(rib == NULL)) { 587 rte_errno = EINVAL; 588 return; 589 } 590 591 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 592 593 rte_mcfg_tailq_write_lock(); 594 595 /* find our tailq entry */ 596 TAILQ_FOREACH(te, rib6_list, next) { 597 if (te->data == (void *)rib) 598 break; 599 } 600 if (te != NULL) 601 TAILQ_REMOVE(rib6_list, te, next); 602 603 rte_mcfg_tailq_write_unlock(); 604 605 while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp, 606 RTE_RIB6_GET_NXT_ALL)) != NULL) 607 rte_rib6_remove(rib, tmp->ip, tmp->depth); 608 609 rte_mempool_free(rib->node_pool); 610 611 rte_free(rib); 612 rte_free(te); 613 } 614