1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com> 3 * Copyright(c) 2019 Intel Corporation 4 */ 5 6 #include <stdbool.h> 7 #include <stdint.h> 8 #include <sys/queue.h> 9 10 #include <rte_eal_memconfig.h> 11 #include <rte_errno.h> 12 #include <rte_log.h> 13 #include <rte_malloc.h> 14 #include <rte_mempool.h> 15 #include <rte_string_fns.h> 16 #include <rte_tailq.h> 17 18 #include <rte_rib6.h> 19 20 #include "rib_log.h" 21 22 #define RTE_RIB_VALID_NODE 1 23 /* Maximum length of a RIB6 name. */ 24 #define RTE_RIB6_NAMESIZE 64 25 26 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry); 27 static struct rte_tailq_elem rte_rib6_tailq = { 28 .name = "RTE_RIB6", 29 }; 30 EAL_REGISTER_TAILQ(rte_rib6_tailq) 31 32 struct rte_rib6_node { 33 struct rte_rib6_node *left; 34 struct rte_rib6_node *right; 35 struct rte_rib6_node *parent; 36 uint64_t nh; 37 struct rte_ipv6_addr ip; 38 uint8_t depth; 39 uint8_t flag; 40 uint64_t ext[]; 41 }; 42 43 struct rte_rib6 { 44 char name[RTE_RIB6_NAMESIZE]; 45 struct rte_rib6_node *tree; 46 struct rte_mempool *node_pool; 47 uint32_t cur_nodes; 48 uint32_t cur_routes; 49 int max_nodes; 50 }; 51 52 static inline bool 53 is_valid_node(const struct rte_rib6_node *node) 54 { 55 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE; 56 } 57 58 static inline bool 59 is_right_node(const struct rte_rib6_node *node) 60 { 61 return node->parent->right == node; 62 } 63 64 static inline int 65 get_dir(const struct rte_ipv6_addr *ip, uint8_t depth) 66 { 67 uint8_t index, msk; 68 69 /* 70 * depth & 127 clamps depth to values that will not 71 * read off the end of ip. 72 * depth is the number of bits deep into ip to traverse, and 73 * is incremented in blocks of 8 (1 byte). This means the last 74 * 3 bits are irrelevant to what the index of ip should be. 75 */ 76 index = (depth & INT8_MAX) / CHAR_BIT; 77 78 /* 79 * msk is the bitmask used to extract the bit used to decide the 80 * direction of the next step of the binary search. 81 */ 82 msk = 1 << (7 - (depth & 7)); 83 84 return (ip->a[index] & msk) != 0; 85 } 86 87 static inline struct rte_rib6_node * 88 get_nxt_node(struct rte_rib6_node *node, 89 const struct rte_ipv6_addr *ip) 90 { 91 if (node->depth == RTE_IPV6_MAX_DEPTH) 92 return NULL; 93 94 return (get_dir(ip, node->depth)) ? node->right : node->left; 95 } 96 97 static struct rte_rib6_node * 98 node_alloc(struct rte_rib6 *rib) 99 { 100 struct rte_rib6_node *ent; 101 int ret; 102 103 ret = rte_mempool_get(rib->node_pool, (void *)&ent); 104 if (unlikely(ret != 0)) 105 return NULL; 106 ++rib->cur_nodes; 107 return ent; 108 } 109 110 static void 111 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent) 112 { 113 --rib->cur_nodes; 114 rte_mempool_put(rib->node_pool, ent); 115 } 116 117 struct rte_rib6_node * 118 rte_rib6_lookup(struct rte_rib6 *rib, 119 const struct rte_ipv6_addr *ip) 120 { 121 struct rte_rib6_node *cur; 122 struct rte_rib6_node *prev = NULL; 123 124 if (unlikely(rib == NULL)) { 125 rte_errno = EINVAL; 126 return NULL; 127 } 128 cur = rib->tree; 129 130 while ((cur != NULL) && rte_ipv6_addr_eq_prefix(ip, &cur->ip, cur->depth)) { 131 if (is_valid_node(cur)) 132 prev = cur; 133 cur = get_nxt_node(cur, ip); 134 } 135 return prev; 136 } 137 138 struct rte_rib6_node * 139 rte_rib6_lookup_parent(struct rte_rib6_node *ent) 140 { 141 struct rte_rib6_node *tmp; 142 143 if (ent == NULL) 144 return NULL; 145 146 tmp = ent->parent; 147 while ((tmp != NULL) && (!is_valid_node(tmp))) 148 tmp = tmp->parent; 149 150 return tmp; 151 } 152 153 struct rte_rib6_node * 154 rte_rib6_lookup_exact(struct rte_rib6 *rib, 155 const struct rte_ipv6_addr *ip, uint8_t depth) 156 { 157 struct rte_rib6_node *cur; 158 struct rte_ipv6_addr tmp_ip; 159 160 if (unlikely(rib == NULL || ip == NULL || depth > RTE_IPV6_MAX_DEPTH)) { 161 rte_errno = EINVAL; 162 return NULL; 163 } 164 cur = rib->tree; 165 166 tmp_ip = *ip; 167 rte_ipv6_addr_mask(&tmp_ip, depth); 168 169 while (cur != NULL) { 170 if (rte_ipv6_addr_eq(&cur->ip, &tmp_ip) && 171 (cur->depth == depth) && 172 is_valid_node(cur)) 173 return cur; 174 175 if (!rte_ipv6_addr_eq_prefix(&tmp_ip, &cur->ip, cur->depth) || 176 (cur->depth >= depth)) 177 break; 178 179 cur = get_nxt_node(cur, &tmp_ip); 180 } 181 182 return NULL; 183 } 184 185 /* 186 * Traverses on subtree and retrieves more specific routes 187 * for a given in args ip/depth prefix 188 * last = NULL means the first invocation 189 */ 190 struct rte_rib6_node * 191 rte_rib6_get_nxt(struct rte_rib6 *rib, 192 const struct rte_ipv6_addr *ip, 193 uint8_t depth, struct rte_rib6_node *last, int flag) 194 { 195 struct rte_rib6_node *tmp, *prev = NULL; 196 struct rte_ipv6_addr tmp_ip; 197 198 if (unlikely(rib == NULL || ip == NULL || depth > RTE_IPV6_MAX_DEPTH)) { 199 rte_errno = EINVAL; 200 return NULL; 201 } 202 203 tmp_ip = *ip; 204 rte_ipv6_addr_mask(&tmp_ip, depth); 205 206 if (last == NULL) { 207 tmp = rib->tree; 208 while ((tmp) && (tmp->depth < depth)) 209 tmp = get_nxt_node(tmp, &tmp_ip); 210 } else { 211 tmp = last; 212 while ((tmp->parent != NULL) && (is_right_node(tmp) || 213 (tmp->parent->right == NULL))) { 214 tmp = tmp->parent; 215 if (is_valid_node(tmp) && 216 (rte_ipv6_addr_eq_prefix(&tmp->ip, &tmp_ip, depth) && 217 (tmp->depth > depth))) 218 return tmp; 219 } 220 tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL; 221 } 222 while (tmp) { 223 if (is_valid_node(tmp) && 224 (rte_ipv6_addr_eq_prefix(&tmp->ip, &tmp_ip, depth) && 225 (tmp->depth > depth))) { 226 prev = tmp; 227 if (flag == RTE_RIB6_GET_NXT_COVER) 228 return prev; 229 } 230 tmp = (tmp->left != NULL) ? tmp->left : tmp->right; 231 } 232 return prev; 233 } 234 235 void 236 rte_rib6_remove(struct rte_rib6 *rib, 237 const struct rte_ipv6_addr *ip, uint8_t depth) 238 { 239 struct rte_rib6_node *cur, *prev, *child; 240 241 cur = rte_rib6_lookup_exact(rib, ip, depth); 242 if (cur == NULL) 243 return; 244 245 --rib->cur_routes; 246 cur->flag &= ~RTE_RIB_VALID_NODE; 247 while (!is_valid_node(cur)) { 248 if ((cur->left != NULL) && (cur->right != NULL)) 249 return; 250 child = (cur->left == NULL) ? cur->right : cur->left; 251 if (child != NULL) 252 child->parent = cur->parent; 253 if (cur->parent == NULL) { 254 rib->tree = child; 255 node_free(rib, cur); 256 return; 257 } 258 if (cur->parent->left == cur) 259 cur->parent->left = child; 260 else 261 cur->parent->right = child; 262 prev = cur; 263 cur = cur->parent; 264 node_free(rib, prev); 265 } 266 } 267 268 struct rte_rib6_node * 269 rte_rib6_insert(struct rte_rib6 *rib, 270 const struct rte_ipv6_addr *ip, uint8_t depth) 271 { 272 struct rte_rib6_node **tmp; 273 struct rte_rib6_node *prev = NULL; 274 struct rte_rib6_node *new_node = NULL; 275 struct rte_rib6_node *common_node = NULL; 276 struct rte_ipv6_addr common_prefix; 277 struct rte_ipv6_addr tmp_ip; 278 int i, d; 279 uint8_t common_depth, ip_xor; 280 281 if (unlikely((rib == NULL || ip == NULL || depth > RTE_IPV6_MAX_DEPTH))) { 282 rte_errno = EINVAL; 283 return NULL; 284 } 285 286 tmp = &rib->tree; 287 288 tmp_ip = *ip; 289 rte_ipv6_addr_mask(&tmp_ip, depth); 290 291 new_node = rte_rib6_lookup_exact(rib, &tmp_ip, depth); 292 if (new_node != NULL) { 293 rte_errno = EEXIST; 294 return NULL; 295 } 296 297 new_node = node_alloc(rib); 298 if (new_node == NULL) { 299 rte_errno = ENOMEM; 300 return NULL; 301 } 302 new_node->left = NULL; 303 new_node->right = NULL; 304 new_node->parent = NULL; 305 new_node->ip = tmp_ip; 306 new_node->depth = depth; 307 new_node->flag = RTE_RIB_VALID_NODE; 308 309 /* traverse down the tree to find matching node or closest matching */ 310 while (1) { 311 /* insert as the last node in the branch */ 312 if (*tmp == NULL) { 313 *tmp = new_node; 314 new_node->parent = prev; 315 ++rib->cur_routes; 316 return *tmp; 317 } 318 /* 319 * Intermediate node found. 320 * Previous rte_rib6_lookup_exact() returned NULL 321 * but node with proper search criteria is found. 322 * Validate intermediate node and return. 323 */ 324 if (rte_ipv6_addr_eq(&tmp_ip, &(*tmp)->ip) && (depth == (*tmp)->depth)) { 325 node_free(rib, new_node); 326 (*tmp)->flag |= RTE_RIB_VALID_NODE; 327 ++rib->cur_routes; 328 return *tmp; 329 } 330 331 if (!rte_ipv6_addr_eq_prefix(&tmp_ip, &(*tmp)->ip, (*tmp)->depth) || 332 ((*tmp)->depth >= depth)) { 333 break; 334 } 335 prev = *tmp; 336 337 tmp = (get_dir(&tmp_ip, (*tmp)->depth)) ? &(*tmp)->right : 338 &(*tmp)->left; 339 } 340 341 /* closest node found, new_node should be inserted in the middle */ 342 common_depth = RTE_MIN(depth, (*tmp)->depth); 343 for (i = 0, d = 0; i < RTE_IPV6_ADDR_SIZE; i++) { 344 ip_xor = tmp_ip.a[i] ^ (*tmp)->ip.a[i]; 345 if (ip_xor == 0) 346 d += 8; 347 else { 348 d += rte_clz32(ip_xor << 24); 349 break; 350 } 351 } 352 353 common_depth = RTE_MIN(d, common_depth); 354 355 common_prefix = tmp_ip; 356 rte_ipv6_addr_mask(&common_prefix, common_depth); 357 358 if (rte_ipv6_addr_eq(&common_prefix, &tmp_ip) && 359 (common_depth == depth)) { 360 /* insert as a parent */ 361 if (get_dir(&(*tmp)->ip, depth)) 362 new_node->right = *tmp; 363 else 364 new_node->left = *tmp; 365 new_node->parent = (*tmp)->parent; 366 (*tmp)->parent = new_node; 367 *tmp = new_node; 368 } else { 369 /* create intermediate node */ 370 common_node = node_alloc(rib); 371 if (common_node == NULL) { 372 node_free(rib, new_node); 373 rte_errno = ENOMEM; 374 return NULL; 375 } 376 common_node->ip = common_prefix; 377 common_node->depth = common_depth; 378 common_node->flag = 0; 379 common_node->parent = (*tmp)->parent; 380 new_node->parent = common_node; 381 (*tmp)->parent = common_node; 382 if (get_dir(&(*tmp)->ip, common_depth) == 1) { 383 common_node->left = new_node; 384 common_node->right = *tmp; 385 } else { 386 common_node->left = *tmp; 387 common_node->right = new_node; 388 } 389 *tmp = common_node; 390 } 391 ++rib->cur_routes; 392 return new_node; 393 } 394 395 int 396 rte_rib6_get_ip(const struct rte_rib6_node *node, 397 struct rte_ipv6_addr *ip) 398 { 399 if (unlikely(node == NULL || ip == NULL)) { 400 rte_errno = EINVAL; 401 return -1; 402 } 403 *ip = node->ip; 404 return 0; 405 } 406 407 int 408 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth) 409 { 410 if (unlikely(node == NULL || depth == NULL)) { 411 rte_errno = EINVAL; 412 return -1; 413 } 414 *depth = node->depth; 415 return 0; 416 } 417 418 void * 419 rte_rib6_get_ext(struct rte_rib6_node *node) 420 { 421 return (node == NULL) ? NULL : &node->ext[0]; 422 } 423 424 int 425 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh) 426 { 427 if (unlikely(node == NULL || nh == NULL)) { 428 rte_errno = EINVAL; 429 return -1; 430 } 431 *nh = node->nh; 432 return 0; 433 } 434 435 int 436 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh) 437 { 438 if (unlikely(node == NULL)) { 439 rte_errno = EINVAL; 440 return -1; 441 } 442 node->nh = nh; 443 return 0; 444 } 445 446 struct rte_rib6 * 447 rte_rib6_create(const char *name, int socket_id, 448 const struct rte_rib6_conf *conf) 449 { 450 char mem_name[RTE_RIB6_NAMESIZE]; 451 struct rte_rib6 *rib = NULL; 452 struct rte_tailq_entry *te; 453 struct rte_rib6_list *rib6_list; 454 struct rte_mempool *node_pool; 455 456 /* Check user arguments. */ 457 if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) { 458 rte_errno = EINVAL; 459 return NULL; 460 } 461 462 snprintf(mem_name, sizeof(mem_name), "MP_%s", name); 463 node_pool = rte_mempool_create(mem_name, conf->max_nodes, 464 sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0, 465 NULL, NULL, NULL, NULL, socket_id, 0); 466 467 if (node_pool == NULL) { 468 RIB_LOG(ERR, 469 "Can not allocate mempool for RIB6 %s", name); 470 return NULL; 471 } 472 473 snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name); 474 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 475 476 rte_mcfg_tailq_write_lock(); 477 478 /* guarantee there's no existing */ 479 TAILQ_FOREACH(te, rib6_list, next) { 480 rib = (struct rte_rib6 *)te->data; 481 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0) 482 break; 483 } 484 rib = NULL; 485 if (te != NULL) { 486 rte_errno = EEXIST; 487 goto exit; 488 } 489 490 /* allocate tailq entry */ 491 te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0); 492 if (unlikely(te == NULL)) { 493 RIB_LOG(ERR, 494 "Can not allocate tailq entry for RIB6 %s", name); 495 rte_errno = ENOMEM; 496 goto exit; 497 } 498 499 /* Allocate memory to store the RIB6 data structures. */ 500 rib = rte_zmalloc_socket(mem_name, 501 sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id); 502 if (unlikely(rib == NULL)) { 503 RIB_LOG(ERR, "RIB6 %s memory allocation failed", name); 504 rte_errno = ENOMEM; 505 goto free_te; 506 } 507 508 rte_strlcpy(rib->name, name, sizeof(rib->name)); 509 rib->tree = NULL; 510 rib->max_nodes = conf->max_nodes; 511 rib->node_pool = node_pool; 512 513 te->data = (void *)rib; 514 TAILQ_INSERT_TAIL(rib6_list, te, next); 515 516 rte_mcfg_tailq_write_unlock(); 517 518 return rib; 519 520 free_te: 521 rte_free(te); 522 exit: 523 rte_mcfg_tailq_write_unlock(); 524 rte_mempool_free(node_pool); 525 526 return NULL; 527 } 528 529 struct rte_rib6 * 530 rte_rib6_find_existing(const char *name) 531 { 532 struct rte_rib6 *rib = NULL; 533 struct rte_tailq_entry *te; 534 struct rte_rib6_list *rib6_list; 535 536 if (unlikely(name == NULL)) { 537 rte_errno = EINVAL; 538 return NULL; 539 } 540 541 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 542 543 rte_mcfg_tailq_read_lock(); 544 TAILQ_FOREACH(te, rib6_list, next) { 545 rib = (struct rte_rib6 *) te->data; 546 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0) 547 break; 548 } 549 rte_mcfg_tailq_read_unlock(); 550 551 if (te == NULL) { 552 rte_errno = ENOENT; 553 return NULL; 554 } 555 556 return rib; 557 } 558 559 void 560 rte_rib6_free(struct rte_rib6 *rib) 561 { 562 struct rte_tailq_entry *te; 563 struct rte_rib6_list *rib6_list; 564 struct rte_rib6_node *tmp = NULL; 565 566 if (unlikely(rib == NULL)) { 567 rte_errno = EINVAL; 568 return; 569 } 570 571 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 572 573 rte_mcfg_tailq_write_lock(); 574 575 /* find our tailq entry */ 576 TAILQ_FOREACH(te, rib6_list, next) { 577 if (te->data == (void *)rib) 578 break; 579 } 580 if (te != NULL) 581 TAILQ_REMOVE(rib6_list, te, next); 582 583 rte_mcfg_tailq_write_unlock(); 584 585 while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp, 586 RTE_RIB6_GET_NXT_ALL)) != NULL) 587 rte_rib6_remove(rib, &tmp->ip, tmp->depth); 588 589 rte_mempool_free(rib->node_pool); 590 591 rte_free(rib); 592 rte_free(te); 593 } 594