1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com> 3 * Copyright(c) 2019 Intel Corporation 4 */ 5 6 #include <stdbool.h> 7 #include <stdint.h> 8 #include <sys/queue.h> 9 10 #include <rte_eal_memconfig.h> 11 #include <rte_errno.h> 12 #include <rte_malloc.h> 13 #include <rte_mempool.h> 14 #include <rte_string_fns.h> 15 #include <rte_tailq.h> 16 17 #include <rte_rib.h> 18 19 #include "rib_log.h" 20 21 RTE_LOG_REGISTER_DEFAULT(rib_logtype, INFO); 22 23 TAILQ_HEAD(rte_rib_list, rte_tailq_entry); 24 static struct rte_tailq_elem rte_rib_tailq = { 25 .name = "RTE_RIB", 26 }; 27 EAL_REGISTER_TAILQ(rte_rib_tailq) 28 29 #define RTE_RIB_VALID_NODE 1 30 /* Maximum depth value possible for IPv4 RIB. */ 31 #define RIB_MAXDEPTH 32 32 /* Maximum length of a RIB name. */ 33 #define RTE_RIB_NAMESIZE 64 34 35 struct rte_rib_node { 36 struct rte_rib_node *left; 37 struct rte_rib_node *right; 38 struct rte_rib_node *parent; 39 uint32_t ip; 40 uint8_t depth; 41 uint8_t flag; 42 uint64_t nh; 43 __extension__ uint64_t ext[]; 44 }; 45 46 struct rte_rib { 47 char name[RTE_RIB_NAMESIZE]; 48 struct rte_rib_node *tree; 49 struct rte_mempool *node_pool; 50 uint32_t cur_nodes; 51 uint32_t cur_routes; 52 uint32_t max_nodes; 53 }; 54 55 static inline bool 56 is_valid_node(const struct rte_rib_node *node) 57 { 58 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE; 59 } 60 61 static inline bool 62 is_right_node(const struct rte_rib_node *node) 63 { 64 return node->parent->right == node; 65 } 66 67 /* 68 * Check if ip1 is covered by ip2/depth prefix 69 */ 70 static inline bool 71 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth) 72 { 73 return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0; 74 } 75 76 static inline struct rte_rib_node * 77 get_nxt_node(struct rte_rib_node *node, uint32_t ip) 78 { 79 if (node->depth == RIB_MAXDEPTH) 80 return NULL; 81 return (ip & (1 << (31 - node->depth))) ? node->right : node->left; 82 } 83 84 static struct rte_rib_node * 85 node_alloc(struct rte_rib *rib) 86 { 87 struct rte_rib_node *ent; 88 int ret; 89 90 ret = rte_mempool_get(rib->node_pool, (void *)&ent); 91 if (unlikely(ret != 0)) 92 return NULL; 93 ++rib->cur_nodes; 94 return ent; 95 } 96 97 static void 98 node_free(struct rte_rib *rib, struct rte_rib_node *ent) 99 { 100 --rib->cur_nodes; 101 rte_mempool_put(rib->node_pool, ent); 102 } 103 104 struct rte_rib_node * 105 rte_rib_lookup(struct rte_rib *rib, uint32_t ip) 106 { 107 struct rte_rib_node *cur, *prev = NULL; 108 109 if (unlikely(rib == NULL)) { 110 rte_errno = EINVAL; 111 return NULL; 112 } 113 114 cur = rib->tree; 115 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) { 116 if (is_valid_node(cur)) 117 prev = cur; 118 cur = get_nxt_node(cur, ip); 119 } 120 return prev; 121 } 122 123 struct rte_rib_node * 124 rte_rib_lookup_parent(struct rte_rib_node *ent) 125 { 126 struct rte_rib_node *tmp; 127 128 if (ent == NULL) 129 return NULL; 130 tmp = ent->parent; 131 while ((tmp != NULL) && !is_valid_node(tmp)) 132 tmp = tmp->parent; 133 return tmp; 134 } 135 136 static struct rte_rib_node * 137 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 138 { 139 struct rte_rib_node *cur; 140 141 cur = rib->tree; 142 while (cur != NULL) { 143 if ((cur->ip == ip) && (cur->depth == depth) && 144 is_valid_node(cur)) 145 return cur; 146 if ((cur->depth > depth) || 147 !is_covered(ip, cur->ip, cur->depth)) 148 break; 149 cur = get_nxt_node(cur, ip); 150 } 151 return NULL; 152 } 153 154 struct rte_rib_node * 155 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 156 { 157 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) { 158 rte_errno = EINVAL; 159 return NULL; 160 } 161 ip &= rte_rib_depth_to_mask(depth); 162 163 return __rib_lookup_exact(rib, ip, depth); 164 } 165 166 /* 167 * Traverses on subtree and retrieves more specific routes 168 * for a given in args ip/depth prefix 169 * last = NULL means the first invocation 170 */ 171 struct rte_rib_node * 172 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip, 173 uint8_t depth, struct rte_rib_node *last, int flag) 174 { 175 struct rte_rib_node *tmp, *prev = NULL; 176 177 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) { 178 rte_errno = EINVAL; 179 return NULL; 180 } 181 182 if (last == NULL) { 183 tmp = rib->tree; 184 while ((tmp) && (tmp->depth < depth)) 185 tmp = get_nxt_node(tmp, ip); 186 } else { 187 tmp = last; 188 while ((tmp->parent != NULL) && (is_right_node(tmp) || 189 (tmp->parent->right == NULL))) { 190 tmp = tmp->parent; 191 if (is_valid_node(tmp) && 192 (is_covered(tmp->ip, ip, depth) && 193 (tmp->depth > depth))) 194 return tmp; 195 } 196 tmp = (tmp->parent) ? tmp->parent->right : NULL; 197 } 198 while (tmp) { 199 if (is_valid_node(tmp) && 200 (is_covered(tmp->ip, ip, depth) && 201 (tmp->depth > depth))) { 202 prev = tmp; 203 if (flag == RTE_RIB_GET_NXT_COVER) 204 return prev; 205 } 206 tmp = (tmp->left) ? tmp->left : tmp->right; 207 } 208 return prev; 209 } 210 211 void 212 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth) 213 { 214 struct rte_rib_node *cur, *prev, *child; 215 216 cur = rte_rib_lookup_exact(rib, ip, depth); 217 if (cur == NULL) 218 return; 219 220 --rib->cur_routes; 221 cur->flag &= ~RTE_RIB_VALID_NODE; 222 while (!is_valid_node(cur)) { 223 if ((cur->left != NULL) && (cur->right != NULL)) 224 return; 225 child = (cur->left == NULL) ? cur->right : cur->left; 226 if (child != NULL) 227 child->parent = cur->parent; 228 if (cur->parent == NULL) { 229 rib->tree = child; 230 node_free(rib, cur); 231 return; 232 } 233 if (cur->parent->left == cur) 234 cur->parent->left = child; 235 else 236 cur->parent->right = child; 237 prev = cur; 238 cur = cur->parent; 239 node_free(rib, prev); 240 } 241 } 242 243 struct rte_rib_node * 244 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth) 245 { 246 struct rte_rib_node **tmp; 247 struct rte_rib_node *prev = NULL; 248 struct rte_rib_node *new_node = NULL; 249 struct rte_rib_node *common_node = NULL; 250 int d = 0; 251 uint32_t common_prefix; 252 uint8_t common_depth; 253 254 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) { 255 rte_errno = EINVAL; 256 return NULL; 257 } 258 259 tmp = &rib->tree; 260 ip &= rte_rib_depth_to_mask(depth); 261 new_node = __rib_lookup_exact(rib, ip, depth); 262 if (new_node != NULL) { 263 rte_errno = EEXIST; 264 return NULL; 265 } 266 267 new_node = node_alloc(rib); 268 if (new_node == NULL) { 269 rte_errno = ENOMEM; 270 return NULL; 271 } 272 new_node->left = NULL; 273 new_node->right = NULL; 274 new_node->parent = NULL; 275 new_node->ip = ip; 276 new_node->depth = depth; 277 new_node->flag = RTE_RIB_VALID_NODE; 278 279 /* traverse down the tree to find matching node or closest matching */ 280 while (1) { 281 /* insert as the last node in the branch */ 282 if (*tmp == NULL) { 283 *tmp = new_node; 284 new_node->parent = prev; 285 ++rib->cur_routes; 286 return *tmp; 287 } 288 /* 289 * Intermediate node found. 290 * Previous rte_rib_lookup_exact() returned NULL 291 * but node with proper search criteria is found. 292 * Validate intermediate node and return. 293 */ 294 if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) { 295 node_free(rib, new_node); 296 (*tmp)->flag |= RTE_RIB_VALID_NODE; 297 ++rib->cur_routes; 298 return *tmp; 299 } 300 d = (*tmp)->depth; 301 if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d)) 302 break; 303 prev = *tmp; 304 tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left; 305 } 306 /* closest node found, new_node should be inserted in the middle */ 307 common_depth = RTE_MIN(depth, (*tmp)->depth); 308 common_prefix = ip ^ (*tmp)->ip; 309 d = (common_prefix == 0) ? 32 : rte_clz32(common_prefix); 310 311 common_depth = RTE_MIN(d, common_depth); 312 common_prefix = ip & rte_rib_depth_to_mask(common_depth); 313 if ((common_prefix == ip) && (common_depth == depth)) { 314 /* insert as a parent */ 315 if ((*tmp)->ip & (1 << (31 - depth))) 316 new_node->right = *tmp; 317 else 318 new_node->left = *tmp; 319 new_node->parent = (*tmp)->parent; 320 (*tmp)->parent = new_node; 321 *tmp = new_node; 322 } else { 323 /* create intermediate node */ 324 common_node = node_alloc(rib); 325 if (common_node == NULL) { 326 node_free(rib, new_node); 327 rte_errno = ENOMEM; 328 return NULL; 329 } 330 common_node->ip = common_prefix; 331 common_node->depth = common_depth; 332 common_node->flag = 0; 333 common_node->parent = (*tmp)->parent; 334 new_node->parent = common_node; 335 (*tmp)->parent = common_node; 336 if ((new_node->ip & (1 << (31 - common_depth))) == 0) { 337 common_node->left = new_node; 338 common_node->right = *tmp; 339 } else { 340 common_node->left = *tmp; 341 common_node->right = new_node; 342 } 343 *tmp = common_node; 344 } 345 ++rib->cur_routes; 346 return new_node; 347 } 348 349 int 350 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip) 351 { 352 if (unlikely(node == NULL || ip == NULL)) { 353 rte_errno = EINVAL; 354 return -1; 355 } 356 *ip = node->ip; 357 return 0; 358 } 359 360 int 361 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth) 362 { 363 if (unlikely(node == NULL || depth == NULL)) { 364 rte_errno = EINVAL; 365 return -1; 366 } 367 *depth = node->depth; 368 return 0; 369 } 370 371 void * 372 rte_rib_get_ext(struct rte_rib_node *node) 373 { 374 return (node == NULL) ? NULL : &node->ext[0]; 375 } 376 377 int 378 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh) 379 { 380 if (unlikely(node == NULL || nh == NULL)) { 381 rte_errno = EINVAL; 382 return -1; 383 } 384 *nh = node->nh; 385 return 0; 386 } 387 388 int 389 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh) 390 { 391 if (unlikely(node == NULL)) { 392 rte_errno = EINVAL; 393 return -1; 394 } 395 node->nh = nh; 396 return 0; 397 } 398 399 struct rte_rib * 400 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf) 401 { 402 char mem_name[RTE_RIB_NAMESIZE]; 403 struct rte_rib *rib = NULL; 404 struct rte_tailq_entry *te; 405 struct rte_rib_list *rib_list; 406 struct rte_mempool *node_pool; 407 408 /* Check user arguments. */ 409 if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) { 410 rte_errno = EINVAL; 411 return NULL; 412 } 413 414 snprintf(mem_name, sizeof(mem_name), "MP_%s", name); 415 node_pool = rte_mempool_create(mem_name, conf->max_nodes, 416 sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0, 417 NULL, NULL, NULL, NULL, socket_id, 0); 418 419 if (node_pool == NULL) { 420 RIB_LOG(ERR, 421 "Can not allocate mempool for RIB %s", name); 422 return NULL; 423 } 424 425 snprintf(mem_name, sizeof(mem_name), "RIB_%s", name); 426 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 427 428 rte_mcfg_tailq_write_lock(); 429 430 /* guarantee there's no existing */ 431 TAILQ_FOREACH(te, rib_list, next) { 432 rib = (struct rte_rib *)te->data; 433 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 434 break; 435 } 436 rib = NULL; 437 if (te != NULL) { 438 rte_errno = EEXIST; 439 goto exit; 440 } 441 442 /* allocate tailq entry */ 443 te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0); 444 if (unlikely(te == NULL)) { 445 RIB_LOG(ERR, 446 "Can not allocate tailq entry for RIB %s", name); 447 rte_errno = ENOMEM; 448 goto exit; 449 } 450 451 /* Allocate memory to store the RIB data structures. */ 452 rib = rte_zmalloc_socket(mem_name, 453 sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id); 454 if (unlikely(rib == NULL)) { 455 RIB_LOG(ERR, "RIB %s memory allocation failed", name); 456 rte_errno = ENOMEM; 457 goto free_te; 458 } 459 460 rte_strlcpy(rib->name, name, sizeof(rib->name)); 461 rib->tree = NULL; 462 rib->max_nodes = conf->max_nodes; 463 rib->node_pool = node_pool; 464 te->data = (void *)rib; 465 TAILQ_INSERT_TAIL(rib_list, te, next); 466 467 rte_mcfg_tailq_write_unlock(); 468 469 return rib; 470 471 free_te: 472 rte_free(te); 473 exit: 474 rte_mcfg_tailq_write_unlock(); 475 rte_mempool_free(node_pool); 476 477 return NULL; 478 } 479 480 struct rte_rib * 481 rte_rib_find_existing(const char *name) 482 { 483 struct rte_rib *rib = NULL; 484 struct rte_tailq_entry *te; 485 struct rte_rib_list *rib_list; 486 487 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 488 489 rte_mcfg_tailq_read_lock(); 490 TAILQ_FOREACH(te, rib_list, next) { 491 rib = (struct rte_rib *) te->data; 492 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 493 break; 494 } 495 rte_mcfg_tailq_read_unlock(); 496 497 if (te == NULL) { 498 rte_errno = ENOENT; 499 return NULL; 500 } 501 502 return rib; 503 } 504 505 void 506 rte_rib_free(struct rte_rib *rib) 507 { 508 struct rte_tailq_entry *te; 509 struct rte_rib_list *rib_list; 510 struct rte_rib_node *tmp = NULL; 511 512 if (rib == NULL) 513 return; 514 515 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 516 517 rte_mcfg_tailq_write_lock(); 518 519 /* find our tailq entry */ 520 TAILQ_FOREACH(te, rib_list, next) { 521 if (te->data == (void *)rib) 522 break; 523 } 524 if (te != NULL) 525 TAILQ_REMOVE(rib_list, te, next); 526 527 rte_mcfg_tailq_write_unlock(); 528 529 while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp, 530 RTE_RIB_GET_NXT_ALL)) != NULL) 531 rte_rib_remove(rib, tmp->ip, tmp->depth); 532 533 rte_mempool_free(rib->node_pool); 534 rte_free(rib); 535 rte_free(te); 536 } 537