1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com> 3 * Copyright(c) 2019 Intel Corporation 4 */ 5 6 #include <stdbool.h> 7 #include <stdint.h> 8 9 #include <rte_eal_memconfig.h> 10 #include <rte_errno.h> 11 #include <rte_malloc.h> 12 #include <rte_mempool.h> 13 #include <rte_string_fns.h> 14 #include <rte_tailq.h> 15 16 #include <rte_rib.h> 17 18 TAILQ_HEAD(rte_rib_list, rte_tailq_entry); 19 static struct rte_tailq_elem rte_rib_tailq = { 20 .name = "RTE_RIB", 21 }; 22 EAL_REGISTER_TAILQ(rte_rib_tailq) 23 24 #define RTE_RIB_VALID_NODE 1 25 /* Maximum depth value possible for IPv4 RIB. */ 26 #define RIB_MAXDEPTH 32 27 /* Maximum length of a RIB name. */ 28 #define RTE_RIB_NAMESIZE 64 29 30 struct rte_rib_node { 31 struct rte_rib_node *left; 32 struct rte_rib_node *right; 33 struct rte_rib_node *parent; 34 uint32_t ip; 35 uint8_t depth; 36 uint8_t flag; 37 uint64_t nh; 38 __extension__ uint64_t ext[0]; 39 }; 40 41 struct rte_rib { 42 char name[RTE_RIB_NAMESIZE]; 43 struct rte_rib_node *tree; 44 struct rte_mempool *node_pool; 45 uint32_t cur_nodes; 46 uint32_t cur_routes; 47 uint32_t max_nodes; 48 }; 49 50 static inline bool 51 is_valid_node(struct rte_rib_node *node) 52 { 53 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE; 54 } 55 56 static inline bool 57 is_right_node(struct rte_rib_node *node) 58 { 59 return node->parent->right == node; 60 } 61 62 /* 63 * Check if ip1 is covered by ip2/depth prefix 64 */ 65 static inline bool 66 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth) 67 { 68 return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0; 69 } 70 71 static inline struct rte_rib_node * 72 get_nxt_node(struct rte_rib_node *node, uint32_t ip) 73 { 74 return (ip & (1 << (31 - node->depth))) ? node->right : node->left; 75 } 76 77 static struct rte_rib_node * 78 node_alloc(struct rte_rib *rib) 79 { 80 struct rte_rib_node *ent; 81 int ret; 82 83 ret = rte_mempool_get(rib->node_pool, (void *)&ent); 84 if (unlikely(ret != 0)) 85 return NULL; 86 ++rib->cur_nodes; 87 return ent; 88 } 89 90 static void 91 node_free(struct rte_rib *rib, struct rte_rib_node *ent) 92 { 93 --rib->cur_nodes; 94 rte_mempool_put(rib->node_pool, ent); 95 } 96 97 struct rte_rib_node * 98 rte_rib_lookup(struct rte_rib *rib, uint32_t ip) 99 { 100 struct rte_rib_node *cur, *prev = NULL; 101 102 if (rib == NULL) { 103 rte_errno = EINVAL; 104 return NULL; 105 } 106 107 cur = rib->tree; 108 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) { 109 if (is_valid_node(cur)) 110 prev = cur; 111 cur = get_nxt_node(cur, ip); 112 } 113 return prev; 114 } 115 116 struct rte_rib_node * 117 rte_rib_lookup_parent(struct rte_rib_node *ent) 118 { 119 struct rte_rib_node *tmp; 120 121 if (ent == NULL) 122 return NULL; 123 tmp = ent->parent; 124 while ((tmp != NULL) && !is_valid_node(tmp)) 125 tmp = tmp->parent; 126 return tmp; 127 } 128 129 static struct rte_rib_node * 130 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 131 { 132 struct rte_rib_node *cur; 133 134 cur = rib->tree; 135 while (cur != NULL) { 136 if ((cur->ip == ip) && (cur->depth == depth) && 137 is_valid_node(cur)) 138 return cur; 139 if ((cur->depth > depth) || 140 !is_covered(ip, cur->ip, cur->depth)) 141 break; 142 cur = get_nxt_node(cur, ip); 143 } 144 return NULL; 145 } 146 147 struct rte_rib_node * 148 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 149 { 150 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) { 151 rte_errno = EINVAL; 152 return NULL; 153 } 154 ip &= rte_rib_depth_to_mask(depth); 155 156 return __rib_lookup_exact(rib, ip, depth); 157 } 158 159 /* 160 * Traverses on subtree and retrieves more specific routes 161 * for a given in args ip/depth prefix 162 * last = NULL means the first invocation 163 */ 164 struct rte_rib_node * 165 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip, 166 uint8_t depth, struct rte_rib_node *last, int flag) 167 { 168 struct rte_rib_node *tmp, *prev = NULL; 169 170 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) { 171 rte_errno = EINVAL; 172 return NULL; 173 } 174 175 if (last == NULL) { 176 tmp = rib->tree; 177 while ((tmp) && (tmp->depth < depth)) 178 tmp = get_nxt_node(tmp, ip); 179 } else { 180 tmp = last; 181 while ((tmp->parent != NULL) && (is_right_node(tmp) || 182 (tmp->parent->right == NULL))) { 183 tmp = tmp->parent; 184 if (is_valid_node(tmp) && 185 (is_covered(tmp->ip, ip, depth) && 186 (tmp->depth > depth))) 187 return tmp; 188 } 189 tmp = (tmp->parent) ? tmp->parent->right : NULL; 190 } 191 while (tmp) { 192 if (is_valid_node(tmp) && 193 (is_covered(tmp->ip, ip, depth) && 194 (tmp->depth > depth))) { 195 prev = tmp; 196 if (flag == RTE_RIB_GET_NXT_COVER) 197 return prev; 198 } 199 tmp = (tmp->left) ? tmp->left : tmp->right; 200 } 201 return prev; 202 } 203 204 void 205 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth) 206 { 207 struct rte_rib_node *cur, *prev, *child; 208 209 cur = rte_rib_lookup_exact(rib, ip, depth); 210 if (cur == NULL) 211 return; 212 213 --rib->cur_routes; 214 cur->flag &= ~RTE_RIB_VALID_NODE; 215 while (!is_valid_node(cur)) { 216 if ((cur->left != NULL) && (cur->right != NULL)) 217 return; 218 child = (cur->left == NULL) ? cur->right : cur->left; 219 if (child != NULL) 220 child->parent = cur->parent; 221 if (cur->parent == NULL) { 222 rib->tree = child; 223 node_free(rib, cur); 224 return; 225 } 226 if (cur->parent->left == cur) 227 cur->parent->left = child; 228 else 229 cur->parent->right = child; 230 prev = cur; 231 cur = cur->parent; 232 node_free(rib, prev); 233 } 234 } 235 236 struct rte_rib_node * 237 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth) 238 { 239 struct rte_rib_node **tmp; 240 struct rte_rib_node *prev = NULL; 241 struct rte_rib_node *new_node = NULL; 242 struct rte_rib_node *common_node = NULL; 243 int d = 0; 244 uint32_t common_prefix; 245 uint8_t common_depth; 246 247 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) { 248 rte_errno = EINVAL; 249 return NULL; 250 } 251 252 tmp = &rib->tree; 253 ip &= rte_rib_depth_to_mask(depth); 254 new_node = __rib_lookup_exact(rib, ip, depth); 255 if (new_node != NULL) { 256 rte_errno = EEXIST; 257 return NULL; 258 } 259 260 new_node = node_alloc(rib); 261 if (new_node == NULL) { 262 rte_errno = ENOMEM; 263 return NULL; 264 } 265 new_node->left = NULL; 266 new_node->right = NULL; 267 new_node->parent = NULL; 268 new_node->ip = ip; 269 new_node->depth = depth; 270 new_node->flag = RTE_RIB_VALID_NODE; 271 272 /* traverse down the tree to find matching node or closest matching */ 273 while (1) { 274 /* insert as the last node in the branch */ 275 if (*tmp == NULL) { 276 *tmp = new_node; 277 new_node->parent = prev; 278 ++rib->cur_routes; 279 return *tmp; 280 } 281 /* 282 * Intermediate node found. 283 * Previous rte_rib_lookup_exact() returned NULL 284 * but node with proper search criteria is found. 285 * Validate intermediate node and return. 286 */ 287 if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) { 288 node_free(rib, new_node); 289 (*tmp)->flag |= RTE_RIB_VALID_NODE; 290 ++rib->cur_routes; 291 return *tmp; 292 } 293 d = (*tmp)->depth; 294 if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d)) 295 break; 296 prev = *tmp; 297 tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left; 298 } 299 /* closest node found, new_node should be inserted in the middle */ 300 common_depth = RTE_MIN(depth, (*tmp)->depth); 301 common_prefix = ip ^ (*tmp)->ip; 302 d = (common_prefix == 0) ? 32 : __builtin_clz(common_prefix); 303 304 common_depth = RTE_MIN(d, common_depth); 305 common_prefix = ip & rte_rib_depth_to_mask(common_depth); 306 if ((common_prefix == ip) && (common_depth == depth)) { 307 /* insert as a parent */ 308 if ((*tmp)->ip & (1 << (31 - depth))) 309 new_node->right = *tmp; 310 else 311 new_node->left = *tmp; 312 new_node->parent = (*tmp)->parent; 313 (*tmp)->parent = new_node; 314 *tmp = new_node; 315 } else { 316 /* create intermediate node */ 317 common_node = node_alloc(rib); 318 if (common_node == NULL) { 319 node_free(rib, new_node); 320 rte_errno = ENOMEM; 321 return NULL; 322 } 323 common_node->ip = common_prefix; 324 common_node->depth = common_depth; 325 common_node->flag = 0; 326 common_node->parent = (*tmp)->parent; 327 new_node->parent = common_node; 328 (*tmp)->parent = common_node; 329 if ((new_node->ip & (1 << (31 - common_depth))) == 0) { 330 common_node->left = new_node; 331 common_node->right = *tmp; 332 } else { 333 common_node->left = *tmp; 334 common_node->right = new_node; 335 } 336 *tmp = common_node; 337 } 338 ++rib->cur_routes; 339 return new_node; 340 } 341 342 int 343 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip) 344 { 345 if ((node == NULL) || (ip == NULL)) { 346 rte_errno = EINVAL; 347 return -1; 348 } 349 *ip = node->ip; 350 return 0; 351 } 352 353 int 354 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth) 355 { 356 if ((node == NULL) || (depth == NULL)) { 357 rte_errno = EINVAL; 358 return -1; 359 } 360 *depth = node->depth; 361 return 0; 362 } 363 364 void * 365 rte_rib_get_ext(struct rte_rib_node *node) 366 { 367 return (node == NULL) ? NULL : &node->ext[0]; 368 } 369 370 int 371 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh) 372 { 373 if ((node == NULL) || (nh == NULL)) { 374 rte_errno = EINVAL; 375 return -1; 376 } 377 *nh = node->nh; 378 return 0; 379 } 380 381 int 382 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh) 383 { 384 if (node == NULL) { 385 rte_errno = EINVAL; 386 return -1; 387 } 388 node->nh = nh; 389 return 0; 390 } 391 392 struct rte_rib * 393 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf) 394 { 395 char mem_name[RTE_RIB_NAMESIZE]; 396 struct rte_rib *rib = NULL; 397 struct rte_tailq_entry *te; 398 struct rte_rib_list *rib_list; 399 struct rte_mempool *node_pool; 400 401 /* Check user arguments. */ 402 if (name == NULL || conf == NULL || conf->max_nodes <= 0) { 403 rte_errno = EINVAL; 404 return NULL; 405 } 406 407 snprintf(mem_name, sizeof(mem_name), "MP_%s", name); 408 node_pool = rte_mempool_create(mem_name, conf->max_nodes, 409 sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0, 410 NULL, NULL, NULL, NULL, socket_id, 0); 411 412 if (node_pool == NULL) { 413 RTE_LOG(ERR, LPM, 414 "Can not allocate mempool for RIB %s\n", name); 415 return NULL; 416 } 417 418 snprintf(mem_name, sizeof(mem_name), "RIB_%s", name); 419 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 420 421 rte_mcfg_tailq_write_lock(); 422 423 /* guarantee there's no existing */ 424 TAILQ_FOREACH(te, rib_list, next) { 425 rib = (struct rte_rib *)te->data; 426 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 427 break; 428 } 429 rib = NULL; 430 if (te != NULL) { 431 rte_errno = EEXIST; 432 goto exit; 433 } 434 435 /* allocate tailq entry */ 436 te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0); 437 if (te == NULL) { 438 RTE_LOG(ERR, LPM, 439 "Can not allocate tailq entry for RIB %s\n", name); 440 rte_errno = ENOMEM; 441 goto exit; 442 } 443 444 /* Allocate memory to store the RIB data structures. */ 445 rib = rte_zmalloc_socket(mem_name, 446 sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id); 447 if (rib == NULL) { 448 RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name); 449 rte_errno = ENOMEM; 450 goto free_te; 451 } 452 453 rte_strlcpy(rib->name, name, sizeof(rib->name)); 454 rib->tree = NULL; 455 rib->max_nodes = conf->max_nodes; 456 rib->node_pool = node_pool; 457 te->data = (void *)rib; 458 TAILQ_INSERT_TAIL(rib_list, te, next); 459 460 rte_mcfg_tailq_write_unlock(); 461 462 return rib; 463 464 free_te: 465 rte_free(te); 466 exit: 467 rte_mcfg_tailq_write_unlock(); 468 rte_mempool_free(node_pool); 469 470 return NULL; 471 } 472 473 struct rte_rib * 474 rte_rib_find_existing(const char *name) 475 { 476 struct rte_rib *rib = NULL; 477 struct rte_tailq_entry *te; 478 struct rte_rib_list *rib_list; 479 480 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 481 482 rte_mcfg_tailq_read_lock(); 483 TAILQ_FOREACH(te, rib_list, next) { 484 rib = (struct rte_rib *) te->data; 485 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 486 break; 487 } 488 rte_mcfg_tailq_read_unlock(); 489 490 if (te == NULL) { 491 rte_errno = ENOENT; 492 return NULL; 493 } 494 495 return rib; 496 } 497 498 void 499 rte_rib_free(struct rte_rib *rib) 500 { 501 struct rte_tailq_entry *te; 502 struct rte_rib_list *rib_list; 503 struct rte_rib_node *tmp = NULL; 504 505 if (rib == NULL) 506 return; 507 508 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 509 510 rte_mcfg_tailq_write_lock(); 511 512 /* find our tailq entry */ 513 TAILQ_FOREACH(te, rib_list, next) { 514 if (te->data == (void *)rib) 515 break; 516 } 517 if (te != NULL) 518 TAILQ_REMOVE(rib_list, te, next); 519 520 rte_mcfg_tailq_write_unlock(); 521 522 while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp, 523 RTE_RIB_GET_NXT_ALL)) != NULL) 524 rte_rib_remove(rib, tmp->ip, tmp->depth); 525 526 rte_mempool_free(rib->node_pool); 527 rte_free(rib); 528 rte_free(te); 529 } 530