1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com> 3 * Copyright(c) 2019 Intel Corporation 4 */ 5 6 #include <stdbool.h> 7 #include <stdint.h> 8 #include <sys/queue.h> 9 10 #include <rte_eal_memconfig.h> 11 #include <rte_errno.h> 12 #include <rte_malloc.h> 13 #include <rte_mempool.h> 14 #include <rte_string_fns.h> 15 #include <rte_tailq.h> 16 17 #include <rte_rib.h> 18 19 TAILQ_HEAD(rte_rib_list, rte_tailq_entry); 20 static struct rte_tailq_elem rte_rib_tailq = { 21 .name = "RTE_RIB", 22 }; 23 EAL_REGISTER_TAILQ(rte_rib_tailq) 24 25 #define RTE_RIB_VALID_NODE 1 26 /* Maximum depth value possible for IPv4 RIB. */ 27 #define RIB_MAXDEPTH 32 28 /* Maximum length of a RIB name. */ 29 #define RTE_RIB_NAMESIZE 64 30 31 struct rte_rib_node { 32 struct rte_rib_node *left; 33 struct rte_rib_node *right; 34 struct rte_rib_node *parent; 35 uint32_t ip; 36 uint8_t depth; 37 uint8_t flag; 38 uint64_t nh; 39 __extension__ uint64_t ext[]; 40 }; 41 42 struct rte_rib { 43 char name[RTE_RIB_NAMESIZE]; 44 struct rte_rib_node *tree; 45 struct rte_mempool *node_pool; 46 uint32_t cur_nodes; 47 uint32_t cur_routes; 48 uint32_t max_nodes; 49 }; 50 51 static inline bool 52 is_valid_node(const struct rte_rib_node *node) 53 { 54 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE; 55 } 56 57 static inline bool 58 is_right_node(const struct rte_rib_node *node) 59 { 60 return node->parent->right == node; 61 } 62 63 /* 64 * Check if ip1 is covered by ip2/depth prefix 65 */ 66 static inline bool 67 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth) 68 { 69 return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0; 70 } 71 72 static inline struct rte_rib_node * 73 get_nxt_node(struct rte_rib_node *node, uint32_t ip) 74 { 75 if (node->depth == RIB_MAXDEPTH) 76 return NULL; 77 return (ip & (1 << (31 - node->depth))) ? node->right : node->left; 78 } 79 80 static struct rte_rib_node * 81 node_alloc(struct rte_rib *rib) 82 { 83 struct rte_rib_node *ent; 84 int ret; 85 86 ret = rte_mempool_get(rib->node_pool, (void *)&ent); 87 if (unlikely(ret != 0)) 88 return NULL; 89 ++rib->cur_nodes; 90 return ent; 91 } 92 93 static void 94 node_free(struct rte_rib *rib, struct rte_rib_node *ent) 95 { 96 --rib->cur_nodes; 97 rte_mempool_put(rib->node_pool, ent); 98 } 99 100 struct rte_rib_node * 101 rte_rib_lookup(struct rte_rib *rib, uint32_t ip) 102 { 103 struct rte_rib_node *cur, *prev = NULL; 104 105 if (unlikely(rib == NULL)) { 106 rte_errno = EINVAL; 107 return NULL; 108 } 109 110 cur = rib->tree; 111 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) { 112 if (is_valid_node(cur)) 113 prev = cur; 114 cur = get_nxt_node(cur, ip); 115 } 116 return prev; 117 } 118 119 struct rte_rib_node * 120 rte_rib_lookup_parent(struct rte_rib_node *ent) 121 { 122 struct rte_rib_node *tmp; 123 124 if (ent == NULL) 125 return NULL; 126 tmp = ent->parent; 127 while ((tmp != NULL) && !is_valid_node(tmp)) 128 tmp = tmp->parent; 129 return tmp; 130 } 131 132 static struct rte_rib_node * 133 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 134 { 135 struct rte_rib_node *cur; 136 137 cur = rib->tree; 138 while (cur != NULL) { 139 if ((cur->ip == ip) && (cur->depth == depth) && 140 is_valid_node(cur)) 141 return cur; 142 if ((cur->depth > depth) || 143 !is_covered(ip, cur->ip, cur->depth)) 144 break; 145 cur = get_nxt_node(cur, ip); 146 } 147 return NULL; 148 } 149 150 struct rte_rib_node * 151 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth) 152 { 153 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) { 154 rte_errno = EINVAL; 155 return NULL; 156 } 157 ip &= rte_rib_depth_to_mask(depth); 158 159 return __rib_lookup_exact(rib, ip, depth); 160 } 161 162 /* 163 * Traverses on subtree and retrieves more specific routes 164 * for a given in args ip/depth prefix 165 * last = NULL means the first invocation 166 */ 167 struct rte_rib_node * 168 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip, 169 uint8_t depth, struct rte_rib_node *last, int flag) 170 { 171 struct rte_rib_node *tmp, *prev = NULL; 172 173 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) { 174 rte_errno = EINVAL; 175 return NULL; 176 } 177 178 if (last == NULL) { 179 tmp = rib->tree; 180 while ((tmp) && (tmp->depth < depth)) 181 tmp = get_nxt_node(tmp, ip); 182 } else { 183 tmp = last; 184 while ((tmp->parent != NULL) && (is_right_node(tmp) || 185 (tmp->parent->right == NULL))) { 186 tmp = tmp->parent; 187 if (is_valid_node(tmp) && 188 (is_covered(tmp->ip, ip, depth) && 189 (tmp->depth > depth))) 190 return tmp; 191 } 192 tmp = (tmp->parent) ? tmp->parent->right : NULL; 193 } 194 while (tmp) { 195 if (is_valid_node(tmp) && 196 (is_covered(tmp->ip, ip, depth) && 197 (tmp->depth > depth))) { 198 prev = tmp; 199 if (flag == RTE_RIB_GET_NXT_COVER) 200 return prev; 201 } 202 tmp = (tmp->left) ? tmp->left : tmp->right; 203 } 204 return prev; 205 } 206 207 void 208 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth) 209 { 210 struct rte_rib_node *cur, *prev, *child; 211 212 cur = rte_rib_lookup_exact(rib, ip, depth); 213 if (cur == NULL) 214 return; 215 216 --rib->cur_routes; 217 cur->flag &= ~RTE_RIB_VALID_NODE; 218 while (!is_valid_node(cur)) { 219 if ((cur->left != NULL) && (cur->right != NULL)) 220 return; 221 child = (cur->left == NULL) ? cur->right : cur->left; 222 if (child != NULL) 223 child->parent = cur->parent; 224 if (cur->parent == NULL) { 225 rib->tree = child; 226 node_free(rib, cur); 227 return; 228 } 229 if (cur->parent->left == cur) 230 cur->parent->left = child; 231 else 232 cur->parent->right = child; 233 prev = cur; 234 cur = cur->parent; 235 node_free(rib, prev); 236 } 237 } 238 239 struct rte_rib_node * 240 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth) 241 { 242 struct rte_rib_node **tmp; 243 struct rte_rib_node *prev = NULL; 244 struct rte_rib_node *new_node = NULL; 245 struct rte_rib_node *common_node = NULL; 246 int d = 0; 247 uint32_t common_prefix; 248 uint8_t common_depth; 249 250 if (unlikely(rib == NULL || depth > RIB_MAXDEPTH)) { 251 rte_errno = EINVAL; 252 return NULL; 253 } 254 255 tmp = &rib->tree; 256 ip &= rte_rib_depth_to_mask(depth); 257 new_node = __rib_lookup_exact(rib, ip, depth); 258 if (new_node != NULL) { 259 rte_errno = EEXIST; 260 return NULL; 261 } 262 263 new_node = node_alloc(rib); 264 if (new_node == NULL) { 265 rte_errno = ENOMEM; 266 return NULL; 267 } 268 new_node->left = NULL; 269 new_node->right = NULL; 270 new_node->parent = NULL; 271 new_node->ip = ip; 272 new_node->depth = depth; 273 new_node->flag = RTE_RIB_VALID_NODE; 274 275 /* traverse down the tree to find matching node or closest matching */ 276 while (1) { 277 /* insert as the last node in the branch */ 278 if (*tmp == NULL) { 279 *tmp = new_node; 280 new_node->parent = prev; 281 ++rib->cur_routes; 282 return *tmp; 283 } 284 /* 285 * Intermediate node found. 286 * Previous rte_rib_lookup_exact() returned NULL 287 * but node with proper search criteria is found. 288 * Validate intermediate node and return. 289 */ 290 if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) { 291 node_free(rib, new_node); 292 (*tmp)->flag |= RTE_RIB_VALID_NODE; 293 ++rib->cur_routes; 294 return *tmp; 295 } 296 d = (*tmp)->depth; 297 if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d)) 298 break; 299 prev = *tmp; 300 tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left; 301 } 302 /* closest node found, new_node should be inserted in the middle */ 303 common_depth = RTE_MIN(depth, (*tmp)->depth); 304 common_prefix = ip ^ (*tmp)->ip; 305 d = (common_prefix == 0) ? 32 : rte_clz32(common_prefix); 306 307 common_depth = RTE_MIN(d, common_depth); 308 common_prefix = ip & rte_rib_depth_to_mask(common_depth); 309 if ((common_prefix == ip) && (common_depth == depth)) { 310 /* insert as a parent */ 311 if ((*tmp)->ip & (1 << (31 - depth))) 312 new_node->right = *tmp; 313 else 314 new_node->left = *tmp; 315 new_node->parent = (*tmp)->parent; 316 (*tmp)->parent = new_node; 317 *tmp = new_node; 318 } else { 319 /* create intermediate node */ 320 common_node = node_alloc(rib); 321 if (common_node == NULL) { 322 node_free(rib, new_node); 323 rte_errno = ENOMEM; 324 return NULL; 325 } 326 common_node->ip = common_prefix; 327 common_node->depth = common_depth; 328 common_node->flag = 0; 329 common_node->parent = (*tmp)->parent; 330 new_node->parent = common_node; 331 (*tmp)->parent = common_node; 332 if ((new_node->ip & (1 << (31 - common_depth))) == 0) { 333 common_node->left = new_node; 334 common_node->right = *tmp; 335 } else { 336 common_node->left = *tmp; 337 common_node->right = new_node; 338 } 339 *tmp = common_node; 340 } 341 ++rib->cur_routes; 342 return new_node; 343 } 344 345 int 346 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip) 347 { 348 if (unlikely(node == NULL || ip == NULL)) { 349 rte_errno = EINVAL; 350 return -1; 351 } 352 *ip = node->ip; 353 return 0; 354 } 355 356 int 357 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth) 358 { 359 if (unlikely(node == NULL || depth == NULL)) { 360 rte_errno = EINVAL; 361 return -1; 362 } 363 *depth = node->depth; 364 return 0; 365 } 366 367 void * 368 rte_rib_get_ext(struct rte_rib_node *node) 369 { 370 return (node == NULL) ? NULL : &node->ext[0]; 371 } 372 373 int 374 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh) 375 { 376 if (unlikely(node == NULL || nh == NULL)) { 377 rte_errno = EINVAL; 378 return -1; 379 } 380 *nh = node->nh; 381 return 0; 382 } 383 384 int 385 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh) 386 { 387 if (unlikely(node == NULL)) { 388 rte_errno = EINVAL; 389 return -1; 390 } 391 node->nh = nh; 392 return 0; 393 } 394 395 struct rte_rib * 396 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf) 397 { 398 char mem_name[RTE_RIB_NAMESIZE]; 399 struct rte_rib *rib = NULL; 400 struct rte_tailq_entry *te; 401 struct rte_rib_list *rib_list; 402 struct rte_mempool *node_pool; 403 404 /* Check user arguments. */ 405 if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) { 406 rte_errno = EINVAL; 407 return NULL; 408 } 409 410 snprintf(mem_name, sizeof(mem_name), "MP_%s", name); 411 node_pool = rte_mempool_create(mem_name, conf->max_nodes, 412 sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0, 413 NULL, NULL, NULL, NULL, socket_id, 0); 414 415 if (node_pool == NULL) { 416 RTE_LOG(ERR, LPM, 417 "Can not allocate mempool for RIB %s\n", name); 418 return NULL; 419 } 420 421 snprintf(mem_name, sizeof(mem_name), "RIB_%s", name); 422 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 423 424 rte_mcfg_tailq_write_lock(); 425 426 /* guarantee there's no existing */ 427 TAILQ_FOREACH(te, rib_list, next) { 428 rib = (struct rte_rib *)te->data; 429 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 430 break; 431 } 432 rib = NULL; 433 if (te != NULL) { 434 rte_errno = EEXIST; 435 goto exit; 436 } 437 438 /* allocate tailq entry */ 439 te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0); 440 if (unlikely(te == NULL)) { 441 RTE_LOG(ERR, LPM, 442 "Can not allocate tailq entry for RIB %s\n", name); 443 rte_errno = ENOMEM; 444 goto exit; 445 } 446 447 /* Allocate memory to store the RIB data structures. */ 448 rib = rte_zmalloc_socket(mem_name, 449 sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id); 450 if (unlikely(rib == NULL)) { 451 RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name); 452 rte_errno = ENOMEM; 453 goto free_te; 454 } 455 456 rte_strlcpy(rib->name, name, sizeof(rib->name)); 457 rib->tree = NULL; 458 rib->max_nodes = conf->max_nodes; 459 rib->node_pool = node_pool; 460 te->data = (void *)rib; 461 TAILQ_INSERT_TAIL(rib_list, te, next); 462 463 rte_mcfg_tailq_write_unlock(); 464 465 return rib; 466 467 free_te: 468 rte_free(te); 469 exit: 470 rte_mcfg_tailq_write_unlock(); 471 rte_mempool_free(node_pool); 472 473 return NULL; 474 } 475 476 struct rte_rib * 477 rte_rib_find_existing(const char *name) 478 { 479 struct rte_rib *rib = NULL; 480 struct rte_tailq_entry *te; 481 struct rte_rib_list *rib_list; 482 483 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 484 485 rte_mcfg_tailq_read_lock(); 486 TAILQ_FOREACH(te, rib_list, next) { 487 rib = (struct rte_rib *) te->data; 488 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0) 489 break; 490 } 491 rte_mcfg_tailq_read_unlock(); 492 493 if (te == NULL) { 494 rte_errno = ENOENT; 495 return NULL; 496 } 497 498 return rib; 499 } 500 501 void 502 rte_rib_free(struct rte_rib *rib) 503 { 504 struct rte_tailq_entry *te; 505 struct rte_rib_list *rib_list; 506 struct rte_rib_node *tmp = NULL; 507 508 if (rib == NULL) 509 return; 510 511 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list); 512 513 rte_mcfg_tailq_write_lock(); 514 515 /* find our tailq entry */ 516 TAILQ_FOREACH(te, rib_list, next) { 517 if (te->data == (void *)rib) 518 break; 519 } 520 if (te != NULL) 521 TAILQ_REMOVE(rib_list, te, next); 522 523 rte_mcfg_tailq_write_unlock(); 524 525 while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp, 526 RTE_RIB_GET_NXT_ALL)) != NULL) 527 rte_rib_remove(rib, tmp->ip, tmp->depth); 528 529 rte_mempool_free(rib->node_pool); 530 rte_free(rib); 531 rte_free(te); 532 } 533