1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 * Copyright(c) 2020 Arm Limited 4 */ 5 6 #include <stdio.h> 7 #include <stdint.h> 8 #include <stdlib.h> 9 #include <math.h> 10 11 #include <rte_cycles.h> 12 #include <rte_random.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_malloc.h> 15 #include <rte_ip.h> 16 #include <rte_lpm.h> 17 18 #include "test.h" 19 #include "test_xmmt_ops.h" 20 21 struct rte_lpm *lpm; 22 static struct rte_rcu_qsbr *rv; 23 static volatile uint8_t writer_done; 24 static volatile uint32_t thr_id; 25 static uint64_t gwrite_cycles; 26 static uint64_t gwrites; 27 /* LPM APIs are not thread safe, use mutex to provide thread safety */ 28 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; 29 30 /* Report quiescent state interval every 1024 lookups. Larger critical 31 * sections in reader will result in writer polling multiple times. 32 */ 33 #define QSBR_REPORTING_INTERVAL 1024 34 35 #define TEST_LPM_ASSERT(cond) do { \ 36 if (!(cond)) { \ 37 printf("Error at line %d: \n", __LINE__); \ 38 return -1; \ 39 } \ 40 } while(0) 41 42 #define ITERATIONS (1 << 10) 43 #define RCU_ITERATIONS 10 44 #define BATCH_SIZE (1 << 12) 45 #define BULK_SIZE 32 46 47 #define MAX_RULE_NUM (1200000) 48 49 struct route_rule { 50 uint32_t ip; 51 uint8_t depth; 52 }; 53 54 static struct route_rule large_route_table[MAX_RULE_NUM]; 55 /* Route table for routes with depth > 24 */ 56 struct route_rule large_ldepth_route_table[MAX_RULE_NUM]; 57 58 static uint32_t num_route_entries; 59 static uint32_t num_ldepth_route_entries; 60 #define NUM_ROUTE_ENTRIES num_route_entries 61 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries 62 63 enum { 64 IP_CLASS_A, 65 IP_CLASS_B, 66 IP_CLASS_C 67 }; 68 69 /* struct route_rule_count defines the total number of rules in following a/b/c 70 * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not 71 * including the ones for private local network. 72 */ 73 struct route_rule_count { 74 uint32_t a[RTE_LPM_MAX_DEPTH]; 75 uint32_t b[RTE_LPM_MAX_DEPTH]; 76 uint32_t c[RTE_LPM_MAX_DEPTH]; 77 }; 78 79 /* All following numbers of each depth of each common IP class are just 80 * got from previous large constant table in app/test/test_lpm_routes.h . 81 * In order to match similar performance, they keep same depth and IP 82 * address coverage as previous constant table. These numbers don't 83 * include any private local IP address. As previous large const rule 84 * table was just dumped from a real router, there are no any IP address 85 * in class C or D. 86 */ 87 static struct route_rule_count rule_count = { 88 .a = { /* IP class A in which the most significant bit is 0 */ 89 0, /* depth = 1 */ 90 0, /* depth = 2 */ 91 1, /* depth = 3 */ 92 0, /* depth = 4 */ 93 2, /* depth = 5 */ 94 1, /* depth = 6 */ 95 3, /* depth = 7 */ 96 185, /* depth = 8 */ 97 26, /* depth = 9 */ 98 16, /* depth = 10 */ 99 39, /* depth = 11 */ 100 144, /* depth = 12 */ 101 233, /* depth = 13 */ 102 528, /* depth = 14 */ 103 866, /* depth = 15 */ 104 3856, /* depth = 16 */ 105 3268, /* depth = 17 */ 106 5662, /* depth = 18 */ 107 17301, /* depth = 19 */ 108 22226, /* depth = 20 */ 109 11147, /* depth = 21 */ 110 16746, /* depth = 22 */ 111 17120, /* depth = 23 */ 112 77578, /* depth = 24 */ 113 401, /* depth = 25 */ 114 656, /* depth = 26 */ 115 1107, /* depth = 27 */ 116 1121, /* depth = 28 */ 117 2316, /* depth = 29 */ 118 717, /* depth = 30 */ 119 10, /* depth = 31 */ 120 66 /* depth = 32 */ 121 }, 122 .b = { /* IP class A in which the most 2 significant bits are 10 */ 123 0, /* depth = 1 */ 124 0, /* depth = 2 */ 125 0, /* depth = 3 */ 126 0, /* depth = 4 */ 127 1, /* depth = 5 */ 128 1, /* depth = 6 */ 129 1, /* depth = 7 */ 130 3, /* depth = 8 */ 131 3, /* depth = 9 */ 132 30, /* depth = 10 */ 133 25, /* depth = 11 */ 134 168, /* depth = 12 */ 135 305, /* depth = 13 */ 136 569, /* depth = 14 */ 137 1129, /* depth = 15 */ 138 50800, /* depth = 16 */ 139 1645, /* depth = 17 */ 140 1820, /* depth = 18 */ 141 3506, /* depth = 19 */ 142 3258, /* depth = 20 */ 143 3424, /* depth = 21 */ 144 4971, /* depth = 22 */ 145 6885, /* depth = 23 */ 146 39771, /* depth = 24 */ 147 424, /* depth = 25 */ 148 170, /* depth = 26 */ 149 433, /* depth = 27 */ 150 92, /* depth = 28 */ 151 366, /* depth = 29 */ 152 377, /* depth = 30 */ 153 2, /* depth = 31 */ 154 200 /* depth = 32 */ 155 }, 156 .c = { /* IP class A in which the most 3 significant bits are 110 */ 157 0, /* depth = 1 */ 158 0, /* depth = 2 */ 159 0, /* depth = 3 */ 160 0, /* depth = 4 */ 161 0, /* depth = 5 */ 162 0, /* depth = 6 */ 163 0, /* depth = 7 */ 164 12, /* depth = 8 */ 165 8, /* depth = 9 */ 166 9, /* depth = 10 */ 167 33, /* depth = 11 */ 168 69, /* depth = 12 */ 169 237, /* depth = 13 */ 170 1007, /* depth = 14 */ 171 1717, /* depth = 15 */ 172 14663, /* depth = 16 */ 173 8070, /* depth = 17 */ 174 16185, /* depth = 18 */ 175 48261, /* depth = 19 */ 176 36870, /* depth = 20 */ 177 33960, /* depth = 21 */ 178 50638, /* depth = 22 */ 179 61422, /* depth = 23 */ 180 466549, /* depth = 24 */ 181 1829, /* depth = 25 */ 182 4824, /* depth = 26 */ 183 4927, /* depth = 27 */ 184 5914, /* depth = 28 */ 185 10254, /* depth = 29 */ 186 4905, /* depth = 30 */ 187 1, /* depth = 31 */ 188 716 /* depth = 32 */ 189 } 190 }; 191 192 static void generate_random_rule_prefix(uint32_t ip_class, uint8_t depth) 193 { 194 /* IP address class A, the most significant bit is 0 */ 195 #define IP_HEAD_MASK_A 0x00000000 196 #define IP_HEAD_BIT_NUM_A 1 197 198 /* IP address class B, the most significant 2 bits are 10 */ 199 #define IP_HEAD_MASK_B 0x80000000 200 #define IP_HEAD_BIT_NUM_B 2 201 202 /* IP address class C, the most significant 3 bits are 110 */ 203 #define IP_HEAD_MASK_C 0xC0000000 204 #define IP_HEAD_BIT_NUM_C 3 205 206 uint32_t class_depth; 207 uint32_t range; 208 uint32_t mask; 209 uint32_t step; 210 uint32_t start; 211 uint32_t fixed_bit_num; 212 uint32_t ip_head_mask; 213 uint32_t rule_num; 214 uint32_t k; 215 struct route_rule *ptr_rule, *ptr_ldepth_rule; 216 217 if (ip_class == IP_CLASS_A) { /* IP Address class A */ 218 fixed_bit_num = IP_HEAD_BIT_NUM_A; 219 ip_head_mask = IP_HEAD_MASK_A; 220 rule_num = rule_count.a[depth - 1]; 221 } else if (ip_class == IP_CLASS_B) { /* IP Address class B */ 222 fixed_bit_num = IP_HEAD_BIT_NUM_B; 223 ip_head_mask = IP_HEAD_MASK_B; 224 rule_num = rule_count.b[depth - 1]; 225 } else { /* IP Address class C */ 226 fixed_bit_num = IP_HEAD_BIT_NUM_C; 227 ip_head_mask = IP_HEAD_MASK_C; 228 rule_num = rule_count.c[depth - 1]; 229 } 230 231 if (rule_num == 0) 232 return; 233 234 /* the number of rest bits which don't include the most significant 235 * fixed bits for this IP address class 236 */ 237 class_depth = depth - fixed_bit_num; 238 239 /* range is the maximum number of rules for this depth and 240 * this IP address class 241 */ 242 range = 1 << class_depth; 243 244 /* only mask the most depth significant generated bits 245 * except fixed bits for IP address class 246 */ 247 mask = range - 1; 248 249 /* Widen coverage of IP address in generated rules */ 250 if (range <= rule_num) 251 step = 1; 252 else 253 step = round((double)range / rule_num); 254 255 /* Only generate rest bits except the most significant 256 * fixed bits for IP address class 257 */ 258 start = lrand48() & mask; 259 ptr_rule = &large_route_table[num_route_entries]; 260 ptr_ldepth_rule = &large_ldepth_route_table[num_ldepth_route_entries]; 261 for (k = 0; k < rule_num; k++) { 262 ptr_rule->ip = (start << (RTE_LPM_MAX_DEPTH - depth)) 263 | ip_head_mask; 264 ptr_rule->depth = depth; 265 /* If the depth of the route is more than 24, store it 266 * in another table as well. 267 */ 268 if (depth > 24) { 269 ptr_ldepth_rule->ip = ptr_rule->ip; 270 ptr_ldepth_rule->depth = ptr_rule->depth; 271 ptr_ldepth_rule++; 272 num_ldepth_route_entries++; 273 } 274 ptr_rule++; 275 start = (start + step) & mask; 276 } 277 num_route_entries += rule_num; 278 } 279 280 static void insert_rule_in_random_pos(uint32_t ip, uint8_t depth) 281 { 282 uint32_t pos; 283 int try_count = 0; 284 struct route_rule tmp; 285 286 do { 287 pos = lrand48(); 288 try_count++; 289 } while ((try_count < 10) && (pos > num_route_entries)); 290 291 if ((pos > num_route_entries) || (pos >= MAX_RULE_NUM)) 292 pos = num_route_entries >> 1; 293 294 tmp = large_route_table[pos]; 295 large_route_table[pos].ip = ip; 296 large_route_table[pos].depth = depth; 297 if (num_route_entries < MAX_RULE_NUM) 298 large_route_table[num_route_entries++] = tmp; 299 } 300 301 static void generate_large_route_rule_table(void) 302 { 303 uint32_t ip_class; 304 uint8_t depth; 305 306 num_route_entries = 0; 307 num_ldepth_route_entries = 0; 308 memset(large_route_table, 0, sizeof(large_route_table)); 309 310 for (ip_class = IP_CLASS_A; ip_class <= IP_CLASS_C; ip_class++) { 311 for (depth = 1; depth <= RTE_LPM_MAX_DEPTH; depth++) { 312 generate_random_rule_prefix(ip_class, depth); 313 } 314 } 315 316 /* Add following rules to keep same as previous large constant table, 317 * they are 4 rules with private local IP address and 1 all-zeros prefix 318 * with depth = 8. 319 */ 320 insert_rule_in_random_pos(RTE_IPV4(0, 0, 0, 0), 8); 321 insert_rule_in_random_pos(RTE_IPV4(10, 2, 23, 147), 32); 322 insert_rule_in_random_pos(RTE_IPV4(192, 168, 100, 10), 24); 323 insert_rule_in_random_pos(RTE_IPV4(192, 168, 25, 100), 24); 324 insert_rule_in_random_pos(RTE_IPV4(192, 168, 129, 124), 32); 325 } 326 327 static void 328 print_route_distribution(const struct route_rule *table, uint32_t n) 329 { 330 unsigned i, j; 331 332 printf("Route distribution per prefix width: \n"); 333 printf("DEPTH QUANTITY (PERCENT)\n"); 334 printf("--------------------------- \n"); 335 336 /* Count depths. */ 337 for (i = 1; i <= 32; i++) { 338 unsigned depth_counter = 0; 339 double percent_hits; 340 341 for (j = 0; j < n; j++) 342 if (table[j].depth == (uint8_t) i) 343 depth_counter++; 344 345 percent_hits = ((double)depth_counter)/((double)n) * 100; 346 printf("%.2u%15u (%.2f)\n", i, depth_counter, percent_hits); 347 } 348 printf("\n"); 349 } 350 351 /* Check condition and return an error if true. */ 352 static uint16_t enabled_core_ids[RTE_MAX_LCORE]; 353 static unsigned int num_cores; 354 355 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */ 356 static inline uint32_t 357 alloc_thread_id(void) 358 { 359 uint32_t tmp_thr_id; 360 361 tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED); 362 if (tmp_thr_id >= RTE_MAX_LCORE) 363 printf("Invalid thread id %u\n", tmp_thr_id); 364 365 return tmp_thr_id; 366 } 367 368 /* 369 * Reader thread using rte_lpm data structure without RCU. 370 */ 371 static int 372 test_lpm_reader(void *arg) 373 { 374 int i; 375 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 376 uint32_t next_hop_return = 0; 377 378 RTE_SET_USED(arg); 379 do { 380 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 381 ip_batch[i] = rte_rand(); 382 383 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 384 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 385 386 } while (!writer_done); 387 388 return 0; 389 } 390 391 /* 392 * Reader thread using rte_lpm data structure with RCU. 393 */ 394 static int 395 test_lpm_rcu_qsbr_reader(void *arg) 396 { 397 int i; 398 uint32_t thread_id = alloc_thread_id(); 399 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 400 uint32_t next_hop_return = 0; 401 402 RTE_SET_USED(arg); 403 /* Register this thread to report quiescent state */ 404 rte_rcu_qsbr_thread_register(rv, thread_id); 405 rte_rcu_qsbr_thread_online(rv, thread_id); 406 407 do { 408 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 409 ip_batch[i] = rte_rand(); 410 411 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 412 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 413 414 /* Update quiescent state */ 415 rte_rcu_qsbr_quiescent(rv, thread_id); 416 } while (!writer_done); 417 418 rte_rcu_qsbr_thread_offline(rv, thread_id); 419 rte_rcu_qsbr_thread_unregister(rv, thread_id); 420 421 return 0; 422 } 423 424 /* 425 * Writer thread using rte_lpm data structure with RCU. 426 */ 427 static int 428 test_lpm_rcu_qsbr_writer(void *arg) 429 { 430 unsigned int i, j, si, ei; 431 uint64_t begin, total_cycles; 432 uint8_t core_id = (uint8_t)((uintptr_t)arg); 433 uint32_t next_hop_add = 0xAA; 434 435 RTE_SET_USED(arg); 436 /* 2 writer threads are used */ 437 if (core_id % 2 == 0) { 438 si = 0; 439 ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; 440 } else { 441 si = NUM_LDEPTH_ROUTE_ENTRIES / 2; 442 ei = NUM_LDEPTH_ROUTE_ENTRIES; 443 } 444 445 /* Measure add/delete. */ 446 begin = rte_rdtsc_precise(); 447 for (i = 0; i < RCU_ITERATIONS; i++) { 448 /* Add all the entries */ 449 for (j = si; j < ei; j++) { 450 pthread_mutex_lock(&lpm_mutex); 451 if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, 452 large_ldepth_route_table[j].depth, 453 next_hop_add) != 0) { 454 printf("Failed to add iteration %d, route# %d\n", 455 i, j); 456 } 457 pthread_mutex_unlock(&lpm_mutex); 458 } 459 460 /* Delete all the entries */ 461 for (j = si; j < ei; j++) { 462 pthread_mutex_lock(&lpm_mutex); 463 if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, 464 large_ldepth_route_table[j].depth) != 0) { 465 printf("Failed to delete iteration %d, route# %d\n", 466 i, j); 467 } 468 pthread_mutex_unlock(&lpm_mutex); 469 } 470 } 471 472 total_cycles = rte_rdtsc_precise() - begin; 473 474 __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); 475 __atomic_fetch_add(&gwrites, 476 2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS, 477 __ATOMIC_RELAXED); 478 479 return 0; 480 } 481 482 /* 483 * Functional test: 484 * 2 writers, rest are readers 485 */ 486 static int 487 test_lpm_rcu_perf_multi_writer(void) 488 { 489 struct rte_lpm_config config; 490 size_t sz; 491 unsigned int i; 492 uint16_t core_id; 493 struct rte_lpm_rcu_config rcu_cfg = {0}; 494 495 if (rte_lcore_count() < 3) { 496 printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); 497 return TEST_SKIPPED; 498 } 499 500 num_cores = 0; 501 RTE_LCORE_FOREACH_WORKER(core_id) { 502 enabled_core_ids[num_cores] = core_id; 503 num_cores++; 504 } 505 506 printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", 507 num_cores - 2); 508 509 /* Create LPM table */ 510 config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; 511 config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; 512 config.flags = 0; 513 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 514 TEST_LPM_ASSERT(lpm != NULL); 515 516 /* Init RCU variable */ 517 sz = rte_rcu_qsbr_get_memsize(num_cores); 518 rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 519 RTE_CACHE_LINE_SIZE); 520 rte_rcu_qsbr_init(rv, num_cores); 521 522 rcu_cfg.v = rv; 523 /* Assign the RCU variable to LPM */ 524 if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { 525 printf("RCU variable assignment failed\n"); 526 goto error; 527 } 528 529 writer_done = 0; 530 __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); 531 __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); 532 533 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 534 535 /* Launch reader threads */ 536 for (i = 2; i < num_cores; i++) 537 rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, 538 enabled_core_ids[i]); 539 540 /* Launch writer threads */ 541 for (i = 0; i < 2; i++) 542 rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, 543 (void *)(uintptr_t)i, 544 enabled_core_ids[i]); 545 546 /* Wait for writer threads */ 547 for (i = 0; i < 2; i++) 548 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 549 goto error; 550 551 printf("Total LPM Adds: %d\n", 552 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 553 printf("Total LPM Deletes: %d\n", 554 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 555 printf("Average LPM Add/Del: %"PRIu64" cycles\n", 556 __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / 557 __atomic_load_n(&gwrites, __ATOMIC_RELAXED) 558 ); 559 560 /* Wait and check return value from reader threads */ 561 writer_done = 1; 562 for (i = 2; i < num_cores; i++) 563 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 564 goto error; 565 566 rte_lpm_free(lpm); 567 rte_free(rv); 568 lpm = NULL; 569 rv = NULL; 570 571 /* Test without RCU integration */ 572 printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", 573 num_cores - 2); 574 575 /* Create LPM table */ 576 config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; 577 config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; 578 config.flags = 0; 579 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 580 TEST_LPM_ASSERT(lpm != NULL); 581 582 writer_done = 0; 583 __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); 584 __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); 585 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 586 587 /* Launch reader threads */ 588 for (i = 2; i < num_cores; i++) 589 rte_eal_remote_launch(test_lpm_reader, NULL, 590 enabled_core_ids[i]); 591 592 /* Launch writer threads */ 593 for (i = 0; i < 2; i++) 594 rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, 595 (void *)(uintptr_t)i, 596 enabled_core_ids[i]); 597 598 /* Wait for writer threads */ 599 for (i = 0; i < 2; i++) 600 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 601 goto error; 602 603 printf("Total LPM Adds: %d\n", 604 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 605 printf("Total LPM Deletes: %d\n", 606 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 607 printf("Average LPM Add/Del: %"PRIu64" cycles\n", 608 __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / 609 __atomic_load_n(&gwrites, __ATOMIC_RELAXED) 610 ); 611 612 writer_done = 1; 613 /* Wait and check return value from reader threads */ 614 for (i = 2; i < num_cores; i++) 615 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 616 goto error; 617 618 rte_lpm_free(lpm); 619 620 return 0; 621 622 error: 623 writer_done = 1; 624 /* Wait until all readers have exited */ 625 rte_eal_mp_wait_lcore(); 626 627 rte_lpm_free(lpm); 628 rte_free(rv); 629 630 return -1; 631 } 632 633 /* 634 * Functional test: 635 * Single writer, rest are readers 636 */ 637 static int 638 test_lpm_rcu_perf(void) 639 { 640 struct rte_lpm_config config; 641 uint64_t begin, total_cycles; 642 size_t sz; 643 unsigned int i, j; 644 uint16_t core_id; 645 uint32_t next_hop_add = 0xAA; 646 struct rte_lpm_rcu_config rcu_cfg = {0}; 647 648 if (rte_lcore_count() < 2) { 649 printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); 650 return TEST_SKIPPED; 651 } 652 653 num_cores = 0; 654 RTE_LCORE_FOREACH_WORKER(core_id) { 655 enabled_core_ids[num_cores] = core_id; 656 num_cores++; 657 } 658 659 printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", 660 num_cores); 661 662 /* Create LPM table */ 663 config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; 664 config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; 665 config.flags = 0; 666 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 667 TEST_LPM_ASSERT(lpm != NULL); 668 669 /* Init RCU variable */ 670 sz = rte_rcu_qsbr_get_memsize(num_cores); 671 rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 672 RTE_CACHE_LINE_SIZE); 673 rte_rcu_qsbr_init(rv, num_cores); 674 675 rcu_cfg.v = rv; 676 /* Assign the RCU variable to LPM */ 677 if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { 678 printf("RCU variable assignment failed\n"); 679 goto error; 680 } 681 682 writer_done = 0; 683 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 684 685 /* Launch reader threads */ 686 for (i = 0; i < num_cores; i++) 687 rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, 688 enabled_core_ids[i]); 689 690 /* Measure add/delete. */ 691 begin = rte_rdtsc_precise(); 692 for (i = 0; i < RCU_ITERATIONS; i++) { 693 /* Add all the entries */ 694 for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) 695 if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, 696 large_ldepth_route_table[j].depth, 697 next_hop_add) != 0) { 698 printf("Failed to add iteration %d, route# %d\n", 699 i, j); 700 goto error; 701 } 702 703 /* Delete all the entries */ 704 for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) 705 if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, 706 large_ldepth_route_table[j].depth) != 0) { 707 printf("Failed to delete iteration %d, route# %d\n", 708 i, j); 709 goto error; 710 } 711 } 712 total_cycles = rte_rdtsc_precise() - begin; 713 714 printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 715 printf("Total LPM Deletes: %d\n", 716 ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 717 printf("Average LPM Add/Del: %g cycles\n", 718 (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); 719 720 writer_done = 1; 721 /* Wait and check return value from reader threads */ 722 for (i = 0; i < num_cores; i++) 723 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 724 goto error; 725 726 rte_lpm_free(lpm); 727 rte_free(rv); 728 lpm = NULL; 729 rv = NULL; 730 731 /* Test without RCU integration */ 732 printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", 733 num_cores); 734 735 /* Create LPM table */ 736 config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; 737 config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; 738 config.flags = 0; 739 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 740 TEST_LPM_ASSERT(lpm != NULL); 741 742 writer_done = 0; 743 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 744 745 /* Launch reader threads */ 746 for (i = 0; i < num_cores; i++) 747 rte_eal_remote_launch(test_lpm_reader, NULL, 748 enabled_core_ids[i]); 749 750 /* Measure add/delete. */ 751 begin = rte_rdtsc_precise(); 752 for (i = 0; i < RCU_ITERATIONS; i++) { 753 /* Add all the entries */ 754 for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) 755 if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, 756 large_ldepth_route_table[j].depth, 757 next_hop_add) != 0) { 758 printf("Failed to add iteration %d, route# %d\n", 759 i, j); 760 goto error; 761 } 762 763 /* Delete all the entries */ 764 for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) 765 if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, 766 large_ldepth_route_table[j].depth) != 0) { 767 printf("Failed to delete iteration %d, route# %d\n", 768 i, j); 769 goto error; 770 } 771 } 772 total_cycles = rte_rdtsc_precise() - begin; 773 774 printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 775 printf("Total LPM Deletes: %d\n", 776 ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); 777 printf("Average LPM Add/Del: %g cycles\n", 778 (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); 779 780 writer_done = 1; 781 /* Wait and check return value from reader threads */ 782 for (i = 0; i < num_cores; i++) 783 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 784 printf("Warning: lcore %u not finished.\n", 785 enabled_core_ids[i]); 786 787 rte_lpm_free(lpm); 788 789 return 0; 790 791 error: 792 writer_done = 1; 793 /* Wait until all readers have exited */ 794 rte_eal_mp_wait_lcore(); 795 796 rte_lpm_free(lpm); 797 rte_free(rv); 798 799 return -1; 800 } 801 802 static int 803 test_lpm_perf(void) 804 { 805 struct rte_lpm_config config; 806 807 config.max_rules = 2000000; 808 config.number_tbl8s = 2048; 809 config.flags = 0; 810 uint64_t begin, total_time, lpm_used_entries = 0; 811 unsigned i, j; 812 uint32_t next_hop_add = 0xAA, next_hop_return = 0; 813 int status = 0; 814 uint64_t cache_line_counter = 0; 815 int64_t count = 0; 816 817 rte_srand(rte_rdtsc()); 818 819 generate_large_route_rule_table(); 820 821 printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES); 822 823 print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES); 824 825 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 826 TEST_LPM_ASSERT(lpm != NULL); 827 828 /* Measure add. */ 829 begin = rte_rdtsc(); 830 831 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 832 if (rte_lpm_add(lpm, large_route_table[i].ip, 833 large_route_table[i].depth, next_hop_add) == 0) 834 status++; 835 } 836 /* End Timer. */ 837 total_time = rte_rdtsc() - begin; 838 839 printf("Unique added entries = %d\n", status); 840 /* Obtain add statistics. */ 841 for (i = 0; i < RTE_LPM_TBL24_NUM_ENTRIES; i++) { 842 if (lpm->tbl24[i].valid) 843 lpm_used_entries++; 844 845 if (i % 32 == 0) { 846 if ((uint64_t)count < lpm_used_entries) { 847 cache_line_counter++; 848 count = lpm_used_entries; 849 } 850 } 851 } 852 853 printf("Used table 24 entries = %u (%g%%)\n", 854 (unsigned) lpm_used_entries, 855 (lpm_used_entries * 100.0) / RTE_LPM_TBL24_NUM_ENTRIES); 856 printf("64 byte Cache entries used = %u (%u bytes)\n", 857 (unsigned) cache_line_counter, (unsigned) cache_line_counter * 64); 858 859 printf("Average LPM Add: %g cycles\n", 860 (double)total_time / NUM_ROUTE_ENTRIES); 861 862 /* Measure single Lookup */ 863 total_time = 0; 864 count = 0; 865 866 for (i = 0; i < ITERATIONS; i++) { 867 static uint32_t ip_batch[BATCH_SIZE]; 868 869 for (j = 0; j < BATCH_SIZE; j++) 870 ip_batch[j] = rte_rand(); 871 872 /* Lookup per batch */ 873 begin = rte_rdtsc(); 874 875 for (j = 0; j < BATCH_SIZE; j++) { 876 if (rte_lpm_lookup(lpm, ip_batch[j], &next_hop_return) != 0) 877 count++; 878 } 879 880 total_time += rte_rdtsc() - begin; 881 882 } 883 printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 884 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 885 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 886 887 /* Measure bulk Lookup */ 888 total_time = 0; 889 count = 0; 890 for (i = 0; i < ITERATIONS; i++) { 891 static uint32_t ip_batch[BATCH_SIZE]; 892 uint32_t next_hops[BULK_SIZE]; 893 894 /* Create array of random IP addresses */ 895 for (j = 0; j < BATCH_SIZE; j++) 896 ip_batch[j] = rte_rand(); 897 898 /* Lookup per batch */ 899 begin = rte_rdtsc(); 900 for (j = 0; j < BATCH_SIZE; j += BULK_SIZE) { 901 unsigned k; 902 rte_lpm_lookup_bulk(lpm, &ip_batch[j], next_hops, BULK_SIZE); 903 for (k = 0; k < BULK_SIZE; k++) 904 if (unlikely(!(next_hops[k] & RTE_LPM_LOOKUP_SUCCESS))) 905 count++; 906 } 907 908 total_time += rte_rdtsc() - begin; 909 } 910 printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 911 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 912 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 913 914 /* Measure LookupX4 */ 915 total_time = 0; 916 count = 0; 917 for (i = 0; i < ITERATIONS; i++) { 918 static uint32_t ip_batch[BATCH_SIZE]; 919 uint32_t next_hops[4]; 920 921 /* Create array of random IP addresses */ 922 for (j = 0; j < BATCH_SIZE; j++) 923 ip_batch[j] = rte_rand(); 924 925 /* Lookup per batch */ 926 begin = rte_rdtsc(); 927 for (j = 0; j < BATCH_SIZE; j += RTE_DIM(next_hops)) { 928 unsigned k; 929 xmm_t ipx4; 930 931 ipx4 = vect_loadu_sil128((xmm_t *)(ip_batch + j)); 932 ipx4 = *(xmm_t *)(ip_batch + j); 933 rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT32_MAX); 934 for (k = 0; k < RTE_DIM(next_hops); k++) 935 if (unlikely(next_hops[k] == UINT32_MAX)) 936 count++; 937 } 938 939 total_time += rte_rdtsc() - begin; 940 } 941 printf("LPM LookupX4: %.1f cycles (fails = %.1f%%)\n", 942 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 943 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 944 945 /* Measure Delete */ 946 status = 0; 947 begin = rte_rdtsc(); 948 949 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 950 /* rte_lpm_delete(lpm, ip, depth) */ 951 status += rte_lpm_delete(lpm, large_route_table[i].ip, 952 large_route_table[i].depth); 953 } 954 955 total_time = rte_rdtsc() - begin; 956 957 printf("Average LPM Delete: %g cycles\n", 958 (double)total_time / NUM_ROUTE_ENTRIES); 959 960 rte_lpm_delete_all(lpm); 961 rte_lpm_free(lpm); 962 963 test_lpm_rcu_perf(); 964 965 test_lpm_rcu_perf_multi_writer(); 966 967 return 0; 968 } 969 970 REGISTER_TEST_COMMAND(lpm_perf_autotest, test_lpm_perf); 971