1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 * Copyright(c) 2020 Arm Limited 4 */ 5 6 #include <stdio.h> 7 #include <stdint.h> 8 #include <stdlib.h> 9 #include <math.h> 10 11 #include <rte_cycles.h> 12 #include <rte_random.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_malloc.h> 15 #include <rte_ip.h> 16 #include <rte_lpm.h> 17 18 #include "test.h" 19 #include "test_xmmt_ops.h" 20 21 struct rte_lpm *lpm; 22 static struct rte_rcu_qsbr *rv; 23 static volatile uint8_t writer_done; 24 static volatile uint32_t thr_id; 25 static uint64_t gwrite_cycles; 26 static uint32_t num_writers; 27 /* LPM APIs are not thread safe, use mutex to provide thread safety */ 28 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; 29 30 /* Report quiescent state interval every 1024 lookups. Larger critical 31 * sections in reader will result in writer polling multiple times. 32 */ 33 #define QSBR_REPORTING_INTERVAL 1024 34 35 #define TEST_LPM_ASSERT(cond) do { \ 36 if (!(cond)) { \ 37 printf("Error at line %d: \n", __LINE__); \ 38 return -1; \ 39 } \ 40 } while(0) 41 42 #define ITERATIONS (1 << 10) 43 #define RCU_ITERATIONS 10 44 #define BATCH_SIZE (1 << 12) 45 #define BULK_SIZE 32 46 47 #define MAX_RULE_NUM (1200000) 48 49 struct route_rule { 50 uint32_t ip; 51 uint8_t depth; 52 }; 53 54 static struct route_rule large_route_table[MAX_RULE_NUM]; 55 /* Route table for routes with depth > 24 */ 56 struct route_rule large_ldepth_route_table[MAX_RULE_NUM]; 57 58 static uint32_t num_route_entries; 59 static uint32_t num_ldepth_route_entries; 60 #define NUM_ROUTE_ENTRIES num_route_entries 61 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries 62 63 #define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) 64 65 enum { 66 IP_CLASS_A, 67 IP_CLASS_B, 68 IP_CLASS_C 69 }; 70 71 /* struct route_rule_count defines the total number of rules in following a/b/c 72 * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not 73 * including the ones for private local network. 74 */ 75 struct route_rule_count { 76 uint32_t a[RTE_LPM_MAX_DEPTH]; 77 uint32_t b[RTE_LPM_MAX_DEPTH]; 78 uint32_t c[RTE_LPM_MAX_DEPTH]; 79 }; 80 81 /* All following numbers of each depth of each common IP class are just 82 * got from previous large constant table in app/test/test_lpm_routes.h . 83 * In order to match similar performance, they keep same depth and IP 84 * address coverage as previous constant table. These numbers don't 85 * include any private local IP address. As previous large const rule 86 * table was just dumped from a real router, there are no any IP address 87 * in class C or D. 88 */ 89 static struct route_rule_count rule_count = { 90 .a = { /* IP class A in which the most significant bit is 0 */ 91 0, /* depth = 1 */ 92 0, /* depth = 2 */ 93 1, /* depth = 3 */ 94 0, /* depth = 4 */ 95 2, /* depth = 5 */ 96 1, /* depth = 6 */ 97 3, /* depth = 7 */ 98 185, /* depth = 8 */ 99 26, /* depth = 9 */ 100 16, /* depth = 10 */ 101 39, /* depth = 11 */ 102 144, /* depth = 12 */ 103 233, /* depth = 13 */ 104 528, /* depth = 14 */ 105 866, /* depth = 15 */ 106 3856, /* depth = 16 */ 107 3268, /* depth = 17 */ 108 5662, /* depth = 18 */ 109 17301, /* depth = 19 */ 110 22226, /* depth = 20 */ 111 11147, /* depth = 21 */ 112 16746, /* depth = 22 */ 113 17120, /* depth = 23 */ 114 77578, /* depth = 24 */ 115 401, /* depth = 25 */ 116 656, /* depth = 26 */ 117 1107, /* depth = 27 */ 118 1121, /* depth = 28 */ 119 2316, /* depth = 29 */ 120 717, /* depth = 30 */ 121 10, /* depth = 31 */ 122 66 /* depth = 32 */ 123 }, 124 .b = { /* IP class A in which the most 2 significant bits are 10 */ 125 0, /* depth = 1 */ 126 0, /* depth = 2 */ 127 0, /* depth = 3 */ 128 0, /* depth = 4 */ 129 1, /* depth = 5 */ 130 1, /* depth = 6 */ 131 1, /* depth = 7 */ 132 3, /* depth = 8 */ 133 3, /* depth = 9 */ 134 30, /* depth = 10 */ 135 25, /* depth = 11 */ 136 168, /* depth = 12 */ 137 305, /* depth = 13 */ 138 569, /* depth = 14 */ 139 1129, /* depth = 15 */ 140 50800, /* depth = 16 */ 141 1645, /* depth = 17 */ 142 1820, /* depth = 18 */ 143 3506, /* depth = 19 */ 144 3258, /* depth = 20 */ 145 3424, /* depth = 21 */ 146 4971, /* depth = 22 */ 147 6885, /* depth = 23 */ 148 39771, /* depth = 24 */ 149 424, /* depth = 25 */ 150 170, /* depth = 26 */ 151 433, /* depth = 27 */ 152 92, /* depth = 28 */ 153 366, /* depth = 29 */ 154 377, /* depth = 30 */ 155 2, /* depth = 31 */ 156 200 /* depth = 32 */ 157 }, 158 .c = { /* IP class A in which the most 3 significant bits are 110 */ 159 0, /* depth = 1 */ 160 0, /* depth = 2 */ 161 0, /* depth = 3 */ 162 0, /* depth = 4 */ 163 0, /* depth = 5 */ 164 0, /* depth = 6 */ 165 0, /* depth = 7 */ 166 12, /* depth = 8 */ 167 8, /* depth = 9 */ 168 9, /* depth = 10 */ 169 33, /* depth = 11 */ 170 69, /* depth = 12 */ 171 237, /* depth = 13 */ 172 1007, /* depth = 14 */ 173 1717, /* depth = 15 */ 174 14663, /* depth = 16 */ 175 8070, /* depth = 17 */ 176 16185, /* depth = 18 */ 177 48261, /* depth = 19 */ 178 36870, /* depth = 20 */ 179 33960, /* depth = 21 */ 180 50638, /* depth = 22 */ 181 61422, /* depth = 23 */ 182 466549, /* depth = 24 */ 183 1829, /* depth = 25 */ 184 4824, /* depth = 26 */ 185 4927, /* depth = 27 */ 186 5914, /* depth = 28 */ 187 10254, /* depth = 29 */ 188 4905, /* depth = 30 */ 189 1, /* depth = 31 */ 190 716 /* depth = 32 */ 191 } 192 }; 193 194 static void generate_random_rule_prefix(uint32_t ip_class, uint8_t depth) 195 { 196 /* IP address class A, the most significant bit is 0 */ 197 #define IP_HEAD_MASK_A 0x00000000 198 #define IP_HEAD_BIT_NUM_A 1 199 200 /* IP address class B, the most significant 2 bits are 10 */ 201 #define IP_HEAD_MASK_B 0x80000000 202 #define IP_HEAD_BIT_NUM_B 2 203 204 /* IP address class C, the most significant 3 bits are 110 */ 205 #define IP_HEAD_MASK_C 0xC0000000 206 #define IP_HEAD_BIT_NUM_C 3 207 208 uint32_t class_depth; 209 uint32_t range; 210 uint32_t mask; 211 uint32_t step; 212 uint32_t start; 213 uint32_t fixed_bit_num; 214 uint32_t ip_head_mask; 215 uint32_t rule_num; 216 uint32_t k; 217 struct route_rule *ptr_rule, *ptr_ldepth_rule; 218 219 if (ip_class == IP_CLASS_A) { /* IP Address class A */ 220 fixed_bit_num = IP_HEAD_BIT_NUM_A; 221 ip_head_mask = IP_HEAD_MASK_A; 222 rule_num = rule_count.a[depth - 1]; 223 } else if (ip_class == IP_CLASS_B) { /* IP Address class B */ 224 fixed_bit_num = IP_HEAD_BIT_NUM_B; 225 ip_head_mask = IP_HEAD_MASK_B; 226 rule_num = rule_count.b[depth - 1]; 227 } else { /* IP Address class C */ 228 fixed_bit_num = IP_HEAD_BIT_NUM_C; 229 ip_head_mask = IP_HEAD_MASK_C; 230 rule_num = rule_count.c[depth - 1]; 231 } 232 233 if (rule_num == 0) 234 return; 235 236 /* the number of rest bits which don't include the most significant 237 * fixed bits for this IP address class 238 */ 239 class_depth = depth - fixed_bit_num; 240 241 /* range is the maximum number of rules for this depth and 242 * this IP address class 243 */ 244 range = 1 << class_depth; 245 246 /* only mask the most depth significant generated bits 247 * except fixed bits for IP address class 248 */ 249 mask = range - 1; 250 251 /* Widen coverage of IP address in generated rules */ 252 if (range <= rule_num) 253 step = 1; 254 else 255 step = round((double)range / rule_num); 256 257 /* Only generate rest bits except the most significant 258 * fixed bits for IP address class 259 */ 260 start = lrand48() & mask; 261 ptr_rule = &large_route_table[num_route_entries]; 262 ptr_ldepth_rule = &large_ldepth_route_table[num_ldepth_route_entries]; 263 for (k = 0; k < rule_num; k++) { 264 ptr_rule->ip = (start << (RTE_LPM_MAX_DEPTH - depth)) 265 | ip_head_mask; 266 ptr_rule->depth = depth; 267 /* If the depth of the route is more than 24, store it 268 * in another table as well. 269 */ 270 if (depth > 24) { 271 ptr_ldepth_rule->ip = ptr_rule->ip; 272 ptr_ldepth_rule->depth = ptr_rule->depth; 273 ptr_ldepth_rule++; 274 num_ldepth_route_entries++; 275 } 276 ptr_rule++; 277 start = (start + step) & mask; 278 } 279 num_route_entries += rule_num; 280 } 281 282 static void insert_rule_in_random_pos(uint32_t ip, uint8_t depth) 283 { 284 uint32_t pos; 285 int try_count = 0; 286 struct route_rule tmp; 287 288 do { 289 pos = lrand48(); 290 try_count++; 291 } while ((try_count < 10) && (pos > num_route_entries)); 292 293 if ((pos > num_route_entries) || (pos >= MAX_RULE_NUM)) 294 pos = num_route_entries >> 1; 295 296 tmp = large_route_table[pos]; 297 large_route_table[pos].ip = ip; 298 large_route_table[pos].depth = depth; 299 if (num_route_entries < MAX_RULE_NUM) 300 large_route_table[num_route_entries++] = tmp; 301 } 302 303 static void generate_large_route_rule_table(void) 304 { 305 uint32_t ip_class; 306 uint8_t depth; 307 308 num_route_entries = 0; 309 num_ldepth_route_entries = 0; 310 memset(large_route_table, 0, sizeof(large_route_table)); 311 312 for (ip_class = IP_CLASS_A; ip_class <= IP_CLASS_C; ip_class++) { 313 for (depth = 1; depth <= RTE_LPM_MAX_DEPTH; depth++) { 314 generate_random_rule_prefix(ip_class, depth); 315 } 316 } 317 318 /* Add following rules to keep same as previous large constant table, 319 * they are 4 rules with private local IP address and 1 all-zeros prefix 320 * with depth = 8. 321 */ 322 insert_rule_in_random_pos(RTE_IPV4(0, 0, 0, 0), 8); 323 insert_rule_in_random_pos(RTE_IPV4(10, 2, 23, 147), 32); 324 insert_rule_in_random_pos(RTE_IPV4(192, 168, 100, 10), 24); 325 insert_rule_in_random_pos(RTE_IPV4(192, 168, 25, 100), 24); 326 insert_rule_in_random_pos(RTE_IPV4(192, 168, 129, 124), 32); 327 } 328 329 static void 330 print_route_distribution(const struct route_rule *table, uint32_t n) 331 { 332 unsigned i, j; 333 334 printf("Route distribution per prefix width: \n"); 335 printf("DEPTH QUANTITY (PERCENT)\n"); 336 printf("--------------------------- \n"); 337 338 /* Count depths. */ 339 for (i = 1; i <= 32; i++) { 340 unsigned depth_counter = 0; 341 double percent_hits; 342 343 for (j = 0; j < n; j++) 344 if (table[j].depth == (uint8_t) i) 345 depth_counter++; 346 347 percent_hits = ((double)depth_counter)/((double)n) * 100; 348 printf("%.2u%15u (%.2f)\n", i, depth_counter, percent_hits); 349 } 350 printf("\n"); 351 } 352 353 /* Check condition and return an error if true. */ 354 static uint16_t enabled_core_ids[RTE_MAX_LCORE]; 355 static unsigned int num_cores; 356 357 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */ 358 static inline uint32_t 359 alloc_thread_id(void) 360 { 361 uint32_t tmp_thr_id; 362 363 tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED); 364 if (tmp_thr_id >= RTE_MAX_LCORE) 365 printf("Invalid thread id %u\n", tmp_thr_id); 366 367 return tmp_thr_id; 368 } 369 370 /* 371 * Reader thread using rte_lpm data structure without RCU. 372 */ 373 static int 374 test_lpm_reader(void *arg) 375 { 376 int i; 377 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 378 uint32_t next_hop_return = 0; 379 380 RTE_SET_USED(arg); 381 do { 382 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 383 ip_batch[i] = rte_rand(); 384 385 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 386 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 387 388 } while (!writer_done); 389 390 return 0; 391 } 392 393 /* 394 * Reader thread using rte_lpm data structure with RCU. 395 */ 396 static int 397 test_lpm_rcu_qsbr_reader(void *arg) 398 { 399 int i; 400 uint32_t thread_id = alloc_thread_id(); 401 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 402 uint32_t next_hop_return = 0; 403 404 RTE_SET_USED(arg); 405 /* Register this thread to report quiescent state */ 406 rte_rcu_qsbr_thread_register(rv, thread_id); 407 rte_rcu_qsbr_thread_online(rv, thread_id); 408 409 do { 410 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 411 ip_batch[i] = rte_rand(); 412 413 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 414 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 415 416 /* Update quiescent state */ 417 rte_rcu_qsbr_quiescent(rv, thread_id); 418 } while (!writer_done); 419 420 rte_rcu_qsbr_thread_offline(rv, thread_id); 421 rte_rcu_qsbr_thread_unregister(rv, thread_id); 422 423 return 0; 424 } 425 426 /* 427 * Writer thread using rte_lpm data structure with RCU. 428 */ 429 static int 430 test_lpm_rcu_qsbr_writer(void *arg) 431 { 432 unsigned int i, j, si, ei; 433 uint64_t begin, total_cycles; 434 uint32_t next_hop_add = 0xAA; 435 uint8_t pos_core = (uint8_t)((uintptr_t)arg); 436 437 si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; 438 ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; 439 440 /* Measure add/delete. */ 441 begin = rte_rdtsc_precise(); 442 for (i = 0; i < RCU_ITERATIONS; i++) { 443 /* Add all the entries */ 444 for (j = si; j < ei; j++) { 445 if (num_writers > 1) 446 pthread_mutex_lock(&lpm_mutex); 447 if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, 448 large_ldepth_route_table[j].depth, 449 next_hop_add) != 0) { 450 printf("Failed to add iteration %d, route# %d\n", 451 i, j); 452 goto error; 453 } 454 if (num_writers > 1) 455 pthread_mutex_unlock(&lpm_mutex); 456 } 457 458 /* Delete all the entries */ 459 for (j = si; j < ei; j++) { 460 if (num_writers > 1) 461 pthread_mutex_lock(&lpm_mutex); 462 if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, 463 large_ldepth_route_table[j].depth) != 0) { 464 printf("Failed to delete iteration %d, route# %d\n", 465 i, j); 466 goto error; 467 } 468 if (num_writers > 1) 469 pthread_mutex_unlock(&lpm_mutex); 470 } 471 } 472 473 total_cycles = rte_rdtsc_precise() - begin; 474 475 __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); 476 477 return 0; 478 479 error: 480 if (num_writers > 1) 481 pthread_mutex_unlock(&lpm_mutex); 482 return -1; 483 } 484 485 /* 486 * Functional test: 487 * 1/2 writers, rest are readers 488 */ 489 static int 490 test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) 491 { 492 struct rte_lpm_config config; 493 size_t sz; 494 unsigned int i, j; 495 uint16_t core_id; 496 struct rte_lpm_rcu_config rcu_cfg = {0}; 497 int (*reader_f)(void *arg) = NULL; 498 499 if (rte_lcore_count() < 3) { 500 printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); 501 return TEST_SKIPPED; 502 } 503 504 num_cores = 0; 505 RTE_LCORE_FOREACH_WORKER(core_id) { 506 enabled_core_ids[num_cores] = core_id; 507 num_cores++; 508 } 509 510 for (j = 1; j < 3; j++) { 511 if (use_rcu) 512 printf("\nPerf test: %d writer(s), %d reader(s)," 513 " RCU integration enabled\n", j, num_cores - j); 514 else 515 printf("\nPerf test: %d writer(s), %d reader(s)," 516 " RCU integration disabled\n", j, num_cores - j); 517 518 num_writers = j; 519 520 /* Create LPM table */ 521 config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; 522 config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; 523 config.flags = 0; 524 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 525 TEST_LPM_ASSERT(lpm != NULL); 526 527 /* Init RCU variable */ 528 if (use_rcu) { 529 sz = rte_rcu_qsbr_get_memsize(num_cores); 530 rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 531 RTE_CACHE_LINE_SIZE); 532 rte_rcu_qsbr_init(rv, num_cores); 533 534 rcu_cfg.v = rv; 535 /* Assign the RCU variable to LPM */ 536 if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { 537 printf("RCU variable assignment failed\n"); 538 goto error; 539 } 540 541 reader_f = test_lpm_rcu_qsbr_reader; 542 } else 543 reader_f = test_lpm_reader; 544 545 writer_done = 0; 546 __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); 547 548 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 549 550 /* Launch reader threads */ 551 for (i = j; i < num_cores; i++) 552 rte_eal_remote_launch(reader_f, NULL, 553 enabled_core_ids[i]); 554 555 /* Launch writer threads */ 556 for (i = 0; i < j; i++) 557 rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, 558 (void *)(uintptr_t)i, 559 enabled_core_ids[i]); 560 561 /* Wait for writer threads */ 562 for (i = 0; i < j; i++) 563 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 564 goto error; 565 566 printf("Total LPM Adds: %d\n", TOTAL_WRITES); 567 printf("Total LPM Deletes: %d\n", TOTAL_WRITES); 568 printf("Average LPM Add/Del: %"PRIu64" cycles\n", 569 __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) 570 / TOTAL_WRITES); 571 572 writer_done = 1; 573 /* Wait until all readers have exited */ 574 for (i = j; i < num_cores; i++) 575 rte_eal_wait_lcore(enabled_core_ids[i]); 576 577 rte_lpm_free(lpm); 578 rte_free(rv); 579 lpm = NULL; 580 rv = NULL; 581 } 582 583 return 0; 584 585 error: 586 writer_done = 1; 587 /* Wait until all readers have exited */ 588 rte_eal_mp_wait_lcore(); 589 590 rte_lpm_free(lpm); 591 rte_free(rv); 592 593 return -1; 594 } 595 596 static int 597 test_lpm_perf(void) 598 { 599 struct rte_lpm_config config; 600 601 config.max_rules = 2000000; 602 config.number_tbl8s = 2048; 603 config.flags = 0; 604 uint64_t begin, total_time, lpm_used_entries = 0; 605 unsigned i, j; 606 uint32_t next_hop_add = 0xAA, next_hop_return = 0; 607 int status = 0; 608 uint64_t cache_line_counter = 0; 609 int64_t count = 0; 610 611 rte_srand(rte_rdtsc()); 612 613 generate_large_route_rule_table(); 614 615 printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES); 616 617 print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES); 618 619 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 620 TEST_LPM_ASSERT(lpm != NULL); 621 622 /* Measure add. */ 623 begin = rte_rdtsc(); 624 625 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 626 if (rte_lpm_add(lpm, large_route_table[i].ip, 627 large_route_table[i].depth, next_hop_add) == 0) 628 status++; 629 } 630 /* End Timer. */ 631 total_time = rte_rdtsc() - begin; 632 633 printf("Unique added entries = %d\n", status); 634 /* Obtain add statistics. */ 635 for (i = 0; i < RTE_LPM_TBL24_NUM_ENTRIES; i++) { 636 if (lpm->tbl24[i].valid) 637 lpm_used_entries++; 638 639 if (i % 32 == 0) { 640 if ((uint64_t)count < lpm_used_entries) { 641 cache_line_counter++; 642 count = lpm_used_entries; 643 } 644 } 645 } 646 647 printf("Used table 24 entries = %u (%g%%)\n", 648 (unsigned) lpm_used_entries, 649 (lpm_used_entries * 100.0) / RTE_LPM_TBL24_NUM_ENTRIES); 650 printf("64 byte Cache entries used = %u (%u bytes)\n", 651 (unsigned) cache_line_counter, (unsigned) cache_line_counter * 64); 652 653 printf("Average LPM Add: %g cycles\n", 654 (double)total_time / NUM_ROUTE_ENTRIES); 655 656 /* Measure single Lookup */ 657 total_time = 0; 658 count = 0; 659 660 for (i = 0; i < ITERATIONS; i++) { 661 static uint32_t ip_batch[BATCH_SIZE]; 662 663 for (j = 0; j < BATCH_SIZE; j++) 664 ip_batch[j] = rte_rand(); 665 666 /* Lookup per batch */ 667 begin = rte_rdtsc(); 668 669 for (j = 0; j < BATCH_SIZE; j++) { 670 if (rte_lpm_lookup(lpm, ip_batch[j], &next_hop_return) != 0) 671 count++; 672 } 673 674 total_time += rte_rdtsc() - begin; 675 676 } 677 printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 678 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 679 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 680 681 /* Measure bulk Lookup */ 682 total_time = 0; 683 count = 0; 684 for (i = 0; i < ITERATIONS; i++) { 685 static uint32_t ip_batch[BATCH_SIZE]; 686 uint32_t next_hops[BULK_SIZE]; 687 688 /* Create array of random IP addresses */ 689 for (j = 0; j < BATCH_SIZE; j++) 690 ip_batch[j] = rte_rand(); 691 692 /* Lookup per batch */ 693 begin = rte_rdtsc(); 694 for (j = 0; j < BATCH_SIZE; j += BULK_SIZE) { 695 unsigned k; 696 rte_lpm_lookup_bulk(lpm, &ip_batch[j], next_hops, BULK_SIZE); 697 for (k = 0; k < BULK_SIZE; k++) 698 if (unlikely(!(next_hops[k] & RTE_LPM_LOOKUP_SUCCESS))) 699 count++; 700 } 701 702 total_time += rte_rdtsc() - begin; 703 } 704 printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 705 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 706 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 707 708 /* Measure LookupX4 */ 709 total_time = 0; 710 count = 0; 711 for (i = 0; i < ITERATIONS; i++) { 712 static uint32_t ip_batch[BATCH_SIZE]; 713 uint32_t next_hops[4]; 714 715 /* Create array of random IP addresses */ 716 for (j = 0; j < BATCH_SIZE; j++) 717 ip_batch[j] = rte_rand(); 718 719 /* Lookup per batch */ 720 begin = rte_rdtsc(); 721 for (j = 0; j < BATCH_SIZE; j += RTE_DIM(next_hops)) { 722 unsigned k; 723 xmm_t ipx4; 724 725 ipx4 = vect_loadu_sil128((xmm_t *)(ip_batch + j)); 726 ipx4 = *(xmm_t *)(ip_batch + j); 727 rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT32_MAX); 728 for (k = 0; k < RTE_DIM(next_hops); k++) 729 if (unlikely(next_hops[k] == UINT32_MAX)) 730 count++; 731 } 732 733 total_time += rte_rdtsc() - begin; 734 } 735 printf("LPM LookupX4: %.1f cycles (fails = %.1f%%)\n", 736 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 737 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 738 739 /* Measure Delete */ 740 status = 0; 741 begin = rte_rdtsc(); 742 743 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 744 /* rte_lpm_delete(lpm, ip, depth) */ 745 status += rte_lpm_delete(lpm, large_route_table[i].ip, 746 large_route_table[i].depth); 747 } 748 749 total_time = rte_rdtsc() - begin; 750 751 printf("Average LPM Delete: %g cycles\n", 752 (double)total_time / NUM_ROUTE_ENTRIES); 753 754 rte_lpm_delete_all(lpm); 755 rte_lpm_free(lpm); 756 757 if (test_lpm_rcu_perf_multi_writer(0) < 0) 758 return -1; 759 760 if (test_lpm_rcu_perf_multi_writer(1) < 0) 761 return -1; 762 763 return 0; 764 } 765 766 REGISTER_TEST_COMMAND(lpm_perf_autotest, test_lpm_perf); 767