1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 * Copyright(c) 2020 Arm Limited 4 */ 5 6 #include "test.h" 7 8 #ifdef RTE_EXEC_ENV_WINDOWS 9 static int 10 test_lpm_perf(void) 11 { 12 printf("lpm_perf not supported on Windows, skipping test\n"); 13 return TEST_SKIPPED; 14 } 15 16 #else 17 #include <stdio.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <math.h> 21 22 #include <rte_cycles.h> 23 #include <rte_random.h> 24 #include <rte_branch_prediction.h> 25 #include <rte_malloc.h> 26 #include <rte_ip.h> 27 #include <rte_lpm.h> 28 29 #include "test_xmmt_ops.h" 30 31 struct rte_lpm *lpm; 32 static struct rte_rcu_qsbr *rv; 33 static volatile uint8_t writer_done; 34 static volatile uint32_t thr_id; 35 static uint64_t gwrite_cycles; 36 static uint32_t num_writers; 37 /* LPM APIs are not thread safe, use mutex to provide thread safety */ 38 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; 39 40 /* Report quiescent state interval every 1024 lookups. Larger critical 41 * sections in reader will result in writer polling multiple times. 42 */ 43 #define QSBR_REPORTING_INTERVAL 1024 44 45 #define TEST_LPM_ASSERT(cond) do { \ 46 if (!(cond)) { \ 47 printf("Error at line %d: \n", __LINE__); \ 48 return -1; \ 49 } \ 50 } while(0) 51 52 #define ITERATIONS (1 << 10) 53 #define RCU_ITERATIONS 10 54 #define BATCH_SIZE (1 << 12) 55 #define BULK_SIZE 32 56 57 #define MAX_RULE_NUM (1200000) 58 59 struct route_rule { 60 uint32_t ip; 61 uint8_t depth; 62 }; 63 64 static struct route_rule large_route_table[MAX_RULE_NUM]; 65 /* Route table for routes with depth > 24 */ 66 struct route_rule large_ldepth_route_table[MAX_RULE_NUM]; 67 68 static uint32_t num_route_entries; 69 static uint32_t num_ldepth_route_entries; 70 #define NUM_ROUTE_ENTRIES num_route_entries 71 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries 72 73 #define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) 74 75 enum { 76 IP_CLASS_A, 77 IP_CLASS_B, 78 IP_CLASS_C 79 }; 80 81 /* struct route_rule_count defines the total number of rules in following a/b/c 82 * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not 83 * including the ones for private local network. 84 */ 85 struct route_rule_count { 86 uint32_t a[RTE_LPM_MAX_DEPTH]; 87 uint32_t b[RTE_LPM_MAX_DEPTH]; 88 uint32_t c[RTE_LPM_MAX_DEPTH]; 89 }; 90 91 /* All following numbers of each depth of each common IP class are just 92 * got from previous large constant table in app/test/test_lpm_routes.h . 93 * In order to match similar performance, they keep same depth and IP 94 * address coverage as previous constant table. These numbers don't 95 * include any private local IP address. As previous large const rule 96 * table was just dumped from a real router, there are no any IP address 97 * in class C or D. 98 */ 99 static struct route_rule_count rule_count = { 100 .a = { /* IP class A in which the most significant bit is 0 */ 101 0, /* depth = 1 */ 102 0, /* depth = 2 */ 103 1, /* depth = 3 */ 104 0, /* depth = 4 */ 105 2, /* depth = 5 */ 106 1, /* depth = 6 */ 107 3, /* depth = 7 */ 108 185, /* depth = 8 */ 109 26, /* depth = 9 */ 110 16, /* depth = 10 */ 111 39, /* depth = 11 */ 112 144, /* depth = 12 */ 113 233, /* depth = 13 */ 114 528, /* depth = 14 */ 115 866, /* depth = 15 */ 116 3856, /* depth = 16 */ 117 3268, /* depth = 17 */ 118 5662, /* depth = 18 */ 119 17301, /* depth = 19 */ 120 22226, /* depth = 20 */ 121 11147, /* depth = 21 */ 122 16746, /* depth = 22 */ 123 17120, /* depth = 23 */ 124 77578, /* depth = 24 */ 125 401, /* depth = 25 */ 126 656, /* depth = 26 */ 127 1107, /* depth = 27 */ 128 1121, /* depth = 28 */ 129 2316, /* depth = 29 */ 130 717, /* depth = 30 */ 131 10, /* depth = 31 */ 132 66 /* depth = 32 */ 133 }, 134 .b = { /* IP class A in which the most 2 significant bits are 10 */ 135 0, /* depth = 1 */ 136 0, /* depth = 2 */ 137 0, /* depth = 3 */ 138 0, /* depth = 4 */ 139 1, /* depth = 5 */ 140 1, /* depth = 6 */ 141 1, /* depth = 7 */ 142 3, /* depth = 8 */ 143 3, /* depth = 9 */ 144 30, /* depth = 10 */ 145 25, /* depth = 11 */ 146 168, /* depth = 12 */ 147 305, /* depth = 13 */ 148 569, /* depth = 14 */ 149 1129, /* depth = 15 */ 150 50800, /* depth = 16 */ 151 1645, /* depth = 17 */ 152 1820, /* depth = 18 */ 153 3506, /* depth = 19 */ 154 3258, /* depth = 20 */ 155 3424, /* depth = 21 */ 156 4971, /* depth = 22 */ 157 6885, /* depth = 23 */ 158 39771, /* depth = 24 */ 159 424, /* depth = 25 */ 160 170, /* depth = 26 */ 161 433, /* depth = 27 */ 162 92, /* depth = 28 */ 163 366, /* depth = 29 */ 164 377, /* depth = 30 */ 165 2, /* depth = 31 */ 166 200 /* depth = 32 */ 167 }, 168 .c = { /* IP class A in which the most 3 significant bits are 110 */ 169 0, /* depth = 1 */ 170 0, /* depth = 2 */ 171 0, /* depth = 3 */ 172 0, /* depth = 4 */ 173 0, /* depth = 5 */ 174 0, /* depth = 6 */ 175 0, /* depth = 7 */ 176 12, /* depth = 8 */ 177 8, /* depth = 9 */ 178 9, /* depth = 10 */ 179 33, /* depth = 11 */ 180 69, /* depth = 12 */ 181 237, /* depth = 13 */ 182 1007, /* depth = 14 */ 183 1717, /* depth = 15 */ 184 14663, /* depth = 16 */ 185 8070, /* depth = 17 */ 186 16185, /* depth = 18 */ 187 48261, /* depth = 19 */ 188 36870, /* depth = 20 */ 189 33960, /* depth = 21 */ 190 50638, /* depth = 22 */ 191 61422, /* depth = 23 */ 192 466549, /* depth = 24 */ 193 1829, /* depth = 25 */ 194 4824, /* depth = 26 */ 195 4927, /* depth = 27 */ 196 5914, /* depth = 28 */ 197 10254, /* depth = 29 */ 198 4905, /* depth = 30 */ 199 1, /* depth = 31 */ 200 716 /* depth = 32 */ 201 } 202 }; 203 204 static void generate_random_rule_prefix(uint32_t ip_class, uint8_t depth) 205 { 206 /* IP address class A, the most significant bit is 0 */ 207 #define IP_HEAD_MASK_A 0x00000000 208 #define IP_HEAD_BIT_NUM_A 1 209 210 /* IP address class B, the most significant 2 bits are 10 */ 211 #define IP_HEAD_MASK_B 0x80000000 212 #define IP_HEAD_BIT_NUM_B 2 213 214 /* IP address class C, the most significant 3 bits are 110 */ 215 #define IP_HEAD_MASK_C 0xC0000000 216 #define IP_HEAD_BIT_NUM_C 3 217 218 uint32_t class_depth; 219 uint32_t range; 220 uint32_t mask; 221 uint32_t step; 222 uint32_t start; 223 uint32_t fixed_bit_num; 224 uint32_t ip_head_mask; 225 uint32_t rule_num; 226 uint32_t k; 227 struct route_rule *ptr_rule, *ptr_ldepth_rule; 228 229 if (ip_class == IP_CLASS_A) { /* IP Address class A */ 230 fixed_bit_num = IP_HEAD_BIT_NUM_A; 231 ip_head_mask = IP_HEAD_MASK_A; 232 rule_num = rule_count.a[depth - 1]; 233 } else if (ip_class == IP_CLASS_B) { /* IP Address class B */ 234 fixed_bit_num = IP_HEAD_BIT_NUM_B; 235 ip_head_mask = IP_HEAD_MASK_B; 236 rule_num = rule_count.b[depth - 1]; 237 } else { /* IP Address class C */ 238 fixed_bit_num = IP_HEAD_BIT_NUM_C; 239 ip_head_mask = IP_HEAD_MASK_C; 240 rule_num = rule_count.c[depth - 1]; 241 } 242 243 if (rule_num == 0) 244 return; 245 246 /* the number of rest bits which don't include the most significant 247 * fixed bits for this IP address class 248 */ 249 class_depth = depth - fixed_bit_num; 250 251 /* range is the maximum number of rules for this depth and 252 * this IP address class 253 */ 254 range = 1 << class_depth; 255 256 /* only mask the most depth significant generated bits 257 * except fixed bits for IP address class 258 */ 259 mask = range - 1; 260 261 /* Widen coverage of IP address in generated rules */ 262 if (range <= rule_num) 263 step = 1; 264 else 265 step = round((double)range / rule_num); 266 267 /* Only generate rest bits except the most significant 268 * fixed bits for IP address class 269 */ 270 start = lrand48() & mask; 271 ptr_rule = &large_route_table[num_route_entries]; 272 ptr_ldepth_rule = &large_ldepth_route_table[num_ldepth_route_entries]; 273 for (k = 0; k < rule_num; k++) { 274 ptr_rule->ip = (start << (RTE_LPM_MAX_DEPTH - depth)) 275 | ip_head_mask; 276 ptr_rule->depth = depth; 277 /* If the depth of the route is more than 24, store it 278 * in another table as well. 279 */ 280 if (depth > 24) { 281 ptr_ldepth_rule->ip = ptr_rule->ip; 282 ptr_ldepth_rule->depth = ptr_rule->depth; 283 ptr_ldepth_rule++; 284 num_ldepth_route_entries++; 285 } 286 ptr_rule++; 287 start = (start + step) & mask; 288 } 289 num_route_entries += rule_num; 290 } 291 292 static void insert_rule_in_random_pos(uint32_t ip, uint8_t depth) 293 { 294 uint32_t pos; 295 int try_count = 0; 296 struct route_rule tmp; 297 298 do { 299 pos = lrand48(); 300 try_count++; 301 } while ((try_count < 10) && (pos > num_route_entries)); 302 303 if ((pos > num_route_entries) || (pos >= MAX_RULE_NUM)) 304 pos = num_route_entries >> 1; 305 306 tmp = large_route_table[pos]; 307 large_route_table[pos].ip = ip; 308 large_route_table[pos].depth = depth; 309 if (num_route_entries < MAX_RULE_NUM) 310 large_route_table[num_route_entries++] = tmp; 311 } 312 313 static void generate_large_route_rule_table(void) 314 { 315 uint32_t ip_class; 316 uint8_t depth; 317 318 num_route_entries = 0; 319 num_ldepth_route_entries = 0; 320 memset(large_route_table, 0, sizeof(large_route_table)); 321 322 for (ip_class = IP_CLASS_A; ip_class <= IP_CLASS_C; ip_class++) { 323 for (depth = 1; depth <= RTE_LPM_MAX_DEPTH; depth++) { 324 generate_random_rule_prefix(ip_class, depth); 325 } 326 } 327 328 /* Add following rules to keep same as previous large constant table, 329 * they are 4 rules with private local IP address and 1 all-zeros prefix 330 * with depth = 8. 331 */ 332 insert_rule_in_random_pos(RTE_IPV4(0, 0, 0, 0), 8); 333 insert_rule_in_random_pos(RTE_IPV4(10, 2, 23, 147), 32); 334 insert_rule_in_random_pos(RTE_IPV4(192, 168, 100, 10), 24); 335 insert_rule_in_random_pos(RTE_IPV4(192, 168, 25, 100), 24); 336 insert_rule_in_random_pos(RTE_IPV4(192, 168, 129, 124), 32); 337 } 338 339 static void 340 print_route_distribution(const struct route_rule *table, uint32_t n) 341 { 342 unsigned i, j; 343 344 printf("Route distribution per prefix width: \n"); 345 printf("DEPTH QUANTITY (PERCENT)\n"); 346 printf("--------------------------- \n"); 347 348 /* Count depths. */ 349 for (i = 1; i <= 32; i++) { 350 unsigned depth_counter = 0; 351 double percent_hits; 352 353 for (j = 0; j < n; j++) 354 if (table[j].depth == (uint8_t) i) 355 depth_counter++; 356 357 percent_hits = ((double)depth_counter)/((double)n) * 100; 358 printf("%.2u%15u (%.2f)\n", i, depth_counter, percent_hits); 359 } 360 printf("\n"); 361 } 362 363 /* Check condition and return an error if true. */ 364 static uint16_t enabled_core_ids[RTE_MAX_LCORE]; 365 static unsigned int num_cores; 366 367 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */ 368 static inline uint32_t 369 alloc_thread_id(void) 370 { 371 uint32_t tmp_thr_id; 372 373 tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED); 374 if (tmp_thr_id >= RTE_MAX_LCORE) 375 printf("Invalid thread id %u\n", tmp_thr_id); 376 377 return tmp_thr_id; 378 } 379 380 /* 381 * Reader thread using rte_lpm data structure without RCU. 382 */ 383 static int 384 test_lpm_reader(void *arg) 385 { 386 int i; 387 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 388 uint32_t next_hop_return = 0; 389 390 RTE_SET_USED(arg); 391 do { 392 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 393 ip_batch[i] = rte_rand(); 394 395 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 396 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 397 398 } while (!writer_done); 399 400 return 0; 401 } 402 403 /* 404 * Reader thread using rte_lpm data structure with RCU. 405 */ 406 static int 407 test_lpm_rcu_qsbr_reader(void *arg) 408 { 409 int i; 410 uint32_t thread_id = alloc_thread_id(); 411 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 412 uint32_t next_hop_return = 0; 413 414 RTE_SET_USED(arg); 415 /* Register this thread to report quiescent state */ 416 rte_rcu_qsbr_thread_register(rv, thread_id); 417 rte_rcu_qsbr_thread_online(rv, thread_id); 418 419 do { 420 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 421 ip_batch[i] = rte_rand(); 422 423 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 424 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 425 426 /* Update quiescent state */ 427 rte_rcu_qsbr_quiescent(rv, thread_id); 428 } while (!writer_done); 429 430 rte_rcu_qsbr_thread_offline(rv, thread_id); 431 rte_rcu_qsbr_thread_unregister(rv, thread_id); 432 433 return 0; 434 } 435 436 /* 437 * Writer thread using rte_lpm data structure with RCU. 438 */ 439 static int 440 test_lpm_rcu_qsbr_writer(void *arg) 441 { 442 unsigned int i, j, si, ei; 443 uint64_t begin, total_cycles; 444 uint32_t next_hop_add = 0xAA; 445 uint8_t pos_core = (uint8_t)((uintptr_t)arg); 446 447 si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; 448 ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; 449 450 /* Measure add/delete. */ 451 begin = rte_rdtsc_precise(); 452 for (i = 0; i < RCU_ITERATIONS; i++) { 453 /* Add all the entries */ 454 for (j = si; j < ei; j++) { 455 if (num_writers > 1) 456 pthread_mutex_lock(&lpm_mutex); 457 if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, 458 large_ldepth_route_table[j].depth, 459 next_hop_add) != 0) { 460 printf("Failed to add iteration %d, route# %d\n", 461 i, j); 462 goto error; 463 } 464 if (num_writers > 1) 465 pthread_mutex_unlock(&lpm_mutex); 466 } 467 468 /* Delete all the entries */ 469 for (j = si; j < ei; j++) { 470 if (num_writers > 1) 471 pthread_mutex_lock(&lpm_mutex); 472 if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, 473 large_ldepth_route_table[j].depth) != 0) { 474 printf("Failed to delete iteration %d, route# %d\n", 475 i, j); 476 goto error; 477 } 478 if (num_writers > 1) 479 pthread_mutex_unlock(&lpm_mutex); 480 } 481 } 482 483 total_cycles = rte_rdtsc_precise() - begin; 484 485 __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); 486 487 return 0; 488 489 error: 490 if (num_writers > 1) 491 pthread_mutex_unlock(&lpm_mutex); 492 return -1; 493 } 494 495 /* 496 * Functional test: 497 * 1/2 writers, rest are readers 498 */ 499 static int 500 test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) 501 { 502 struct rte_lpm_config config; 503 size_t sz; 504 unsigned int i, j; 505 uint16_t core_id; 506 struct rte_lpm_rcu_config rcu_cfg = {0}; 507 int (*reader_f)(void *arg) = NULL; 508 509 if (rte_lcore_count() < 3) { 510 printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); 511 return TEST_SKIPPED; 512 } 513 514 num_cores = 0; 515 RTE_LCORE_FOREACH_WORKER(core_id) { 516 enabled_core_ids[num_cores] = core_id; 517 num_cores++; 518 } 519 520 for (j = 1; j < 3; j++) { 521 if (use_rcu) 522 printf("\nPerf test: %d writer(s), %d reader(s)," 523 " RCU integration enabled\n", j, num_cores - j); 524 else 525 printf("\nPerf test: %d writer(s), %d reader(s)," 526 " RCU integration disabled\n", j, num_cores - j); 527 528 num_writers = j; 529 530 /* Create LPM table */ 531 config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; 532 config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; 533 config.flags = 0; 534 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 535 TEST_LPM_ASSERT(lpm != NULL); 536 537 /* Init RCU variable */ 538 if (use_rcu) { 539 sz = rte_rcu_qsbr_get_memsize(num_cores); 540 rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 541 RTE_CACHE_LINE_SIZE); 542 rte_rcu_qsbr_init(rv, num_cores); 543 544 rcu_cfg.v = rv; 545 /* Assign the RCU variable to LPM */ 546 if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { 547 printf("RCU variable assignment failed\n"); 548 goto error; 549 } 550 551 reader_f = test_lpm_rcu_qsbr_reader; 552 } else 553 reader_f = test_lpm_reader; 554 555 writer_done = 0; 556 __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); 557 558 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 559 560 /* Launch reader threads */ 561 for (i = j; i < num_cores; i++) 562 rte_eal_remote_launch(reader_f, NULL, 563 enabled_core_ids[i]); 564 565 /* Launch writer threads */ 566 for (i = 0; i < j; i++) 567 rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, 568 (void *)(uintptr_t)i, 569 enabled_core_ids[i]); 570 571 /* Wait for writer threads */ 572 for (i = 0; i < j; i++) 573 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 574 goto error; 575 576 printf("Total LPM Adds: %d\n", TOTAL_WRITES); 577 printf("Total LPM Deletes: %d\n", TOTAL_WRITES); 578 printf("Average LPM Add/Del: %"PRIu64" cycles\n", 579 __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) 580 / TOTAL_WRITES); 581 582 writer_done = 1; 583 /* Wait until all readers have exited */ 584 for (i = j; i < num_cores; i++) 585 rte_eal_wait_lcore(enabled_core_ids[i]); 586 587 rte_lpm_free(lpm); 588 rte_free(rv); 589 lpm = NULL; 590 rv = NULL; 591 } 592 593 return 0; 594 595 error: 596 writer_done = 1; 597 /* Wait until all readers have exited */ 598 rte_eal_mp_wait_lcore(); 599 600 rte_lpm_free(lpm); 601 rte_free(rv); 602 603 return -1; 604 } 605 606 static int 607 test_lpm_perf(void) 608 { 609 struct rte_lpm_config config; 610 611 config.max_rules = 2000000; 612 config.number_tbl8s = 2048; 613 config.flags = 0; 614 uint64_t begin, total_time, lpm_used_entries = 0; 615 unsigned i, j; 616 uint32_t next_hop_add = 0xAA, next_hop_return = 0; 617 int status = 0; 618 uint64_t cache_line_counter = 0; 619 int64_t count = 0; 620 621 rte_srand(rte_rdtsc()); 622 623 generate_large_route_rule_table(); 624 625 printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES); 626 627 print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES); 628 629 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 630 TEST_LPM_ASSERT(lpm != NULL); 631 632 /* Measure add. */ 633 begin = rte_rdtsc(); 634 635 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 636 if (rte_lpm_add(lpm, large_route_table[i].ip, 637 large_route_table[i].depth, next_hop_add) == 0) 638 status++; 639 } 640 /* End Timer. */ 641 total_time = rte_rdtsc() - begin; 642 643 printf("Unique added entries = %d\n", status); 644 /* Obtain add statistics. */ 645 for (i = 0; i < RTE_LPM_TBL24_NUM_ENTRIES; i++) { 646 if (lpm->tbl24[i].valid) 647 lpm_used_entries++; 648 649 if (i % 32 == 0) { 650 if ((uint64_t)count < lpm_used_entries) { 651 cache_line_counter++; 652 count = lpm_used_entries; 653 } 654 } 655 } 656 657 printf("Used table 24 entries = %u (%g%%)\n", 658 (unsigned) lpm_used_entries, 659 (lpm_used_entries * 100.0) / RTE_LPM_TBL24_NUM_ENTRIES); 660 printf("64 byte Cache entries used = %u (%u bytes)\n", 661 (unsigned) cache_line_counter, (unsigned) cache_line_counter * 64); 662 663 printf("Average LPM Add: %g cycles\n", 664 (double)total_time / NUM_ROUTE_ENTRIES); 665 666 /* Measure single Lookup */ 667 total_time = 0; 668 count = 0; 669 670 for (i = 0; i < ITERATIONS; i++) { 671 static uint32_t ip_batch[BATCH_SIZE]; 672 673 for (j = 0; j < BATCH_SIZE; j++) 674 ip_batch[j] = rte_rand(); 675 676 /* Lookup per batch */ 677 begin = rte_rdtsc(); 678 679 for (j = 0; j < BATCH_SIZE; j++) { 680 if (rte_lpm_lookup(lpm, ip_batch[j], &next_hop_return) != 0) 681 count++; 682 } 683 684 total_time += rte_rdtsc() - begin; 685 686 } 687 printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 688 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 689 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 690 691 /* Measure bulk Lookup */ 692 total_time = 0; 693 count = 0; 694 for (i = 0; i < ITERATIONS; i++) { 695 static uint32_t ip_batch[BATCH_SIZE]; 696 uint32_t next_hops[BULK_SIZE]; 697 698 /* Create array of random IP addresses */ 699 for (j = 0; j < BATCH_SIZE; j++) 700 ip_batch[j] = rte_rand(); 701 702 /* Lookup per batch */ 703 begin = rte_rdtsc(); 704 for (j = 0; j < BATCH_SIZE; j += BULK_SIZE) { 705 unsigned k; 706 rte_lpm_lookup_bulk(lpm, &ip_batch[j], next_hops, BULK_SIZE); 707 for (k = 0; k < BULK_SIZE; k++) 708 if (unlikely(!(next_hops[k] & RTE_LPM_LOOKUP_SUCCESS))) 709 count++; 710 } 711 712 total_time += rte_rdtsc() - begin; 713 } 714 printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 715 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 716 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 717 718 /* Measure LookupX4 */ 719 total_time = 0; 720 count = 0; 721 for (i = 0; i < ITERATIONS; i++) { 722 static uint32_t ip_batch[BATCH_SIZE]; 723 uint32_t next_hops[4]; 724 725 /* Create array of random IP addresses */ 726 for (j = 0; j < BATCH_SIZE; j++) 727 ip_batch[j] = rte_rand(); 728 729 /* Lookup per batch */ 730 begin = rte_rdtsc(); 731 for (j = 0; j < BATCH_SIZE; j += RTE_DIM(next_hops)) { 732 unsigned k; 733 xmm_t ipx4; 734 735 ipx4 = vect_loadu_sil128((xmm_t *)(ip_batch + j)); 736 ipx4 = *(xmm_t *)(ip_batch + j); 737 rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT32_MAX); 738 for (k = 0; k < RTE_DIM(next_hops); k++) 739 if (unlikely(next_hops[k] == UINT32_MAX)) 740 count++; 741 } 742 743 total_time += rte_rdtsc() - begin; 744 } 745 printf("LPM LookupX4: %.1f cycles (fails = %.1f%%)\n", 746 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 747 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 748 749 /* Measure Delete */ 750 status = 0; 751 begin = rte_rdtsc(); 752 753 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 754 /* rte_lpm_delete(lpm, ip, depth) */ 755 status += rte_lpm_delete(lpm, large_route_table[i].ip, 756 large_route_table[i].depth); 757 } 758 759 total_time = rte_rdtsc() - begin; 760 761 printf("Average LPM Delete: %g cycles\n", 762 (double)total_time / NUM_ROUTE_ENTRIES); 763 764 rte_lpm_delete_all(lpm); 765 rte_lpm_free(lpm); 766 767 if (test_lpm_rcu_perf_multi_writer(0) < 0) 768 return -1; 769 770 if (test_lpm_rcu_perf_multi_writer(1) < 0) 771 return -1; 772 773 return 0; 774 } 775 776 #endif /* !RTE_EXEC_ENV_WINDOWS */ 777 778 REGISTER_TEST_COMMAND(lpm_perf_autotest, test_lpm_perf); 779