1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 * Copyright(c) 2020 Arm Limited 4 */ 5 6 #include <stdio.h> 7 #include <stdint.h> 8 #include <stdlib.h> 9 #include <math.h> 10 11 #include <rte_cycles.h> 12 #include <rte_random.h> 13 #include <rte_branch_prediction.h> 14 #include <rte_malloc.h> 15 #include <rte_ip.h> 16 #include <rte_lpm.h> 17 #include <rte_spinlock.h> 18 19 #include "test.h" 20 #include "test_xmmt_ops.h" 21 22 struct rte_lpm *lpm; 23 static struct rte_rcu_qsbr *rv; 24 static volatile uint8_t writer_done; 25 static volatile RTE_ATOMIC(uint32_t) thr_id; 26 static RTE_ATOMIC(uint64_t) gwrite_cycles; 27 static uint32_t num_writers; 28 29 /* LPM APIs are not thread safe, use spinlock */ 30 static rte_spinlock_t lpm_lock = RTE_SPINLOCK_INITIALIZER; 31 32 /* Report quiescent state interval every 1024 lookups. Larger critical 33 * sections in reader will result in writer polling multiple times. 34 */ 35 #define QSBR_REPORTING_INTERVAL 1024 36 37 #define TEST_LPM_ASSERT(cond) do { \ 38 if (!(cond)) { \ 39 printf("Error at line %d: \n", __LINE__); \ 40 return -1; \ 41 } \ 42 } while(0) 43 44 #define ITERATIONS (1 << 10) 45 #define RCU_ITERATIONS 10 46 #define BATCH_SIZE (1 << 12) 47 #define BULK_SIZE 32 48 49 #define MAX_RULE_NUM (1200000) 50 51 struct route_rule { 52 uint32_t ip; 53 uint8_t depth; 54 }; 55 56 static struct route_rule large_route_table[MAX_RULE_NUM]; 57 /* Route table for routes with depth > 24 */ 58 struct route_rule large_ldepth_route_table[MAX_RULE_NUM]; 59 60 static uint32_t num_route_entries; 61 static uint32_t num_ldepth_route_entries; 62 #define NUM_ROUTE_ENTRIES num_route_entries 63 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries 64 65 #define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) 66 67 enum { 68 IP_CLASS_A, 69 IP_CLASS_B, 70 IP_CLASS_C 71 }; 72 73 /* struct route_rule_count defines the total number of rules in following a/b/c 74 * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not 75 * including the ones for private local network. 76 */ 77 struct route_rule_count { 78 uint32_t a[RTE_LPM_MAX_DEPTH]; 79 uint32_t b[RTE_LPM_MAX_DEPTH]; 80 uint32_t c[RTE_LPM_MAX_DEPTH]; 81 }; 82 83 /* All following numbers of each depth of each common IP class are just 84 * got from previous large constant table in app/test/test_lpm_routes.h . 85 * In order to match similar performance, they keep same depth and IP 86 * address coverage as previous constant table. These numbers don't 87 * include any private local IP address. As previous large const rule 88 * table was just dumped from a real router, there are no any IP address 89 * in class C or D. 90 */ 91 static struct route_rule_count rule_count = { 92 .a = { /* IP class A in which the most significant bit is 0 */ 93 0, /* depth = 1 */ 94 0, /* depth = 2 */ 95 1, /* depth = 3 */ 96 0, /* depth = 4 */ 97 2, /* depth = 5 */ 98 1, /* depth = 6 */ 99 3, /* depth = 7 */ 100 185, /* depth = 8 */ 101 26, /* depth = 9 */ 102 16, /* depth = 10 */ 103 39, /* depth = 11 */ 104 144, /* depth = 12 */ 105 233, /* depth = 13 */ 106 528, /* depth = 14 */ 107 866, /* depth = 15 */ 108 3856, /* depth = 16 */ 109 3268, /* depth = 17 */ 110 5662, /* depth = 18 */ 111 17301, /* depth = 19 */ 112 22226, /* depth = 20 */ 113 11147, /* depth = 21 */ 114 16746, /* depth = 22 */ 115 17120, /* depth = 23 */ 116 77578, /* depth = 24 */ 117 401, /* depth = 25 */ 118 656, /* depth = 26 */ 119 1107, /* depth = 27 */ 120 1121, /* depth = 28 */ 121 2316, /* depth = 29 */ 122 717, /* depth = 30 */ 123 10, /* depth = 31 */ 124 66 /* depth = 32 */ 125 }, 126 .b = { /* IP class A in which the most 2 significant bits are 10 */ 127 0, /* depth = 1 */ 128 0, /* depth = 2 */ 129 0, /* depth = 3 */ 130 0, /* depth = 4 */ 131 1, /* depth = 5 */ 132 1, /* depth = 6 */ 133 1, /* depth = 7 */ 134 3, /* depth = 8 */ 135 3, /* depth = 9 */ 136 30, /* depth = 10 */ 137 25, /* depth = 11 */ 138 168, /* depth = 12 */ 139 305, /* depth = 13 */ 140 569, /* depth = 14 */ 141 1129, /* depth = 15 */ 142 50800, /* depth = 16 */ 143 1645, /* depth = 17 */ 144 1820, /* depth = 18 */ 145 3506, /* depth = 19 */ 146 3258, /* depth = 20 */ 147 3424, /* depth = 21 */ 148 4971, /* depth = 22 */ 149 6885, /* depth = 23 */ 150 39771, /* depth = 24 */ 151 424, /* depth = 25 */ 152 170, /* depth = 26 */ 153 433, /* depth = 27 */ 154 92, /* depth = 28 */ 155 366, /* depth = 29 */ 156 377, /* depth = 30 */ 157 2, /* depth = 31 */ 158 200 /* depth = 32 */ 159 }, 160 .c = { /* IP class A in which the most 3 significant bits are 110 */ 161 0, /* depth = 1 */ 162 0, /* depth = 2 */ 163 0, /* depth = 3 */ 164 0, /* depth = 4 */ 165 0, /* depth = 5 */ 166 0, /* depth = 6 */ 167 0, /* depth = 7 */ 168 12, /* depth = 8 */ 169 8, /* depth = 9 */ 170 9, /* depth = 10 */ 171 33, /* depth = 11 */ 172 69, /* depth = 12 */ 173 237, /* depth = 13 */ 174 1007, /* depth = 14 */ 175 1717, /* depth = 15 */ 176 14663, /* depth = 16 */ 177 8070, /* depth = 17 */ 178 16185, /* depth = 18 */ 179 48261, /* depth = 19 */ 180 36870, /* depth = 20 */ 181 33960, /* depth = 21 */ 182 50638, /* depth = 22 */ 183 61422, /* depth = 23 */ 184 466549, /* depth = 24 */ 185 1829, /* depth = 25 */ 186 4824, /* depth = 26 */ 187 4927, /* depth = 27 */ 188 5914, /* depth = 28 */ 189 10254, /* depth = 29 */ 190 4905, /* depth = 30 */ 191 1, /* depth = 31 */ 192 716 /* depth = 32 */ 193 } 194 }; 195 196 static void generate_random_rule_prefix(uint32_t ip_class, uint8_t depth) 197 { 198 /* IP address class A, the most significant bit is 0 */ 199 #define IP_HEAD_MASK_A 0x00000000 200 #define IP_HEAD_BIT_NUM_A 1 201 202 /* IP address class B, the most significant 2 bits are 10 */ 203 #define IP_HEAD_MASK_B 0x80000000 204 #define IP_HEAD_BIT_NUM_B 2 205 206 /* IP address class C, the most significant 3 bits are 110 */ 207 #define IP_HEAD_MASK_C 0xC0000000 208 #define IP_HEAD_BIT_NUM_C 3 209 210 uint32_t class_depth; 211 uint32_t range; 212 uint32_t mask; 213 uint32_t step; 214 uint32_t start; 215 uint32_t fixed_bit_num; 216 uint32_t ip_head_mask; 217 uint32_t rule_num; 218 uint32_t k; 219 struct route_rule *ptr_rule, *ptr_ldepth_rule; 220 221 if (ip_class == IP_CLASS_A) { /* IP Address class A */ 222 fixed_bit_num = IP_HEAD_BIT_NUM_A; 223 ip_head_mask = IP_HEAD_MASK_A; 224 rule_num = rule_count.a[depth - 1]; 225 } else if (ip_class == IP_CLASS_B) { /* IP Address class B */ 226 fixed_bit_num = IP_HEAD_BIT_NUM_B; 227 ip_head_mask = IP_HEAD_MASK_B; 228 rule_num = rule_count.b[depth - 1]; 229 } else { /* IP Address class C */ 230 fixed_bit_num = IP_HEAD_BIT_NUM_C; 231 ip_head_mask = IP_HEAD_MASK_C; 232 rule_num = rule_count.c[depth - 1]; 233 } 234 235 if (rule_num == 0) 236 return; 237 238 /* the number of rest bits which don't include the most significant 239 * fixed bits for this IP address class 240 */ 241 class_depth = depth - fixed_bit_num; 242 243 /* range is the maximum number of rules for this depth and 244 * this IP address class 245 */ 246 range = 1 << class_depth; 247 248 /* only mask the most depth significant generated bits 249 * except fixed bits for IP address class 250 */ 251 mask = range - 1; 252 253 /* Widen coverage of IP address in generated rules */ 254 if (range <= rule_num) 255 step = 1; 256 else 257 step = round((double)range / rule_num); 258 259 /* Only generate rest bits except the most significant 260 * fixed bits for IP address class 261 */ 262 start = rte_rand() & mask; 263 ptr_rule = &large_route_table[num_route_entries]; 264 ptr_ldepth_rule = &large_ldepth_route_table[num_ldepth_route_entries]; 265 for (k = 0; k < rule_num; k++) { 266 ptr_rule->ip = (start << (RTE_LPM_MAX_DEPTH - depth)) 267 | ip_head_mask; 268 ptr_rule->depth = depth; 269 /* If the depth of the route is more than 24, store it 270 * in another table as well. 271 */ 272 if (depth > 24) { 273 ptr_ldepth_rule->ip = ptr_rule->ip; 274 ptr_ldepth_rule->depth = ptr_rule->depth; 275 ptr_ldepth_rule++; 276 num_ldepth_route_entries++; 277 } 278 ptr_rule++; 279 start = (start + step) & mask; 280 } 281 num_route_entries += rule_num; 282 } 283 284 static void insert_rule_in_random_pos(uint32_t ip, uint8_t depth) 285 { 286 uint32_t pos; 287 int try_count = 0; 288 struct route_rule tmp; 289 290 do { 291 pos = rte_rand(); 292 try_count++; 293 } while ((try_count < 10) && (pos > num_route_entries)); 294 295 if ((pos > num_route_entries) || (pos >= MAX_RULE_NUM)) 296 pos = num_route_entries >> 1; 297 298 tmp = large_route_table[pos]; 299 large_route_table[pos].ip = ip; 300 large_route_table[pos].depth = depth; 301 if (num_route_entries < MAX_RULE_NUM) 302 large_route_table[num_route_entries++] = tmp; 303 } 304 305 static void generate_large_route_rule_table(void) 306 { 307 uint32_t ip_class; 308 uint8_t depth; 309 310 num_route_entries = 0; 311 num_ldepth_route_entries = 0; 312 memset(large_route_table, 0, sizeof(large_route_table)); 313 314 for (ip_class = IP_CLASS_A; ip_class <= IP_CLASS_C; ip_class++) { 315 for (depth = 1; depth <= RTE_LPM_MAX_DEPTH; depth++) { 316 generate_random_rule_prefix(ip_class, depth); 317 } 318 } 319 320 /* Add following rules to keep same as previous large constant table, 321 * they are 4 rules with private local IP address and 1 all-zeros prefix 322 * with depth = 8. 323 */ 324 insert_rule_in_random_pos(RTE_IPV4(0, 0, 0, 0), 8); 325 insert_rule_in_random_pos(RTE_IPV4(10, 2, 23, 147), 32); 326 insert_rule_in_random_pos(RTE_IPV4(192, 168, 100, 10), 24); 327 insert_rule_in_random_pos(RTE_IPV4(192, 168, 25, 100), 24); 328 insert_rule_in_random_pos(RTE_IPV4(192, 168, 129, 124), 32); 329 } 330 331 static void 332 print_route_distribution(const struct route_rule *table, uint32_t n) 333 { 334 unsigned i, j; 335 336 printf("Route distribution per prefix width: \n"); 337 printf("DEPTH QUANTITY (PERCENT)\n"); 338 printf("--------------------------- \n"); 339 340 /* Count depths. */ 341 for (i = 1; i <= 32; i++) { 342 unsigned depth_counter = 0; 343 double percent_hits; 344 345 for (j = 0; j < n; j++) 346 if (table[j].depth == (uint8_t) i) 347 depth_counter++; 348 349 percent_hits = ((double)depth_counter)/((double)n) * 100; 350 printf("%.2u%15u (%.2f)\n", i, depth_counter, percent_hits); 351 } 352 printf("\n"); 353 } 354 355 /* Check condition and return an error if true. */ 356 static uint16_t enabled_core_ids[RTE_MAX_LCORE]; 357 static unsigned int num_cores; 358 359 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */ 360 static inline uint32_t 361 alloc_thread_id(void) 362 { 363 uint32_t tmp_thr_id; 364 365 tmp_thr_id = rte_atomic_fetch_add_explicit(&thr_id, 1, rte_memory_order_relaxed); 366 if (tmp_thr_id >= RTE_MAX_LCORE) 367 printf("Invalid thread id %u\n", tmp_thr_id); 368 369 return tmp_thr_id; 370 } 371 372 /* 373 * Reader thread using rte_lpm data structure without RCU. 374 */ 375 static int 376 test_lpm_reader(void *arg) 377 { 378 int i; 379 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 380 uint32_t next_hop_return = 0; 381 382 RTE_SET_USED(arg); 383 do { 384 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 385 ip_batch[i] = rte_rand(); 386 387 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 388 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 389 390 } while (!writer_done); 391 392 return 0; 393 } 394 395 /* 396 * Reader thread using rte_lpm data structure with RCU. 397 */ 398 static int 399 test_lpm_rcu_qsbr_reader(void *arg) 400 { 401 int i; 402 uint32_t thread_id = alloc_thread_id(); 403 uint32_t ip_batch[QSBR_REPORTING_INTERVAL]; 404 uint32_t next_hop_return = 0; 405 406 RTE_SET_USED(arg); 407 /* Register this thread to report quiescent state */ 408 rte_rcu_qsbr_thread_register(rv, thread_id); 409 rte_rcu_qsbr_thread_online(rv, thread_id); 410 411 do { 412 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 413 ip_batch[i] = rte_rand(); 414 415 for (i = 0; i < QSBR_REPORTING_INTERVAL; i++) 416 rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return); 417 418 /* Update quiescent state */ 419 rte_rcu_qsbr_quiescent(rv, thread_id); 420 } while (!writer_done); 421 422 rte_rcu_qsbr_thread_offline(rv, thread_id); 423 rte_rcu_qsbr_thread_unregister(rv, thread_id); 424 425 return 0; 426 } 427 428 /* 429 * Writer thread using rte_lpm data structure with RCU. 430 */ 431 static int 432 test_lpm_rcu_qsbr_writer(void *arg) 433 { 434 unsigned int i, j, si, ei; 435 uint64_t begin, total_cycles; 436 uint32_t next_hop_add = 0xAA; 437 uint8_t pos_core = (uint8_t)((uintptr_t)arg); 438 439 si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; 440 ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; 441 442 /* Measure add/delete. */ 443 begin = rte_rdtsc_precise(); 444 for (i = 0; i < RCU_ITERATIONS; i++) { 445 /* Add all the entries */ 446 for (j = si; j < ei; j++) { 447 rte_spinlock_lock(&lpm_lock); 448 if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, 449 large_ldepth_route_table[j].depth, 450 next_hop_add) != 0) { 451 printf("Failed to add iteration %d, route# %d\n", 452 i, j); 453 goto error; 454 } 455 rte_spinlock_unlock(&lpm_lock); 456 } 457 458 /* Delete all the entries */ 459 for (j = si; j < ei; j++) { 460 rte_spinlock_lock(&lpm_lock); 461 if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, 462 large_ldepth_route_table[j].depth) != 0) { 463 printf("Failed to delete iteration %d, route# %d\n", 464 i, j); 465 goto error; 466 } 467 rte_spinlock_unlock(&lpm_lock); 468 } 469 } 470 471 total_cycles = rte_rdtsc_precise() - begin; 472 473 rte_atomic_fetch_add_explicit(&gwrite_cycles, total_cycles, rte_memory_order_relaxed); 474 475 return 0; 476 477 error: 478 rte_spinlock_unlock(&lpm_lock); 479 return -1; 480 } 481 482 /* 483 * Functional test: 484 * 1/2 writers, rest are readers 485 */ 486 static int 487 test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) 488 { 489 struct rte_lpm_config config; 490 size_t sz; 491 unsigned int i, j; 492 uint16_t core_id; 493 struct rte_lpm_rcu_config rcu_cfg = {0}; 494 int (*reader_f)(void *arg) = NULL; 495 496 if (rte_lcore_count() < 3) { 497 printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); 498 return TEST_SKIPPED; 499 } 500 501 num_cores = 0; 502 RTE_LCORE_FOREACH_WORKER(core_id) { 503 enabled_core_ids[num_cores] = core_id; 504 num_cores++; 505 } 506 507 for (j = 1; j < 3; j++) { 508 if (use_rcu) 509 printf("\nPerf test: %d writer(s), %d reader(s)," 510 " RCU integration enabled\n", j, num_cores - j); 511 else 512 printf("\nPerf test: %d writer(s), %d reader(s)," 513 " RCU integration disabled\n", j, num_cores - j); 514 515 num_writers = j; 516 517 /* Create LPM table */ 518 config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; 519 config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; 520 config.flags = 0; 521 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 522 TEST_LPM_ASSERT(lpm != NULL); 523 524 /* Init RCU variable */ 525 if (use_rcu) { 526 sz = rte_rcu_qsbr_get_memsize(num_cores); 527 rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 528 RTE_CACHE_LINE_SIZE); 529 rte_rcu_qsbr_init(rv, num_cores); 530 531 rcu_cfg.v = rv; 532 /* Assign the RCU variable to LPM */ 533 if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { 534 printf("RCU variable assignment failed\n"); 535 goto error; 536 } 537 538 reader_f = test_lpm_rcu_qsbr_reader; 539 } else 540 reader_f = test_lpm_reader; 541 542 writer_done = 0; 543 rte_atomic_store_explicit(&gwrite_cycles, 0, rte_memory_order_relaxed); 544 545 rte_atomic_store_explicit(&thr_id, 0, rte_memory_order_seq_cst); 546 547 /* Launch reader threads */ 548 for (i = j; i < num_cores; i++) 549 rte_eal_remote_launch(reader_f, NULL, 550 enabled_core_ids[i]); 551 552 /* Launch writer threads */ 553 for (i = 0; i < j; i++) 554 rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, 555 (void *)(uintptr_t)i, 556 enabled_core_ids[i]); 557 558 /* Wait for writer threads */ 559 for (i = 0; i < j; i++) 560 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 561 goto error; 562 563 printf("Total LPM Adds: %d\n", TOTAL_WRITES); 564 printf("Total LPM Deletes: %d\n", TOTAL_WRITES); 565 printf("Average LPM Add/Del: %"PRIu64" cycles\n", 566 rte_atomic_load_explicit(&gwrite_cycles, rte_memory_order_relaxed) 567 / TOTAL_WRITES); 568 569 writer_done = 1; 570 /* Wait until all readers have exited */ 571 for (i = j; i < num_cores; i++) 572 rte_eal_wait_lcore(enabled_core_ids[i]); 573 574 rte_lpm_free(lpm); 575 rte_free(rv); 576 lpm = NULL; 577 rv = NULL; 578 } 579 580 return 0; 581 582 error: 583 writer_done = 1; 584 /* Wait until all readers have exited */ 585 rte_eal_mp_wait_lcore(); 586 587 rte_lpm_free(lpm); 588 rte_free(rv); 589 590 return -1; 591 } 592 593 static int 594 test_lpm_perf(void) 595 { 596 struct rte_lpm_config config; 597 598 config.max_rules = 2000000; 599 config.number_tbl8s = 2048; 600 config.flags = 0; 601 uint64_t begin, total_time, lpm_used_entries = 0; 602 unsigned i, j; 603 uint32_t next_hop_add = 0xAA, next_hop_return = 0; 604 int status = 0; 605 uint64_t cache_line_counter = 0; 606 int64_t count = 0; 607 608 generate_large_route_rule_table(); 609 610 printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES); 611 612 print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES); 613 614 lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); 615 TEST_LPM_ASSERT(lpm != NULL); 616 617 /* Measure add. */ 618 begin = rte_rdtsc(); 619 620 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 621 if (rte_lpm_add(lpm, large_route_table[i].ip, 622 large_route_table[i].depth, next_hop_add) == 0) 623 status++; 624 } 625 /* End Timer. */ 626 total_time = rte_rdtsc() - begin; 627 628 printf("Unique added entries = %d\n", status); 629 /* Obtain add statistics. */ 630 for (i = 0; i < RTE_LPM_TBL24_NUM_ENTRIES; i++) { 631 if (lpm->tbl24[i].valid) 632 lpm_used_entries++; 633 634 if (i % 32 == 0) { 635 if ((uint64_t)count < lpm_used_entries) { 636 cache_line_counter++; 637 count = lpm_used_entries; 638 } 639 } 640 } 641 642 printf("Used table 24 entries = %u (%g%%)\n", 643 (unsigned) lpm_used_entries, 644 (lpm_used_entries * 100.0) / RTE_LPM_TBL24_NUM_ENTRIES); 645 printf("64 byte Cache entries used = %u (%u bytes)\n", 646 (unsigned) cache_line_counter, (unsigned) cache_line_counter * 64); 647 648 printf("Average LPM Add: %g cycles\n", 649 (double)total_time / NUM_ROUTE_ENTRIES); 650 651 /* Measure single Lookup */ 652 total_time = 0; 653 count = 0; 654 655 for (i = 0; i < ITERATIONS; i++) { 656 static uint32_t ip_batch[BATCH_SIZE]; 657 658 for (j = 0; j < BATCH_SIZE; j++) 659 ip_batch[j] = rte_rand(); 660 661 /* Lookup per batch */ 662 begin = rte_rdtsc(); 663 664 for (j = 0; j < BATCH_SIZE; j++) { 665 if (rte_lpm_lookup(lpm, ip_batch[j], &next_hop_return) != 0) 666 count++; 667 } 668 669 total_time += rte_rdtsc() - begin; 670 671 } 672 printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 673 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 674 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 675 676 /* Measure bulk Lookup */ 677 total_time = 0; 678 count = 0; 679 for (i = 0; i < ITERATIONS; i++) { 680 static uint32_t ip_batch[BATCH_SIZE]; 681 uint32_t next_hops[BULK_SIZE]; 682 683 /* Create array of random IP addresses */ 684 for (j = 0; j < BATCH_SIZE; j++) 685 ip_batch[j] = rte_rand(); 686 687 /* Lookup per batch */ 688 begin = rte_rdtsc(); 689 for (j = 0; j < BATCH_SIZE; j += BULK_SIZE) { 690 unsigned k; 691 rte_lpm_lookup_bulk(lpm, &ip_batch[j], next_hops, BULK_SIZE); 692 for (k = 0; k < BULK_SIZE; k++) 693 if (unlikely(!(next_hops[k] & RTE_LPM_LOOKUP_SUCCESS))) 694 count++; 695 } 696 697 total_time += rte_rdtsc() - begin; 698 } 699 printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n", 700 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 701 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 702 703 /* Measure LookupX4 */ 704 total_time = 0; 705 count = 0; 706 for (i = 0; i < ITERATIONS; i++) { 707 static uint32_t ip_batch[BATCH_SIZE]; 708 uint32_t next_hops[4]; 709 710 /* Create array of random IP addresses */ 711 for (j = 0; j < BATCH_SIZE; j++) 712 ip_batch[j] = rte_rand(); 713 714 /* Lookup per batch */ 715 begin = rte_rdtsc(); 716 for (j = 0; j < BATCH_SIZE; j += RTE_DIM(next_hops)) { 717 unsigned k; 718 xmm_t ipx4; 719 720 ipx4 = vect_loadu_sil128((xmm_t *)(ip_batch + j)); 721 ipx4 = *(xmm_t *)(ip_batch + j); 722 rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT32_MAX); 723 for (k = 0; k < RTE_DIM(next_hops); k++) 724 if (unlikely(next_hops[k] == UINT32_MAX)) 725 count++; 726 } 727 728 total_time += rte_rdtsc() - begin; 729 } 730 printf("LPM LookupX4: %.1f cycles (fails = %.1f%%)\n", 731 (double)total_time / ((double)ITERATIONS * BATCH_SIZE), 732 (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); 733 734 /* Measure Delete */ 735 status = 0; 736 begin = rte_rdtsc(); 737 738 for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { 739 /* rte_lpm_delete(lpm, ip, depth) */ 740 status += rte_lpm_delete(lpm, large_route_table[i].ip, 741 large_route_table[i].depth); 742 } 743 744 total_time = rte_rdtsc() - begin; 745 746 printf("Average LPM Delete: %g cycles\n", 747 (double)total_time / NUM_ROUTE_ENTRIES); 748 749 rte_lpm_delete_all(lpm); 750 rte_lpm_free(lpm); 751 752 if (test_lpm_rcu_perf_multi_writer(0) < 0) 753 return -1; 754 755 if (test_lpm_rcu_perf_multi_writer(1) < 0) 756 return -1; 757 758 return 0; 759 } 760 761 REGISTER_PERF_TEST(lpm_perf_autotest, test_lpm_perf); 762