1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2018 Arm Limited 3 */ 4 5 #include <stdio.h> 6 #include <stdbool.h> 7 #include <inttypes.h> 8 #include <rte_pause.h> 9 #include <rte_rcu_qsbr.h> 10 #include <rte_hash.h> 11 #include <rte_hash_crc.h> 12 #include <rte_malloc.h> 13 #include <rte_cycles.h> 14 #include <unistd.h> 15 16 #include "test.h" 17 18 /* Check condition and return an error if true. */ 19 static uint16_t enabled_core_ids[RTE_MAX_LCORE]; 20 static unsigned int num_cores; 21 22 static uint32_t *keys; 23 #define TOTAL_ENTRY (1024 * 8) 24 #define COUNTER_VALUE 4096 25 static uint32_t *hash_data[TOTAL_ENTRY]; 26 static volatile uint8_t writer_done; 27 static volatile uint8_t all_registered; 28 static volatile uint32_t thr_id; 29 30 static struct rte_rcu_qsbr *t[RTE_MAX_LCORE]; 31 static struct rte_hash *h; 32 static char hash_name[8]; 33 static rte_atomic64_t updates, checks; 34 static rte_atomic64_t update_cycles, check_cycles; 35 36 /* Scale down results to 1000 operations to support lower 37 * granularity clocks. 38 */ 39 #define RCU_SCALE_DOWN 1000 40 41 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */ 42 static inline uint32_t 43 alloc_thread_id(void) 44 { 45 uint32_t tmp_thr_id; 46 47 tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED); 48 if (tmp_thr_id >= RTE_MAX_LCORE) 49 printf("Invalid thread id %u\n", tmp_thr_id); 50 51 return tmp_thr_id; 52 } 53 54 static int 55 test_rcu_qsbr_reader_perf(void *arg) 56 { 57 bool writer_present = (bool)arg; 58 uint32_t thread_id = alloc_thread_id(); 59 uint64_t loop_cnt = 0; 60 uint64_t begin, cycles; 61 62 /* Register for report QS */ 63 rte_rcu_qsbr_thread_register(t[0], thread_id); 64 /* Make the thread online */ 65 rte_rcu_qsbr_thread_online(t[0], thread_id); 66 67 begin = rte_rdtsc_precise(); 68 69 if (writer_present) { 70 while (!writer_done) { 71 /* Update quiescent state counter */ 72 rte_rcu_qsbr_quiescent(t[0], thread_id); 73 loop_cnt++; 74 } 75 } else { 76 while (loop_cnt < 100000000) { 77 /* Update quiescent state counter */ 78 rte_rcu_qsbr_quiescent(t[0], thread_id); 79 loop_cnt++; 80 } 81 } 82 83 cycles = rte_rdtsc_precise() - begin; 84 rte_atomic64_add(&update_cycles, cycles); 85 rte_atomic64_add(&updates, loop_cnt); 86 87 /* Make the thread offline */ 88 rte_rcu_qsbr_thread_offline(t[0], thread_id); 89 /* Unregister before exiting to avoid writer from waiting */ 90 rte_rcu_qsbr_thread_unregister(t[0], thread_id); 91 92 return 0; 93 } 94 95 static int 96 test_rcu_qsbr_writer_perf(void *arg) 97 { 98 bool wait = (bool)arg; 99 uint64_t token = 0; 100 uint64_t loop_cnt = 0; 101 uint64_t begin, cycles; 102 103 begin = rte_rdtsc_precise(); 104 105 do { 106 /* Start the quiescent state query process */ 107 if (wait) 108 token = rte_rcu_qsbr_start(t[0]); 109 110 /* Check quiescent state status */ 111 rte_rcu_qsbr_check(t[0], token, wait); 112 loop_cnt++; 113 } while (loop_cnt < 20000000); 114 115 cycles = rte_rdtsc_precise() - begin; 116 rte_atomic64_add(&check_cycles, cycles); 117 rte_atomic64_add(&checks, loop_cnt); 118 return 0; 119 } 120 121 /* 122 * Perf test: Reader/writer 123 * Single writer, Multiple Readers, Single QS var, Non-Blocking rcu_qsbr_check 124 */ 125 static int 126 test_rcu_qsbr_perf(void) 127 { 128 int sz; 129 unsigned int i, tmp_num_cores; 130 131 writer_done = 0; 132 133 rte_atomic64_clear(&updates); 134 rte_atomic64_clear(&update_cycles); 135 rte_atomic64_clear(&checks); 136 rte_atomic64_clear(&check_cycles); 137 138 printf("\nPerf Test: %d Readers/1 Writer('wait' in qsbr_check == true)\n", 139 num_cores - 1); 140 141 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 142 143 if (all_registered == 1) 144 tmp_num_cores = num_cores - 1; 145 else 146 tmp_num_cores = RTE_MAX_LCORE; 147 148 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 149 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 150 RTE_CACHE_LINE_SIZE); 151 /* QS variable is initialized */ 152 rte_rcu_qsbr_init(t[0], tmp_num_cores); 153 154 /* Reader threads are launched */ 155 for (i = 0; i < num_cores - 1; i++) 156 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, (void *)1, 157 enabled_core_ids[i]); 158 159 /* Writer thread is launched */ 160 rte_eal_remote_launch(test_rcu_qsbr_writer_perf, 161 (void *)1, enabled_core_ids[i]); 162 163 /* Wait for the writer thread */ 164 rte_eal_wait_lcore(enabled_core_ids[i]); 165 writer_done = 1; 166 167 /* Wait until all readers have exited */ 168 rte_eal_mp_wait_lcore(); 169 170 printf("Total RCU updates = %"PRIi64"\n", rte_atomic64_read(&updates)); 171 printf("Cycles per %d updates: %"PRIi64"\n", RCU_SCALE_DOWN, 172 rte_atomic64_read(&update_cycles) / 173 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN)); 174 printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks)); 175 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN, 176 rte_atomic64_read(&check_cycles) / 177 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN)); 178 179 rte_free(t[0]); 180 181 return 0; 182 } 183 184 /* 185 * Perf test: Readers 186 * Single writer, Multiple readers, Single QS variable 187 */ 188 static int 189 test_rcu_qsbr_rperf(void) 190 { 191 int sz; 192 unsigned int i, tmp_num_cores; 193 194 rte_atomic64_clear(&updates); 195 rte_atomic64_clear(&update_cycles); 196 197 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 198 199 printf("\nPerf Test: %d Readers\n", num_cores); 200 201 if (all_registered == 1) 202 tmp_num_cores = num_cores; 203 else 204 tmp_num_cores = RTE_MAX_LCORE; 205 206 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 207 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 208 RTE_CACHE_LINE_SIZE); 209 /* QS variable is initialized */ 210 rte_rcu_qsbr_init(t[0], tmp_num_cores); 211 212 /* Reader threads are launched */ 213 for (i = 0; i < num_cores; i++) 214 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, NULL, 215 enabled_core_ids[i]); 216 217 /* Wait until all readers have exited */ 218 rte_eal_mp_wait_lcore(); 219 220 printf("Total RCU updates = %"PRIi64"\n", rte_atomic64_read(&updates)); 221 printf("Cycles per %d updates: %"PRIi64"\n", RCU_SCALE_DOWN, 222 rte_atomic64_read(&update_cycles) / 223 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN)); 224 225 rte_free(t[0]); 226 227 return 0; 228 } 229 230 /* 231 * Perf test: 232 * Multiple writer, Single QS variable, Non-blocking rcu_qsbr_check 233 */ 234 static int 235 test_rcu_qsbr_wperf(void) 236 { 237 int sz; 238 unsigned int i; 239 240 rte_atomic64_clear(&checks); 241 rte_atomic64_clear(&check_cycles); 242 243 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 244 245 printf("\nPerf test: %d Writers ('wait' in qsbr_check == false)\n", 246 num_cores); 247 248 /* Number of readers does not matter for QS variable in this test 249 * case as no reader will be registered. 250 */ 251 sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE); 252 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 253 RTE_CACHE_LINE_SIZE); 254 /* QS variable is initialized */ 255 rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE); 256 257 /* Writer threads are launched */ 258 for (i = 0; i < num_cores; i++) 259 rte_eal_remote_launch(test_rcu_qsbr_writer_perf, 260 (void *)0, enabled_core_ids[i]); 261 262 /* Wait until all readers have exited */ 263 rte_eal_mp_wait_lcore(); 264 265 printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks)); 266 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN, 267 rte_atomic64_read(&check_cycles) / 268 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN)); 269 270 rte_free(t[0]); 271 272 return 0; 273 } 274 275 /* 276 * RCU test cases using rte_hash data structure. 277 */ 278 static int 279 test_rcu_qsbr_hash_reader(void *arg) 280 { 281 struct rte_rcu_qsbr *temp; 282 struct rte_hash *hash = NULL; 283 int i; 284 uint64_t loop_cnt = 0; 285 uint64_t begin, cycles; 286 uint32_t thread_id = alloc_thread_id(); 287 uint8_t read_type = (uint8_t)((uintptr_t)arg); 288 uint32_t *pdata; 289 290 temp = t[read_type]; 291 hash = h; 292 293 rte_rcu_qsbr_thread_register(temp, thread_id); 294 295 begin = rte_rdtsc_precise(); 296 297 do { 298 rte_rcu_qsbr_thread_online(temp, thread_id); 299 for (i = 0; i < TOTAL_ENTRY; i++) { 300 rte_rcu_qsbr_lock(temp, thread_id); 301 if (rte_hash_lookup_data(hash, keys + i, 302 (void **)&pdata) != -ENOENT) { 303 pdata[thread_id] = 0; 304 while (pdata[thread_id] < COUNTER_VALUE) 305 pdata[thread_id]++; 306 } 307 rte_rcu_qsbr_unlock(temp, thread_id); 308 } 309 /* Update quiescent state counter */ 310 rte_rcu_qsbr_quiescent(temp, thread_id); 311 rte_rcu_qsbr_thread_offline(temp, thread_id); 312 loop_cnt++; 313 } while (!writer_done); 314 315 cycles = rte_rdtsc_precise() - begin; 316 rte_atomic64_add(&update_cycles, cycles); 317 rte_atomic64_add(&updates, loop_cnt); 318 319 rte_rcu_qsbr_thread_unregister(temp, thread_id); 320 321 return 0; 322 } 323 324 static struct rte_hash *init_hash(void) 325 { 326 int i; 327 struct rte_hash *hash = NULL; 328 329 snprintf(hash_name, 8, "hash"); 330 struct rte_hash_parameters hash_params = { 331 .entries = TOTAL_ENTRY, 332 .key_len = sizeof(uint32_t), 333 .hash_func_init_val = 0, 334 .socket_id = rte_socket_id(), 335 .hash_func = rte_hash_crc, 336 .extra_flag = 337 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF, 338 .name = hash_name, 339 }; 340 341 hash = rte_hash_create(&hash_params); 342 if (hash == NULL) { 343 printf("Hash create Failed\n"); 344 return NULL; 345 } 346 347 for (i = 0; i < TOTAL_ENTRY; i++) { 348 hash_data[i] = rte_zmalloc(NULL, 349 sizeof(uint32_t) * RTE_MAX_LCORE, 0); 350 if (hash_data[i] == NULL) { 351 printf("No memory\n"); 352 return NULL; 353 } 354 } 355 keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0); 356 if (keys == NULL) { 357 printf("No memory\n"); 358 return NULL; 359 } 360 361 for (i = 0; i < TOTAL_ENTRY; i++) 362 keys[i] = i; 363 364 for (i = 0; i < TOTAL_ENTRY; i++) { 365 if (rte_hash_add_key_data(hash, keys + i, 366 (void *)((uintptr_t)hash_data[i])) < 0) { 367 printf("Hash key add Failed #%d\n", i); 368 return NULL; 369 } 370 } 371 return hash; 372 } 373 374 /* 375 * Functional test: 376 * Single writer, Single QS variable Single QSBR query, Blocking rcu_qsbr_check 377 */ 378 static int 379 test_rcu_qsbr_sw_sv_1qs(void) 380 { 381 uint64_t token, begin, cycles; 382 int sz; 383 unsigned int i, j, tmp_num_cores; 384 int32_t pos; 385 386 writer_done = 0; 387 388 rte_atomic64_clear(&updates); 389 rte_atomic64_clear(&update_cycles); 390 rte_atomic64_clear(&checks); 391 rte_atomic64_clear(&check_cycles); 392 393 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 394 395 printf("\nPerf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Blocking QSBR Check\n", num_cores); 396 397 if (all_registered == 1) 398 tmp_num_cores = num_cores; 399 else 400 tmp_num_cores = RTE_MAX_LCORE; 401 402 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 403 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 404 RTE_CACHE_LINE_SIZE); 405 /* QS variable is initialized */ 406 rte_rcu_qsbr_init(t[0], tmp_num_cores); 407 408 /* Shared data structure created */ 409 h = init_hash(); 410 if (h == NULL) { 411 printf("Hash init failed\n"); 412 goto error; 413 } 414 415 /* Reader threads are launched */ 416 for (i = 0; i < num_cores; i++) 417 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL, 418 enabled_core_ids[i]); 419 420 begin = rte_rdtsc_precise(); 421 422 for (i = 0; i < TOTAL_ENTRY; i++) { 423 /* Delete elements from the shared data structure */ 424 pos = rte_hash_del_key(h, keys + i); 425 if (pos < 0) { 426 printf("Delete key failed #%d\n", keys[i]); 427 goto error; 428 } 429 /* Start the quiescent state query process */ 430 token = rte_rcu_qsbr_start(t[0]); 431 432 /* Check the quiescent state status */ 433 rte_rcu_qsbr_check(t[0], token, true); 434 for (j = 0; j < tmp_num_cores; j++) { 435 if (hash_data[i][j] != COUNTER_VALUE && 436 hash_data[i][j] != 0) { 437 printf("Reader thread ID %u did not complete #%d = %d\n", 438 j, i, hash_data[i][j]); 439 goto error; 440 } 441 } 442 443 if (rte_hash_free_key_with_position(h, pos) < 0) { 444 printf("Failed to free the key #%d\n", keys[i]); 445 goto error; 446 } 447 rte_free(hash_data[i]); 448 hash_data[i] = NULL; 449 } 450 451 cycles = rte_rdtsc_precise() - begin; 452 rte_atomic64_add(&check_cycles, cycles); 453 rte_atomic64_add(&checks, i); 454 455 writer_done = 1; 456 457 /* Wait and check return value from reader threads */ 458 for (i = 0; i < num_cores; i++) 459 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 460 goto error; 461 rte_hash_free(h); 462 rte_free(keys); 463 464 printf("Following numbers include calls to rte_hash functions\n"); 465 printf("Cycles per 1 update(online/update/offline): %"PRIi64"\n", 466 rte_atomic64_read(&update_cycles) / 467 rte_atomic64_read(&updates)); 468 469 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n", 470 rte_atomic64_read(&check_cycles) / 471 rte_atomic64_read(&checks)); 472 473 rte_free(t[0]); 474 475 return 0; 476 477 error: 478 writer_done = 1; 479 /* Wait until all readers have exited */ 480 rte_eal_mp_wait_lcore(); 481 482 rte_hash_free(h); 483 rte_free(keys); 484 for (i = 0; i < TOTAL_ENTRY; i++) 485 rte_free(hash_data[i]); 486 487 rte_free(t[0]); 488 489 return -1; 490 } 491 492 /* 493 * Functional test: 494 * Single writer, Single QS variable, Single QSBR query, 495 * Non-blocking rcu_qsbr_check 496 */ 497 static int 498 test_rcu_qsbr_sw_sv_1qs_non_blocking(void) 499 { 500 uint64_t token, begin, cycles; 501 int ret, sz; 502 unsigned int i, j, tmp_num_cores; 503 int32_t pos; 504 505 writer_done = 0; 506 507 printf("Perf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Non-Blocking QSBR check\n", num_cores); 508 509 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); 510 511 if (all_registered == 1) 512 tmp_num_cores = num_cores; 513 else 514 tmp_num_cores = RTE_MAX_LCORE; 515 516 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores); 517 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, 518 RTE_CACHE_LINE_SIZE); 519 /* QS variable is initialized */ 520 rte_rcu_qsbr_init(t[0], tmp_num_cores); 521 522 /* Shared data structure created */ 523 h = init_hash(); 524 if (h == NULL) { 525 printf("Hash init failed\n"); 526 goto error; 527 } 528 529 /* Reader threads are launched */ 530 for (i = 0; i < num_cores; i++) 531 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL, 532 enabled_core_ids[i]); 533 534 begin = rte_rdtsc_precise(); 535 536 for (i = 0; i < TOTAL_ENTRY; i++) { 537 /* Delete elements from the shared data structure */ 538 pos = rte_hash_del_key(h, keys + i); 539 if (pos < 0) { 540 printf("Delete key failed #%d\n", keys[i]); 541 goto error; 542 } 543 /* Start the quiescent state query process */ 544 token = rte_rcu_qsbr_start(t[0]); 545 546 /* Check the quiescent state status */ 547 do { 548 ret = rte_rcu_qsbr_check(t[0], token, false); 549 } while (ret == 0); 550 for (j = 0; j < tmp_num_cores; j++) { 551 if (hash_data[i][j] != COUNTER_VALUE && 552 hash_data[i][j] != 0) { 553 printf("Reader thread ID %u did not complete #%d = %d\n", 554 j, i, hash_data[i][j]); 555 goto error; 556 } 557 } 558 559 if (rte_hash_free_key_with_position(h, pos) < 0) { 560 printf("Failed to free the key #%d\n", keys[i]); 561 goto error; 562 } 563 rte_free(hash_data[i]); 564 hash_data[i] = NULL; 565 } 566 567 cycles = rte_rdtsc_precise() - begin; 568 rte_atomic64_add(&check_cycles, cycles); 569 rte_atomic64_add(&checks, i); 570 571 writer_done = 1; 572 /* Wait and check return value from reader threads */ 573 for (i = 0; i < num_cores; i++) 574 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) 575 goto error; 576 rte_hash_free(h); 577 rte_free(keys); 578 579 printf("Following numbers include calls to rte_hash functions\n"); 580 printf("Cycles per 1 update(online/update/offline): %"PRIi64"\n", 581 rte_atomic64_read(&update_cycles) / 582 rte_atomic64_read(&updates)); 583 584 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n", 585 rte_atomic64_read(&check_cycles) / 586 rte_atomic64_read(&checks)); 587 588 rte_free(t[0]); 589 590 return 0; 591 592 error: 593 writer_done = 1; 594 /* Wait until all readers have exited */ 595 rte_eal_mp_wait_lcore(); 596 597 rte_hash_free(h); 598 rte_free(keys); 599 for (i = 0; i < TOTAL_ENTRY; i++) 600 rte_free(hash_data[i]); 601 602 rte_free(t[0]); 603 604 return -1; 605 } 606 607 static int 608 test_rcu_qsbr_main(void) 609 { 610 uint16_t core_id; 611 612 if (rte_lcore_count() < 3) { 613 printf("Not enough cores for rcu_qsbr_perf_autotest, expecting at least 3\n"); 614 return TEST_SKIPPED; 615 } 616 617 rte_atomic64_init(&updates); 618 rte_atomic64_init(&update_cycles); 619 rte_atomic64_init(&checks); 620 rte_atomic64_init(&check_cycles); 621 622 num_cores = 0; 623 RTE_LCORE_FOREACH_SLAVE(core_id) { 624 enabled_core_ids[num_cores] = core_id; 625 num_cores++; 626 } 627 628 printf("Number of cores provided = %d\n", num_cores); 629 printf("Perf test with all reader threads registered\n"); 630 printf("--------------------------------------------\n"); 631 all_registered = 1; 632 633 if (test_rcu_qsbr_perf() < 0) 634 goto test_fail; 635 636 if (test_rcu_qsbr_rperf() < 0) 637 goto test_fail; 638 639 if (test_rcu_qsbr_wperf() < 0) 640 goto test_fail; 641 642 if (test_rcu_qsbr_sw_sv_1qs() < 0) 643 goto test_fail; 644 645 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0) 646 goto test_fail; 647 648 /* Make sure the actual number of cores provided is less than 649 * RTE_MAX_LCORE. This will allow for some threads not 650 * to be registered on the QS variable. 651 */ 652 if (num_cores >= RTE_MAX_LCORE) { 653 printf("Test failed! number of cores provided should be less than %d\n", 654 RTE_MAX_LCORE); 655 goto test_fail; 656 } 657 658 printf("Perf test with some of reader threads registered\n"); 659 printf("------------------------------------------------\n"); 660 all_registered = 0; 661 662 if (test_rcu_qsbr_perf() < 0) 663 goto test_fail; 664 665 if (test_rcu_qsbr_rperf() < 0) 666 goto test_fail; 667 668 if (test_rcu_qsbr_wperf() < 0) 669 goto test_fail; 670 671 if (test_rcu_qsbr_sw_sv_1qs() < 0) 672 goto test_fail; 673 674 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0) 675 goto test_fail; 676 677 printf("\n"); 678 679 return 0; 680 681 test_fail: 682 return -1; 683 } 684 685 REGISTER_TEST_COMMAND(rcu_qsbr_perf_autotest, test_rcu_qsbr_main); 686