1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #include <rte_cycles.h> 6 #include <rte_debug.h> 7 #include <rte_eal.h> 8 #include <rte_eal_trace.h> 9 #include <rte_malloc.h> 10 #include <rte_lcore.h> 11 12 #include "test.h" 13 #include "test_trace.h" 14 15 struct test_data; 16 17 struct lcore_data { 18 volatile bool done; 19 volatile bool started; 20 uint64_t total_cycles; 21 uint64_t total_calls; 22 } __rte_cache_aligned; 23 24 struct test_data { 25 unsigned int nb_workers; 26 struct lcore_data ldata[]; 27 } __rte_cache_aligned; 28 29 #define STEP 100 30 #define CENT_OPS(OP) do { \ 31 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 32 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 33 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 34 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 35 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 36 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 37 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 38 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 39 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 40 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 41 } while (0) 42 43 static void 44 measure_perf(const char *str, struct test_data *data) 45 { 46 uint64_t hz = rte_get_timer_hz(); 47 uint64_t total_cycles = 0; 48 uint64_t total_calls = 0; 49 double cycles, ns; 50 unsigned int workers; 51 52 for (workers = 0; workers < data->nb_workers; workers++) { 53 total_cycles += data->ldata[workers].total_cycles; 54 total_calls += data->ldata[workers].total_calls; 55 } 56 57 cycles = total_calls ? (double)total_cycles / (double)total_calls : 0; 58 cycles /= STEP; 59 cycles /= 100; /* CENT_OPS */ 60 61 ns = (cycles / (double)hz) * 1E9; 62 printf("%16s: cycles=%f ns=%f\n", str, cycles, ns); 63 } 64 65 static void 66 wait_till_workers_are_ready(struct test_data *data) 67 { 68 unsigned int workers; 69 70 for (workers = 0; workers < data->nb_workers; workers++) 71 while (!data->ldata[workers].started) 72 rte_pause(); 73 } 74 75 static void 76 signal_workers_to_finish(struct test_data *data) 77 { 78 unsigned int workers; 79 80 for (workers = 0; workers < data->nb_workers; workers++) { 81 data->ldata[workers].done = 1; 82 } 83 } 84 85 #define WORKER_DEFINE(func) \ 86 static void __rte_noinline \ 87 __worker_##func(struct lcore_data *ldata) \ 88 { \ 89 uint64_t start; \ 90 int i; \ 91 while (!ldata->done) { \ 92 start = rte_get_timer_cycles(); \ 93 for (i = 0; i < STEP; i++) \ 94 CENT_OPS(func); \ 95 ldata->total_cycles += rte_get_timer_cycles() - start; \ 96 ldata->total_calls++; \ 97 } \ 98 } \ 99 static int \ 100 worker_fn_##func(void *arg) \ 101 { \ 102 struct lcore_data *ldata = arg; \ 103 ldata->started = 1; \ 104 __worker_##func(ldata); \ 105 return 0; \ 106 } 107 108 109 /* Test to find trace overhead */ 110 #define GENERIC_VOID rte_eal_trace_generic_void() 111 #define GENERIC_U64 rte_eal_trace_generic_u64(0x120000) 112 #define GENERIC_INT rte_eal_trace_generic_int(-34) 113 #define GENERIC_FLOAT rte_eal_trace_generic_float(3.3f) 114 #define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666) 115 #define GENERIC_STR rte_eal_trace_generic_str("hello world") 116 #define VOID_FP app_dpdk_test_fp() 117 118 WORKER_DEFINE(GENERIC_VOID) 119 WORKER_DEFINE(GENERIC_U64) 120 WORKER_DEFINE(GENERIC_INT) 121 WORKER_DEFINE(GENERIC_FLOAT) 122 WORKER_DEFINE(GENERIC_DOUBLE) 123 WORKER_DEFINE(GENERIC_STR) 124 WORKER_DEFINE(VOID_FP) 125 126 static void 127 run_test(const char *str, lcore_function_t f, struct test_data *data, size_t sz) 128 { 129 unsigned int id, worker = 0; 130 131 memset(data, 0, sz); 132 data->nb_workers = rte_lcore_count() - 1; 133 RTE_LCORE_FOREACH_WORKER(id) 134 rte_eal_remote_launch(f, &data->ldata[worker++], id); 135 136 wait_till_workers_are_ready(data); 137 rte_delay_ms(100); /* Wait for some time to accumulate the stats */ 138 signal_workers_to_finish(data); 139 140 RTE_LCORE_FOREACH_WORKER(id) 141 rte_eal_wait_lcore(id); 142 143 measure_perf(str, data); 144 } 145 146 static int 147 test_trace_perf(void) 148 { 149 unsigned int nb_cores, nb_workers; 150 struct test_data *data; 151 size_t sz; 152 153 nb_cores = rte_lcore_count(); 154 nb_workers = nb_cores - 1; 155 if (nb_cores < 2) { 156 printf("Need minimum two cores for testing\n"); 157 return TEST_SKIPPED; 158 } 159 160 printf("Timer running at %5.2fMHz\n", rte_get_timer_hz()/1E6); 161 sz = sizeof(struct test_data); 162 sz += nb_workers * sizeof(struct lcore_data); 163 164 data = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); 165 if (data == NULL) { 166 printf("Failed to allocate memory\n"); 167 return TEST_FAILED; 168 } 169 170 run_test("void", worker_fn_GENERIC_VOID, data, sz); 171 run_test("u64", worker_fn_GENERIC_U64, data, sz); 172 run_test("int", worker_fn_GENERIC_INT, data, sz); 173 run_test("float", worker_fn_GENERIC_FLOAT, data, sz); 174 run_test("double", worker_fn_GENERIC_DOUBLE, data, sz); 175 run_test("string", worker_fn_GENERIC_STR, data, sz); 176 run_test("void_fp", worker_fn_VOID_FP, data, sz); 177 178 rte_free(data); 179 return TEST_SUCCESS; 180 } 181 182 REGISTER_PERF_TEST(trace_perf_autotest, test_trace_perf); 183