1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #include <rte_cycles.h> 6 #include <rte_debug.h> 7 #include <rte_eal.h> 8 #include <rte_eal_trace.h> 9 #include <rte_malloc.h> 10 #include <rte_lcore.h> 11 12 #include "test.h" 13 #include "test_trace.h" 14 15 struct test_data; 16 17 struct lcore_data { 18 volatile bool done; 19 volatile bool started; 20 uint64_t total_cycles; 21 uint64_t total_calls; 22 } __rte_cache_aligned; 23 24 struct test_data { 25 unsigned int nb_workers; 26 struct lcore_data ldata[]; 27 } __rte_cache_aligned; 28 29 #define STEP 100 30 #define CENT_OPS(OP) do { \ 31 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 32 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 33 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 34 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 35 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 36 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 37 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 38 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 39 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 40 OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \ 41 } while (0) 42 43 static void 44 measure_perf(const char *str, struct test_data *data) 45 { 46 uint64_t hz = rte_get_timer_hz(); 47 uint64_t total_cycles = 0; 48 uint64_t total_calls = 0; 49 double cycles, ns; 50 unsigned int workers; 51 52 for (workers = 0; workers < data->nb_workers; workers++) { 53 total_cycles += data->ldata[workers].total_cycles; 54 total_calls += data->ldata[workers].total_calls; 55 } 56 57 cycles = total_calls ? (double)total_cycles / (double)total_calls : 0; 58 cycles /= STEP; 59 cycles /= 100; /* CENT_OPS */ 60 61 ns = (cycles / (double)hz) * 1E9; 62 printf("%16s: cycles=%f ns=%f\n", str, cycles, ns); 63 } 64 65 static void 66 wait_till_workers_are_ready(struct test_data *data) 67 { 68 unsigned int workers; 69 70 for (workers = 0; workers < data->nb_workers; workers++) 71 while (!data->ldata[workers].started) 72 rte_pause(); 73 } 74 75 static void 76 signal_workers_to_finish(struct test_data *data) 77 { 78 unsigned int workers; 79 80 for (workers = 0; workers < data->nb_workers; workers++) { 81 data->ldata[workers].done = 1; 82 rte_smp_wmb(); 83 } 84 } 85 86 #define WORKER_DEFINE(func) \ 87 static void __rte_noinline \ 88 __worker_##func(struct lcore_data *ldata) \ 89 { \ 90 uint64_t start; \ 91 int i; \ 92 while (!ldata->done) { \ 93 start = rte_get_timer_cycles(); \ 94 for (i = 0; i < STEP; i++) \ 95 CENT_OPS(func); \ 96 ldata->total_cycles += rte_get_timer_cycles() - start; \ 97 ldata->total_calls++; \ 98 } \ 99 } \ 100 static int \ 101 worker_fn_##func(void *arg) \ 102 { \ 103 struct lcore_data *ldata = arg; \ 104 ldata->started = 1; \ 105 rte_smp_wmb(); \ 106 __worker_##func(ldata); \ 107 return 0; \ 108 } 109 110 111 /* Test to find trace overhead */ 112 #define GENERIC_VOID rte_eal_trace_generic_void() 113 #define GENERIC_U64 rte_eal_trace_generic_u64(0x120000) 114 #define GENERIC_INT rte_eal_trace_generic_int(-34) 115 #define GENERIC_FLOAT rte_eal_trace_generic_float(3.3f) 116 #define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666) 117 #define GENERIC_STR rte_eal_trace_generic_str("hello world") 118 #define VOID_FP app_dpdk_test_fp() 119 120 WORKER_DEFINE(GENERIC_VOID) 121 WORKER_DEFINE(GENERIC_U64) 122 WORKER_DEFINE(GENERIC_INT) 123 WORKER_DEFINE(GENERIC_FLOAT) 124 WORKER_DEFINE(GENERIC_DOUBLE) 125 WORKER_DEFINE(GENERIC_STR) 126 WORKER_DEFINE(VOID_FP) 127 128 static void 129 run_test(const char *str, lcore_function_t f, struct test_data *data, size_t sz) 130 { 131 unsigned int id, worker = 0; 132 133 memset(data, 0, sz); 134 data->nb_workers = rte_lcore_count() - 1; 135 RTE_LCORE_FOREACH_WORKER(id) 136 rte_eal_remote_launch(f, &data->ldata[worker++], id); 137 138 wait_till_workers_are_ready(data); 139 rte_delay_ms(100); /* Wait for some time to accumulate the stats */ 140 measure_perf(str, data); 141 signal_workers_to_finish(data); 142 143 RTE_LCORE_FOREACH_WORKER(id) 144 rte_eal_wait_lcore(id); 145 } 146 147 static int 148 test_trace_perf(void) 149 { 150 unsigned int nb_cores, nb_workers; 151 struct test_data *data; 152 size_t sz; 153 154 nb_cores = rte_lcore_count(); 155 nb_workers = nb_cores - 1; 156 if (nb_cores < 2) { 157 printf("Need minimum two cores for testing\n"); 158 return TEST_SKIPPED; 159 } 160 161 printf("Timer running at %5.2fMHz\n", rte_get_timer_hz()/1E6); 162 sz = sizeof(struct test_data); 163 sz += nb_workers * sizeof(struct lcore_data); 164 165 data = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); 166 if (data == NULL) { 167 printf("Failed to allocate memory\n"); 168 return TEST_FAILED; 169 } 170 171 run_test("void", worker_fn_GENERIC_VOID, data, sz); 172 run_test("u64", worker_fn_GENERIC_U64, data, sz); 173 run_test("int", worker_fn_GENERIC_INT, data, sz); 174 run_test("float", worker_fn_GENERIC_FLOAT, data, sz); 175 run_test("double", worker_fn_GENERIC_DOUBLE, data, sz); 176 run_test("string", worker_fn_GENERIC_STR, data, sz); 177 run_test("void_fp", worker_fn_VOID_FP, data, sz); 178 179 rte_free(data); 180 return TEST_SUCCESS; 181 } 182 183 REGISTER_TEST_COMMAND(trace_perf_autotest, test_trace_perf); 184