1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3 */ 4 5 #include <inttypes.h> 6 #include <string.h> 7 #include <rte_cycles.h> 8 #include <rte_errno.h> 9 #include <rte_malloc.h> 10 #include <rte_memzone.h> 11 12 #include "test.h" 13 14 #define TEST_LOG(level, ...) RTE_LOG(level, USER1, __VA_ARGS__) 15 16 typedef void * (alloc_t)(const char *name, size_t size, unsigned int align); 17 typedef void (free_t)(void *addr); 18 typedef void * (memset_t)(void *addr, int value, size_t size); 19 20 static const uint64_t KB = 1 << 10; 21 static const uint64_t GB = 1 << 30; 22 23 static double 24 tsc_to_us(uint64_t tsc, size_t runs) 25 { 26 return (double)tsc / rte_get_tsc_hz() * US_PER_S / runs; 27 } 28 29 static int 30 test_memset_perf(double *us_per_gb) 31 { 32 static const size_t RUNS = 20; 33 34 void *ptr; 35 size_t i; 36 uint64_t tsc; 37 38 TEST_LOG(INFO, "Reference: memset\n"); 39 40 ptr = rte_malloc(NULL, GB, 0); 41 if (ptr == NULL) { 42 TEST_LOG(ERR, "rte_malloc(size=%"PRIx64") failed\n", GB); 43 return -1; 44 } 45 46 tsc = rte_rdtsc_precise(); 47 for (i = 0; i < RUNS; i++) 48 memset(ptr, 0, GB); 49 tsc = rte_rdtsc_precise() - tsc; 50 51 *us_per_gb = tsc_to_us(tsc, RUNS); 52 TEST_LOG(INFO, "Result: %f.3 GiB/s <=> %.2f us/MiB\n", 53 US_PER_S / *us_per_gb, *us_per_gb / KB); 54 55 rte_free(ptr); 56 TEST_LOG(INFO, "\n"); 57 return 0; 58 } 59 60 static int 61 test_alloc_perf(const char *name, alloc_t *alloc_fn, free_t *free_fn, 62 memset_t *memset_fn, double memset_gb_us, size_t max_runs) 63 { 64 static const size_t SIZES[] = { 65 1 << 6, 1 << 7, 1 << 10, 1 << 12, 1 << 16, 1 << 20, 66 1 << 21, 1 << 22, 1 << 24, 1 << 30 }; 67 68 size_t i, j; 69 void **ptrs; 70 71 TEST_LOG(INFO, "Performance: %s\n", name); 72 73 ptrs = calloc(max_runs, sizeof(ptrs[0])); 74 if (ptrs == NULL) { 75 TEST_LOG(ERR, "Cannot allocate memory for pointers"); 76 return -1; 77 } 78 79 TEST_LOG(INFO, "%12s%8s%12s%12s%12s%17s\n", "Size (B)", "Runs", 80 "Alloc (us)", "Free (us)", "Total (us)", 81 memset_fn != NULL ? "memset (us)" : "est.memset (us)"); 82 for (i = 0; i < RTE_DIM(SIZES); i++) { 83 size_t size = SIZES[i]; 84 size_t runs_done; 85 uint64_t tsc_start, tsc_alloc, tsc_memset = 0, tsc_free; 86 double alloc_time, free_time, memset_time; 87 88 tsc_start = rte_rdtsc_precise(); 89 for (j = 0; j < max_runs; j++) { 90 ptrs[j] = alloc_fn(NULL, size, 0); 91 if (ptrs[j] == NULL) 92 break; 93 } 94 tsc_alloc = rte_rdtsc_precise() - tsc_start; 95 96 if (j == 0) { 97 TEST_LOG(INFO, "%12zu Interrupted: out of memory.\n", 98 size); 99 break; 100 } 101 runs_done = j; 102 103 if (memset_fn != NULL) { 104 tsc_start = rte_rdtsc_precise(); 105 for (j = 0; j < runs_done && ptrs[j] != NULL; j++) 106 memset_fn(ptrs[j], 0, size); 107 tsc_memset = rte_rdtsc_precise() - tsc_start; 108 } 109 110 tsc_start = rte_rdtsc_precise(); 111 for (j = 0; j < runs_done && ptrs[j] != NULL; j++) 112 free_fn(ptrs[j]); 113 tsc_free = rte_rdtsc_precise() - tsc_start; 114 115 alloc_time = tsc_to_us(tsc_alloc, runs_done); 116 free_time = tsc_to_us(tsc_free, runs_done); 117 memset_time = memset_fn != NULL ? 118 tsc_to_us(tsc_memset, runs_done) : 119 memset_gb_us * size / GB; 120 TEST_LOG(INFO, "%12zu%8zu%12.2f%12.2f%12.2f%17.2f\n", 121 size, runs_done, alloc_time, free_time, 122 alloc_time + free_time, memset_time); 123 124 memset(ptrs, 0, max_runs * sizeof(ptrs[0])); 125 } 126 127 free(ptrs); 128 TEST_LOG(INFO, "\n"); 129 return 0; 130 } 131 132 static void * 133 memzone_alloc(const char *name __rte_unused, size_t size, unsigned int align) 134 { 135 const struct rte_memzone *mz; 136 char gen_name[RTE_MEMZONE_NAMESIZE]; 137 138 snprintf(gen_name, sizeof(gen_name), "test-mz-%"PRIx64, rte_rdtsc()); 139 mz = rte_memzone_reserve_aligned(gen_name, size, SOCKET_ID_ANY, 140 RTE_MEMZONE_1GB | RTE_MEMZONE_SIZE_HINT_ONLY, align); 141 return (void *)(uintptr_t)mz; 142 } 143 144 static void 145 memzone_free(void *addr) 146 { 147 rte_memzone_free((struct rte_memzone *)addr); 148 } 149 150 static int 151 test_malloc_perf(void) 152 { 153 static const size_t MAX_RUNS = 10000; 154 155 double memset_us_gb = 0; 156 157 if (test_memset_perf(&memset_us_gb) < 0) 158 return -1; 159 160 if (test_alloc_perf("rte_malloc", rte_malloc, rte_free, memset, 161 memset_us_gb, MAX_RUNS) < 0) 162 return -1; 163 if (test_alloc_perf("rte_zmalloc", rte_zmalloc, rte_free, memset, 164 memset_us_gb, MAX_RUNS) < 0) 165 return -1; 166 167 if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free, 168 NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0) 169 return -1; 170 171 return 0; 172 } 173 174 REGISTER_PERF_TEST(malloc_perf_autotest, test_malloc_perf); 175