1*c62b318aSDmitry Kozlyuk /* SPDX-License-Identifier: BSD-3-Clause 2*c62b318aSDmitry Kozlyuk * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3*c62b318aSDmitry Kozlyuk */ 4*c62b318aSDmitry Kozlyuk 5*c62b318aSDmitry Kozlyuk #include <inttypes.h> 6*c62b318aSDmitry Kozlyuk #include <string.h> 7*c62b318aSDmitry Kozlyuk #include <rte_cycles.h> 8*c62b318aSDmitry Kozlyuk #include <rte_errno.h> 9*c62b318aSDmitry Kozlyuk #include <rte_malloc.h> 10*c62b318aSDmitry Kozlyuk #include <rte_memzone.h> 11*c62b318aSDmitry Kozlyuk 12*c62b318aSDmitry Kozlyuk #include "test.h" 13*c62b318aSDmitry Kozlyuk 14*c62b318aSDmitry Kozlyuk #define TEST_LOG(level, ...) RTE_LOG(level, USER1, __VA_ARGS__) 15*c62b318aSDmitry Kozlyuk 16*c62b318aSDmitry Kozlyuk typedef void * (alloc_t)(const char *name, size_t size, unsigned int align); 17*c62b318aSDmitry Kozlyuk typedef void (free_t)(void *addr); 18*c62b318aSDmitry Kozlyuk typedef void * (memset_t)(void *addr, int value, size_t size); 19*c62b318aSDmitry Kozlyuk 20*c62b318aSDmitry Kozlyuk static const uint64_t KB = 1 << 10; 21*c62b318aSDmitry Kozlyuk static const uint64_t GB = 1 << 30; 22*c62b318aSDmitry Kozlyuk 23*c62b318aSDmitry Kozlyuk static double 24*c62b318aSDmitry Kozlyuk tsc_to_us(uint64_t tsc, size_t runs) 25*c62b318aSDmitry Kozlyuk { 26*c62b318aSDmitry Kozlyuk return (double)tsc / rte_get_tsc_hz() * US_PER_S / runs; 27*c62b318aSDmitry Kozlyuk } 28*c62b318aSDmitry Kozlyuk 29*c62b318aSDmitry Kozlyuk static int 30*c62b318aSDmitry Kozlyuk test_memset_perf(double *us_per_gb) 31*c62b318aSDmitry Kozlyuk { 32*c62b318aSDmitry Kozlyuk static const size_t RUNS = 20; 33*c62b318aSDmitry Kozlyuk 34*c62b318aSDmitry Kozlyuk void *ptr; 35*c62b318aSDmitry Kozlyuk size_t i; 36*c62b318aSDmitry Kozlyuk uint64_t tsc; 37*c62b318aSDmitry Kozlyuk 38*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "Reference: memset\n"); 39*c62b318aSDmitry Kozlyuk 40*c62b318aSDmitry Kozlyuk ptr = rte_malloc(NULL, GB, 0); 41*c62b318aSDmitry Kozlyuk if (ptr == NULL) { 42*c62b318aSDmitry Kozlyuk TEST_LOG(ERR, "rte_malloc(size=%"PRIx64") failed\n", GB); 43*c62b318aSDmitry Kozlyuk return -1; 44*c62b318aSDmitry Kozlyuk } 45*c62b318aSDmitry Kozlyuk 46*c62b318aSDmitry Kozlyuk tsc = rte_rdtsc_precise(); 47*c62b318aSDmitry Kozlyuk for (i = 0; i < RUNS; i++) 48*c62b318aSDmitry Kozlyuk memset(ptr, 0, GB); 49*c62b318aSDmitry Kozlyuk tsc = rte_rdtsc_precise() - tsc; 50*c62b318aSDmitry Kozlyuk 51*c62b318aSDmitry Kozlyuk *us_per_gb = tsc_to_us(tsc, RUNS); 52*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "Result: %f.3 GiB/s <=> %.2f us/MiB\n", 53*c62b318aSDmitry Kozlyuk US_PER_S / *us_per_gb, *us_per_gb / KB); 54*c62b318aSDmitry Kozlyuk 55*c62b318aSDmitry Kozlyuk rte_free(ptr); 56*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "\n"); 57*c62b318aSDmitry Kozlyuk return 0; 58*c62b318aSDmitry Kozlyuk } 59*c62b318aSDmitry Kozlyuk 60*c62b318aSDmitry Kozlyuk static int 61*c62b318aSDmitry Kozlyuk test_alloc_perf(const char *name, alloc_t *alloc_fn, free_t *free_fn, 62*c62b318aSDmitry Kozlyuk memset_t *memset_fn, double memset_gb_us, size_t max_runs) 63*c62b318aSDmitry Kozlyuk { 64*c62b318aSDmitry Kozlyuk static const size_t SIZES[] = { 65*c62b318aSDmitry Kozlyuk 1 << 6, 1 << 7, 1 << 10, 1 << 12, 1 << 16, 1 << 20, 66*c62b318aSDmitry Kozlyuk 1 << 21, 1 << 22, 1 << 24, 1 << 30 }; 67*c62b318aSDmitry Kozlyuk 68*c62b318aSDmitry Kozlyuk size_t i, j; 69*c62b318aSDmitry Kozlyuk void **ptrs; 70*c62b318aSDmitry Kozlyuk 71*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "Performance: %s\n", name); 72*c62b318aSDmitry Kozlyuk 73*c62b318aSDmitry Kozlyuk ptrs = calloc(max_runs, sizeof(ptrs[0])); 74*c62b318aSDmitry Kozlyuk if (ptrs == NULL) { 75*c62b318aSDmitry Kozlyuk TEST_LOG(ERR, "Cannot allocate memory for pointers"); 76*c62b318aSDmitry Kozlyuk return -1; 77*c62b318aSDmitry Kozlyuk } 78*c62b318aSDmitry Kozlyuk 79*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "%12s%8s%12s%12s%12s%17s\n", "Size (B)", "Runs", 80*c62b318aSDmitry Kozlyuk "Alloc (us)", "Free (us)", "Total (us)", 81*c62b318aSDmitry Kozlyuk memset_fn != NULL ? "memset (us)" : "est.memset (us)"); 82*c62b318aSDmitry Kozlyuk for (i = 0; i < RTE_DIM(SIZES); i++) { 83*c62b318aSDmitry Kozlyuk size_t size = SIZES[i]; 84*c62b318aSDmitry Kozlyuk size_t runs_done; 85*c62b318aSDmitry Kozlyuk uint64_t tsc_start, tsc_alloc, tsc_memset = 0, tsc_free; 86*c62b318aSDmitry Kozlyuk double alloc_time, free_time, memset_time; 87*c62b318aSDmitry Kozlyuk 88*c62b318aSDmitry Kozlyuk tsc_start = rte_rdtsc_precise(); 89*c62b318aSDmitry Kozlyuk for (j = 0; j < max_runs; j++) { 90*c62b318aSDmitry Kozlyuk ptrs[j] = alloc_fn(NULL, size, 0); 91*c62b318aSDmitry Kozlyuk if (ptrs[j] == NULL) 92*c62b318aSDmitry Kozlyuk break; 93*c62b318aSDmitry Kozlyuk } 94*c62b318aSDmitry Kozlyuk tsc_alloc = rte_rdtsc_precise() - tsc_start; 95*c62b318aSDmitry Kozlyuk 96*c62b318aSDmitry Kozlyuk if (j == 0) { 97*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "%12zu Interrupted: out of memory.\n", 98*c62b318aSDmitry Kozlyuk size); 99*c62b318aSDmitry Kozlyuk break; 100*c62b318aSDmitry Kozlyuk } 101*c62b318aSDmitry Kozlyuk runs_done = j; 102*c62b318aSDmitry Kozlyuk 103*c62b318aSDmitry Kozlyuk if (memset_fn != NULL) { 104*c62b318aSDmitry Kozlyuk tsc_start = rte_rdtsc_precise(); 105*c62b318aSDmitry Kozlyuk for (j = 0; j < runs_done && ptrs[j] != NULL; j++) 106*c62b318aSDmitry Kozlyuk memset_fn(ptrs[j], 0, size); 107*c62b318aSDmitry Kozlyuk tsc_memset = rte_rdtsc_precise() - tsc_start; 108*c62b318aSDmitry Kozlyuk } 109*c62b318aSDmitry Kozlyuk 110*c62b318aSDmitry Kozlyuk tsc_start = rte_rdtsc_precise(); 111*c62b318aSDmitry Kozlyuk for (j = 0; j < runs_done && ptrs[j] != NULL; j++) 112*c62b318aSDmitry Kozlyuk free_fn(ptrs[j]); 113*c62b318aSDmitry Kozlyuk tsc_free = rte_rdtsc_precise() - tsc_start; 114*c62b318aSDmitry Kozlyuk 115*c62b318aSDmitry Kozlyuk alloc_time = tsc_to_us(tsc_alloc, runs_done); 116*c62b318aSDmitry Kozlyuk free_time = tsc_to_us(tsc_free, runs_done); 117*c62b318aSDmitry Kozlyuk memset_time = memset_fn != NULL ? 118*c62b318aSDmitry Kozlyuk tsc_to_us(tsc_memset, runs_done) : 119*c62b318aSDmitry Kozlyuk memset_gb_us * size / GB; 120*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "%12zu%8zu%12.2f%12.2f%12.2f%17.2f\n", 121*c62b318aSDmitry Kozlyuk size, runs_done, alloc_time, free_time, 122*c62b318aSDmitry Kozlyuk alloc_time + free_time, memset_time); 123*c62b318aSDmitry Kozlyuk 124*c62b318aSDmitry Kozlyuk memset(ptrs, 0, max_runs * sizeof(ptrs[0])); 125*c62b318aSDmitry Kozlyuk } 126*c62b318aSDmitry Kozlyuk 127*c62b318aSDmitry Kozlyuk free(ptrs); 128*c62b318aSDmitry Kozlyuk TEST_LOG(INFO, "\n"); 129*c62b318aSDmitry Kozlyuk return 0; 130*c62b318aSDmitry Kozlyuk } 131*c62b318aSDmitry Kozlyuk 132*c62b318aSDmitry Kozlyuk static void * 133*c62b318aSDmitry Kozlyuk memzone_alloc(const char *name __rte_unused, size_t size, unsigned int align) 134*c62b318aSDmitry Kozlyuk { 135*c62b318aSDmitry Kozlyuk const struct rte_memzone *mz; 136*c62b318aSDmitry Kozlyuk char gen_name[RTE_MEMZONE_NAMESIZE]; 137*c62b318aSDmitry Kozlyuk 138*c62b318aSDmitry Kozlyuk snprintf(gen_name, sizeof(gen_name), "test-mz-%"PRIx64, rte_rdtsc()); 139*c62b318aSDmitry Kozlyuk mz = rte_memzone_reserve_aligned(gen_name, size, SOCKET_ID_ANY, 140*c62b318aSDmitry Kozlyuk RTE_MEMZONE_1GB | RTE_MEMZONE_SIZE_HINT_ONLY, align); 141*c62b318aSDmitry Kozlyuk return (void *)(uintptr_t)mz; 142*c62b318aSDmitry Kozlyuk } 143*c62b318aSDmitry Kozlyuk 144*c62b318aSDmitry Kozlyuk static void 145*c62b318aSDmitry Kozlyuk memzone_free(void *addr) 146*c62b318aSDmitry Kozlyuk { 147*c62b318aSDmitry Kozlyuk rte_memzone_free((struct rte_memzone *)addr); 148*c62b318aSDmitry Kozlyuk } 149*c62b318aSDmitry Kozlyuk 150*c62b318aSDmitry Kozlyuk static int 151*c62b318aSDmitry Kozlyuk test_malloc_perf(void) 152*c62b318aSDmitry Kozlyuk { 153*c62b318aSDmitry Kozlyuk static const size_t MAX_RUNS = 10000; 154*c62b318aSDmitry Kozlyuk 155*c62b318aSDmitry Kozlyuk double memset_us_gb = 0; 156*c62b318aSDmitry Kozlyuk 157*c62b318aSDmitry Kozlyuk if (test_memset_perf(&memset_us_gb) < 0) 158*c62b318aSDmitry Kozlyuk return -1; 159*c62b318aSDmitry Kozlyuk 160*c62b318aSDmitry Kozlyuk if (test_alloc_perf("rte_malloc", rte_malloc, rte_free, memset, 161*c62b318aSDmitry Kozlyuk memset_us_gb, MAX_RUNS) < 0) 162*c62b318aSDmitry Kozlyuk return -1; 163*c62b318aSDmitry Kozlyuk if (test_alloc_perf("rte_zmalloc", rte_zmalloc, rte_free, memset, 164*c62b318aSDmitry Kozlyuk memset_us_gb, MAX_RUNS) < 0) 165*c62b318aSDmitry Kozlyuk return -1; 166*c62b318aSDmitry Kozlyuk 167*c62b318aSDmitry Kozlyuk if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free, 168*c62b318aSDmitry Kozlyuk NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0) 169*c62b318aSDmitry Kozlyuk return -1; 170*c62b318aSDmitry Kozlyuk 171*c62b318aSDmitry Kozlyuk return 0; 172*c62b318aSDmitry Kozlyuk } 173*c62b318aSDmitry Kozlyuk 174*c62b318aSDmitry Kozlyuk REGISTER_TEST_COMMAND(malloc_perf_autotest, test_malloc_perf); 175