1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <string.h> 37 #include <stdlib.h> 38 39 #include <rte_common.h> 40 #include <rte_cycles.h> 41 #include <rte_random.h> 42 #include <rte_malloc.h> 43 44 #include <rte_memcpy.h> 45 46 #include "test.h" 47 48 /* 49 * Set this to the maximum buffer size you want to test. If it is 0, then the 50 * values in the buf_sizes[] array below will be used. 51 */ 52 #define TEST_VALUE_RANGE 0 53 54 /* List of buffer sizes to test */ 55 #if TEST_VALUE_RANGE == 0 56 static size_t buf_sizes[] = { 57 0, 1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 255, 58 256, 257, 320, 384, 511, 512, 513, 1023, 1024, 1025, 1518, 1522, 1600, 59 2048, 3072, 4096, 5120, 6144, 7168, 8192 60 }; 61 /* MUST be as large as largest packet size above */ 62 #define SMALL_BUFFER_SIZE 8192 63 #else /* TEST_VALUE_RANGE != 0 */ 64 static size_t buf_sizes[TEST_VALUE_RANGE]; 65 #define SMALL_BUFFER_SIZE TEST_VALUE_RANGE 66 #endif /* TEST_VALUE_RANGE == 0 */ 67 68 69 /* 70 * Arrays of this size are used for measuring uncached memory accesses by 71 * picking a random location within the buffer. Make this smaller if there are 72 * memory allocation errors. 73 */ 74 #define LARGE_BUFFER_SIZE (100 * 1024 * 1024) 75 76 /* How many times to run timing loop for performance tests */ 77 #define TEST_ITERATIONS 1000000 78 #define TEST_BATCH_SIZE 100 79 80 /* Data is aligned on this many bytes (power of 2) */ 81 #define ALIGNMENT_UNIT 16 82 83 /* 84 * Pointers used in performance tests. The two large buffers are for uncached 85 * access where random addresses within the buffer are used for each 86 * memcpy. The two small buffers are for cached access. 87 */ 88 static uint8_t *large_buf_read, *large_buf_write; 89 static uint8_t *small_buf_read, *small_buf_write; 90 91 /* Initialise data buffers. */ 92 static int 93 init_buffers(void) 94 { 95 unsigned i; 96 97 large_buf_read = rte_malloc("memcpy", LARGE_BUFFER_SIZE, ALIGNMENT_UNIT); 98 if (large_buf_read == NULL) 99 goto error_large_buf_read; 100 101 large_buf_write = rte_malloc("memcpy", LARGE_BUFFER_SIZE, ALIGNMENT_UNIT); 102 if (large_buf_write == NULL) 103 goto error_large_buf_write; 104 105 small_buf_read = rte_malloc("memcpy", SMALL_BUFFER_SIZE, ALIGNMENT_UNIT); 106 if (small_buf_read == NULL) 107 goto error_small_buf_read; 108 109 small_buf_write = rte_malloc("memcpy", SMALL_BUFFER_SIZE, ALIGNMENT_UNIT); 110 if (small_buf_write == NULL) 111 goto error_small_buf_write; 112 113 for (i = 0; i < LARGE_BUFFER_SIZE; i++) 114 large_buf_read[i] = rte_rand(); 115 for (i = 0; i < SMALL_BUFFER_SIZE; i++) 116 small_buf_read[i] = rte_rand(); 117 118 return 0; 119 120 error_small_buf_write: 121 rte_free(small_buf_read); 122 error_small_buf_read: 123 rte_free(large_buf_write); 124 error_large_buf_write: 125 rte_free(large_buf_read); 126 error_large_buf_read: 127 printf("ERROR: not enough memory\n"); 128 return -1; 129 } 130 131 /* Cleanup data buffers */ 132 static void 133 free_buffers(void) 134 { 135 rte_free(large_buf_read); 136 rte_free(large_buf_write); 137 rte_free(small_buf_read); 138 rte_free(small_buf_write); 139 } 140 141 /* 142 * Get a random offset into large array, with enough space needed to perform 143 * max copy size. Offset is aligned. 144 */ 145 static inline size_t 146 get_rand_offset(void) 147 { 148 return ((rte_rand() % (LARGE_BUFFER_SIZE - SMALL_BUFFER_SIZE)) & 149 ~(ALIGNMENT_UNIT - 1)); 150 } 151 152 /* Fill in source and destination addresses. */ 153 static inline void 154 fill_addr_arrays(size_t *dst_addr, int is_dst_cached, 155 size_t *src_addr, int is_src_cached) 156 { 157 unsigned int i; 158 159 for (i = 0; i < TEST_BATCH_SIZE; i++) { 160 dst_addr[i] = (is_dst_cached) ? 0 : get_rand_offset(); 161 src_addr[i] = (is_src_cached) ? 0 : get_rand_offset(); 162 } 163 } 164 165 /* 166 * WORKAROUND: For some reason the first test doing an uncached write 167 * takes a very long time (~25 times longer than is expected). So we do 168 * it once without timing. 169 */ 170 static void 171 do_uncached_write(uint8_t *dst, int is_dst_cached, 172 const uint8_t *src, int is_src_cached, size_t size) 173 { 174 unsigned i, j; 175 size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE]; 176 177 for (i = 0; i < (TEST_ITERATIONS / TEST_BATCH_SIZE); i++) { 178 fill_addr_arrays(dst_addrs, is_dst_cached, 179 src_addrs, is_src_cached); 180 for (j = 0; j < TEST_BATCH_SIZE; j++) 181 rte_memcpy(dst+dst_addrs[j], src+src_addrs[j], size); 182 } 183 } 184 185 /* 186 * Run a single memcpy performance test. This is a macro to ensure that if 187 * the "size" parameter is a constant it won't be converted to a variable. 188 */ 189 #define SINGLE_PERF_TEST(dst, is_dst_cached, src, is_src_cached, size) do { \ 190 unsigned int iter, t; \ 191 size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE]; \ 192 uint64_t start_time, total_time = 0; \ 193 uint64_t total_time2 = 0; \ 194 for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) { \ 195 fill_addr_arrays(dst_addrs, is_dst_cached, \ 196 src_addrs, is_src_cached); \ 197 start_time = rte_rdtsc(); \ 198 for (t = 0; t < TEST_BATCH_SIZE; t++) \ 199 rte_memcpy(dst+dst_addrs[t], src+src_addrs[t], size); \ 200 total_time += rte_rdtsc() - start_time; \ 201 } \ 202 for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) { \ 203 fill_addr_arrays(dst_addrs, is_dst_cached, \ 204 src_addrs, is_src_cached); \ 205 start_time = rte_rdtsc(); \ 206 for (t = 0; t < TEST_BATCH_SIZE; t++) \ 207 memcpy(dst+dst_addrs[t], src+src_addrs[t], size); \ 208 total_time2 += rte_rdtsc() - start_time; \ 209 } \ 210 printf("%8.0f -", (double)total_time /TEST_ITERATIONS); \ 211 printf("%5.0f", (double)total_time2 / TEST_ITERATIONS); \ 212 } while (0) 213 214 /* Run memcpy() tests for each cached/uncached permutation. */ 215 #define ALL_PERF_TESTS_FOR_SIZE(n) do { \ 216 if (__builtin_constant_p(n)) \ 217 printf("\nC%6u", (unsigned)n); \ 218 else \ 219 printf("\n%7u", (unsigned)n); \ 220 SINGLE_PERF_TEST(small_buf_write, 1, small_buf_read, 1, n); \ 221 SINGLE_PERF_TEST(large_buf_write, 0, small_buf_read, 1, n); \ 222 SINGLE_PERF_TEST(small_buf_write, 1, large_buf_read, 0, n); \ 223 SINGLE_PERF_TEST(large_buf_write, 0, large_buf_read, 0, n); \ 224 } while (0) 225 226 /* 227 * Run performance tests for a number of different sizes and cached/uncached 228 * permutations. 229 */ 230 static int 231 perf_test(void) 232 { 233 const unsigned num_buf_sizes = sizeof(buf_sizes) / sizeof(buf_sizes[0]); 234 unsigned i; 235 int ret; 236 237 ret = init_buffers(); 238 if (ret != 0) 239 return ret; 240 241 #if TEST_VALUE_RANGE != 0 242 /* Setup buf_sizes array, if required */ 243 for (i = 0; i < TEST_VALUE_RANGE; i++) 244 buf_sizes[i] = i; 245 #endif 246 247 /* See function comment */ 248 do_uncached_write(large_buf_write, 0, small_buf_read, 1, SMALL_BUFFER_SIZE); 249 250 printf("\n** rte_memcpy() - memcpy perf. tests (C = compile-time constant) **\n" 251 "======= ============== ============== ============== ==============\n" 252 " Size Cache to cache Cache to mem Mem to cache Mem to mem\n" 253 "(bytes) (ticks) (ticks) (ticks) (ticks)\n" 254 "------- -------------- -------------- -------------- --------------"); 255 256 /* Do tests where size is a variable */ 257 for (i = 0; i < num_buf_sizes; i++) { 258 ALL_PERF_TESTS_FOR_SIZE((size_t)buf_sizes[i]); 259 } 260 printf("\n------- -------------- -------------- -------------- --------------"); 261 /* Do tests where size is a compile-time constant */ 262 ALL_PERF_TESTS_FOR_SIZE(63U); 263 ALL_PERF_TESTS_FOR_SIZE(64U); 264 ALL_PERF_TESTS_FOR_SIZE(65U); 265 ALL_PERF_TESTS_FOR_SIZE(255U); 266 ALL_PERF_TESTS_FOR_SIZE(256U); 267 ALL_PERF_TESTS_FOR_SIZE(257U); 268 ALL_PERF_TESTS_FOR_SIZE(1023U); 269 ALL_PERF_TESTS_FOR_SIZE(1024U); 270 ALL_PERF_TESTS_FOR_SIZE(1025U); 271 ALL_PERF_TESTS_FOR_SIZE(1518U); 272 273 printf("\n======= ============== ============== ============== ==============\n\n"); 274 275 free_buffers(); 276 277 return 0; 278 } 279 280 281 static int 282 test_memcpy_perf(void) 283 { 284 int ret; 285 286 ret = perf_test(); 287 if (ret != 0) 288 return -1; 289 return 0; 290 } 291 292 static struct test_command memcpy_perf_cmd = { 293 .command = "memcpy_perf_autotest", 294 .callback = test_memcpy_perf, 295 }; 296 REGISTER_TEST_COMMAND(memcpy_perf_cmd); 297