1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <string.h> 37 #include <stdlib.h> 38 39 #include <rte_common.h> 40 #include <cmdline_parse.h> 41 #include <rte_cycles.h> 42 #include <rte_random.h> 43 #include <rte_malloc.h> 44 45 #include <rte_memcpy.h> 46 47 #include "test.h" 48 49 /* 50 * Set this to the maximum buffer size you want to test. If it is 0, then the 51 * values in the buf_sizes[] array below will be used. 52 */ 53 #define TEST_VALUE_RANGE 0 54 55 /* List of buffer sizes to test */ 56 #if TEST_VALUE_RANGE == 0 57 static size_t buf_sizes[] = { 58 0, 1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 255, 59 256, 257, 320, 384, 511, 512, 513, 1023, 1024, 1025, 1518, 1522, 1600, 60 2048, 3072, 4096, 5120, 6144, 7168, 8192 61 }; 62 /* MUST be as large as largest packet size above */ 63 #define SMALL_BUFFER_SIZE 8192 64 #else /* TEST_VALUE_RANGE != 0 */ 65 static size_t buf_sizes[TEST_VALUE_RANGE]; 66 #define SMALL_BUFFER_SIZE TEST_VALUE_RANGE 67 #endif /* TEST_VALUE_RANGE == 0 */ 68 69 70 /* 71 * Arrays of this size are used for measuring uncached memory accesses by 72 * picking a random location within the buffer. Make this smaller if there are 73 * memory allocation errors. 74 */ 75 #define LARGE_BUFFER_SIZE (100 * 1024 * 1024) 76 77 /* How many times to run timing loop for performance tests */ 78 #define TEST_ITERATIONS 1000000 79 #define TEST_BATCH_SIZE 100 80 81 /* Data is aligned on this many bytes (power of 2) */ 82 #define ALIGNMENT_UNIT 16 83 84 /* 85 * Pointers used in performance tests. The two large buffers are for uncached 86 * access where random addresses within the buffer are used for each 87 * memcpy. The two small buffers are for cached access. 88 */ 89 static uint8_t *large_buf_read, *large_buf_write; 90 static uint8_t *small_buf_read, *small_buf_write; 91 92 /* Initialise data buffers. */ 93 static int 94 init_buffers(void) 95 { 96 unsigned i; 97 98 large_buf_read = rte_malloc("memcpy", LARGE_BUFFER_SIZE, ALIGNMENT_UNIT); 99 if (large_buf_read == NULL) 100 goto error_large_buf_read; 101 102 large_buf_write = rte_malloc("memcpy", LARGE_BUFFER_SIZE, ALIGNMENT_UNIT); 103 if (large_buf_write == NULL) 104 goto error_large_buf_write; 105 106 small_buf_read = rte_malloc("memcpy", SMALL_BUFFER_SIZE, ALIGNMENT_UNIT); 107 if (small_buf_read == NULL) 108 goto error_small_buf_read; 109 110 small_buf_write = rte_malloc("memcpy", SMALL_BUFFER_SIZE, ALIGNMENT_UNIT); 111 if (small_buf_write == NULL) 112 goto error_small_buf_write; 113 114 for (i = 0; i < LARGE_BUFFER_SIZE; i++) 115 large_buf_read[i] = rte_rand(); 116 for (i = 0; i < SMALL_BUFFER_SIZE; i++) 117 small_buf_read[i] = rte_rand(); 118 119 return 0; 120 121 error_small_buf_write: 122 rte_free(small_buf_read); 123 error_small_buf_read: 124 rte_free(large_buf_write); 125 error_large_buf_write: 126 rte_free(large_buf_read); 127 error_large_buf_read: 128 printf("ERROR: not enough memory\n"); 129 return -1; 130 } 131 132 /* Cleanup data buffers */ 133 static void 134 free_buffers(void) 135 { 136 rte_free(large_buf_read); 137 rte_free(large_buf_write); 138 rte_free(small_buf_read); 139 rte_free(small_buf_write); 140 } 141 142 /* 143 * Get a random offset into large array, with enough space needed to perform 144 * max copy size. Offset is aligned. 145 */ 146 static inline size_t 147 get_rand_offset(void) 148 { 149 return ((rte_rand() % (LARGE_BUFFER_SIZE - SMALL_BUFFER_SIZE)) & 150 ~(ALIGNMENT_UNIT - 1)); 151 } 152 153 /* Fill in source and destination addresses. */ 154 static inline void 155 fill_addr_arrays(size_t *dst_addr, int is_dst_cached, 156 size_t *src_addr, int is_src_cached) 157 { 158 unsigned int i; 159 160 for (i = 0; i < TEST_BATCH_SIZE; i++) { 161 dst_addr[i] = (is_dst_cached) ? 0 : get_rand_offset(); 162 src_addr[i] = (is_src_cached) ? 0 : get_rand_offset(); 163 } 164 } 165 166 /* 167 * WORKAROUND: For some reason the first test doing an uncached write 168 * takes a very long time (~25 times longer than is expected). So we do 169 * it once without timing. 170 */ 171 static void 172 do_uncached_write(uint8_t *dst, int is_dst_cached, 173 const uint8_t *src, int is_src_cached, size_t size) 174 { 175 unsigned i, j; 176 size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE]; 177 178 for (i = 0; i < (TEST_ITERATIONS / TEST_BATCH_SIZE); i++) { 179 fill_addr_arrays(dst_addrs, is_dst_cached, 180 src_addrs, is_src_cached); 181 for (j = 0; j < TEST_BATCH_SIZE; j++) 182 rte_memcpy(dst+dst_addrs[j], src+src_addrs[j], size); 183 } 184 } 185 186 /* 187 * Run a single memcpy performance test. This is a macro to ensure that if 188 * the "size" parameter is a constant it won't be converted to a variable. 189 */ 190 #define SINGLE_PERF_TEST(dst, is_dst_cached, src, is_src_cached, size) do { \ 191 unsigned int iter, t; \ 192 size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE]; \ 193 uint64_t start_time, total_time = 0; \ 194 uint64_t total_time2 = 0; \ 195 for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) { \ 196 fill_addr_arrays(dst_addrs, is_dst_cached, \ 197 src_addrs, is_src_cached); \ 198 start_time = rte_rdtsc(); \ 199 for (t = 0; t < TEST_BATCH_SIZE; t++) \ 200 rte_memcpy(dst+dst_addrs[t], src+src_addrs[t], size); \ 201 total_time += rte_rdtsc() - start_time; \ 202 } \ 203 for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) { \ 204 fill_addr_arrays(dst_addrs, is_dst_cached, \ 205 src_addrs, is_src_cached); \ 206 start_time = rte_rdtsc(); \ 207 for (t = 0; t < TEST_BATCH_SIZE; t++) \ 208 memcpy(dst+dst_addrs[t], src+src_addrs[t], size); \ 209 total_time2 += rte_rdtsc() - start_time; \ 210 } \ 211 printf("%8.0f -", (double)total_time /TEST_ITERATIONS); \ 212 printf("%5.0f", (double)total_time2 / TEST_ITERATIONS); \ 213 } while (0) 214 215 /* Run memcpy() tests for each cached/uncached permutation. */ 216 #define ALL_PERF_TESTS_FOR_SIZE(n) do { \ 217 if (__builtin_constant_p(n)) \ 218 printf("\nC%6u", (unsigned)n); \ 219 else \ 220 printf("\n%7u", (unsigned)n); \ 221 SINGLE_PERF_TEST(small_buf_write, 1, small_buf_read, 1, n); \ 222 SINGLE_PERF_TEST(large_buf_write, 0, small_buf_read, 1, n); \ 223 SINGLE_PERF_TEST(small_buf_write, 1, large_buf_read, 0, n); \ 224 SINGLE_PERF_TEST(large_buf_write, 0, large_buf_read, 0, n); \ 225 } while (0) 226 227 /* 228 * Run performance tests for a number of different sizes and cached/uncached 229 * permutations. 230 */ 231 static int 232 perf_test(void) 233 { 234 const unsigned num_buf_sizes = sizeof(buf_sizes) / sizeof(buf_sizes[0]); 235 unsigned i; 236 int ret; 237 238 ret = init_buffers(); 239 if (ret != 0) 240 return ret; 241 242 #if TEST_VALUE_RANGE != 0 243 /* Setup buf_sizes array, if required */ 244 for (i = 0; i < TEST_VALUE_RANGE; i++) 245 buf_sizes[i] = i; 246 #endif 247 248 /* See function comment */ 249 do_uncached_write(large_buf_write, 0, small_buf_read, 1, SMALL_BUFFER_SIZE); 250 251 printf("\n** rte_memcpy() - memcpy perf. tests (C = compile-time constant) **\n" 252 "======= ============== ============== ============== ==============\n" 253 " Size Cache to cache Cache to mem Mem to cache Mem to mem\n" 254 "(bytes) (ticks) (ticks) (ticks) (ticks)\n" 255 "------- -------------- -------------- -------------- --------------"); 256 257 /* Do tests where size is a variable */ 258 for (i = 0; i < num_buf_sizes; i++) { 259 ALL_PERF_TESTS_FOR_SIZE((size_t)buf_sizes[i]); 260 } 261 printf("\n------- -------------- -------------- -------------- --------------"); 262 /* Do tests where size is a compile-time constant */ 263 ALL_PERF_TESTS_FOR_SIZE(63U); 264 ALL_PERF_TESTS_FOR_SIZE(64U); 265 ALL_PERF_TESTS_FOR_SIZE(65U); 266 ALL_PERF_TESTS_FOR_SIZE(255U); 267 ALL_PERF_TESTS_FOR_SIZE(256U); 268 ALL_PERF_TESTS_FOR_SIZE(257U); 269 ALL_PERF_TESTS_FOR_SIZE(1023U); 270 ALL_PERF_TESTS_FOR_SIZE(1024U); 271 ALL_PERF_TESTS_FOR_SIZE(1025U); 272 ALL_PERF_TESTS_FOR_SIZE(1518U); 273 274 printf("\n======= ============== ============== ============== ==============\n\n"); 275 276 free_buffers(); 277 278 return 0; 279 } 280 281 282 int 283 test_memcpy_perf(void) 284 { 285 int ret; 286 287 ret = perf_test(); 288 if (ret != 0) 289 return -1; 290 return 0; 291 } 292