1a9de470cSBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2a9de470cSBruce Richardson * Copyright(c) 2010-2014 Intel Corporation
3a9de470cSBruce Richardson */
4a9de470cSBruce Richardson
5a9de470cSBruce Richardson #include <stdint.h>
6a9de470cSBruce Richardson #include <stdio.h>
7a9de470cSBruce Richardson #include <string.h>
8a9de470cSBruce Richardson #include <stdlib.h>
9987d40a0SJie Zhou #include <time.h>
10a9de470cSBruce Richardson
11a9de470cSBruce Richardson #include <rte_common.h>
12a9de470cSBruce Richardson #include <rte_cycles.h>
13987d40a0SJie Zhou #include <rte_os_shim.h>
14a9de470cSBruce Richardson #include <rte_random.h>
15a9de470cSBruce Richardson #include <rte_malloc.h>
16a9de470cSBruce Richardson
17a9de470cSBruce Richardson #include <rte_memcpy.h>
18a9de470cSBruce Richardson
19a9de470cSBruce Richardson #include "test.h"
20a9de470cSBruce Richardson
21a9de470cSBruce Richardson /*
22a9de470cSBruce Richardson * Set this to the maximum buffer size you want to test. If it is 0, then the
23a9de470cSBruce Richardson * values in the buf_sizes[] array below will be used.
24a9de470cSBruce Richardson */
25a9de470cSBruce Richardson #define TEST_VALUE_RANGE 0
26a9de470cSBruce Richardson
27a9de470cSBruce Richardson /* List of buffer sizes to test */
28a9de470cSBruce Richardson #if TEST_VALUE_RANGE == 0
29a9de470cSBruce Richardson static size_t buf_sizes[] = {
30a9de470cSBruce Richardson 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128,
31a9de470cSBruce Richardson 129, 191, 192, 193, 255, 256, 257, 319, 320, 321, 383, 384, 385, 447, 448,
32a9de470cSBruce Richardson 449, 511, 512, 513, 767, 768, 769, 1023, 1024, 1025, 1518, 1522, 1536, 1600,
33a9de470cSBruce Richardson 2048, 2560, 3072, 3584, 4096, 4608, 5120, 5632, 6144, 6656, 7168, 7680, 8192
34a9de470cSBruce Richardson };
35a9de470cSBruce Richardson /* MUST be as large as largest packet size above */
36a9de470cSBruce Richardson #define SMALL_BUFFER_SIZE 8192
37a9de470cSBruce Richardson #else /* TEST_VALUE_RANGE != 0 */
38a9de470cSBruce Richardson static size_t buf_sizes[TEST_VALUE_RANGE];
39a9de470cSBruce Richardson #define SMALL_BUFFER_SIZE TEST_VALUE_RANGE
40a9de470cSBruce Richardson #endif /* TEST_VALUE_RANGE == 0 */
41a9de470cSBruce Richardson
42a9de470cSBruce Richardson
43a9de470cSBruce Richardson /*
44a9de470cSBruce Richardson * Arrays of this size are used for measuring uncached memory accesses by
45a9de470cSBruce Richardson * picking a random location within the buffer. Make this smaller if there are
46a9de470cSBruce Richardson * memory allocation errors.
47a9de470cSBruce Richardson */
48a9de470cSBruce Richardson #define LARGE_BUFFER_SIZE (100 * 1024 * 1024)
49a9de470cSBruce Richardson
50a9de470cSBruce Richardson /* How many times to run timing loop for performance tests */
51a9de470cSBruce Richardson #define TEST_ITERATIONS 1000000
52a9de470cSBruce Richardson #define TEST_BATCH_SIZE 100
53a9de470cSBruce Richardson
54a9de470cSBruce Richardson /* Data is aligned on this many bytes (power of 2) */
5584fb33feSRadu Nicolau #ifdef __AVX512F__
56a9de470cSBruce Richardson #define ALIGNMENT_UNIT 64
5784fb33feSRadu Nicolau #elif defined __AVX2__
58a9de470cSBruce Richardson #define ALIGNMENT_UNIT 32
5984fb33feSRadu Nicolau #else
60a9de470cSBruce Richardson #define ALIGNMENT_UNIT 16
6184fb33feSRadu Nicolau #endif
62a9de470cSBruce Richardson
63a9de470cSBruce Richardson /*
64a9de470cSBruce Richardson * Pointers used in performance tests. The two large buffers are for uncached
65a9de470cSBruce Richardson * access where random addresses within the buffer are used for each
66a9de470cSBruce Richardson * memcpy. The two small buffers are for cached access.
67a9de470cSBruce Richardson */
68a9de470cSBruce Richardson static uint8_t *large_buf_read, *large_buf_write;
69a9de470cSBruce Richardson static uint8_t *small_buf_read, *small_buf_write;
70a9de470cSBruce Richardson
71a9de470cSBruce Richardson /* Initialise data buffers. */
72a9de470cSBruce Richardson static int
init_buffers(void)73a9de470cSBruce Richardson init_buffers(void)
74a9de470cSBruce Richardson {
75a9de470cSBruce Richardson unsigned i;
76a9de470cSBruce Richardson
77a9de470cSBruce Richardson large_buf_read = rte_malloc("memcpy", LARGE_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
78a9de470cSBruce Richardson if (large_buf_read == NULL)
79a9de470cSBruce Richardson goto error_large_buf_read;
80a9de470cSBruce Richardson
81a9de470cSBruce Richardson large_buf_write = rte_malloc("memcpy", LARGE_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
82a9de470cSBruce Richardson if (large_buf_write == NULL)
83a9de470cSBruce Richardson goto error_large_buf_write;
84a9de470cSBruce Richardson
85a9de470cSBruce Richardson small_buf_read = rte_malloc("memcpy", SMALL_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
86a9de470cSBruce Richardson if (small_buf_read == NULL)
87a9de470cSBruce Richardson goto error_small_buf_read;
88a9de470cSBruce Richardson
89a9de470cSBruce Richardson small_buf_write = rte_malloc("memcpy", SMALL_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
90a9de470cSBruce Richardson if (small_buf_write == NULL)
91a9de470cSBruce Richardson goto error_small_buf_write;
92a9de470cSBruce Richardson
93a9de470cSBruce Richardson for (i = 0; i < LARGE_BUFFER_SIZE; i++)
94a9de470cSBruce Richardson large_buf_read[i] = rte_rand();
95a9de470cSBruce Richardson for (i = 0; i < SMALL_BUFFER_SIZE; i++)
96a9de470cSBruce Richardson small_buf_read[i] = rte_rand();
97a9de470cSBruce Richardson
98a9de470cSBruce Richardson return 0;
99a9de470cSBruce Richardson
100a9de470cSBruce Richardson error_small_buf_write:
101a9de470cSBruce Richardson rte_free(small_buf_read);
102a9de470cSBruce Richardson error_small_buf_read:
103a9de470cSBruce Richardson rte_free(large_buf_write);
104a9de470cSBruce Richardson error_large_buf_write:
105a9de470cSBruce Richardson rte_free(large_buf_read);
106a9de470cSBruce Richardson error_large_buf_read:
107a9de470cSBruce Richardson printf("ERROR: not enough memory\n");
108a9de470cSBruce Richardson return -1;
109a9de470cSBruce Richardson }
110a9de470cSBruce Richardson
111a9de470cSBruce Richardson /* Cleanup data buffers */
112a9de470cSBruce Richardson static void
free_buffers(void)113a9de470cSBruce Richardson free_buffers(void)
114a9de470cSBruce Richardson {
115a9de470cSBruce Richardson rte_free(large_buf_read);
116a9de470cSBruce Richardson rte_free(large_buf_write);
117a9de470cSBruce Richardson rte_free(small_buf_read);
118a9de470cSBruce Richardson rte_free(small_buf_write);
119a9de470cSBruce Richardson }
120a9de470cSBruce Richardson
121a9de470cSBruce Richardson /*
122a9de470cSBruce Richardson * Get a random offset into large array, with enough space needed to perform
123a9de470cSBruce Richardson * max copy size. Offset is aligned, uoffset is used for unalignment setting.
124a9de470cSBruce Richardson */
125a9de470cSBruce Richardson static inline size_t
get_rand_offset(size_t uoffset)126a9de470cSBruce Richardson get_rand_offset(size_t uoffset)
127a9de470cSBruce Richardson {
128a9de470cSBruce Richardson return ((rte_rand() % (LARGE_BUFFER_SIZE - SMALL_BUFFER_SIZE)) &
129a9de470cSBruce Richardson ~(ALIGNMENT_UNIT - 1)) + uoffset;
130a9de470cSBruce Richardson }
131a9de470cSBruce Richardson
132a9de470cSBruce Richardson /* Fill in source and destination addresses. */
133a9de470cSBruce Richardson static inline void
fill_addr_arrays(size_t * dst_addr,int is_dst_cached,size_t dst_uoffset,size_t * src_addr,int is_src_cached,size_t src_uoffset)134a9de470cSBruce Richardson fill_addr_arrays(size_t *dst_addr, int is_dst_cached, size_t dst_uoffset,
135a9de470cSBruce Richardson size_t *src_addr, int is_src_cached, size_t src_uoffset)
136a9de470cSBruce Richardson {
137a9de470cSBruce Richardson unsigned int i;
138a9de470cSBruce Richardson
139a9de470cSBruce Richardson for (i = 0; i < TEST_BATCH_SIZE; i++) {
140a9de470cSBruce Richardson dst_addr[i] = (is_dst_cached) ? dst_uoffset : get_rand_offset(dst_uoffset);
141a9de470cSBruce Richardson src_addr[i] = (is_src_cached) ? src_uoffset : get_rand_offset(src_uoffset);
142a9de470cSBruce Richardson }
143a9de470cSBruce Richardson }
144a9de470cSBruce Richardson
145a9de470cSBruce Richardson /*
146a9de470cSBruce Richardson * WORKAROUND: For some reason the first test doing an uncached write
147a9de470cSBruce Richardson * takes a very long time (~25 times longer than is expected). So we do
148a9de470cSBruce Richardson * it once without timing.
149a9de470cSBruce Richardson */
150a9de470cSBruce Richardson static void
do_uncached_write(uint8_t * dst,int is_dst_cached,const uint8_t * src,int is_src_cached,size_t size)151a9de470cSBruce Richardson do_uncached_write(uint8_t *dst, int is_dst_cached,
152a9de470cSBruce Richardson const uint8_t *src, int is_src_cached, size_t size)
153a9de470cSBruce Richardson {
154a9de470cSBruce Richardson unsigned i, j;
155a9de470cSBruce Richardson size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE];
156a9de470cSBruce Richardson
157a9de470cSBruce Richardson for (i = 0; i < (TEST_ITERATIONS / TEST_BATCH_SIZE); i++) {
158a9de470cSBruce Richardson fill_addr_arrays(dst_addrs, is_dst_cached, 0,
159a9de470cSBruce Richardson src_addrs, is_src_cached, 0);
160a9de470cSBruce Richardson for (j = 0; j < TEST_BATCH_SIZE; j++) {
161a9de470cSBruce Richardson rte_memcpy(dst+dst_addrs[j], src+src_addrs[j], size);
162a9de470cSBruce Richardson }
163a9de470cSBruce Richardson }
164a9de470cSBruce Richardson }
165a9de470cSBruce Richardson
166a9de470cSBruce Richardson /*
167a9de470cSBruce Richardson * Run a single memcpy performance test. This is a macro to ensure that if
168a9de470cSBruce Richardson * the "size" parameter is a constant it won't be converted to a variable.
169a9de470cSBruce Richardson */
170a9de470cSBruce Richardson #define SINGLE_PERF_TEST(dst, is_dst_cached, dst_uoffset, \
171a9de470cSBruce Richardson src, is_src_cached, src_uoffset, size) \
172a9de470cSBruce Richardson do { \
173a9de470cSBruce Richardson unsigned int iter, t; \
174a9de470cSBruce Richardson size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE]; \
175a9de470cSBruce Richardson uint64_t start_time, total_time = 0; \
176a9de470cSBruce Richardson uint64_t total_time2 = 0; \
177a9de470cSBruce Richardson for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) { \
178a9de470cSBruce Richardson fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset, \
179a9de470cSBruce Richardson src_addrs, is_src_cached, src_uoffset); \
180a9de470cSBruce Richardson start_time = rte_rdtsc(); \
181a9de470cSBruce Richardson for (t = 0; t < TEST_BATCH_SIZE; t++) \
182a9de470cSBruce Richardson rte_memcpy(dst+dst_addrs[t], src+src_addrs[t], size); \
183a9de470cSBruce Richardson total_time += rte_rdtsc() - start_time; \
184a9de470cSBruce Richardson } \
185a9de470cSBruce Richardson for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) { \
186a9de470cSBruce Richardson fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset, \
187a9de470cSBruce Richardson src_addrs, is_src_cached, src_uoffset); \
188a9de470cSBruce Richardson start_time = rte_rdtsc(); \
189a9de470cSBruce Richardson for (t = 0; t < TEST_BATCH_SIZE; t++) \
190a9de470cSBruce Richardson memcpy(dst+dst_addrs[t], src+src_addrs[t], size); \
191a9de470cSBruce Richardson total_time2 += rte_rdtsc() - start_time; \
192a9de470cSBruce Richardson } \
193a9de470cSBruce Richardson printf("%3.0f -", (double)total_time / TEST_ITERATIONS); \
194a9de470cSBruce Richardson printf("%3.0f", (double)total_time2 / TEST_ITERATIONS); \
195a9de470cSBruce Richardson printf("(%6.2f%%) ", ((double)total_time - total_time2)*100/total_time2); \
196a9de470cSBruce Richardson } while (0)
197a9de470cSBruce Richardson
198a9de470cSBruce Richardson /* Run aligned memcpy tests for each cached/uncached permutation */
199a9de470cSBruce Richardson #define ALL_PERF_TESTS_FOR_SIZE(n) \
200a9de470cSBruce Richardson do { \
201a9de470cSBruce Richardson if (__builtin_constant_p(n)) \
202a9de470cSBruce Richardson printf("\nC%6u", (unsigned)n); \
203a9de470cSBruce Richardson else \
204a9de470cSBruce Richardson printf("\n%7u", (unsigned)n); \
205a9de470cSBruce Richardson SINGLE_PERF_TEST(small_buf_write, 1, 0, small_buf_read, 1, 0, n); \
206a9de470cSBruce Richardson SINGLE_PERF_TEST(large_buf_write, 0, 0, small_buf_read, 1, 0, n); \
207a9de470cSBruce Richardson SINGLE_PERF_TEST(small_buf_write, 1, 0, large_buf_read, 0, 0, n); \
208a9de470cSBruce Richardson SINGLE_PERF_TEST(large_buf_write, 0, 0, large_buf_read, 0, 0, n); \
209a9de470cSBruce Richardson } while (0)
210a9de470cSBruce Richardson
211a9de470cSBruce Richardson /* Run unaligned memcpy tests for each cached/uncached permutation */
212a9de470cSBruce Richardson #define ALL_PERF_TESTS_FOR_SIZE_UNALIGNED(n) \
213a9de470cSBruce Richardson do { \
214a9de470cSBruce Richardson if (__builtin_constant_p(n)) \
215a9de470cSBruce Richardson printf("\nC%6u", (unsigned)n); \
216a9de470cSBruce Richardson else \
217a9de470cSBruce Richardson printf("\n%7u", (unsigned)n); \
218a9de470cSBruce Richardson SINGLE_PERF_TEST(small_buf_write, 1, 1, small_buf_read, 1, 5, n); \
219a9de470cSBruce Richardson SINGLE_PERF_TEST(large_buf_write, 0, 1, small_buf_read, 1, 5, n); \
220a9de470cSBruce Richardson SINGLE_PERF_TEST(small_buf_write, 1, 1, large_buf_read, 0, 5, n); \
221a9de470cSBruce Richardson SINGLE_PERF_TEST(large_buf_write, 0, 1, large_buf_read, 0, 5, n); \
222a9de470cSBruce Richardson } while (0)
223a9de470cSBruce Richardson
224a9de470cSBruce Richardson /* Run memcpy tests for constant length */
225a9de470cSBruce Richardson #define ALL_PERF_TEST_FOR_CONSTANT \
226a9de470cSBruce Richardson do { \
227a9de470cSBruce Richardson TEST_CONSTANT(6U); TEST_CONSTANT(64U); TEST_CONSTANT(128U); \
228a9de470cSBruce Richardson TEST_CONSTANT(192U); TEST_CONSTANT(256U); TEST_CONSTANT(512U); \
229a9de470cSBruce Richardson TEST_CONSTANT(768U); TEST_CONSTANT(1024U); TEST_CONSTANT(1536U); \
230a9de470cSBruce Richardson } while (0)
231a9de470cSBruce Richardson
232a9de470cSBruce Richardson /* Run all memcpy tests for aligned constant cases */
233a9de470cSBruce Richardson static inline void
perf_test_constant_aligned(void)234a9de470cSBruce Richardson perf_test_constant_aligned(void)
235a9de470cSBruce Richardson {
236a9de470cSBruce Richardson #define TEST_CONSTANT ALL_PERF_TESTS_FOR_SIZE
237a9de470cSBruce Richardson ALL_PERF_TEST_FOR_CONSTANT;
238a9de470cSBruce Richardson #undef TEST_CONSTANT
239a9de470cSBruce Richardson }
240a9de470cSBruce Richardson
241a9de470cSBruce Richardson /* Run all memcpy tests for unaligned constant cases */
242a9de470cSBruce Richardson static inline void
perf_test_constant_unaligned(void)243a9de470cSBruce Richardson perf_test_constant_unaligned(void)
244a9de470cSBruce Richardson {
245a9de470cSBruce Richardson #define TEST_CONSTANT ALL_PERF_TESTS_FOR_SIZE_UNALIGNED
246a9de470cSBruce Richardson ALL_PERF_TEST_FOR_CONSTANT;
247a9de470cSBruce Richardson #undef TEST_CONSTANT
248a9de470cSBruce Richardson }
249a9de470cSBruce Richardson
250a9de470cSBruce Richardson /* Run all memcpy tests for aligned variable cases */
251a9de470cSBruce Richardson static inline void
perf_test_variable_aligned(void)252a9de470cSBruce Richardson perf_test_variable_aligned(void)
253a9de470cSBruce Richardson {
254a9de470cSBruce Richardson unsigned i;
25571bdd8a1SPavan Nikhilesh for (i = 0; i < RTE_DIM(buf_sizes); i++) {
256a9de470cSBruce Richardson ALL_PERF_TESTS_FOR_SIZE((size_t)buf_sizes[i]);
257a9de470cSBruce Richardson }
258a9de470cSBruce Richardson }
259a9de470cSBruce Richardson
260a9de470cSBruce Richardson /* Run all memcpy tests for unaligned variable cases */
261a9de470cSBruce Richardson static inline void
perf_test_variable_unaligned(void)262a9de470cSBruce Richardson perf_test_variable_unaligned(void)
263a9de470cSBruce Richardson {
264a9de470cSBruce Richardson unsigned i;
26571bdd8a1SPavan Nikhilesh for (i = 0; i < RTE_DIM(buf_sizes); i++) {
266a9de470cSBruce Richardson ALL_PERF_TESTS_FOR_SIZE_UNALIGNED((size_t)buf_sizes[i]);
267a9de470cSBruce Richardson }
268a9de470cSBruce Richardson }
269a9de470cSBruce Richardson
270a9de470cSBruce Richardson /* Run all memcpy tests */
271a9de470cSBruce Richardson static int
perf_test(void)272a9de470cSBruce Richardson perf_test(void)
273a9de470cSBruce Richardson {
274a9de470cSBruce Richardson int ret;
275987d40a0SJie Zhou struct timespec tv_begin, tv_end;
276a9de470cSBruce Richardson double time_aligned, time_unaligned;
277a9de470cSBruce Richardson double time_aligned_const, time_unaligned_const;
278a9de470cSBruce Richardson
279a9de470cSBruce Richardson ret = init_buffers();
280a9de470cSBruce Richardson if (ret != 0)
281a9de470cSBruce Richardson return ret;
282a9de470cSBruce Richardson
283a9de470cSBruce Richardson #if TEST_VALUE_RANGE != 0
284a9de470cSBruce Richardson /* Set up buf_sizes array, if required */
285a9de470cSBruce Richardson unsigned i;
286a9de470cSBruce Richardson for (i = 0; i < TEST_VALUE_RANGE; i++)
287a9de470cSBruce Richardson buf_sizes[i] = i;
288a9de470cSBruce Richardson #endif
289a9de470cSBruce Richardson
290a9de470cSBruce Richardson /* See function comment */
291a9de470cSBruce Richardson do_uncached_write(large_buf_write, 0, small_buf_read, 1, SMALL_BUFFER_SIZE);
292a9de470cSBruce Richardson
293a9de470cSBruce Richardson printf("\n** rte_memcpy() - memcpy perf. tests (C = compile-time constant) **\n"
294a9de470cSBruce Richardson "======= ================= ================= ================= =================\n"
295a9de470cSBruce Richardson " Size Cache to cache Cache to mem Mem to cache Mem to mem\n"
296a9de470cSBruce Richardson "(bytes) (ticks) (ticks) (ticks) (ticks)\n"
297a9de470cSBruce Richardson "------- ----------------- ----------------- ----------------- -----------------");
298a9de470cSBruce Richardson
299a9de470cSBruce Richardson printf("\n================================= %2dB aligned =================================",
300a9de470cSBruce Richardson ALIGNMENT_UNIT);
301a9de470cSBruce Richardson /* Do aligned tests where size is a variable */
302987d40a0SJie Zhou timespec_get(&tv_begin, TIME_UTC);
303a9de470cSBruce Richardson perf_test_variable_aligned();
304987d40a0SJie Zhou timespec_get(&tv_end, TIME_UTC);
305a9de470cSBruce Richardson time_aligned = (double)(tv_end.tv_sec - tv_begin.tv_sec)
306987d40a0SJie Zhou + ((double)tv_end.tv_nsec - tv_begin.tv_nsec) / NS_PER_S;
307a9de470cSBruce Richardson printf("\n------- ----------------- ----------------- ----------------- -----------------");
308a9de470cSBruce Richardson /* Do aligned tests where size is a compile-time constant */
309987d40a0SJie Zhou timespec_get(&tv_begin, TIME_UTC);
310a9de470cSBruce Richardson perf_test_constant_aligned();
311987d40a0SJie Zhou timespec_get(&tv_end, TIME_UTC);
312a9de470cSBruce Richardson time_aligned_const = (double)(tv_end.tv_sec - tv_begin.tv_sec)
313987d40a0SJie Zhou + ((double)tv_end.tv_nsec - tv_begin.tv_nsec) / NS_PER_S;
314a9de470cSBruce Richardson printf("\n================================== Unaligned ==================================");
315a9de470cSBruce Richardson /* Do unaligned tests where size is a variable */
316987d40a0SJie Zhou timespec_get(&tv_begin, TIME_UTC);
317a9de470cSBruce Richardson perf_test_variable_unaligned();
318987d40a0SJie Zhou timespec_get(&tv_end, TIME_UTC);
319a9de470cSBruce Richardson time_unaligned = (double)(tv_end.tv_sec - tv_begin.tv_sec)
320987d40a0SJie Zhou + ((double)tv_end.tv_nsec - tv_begin.tv_nsec) / NS_PER_S;
321a9de470cSBruce Richardson printf("\n------- ----------------- ----------------- ----------------- -----------------");
322a9de470cSBruce Richardson /* Do unaligned tests where size is a compile-time constant */
323987d40a0SJie Zhou timespec_get(&tv_begin, TIME_UTC);
324a9de470cSBruce Richardson perf_test_constant_unaligned();
325987d40a0SJie Zhou timespec_get(&tv_end, TIME_UTC);
326a9de470cSBruce Richardson time_unaligned_const = (double)(tv_end.tv_sec - tv_begin.tv_sec)
327987d40a0SJie Zhou + ((double)tv_end.tv_nsec - tv_begin.tv_nsec) / NS_PER_S;
328a9de470cSBruce Richardson printf("\n======= ================= ================= ================= =================\n\n");
329a9de470cSBruce Richardson
330a9de470cSBruce Richardson printf("Test Execution Time (seconds):\n");
331a9de470cSBruce Richardson printf("Aligned variable copy size = %8.3f\n", time_aligned);
332a9de470cSBruce Richardson printf("Aligned constant copy size = %8.3f\n", time_aligned_const);
333a9de470cSBruce Richardson printf("Unaligned variable copy size = %8.3f\n", time_unaligned);
334a9de470cSBruce Richardson printf("Unaligned constant copy size = %8.3f\n", time_unaligned_const);
335a9de470cSBruce Richardson free_buffers();
336a9de470cSBruce Richardson
337a9de470cSBruce Richardson return 0;
338a9de470cSBruce Richardson }
339a9de470cSBruce Richardson
340a9de470cSBruce Richardson static int
test_memcpy_perf(void)341a9de470cSBruce Richardson test_memcpy_perf(void)
342a9de470cSBruce Richardson {
343a9de470cSBruce Richardson int ret;
344a9de470cSBruce Richardson
345a9de470cSBruce Richardson ret = perf_test();
346a9de470cSBruce Richardson if (ret != 0)
347a9de470cSBruce Richardson return -1;
348a9de470cSBruce Richardson return 0;
349a9de470cSBruce Richardson }
350a9de470cSBruce Richardson
351*e0a8442cSBruce Richardson REGISTER_PERF_TEST(memcpy_perf_autotest, test_memcpy_perf);
352