1 /**********************************************************************
2 Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29
30 #ifndef _TEST_H
31 #define _TEST_H
32
33 /**
34 * @file test.h
35 * @brief Test helper include for common perf and test macros
36 *
37 * This is a helper file to enable short and simple tests. Not intended for use
38 * in library functions or production. Includes helper routines for alignment,
39 * benchmark timing, and filesize.
40 */
41
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45
46 #include <stdio.h>
47 #include <stdint.h>
48
49 #ifdef _MSC_VER
50 #define inline __inline
51 #endif
52
53 /* Make os-independent alignment attribute, alloc and free. */
54 #if defined __unix__ || defined __APPLE__
55 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
56 #define __forceinline static inline
57 #define aligned_free(x) free(x)
58 #else
59 #ifdef __MINGW32__
60 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
61 #define posix_memalign(p, algn, len) \
62 (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
63 #define aligned_free(x) _aligned_free(x)
64 #else
65 #define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
66 #define posix_memalign(p, algn, len) \
67 (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
68 #define aligned_free(x) _aligned_free(x)
69 #endif
70 #endif
71
72 #ifdef DEBUG
73 #define DEBUG_PRINT(x) printf x
74 #else
75 #define DEBUG_PRINT(x) \
76 do { \
77 } while (0)
78 #endif
79
80 /* Decide whether to use benchmark time as an approximation or a minimum. Fewer
81 * calls to the timer are required for the approximation case.*/
82 #define BENCHMARK_MIN_TIME 0
83 #define BENCHMARK_APPROX_TIME 1
84 #ifndef BENCHMARK_TYPE
85 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
86 #endif
87
88 #ifdef USE_RDTSC
89 /* The use of rtdsc is nuanced. On many processors it corresponds to a
90 * standardized clock source. To obtain a meaningful result it may be
91 * necessary to fix the CPU clock to match the rtdsc tick rate.
92 */
93 #include <inttypes.h>
94 #include <x86intrin.h>
95 #define USE_CYCLES
96 #else
97 #include <time.h>
98 #define USE_SECONDS
99 #endif
100
101 #ifdef USE_RDTSC
102 #ifndef BENCHMARK_TIME
103 #define BENCHMARK_TIME 6
104 #endif
105 #define GHZ 1000000000
106 #define UNIT_SCALE (GHZ)
107 #define CALIBRATE_TIME (UNIT_SCALE / 2)
108 static inline long long
get_time(void)109 get_time(void)
110 {
111 unsigned int dummy;
112 return __rdtscp(&dummy);
113 }
114
115 static inline long long
get_res(void)116 get_res(void)
117 {
118 return 1;
119 }
120 #else
121 #ifndef BENCHMARK_TIME
122 #define BENCHMARK_TIME 3
123 #endif
124 #ifdef _MSC_VER
125 #define UNIT_SCALE get_res()
126 #define CALIBRATE_TIME (UNIT_SCALE / 4)
127 static inline long long
128 get_time(void)
129 {
130 long long ret = 0;
131 QueryPerformanceCounter(&ret);
132 return ret;
133 }
134
135 static inline long long
136 get_res(void)
137 {
138 long long ret = 0;
139 QueryPerformanceFrequency(&ret);
140 return ret;
141 }
142 #else
143 #define NANO_SCALE 1000000000
144 #define UNIT_SCALE NANO_SCALE
145 #define CALIBRATE_TIME (UNIT_SCALE / 4)
146 #ifdef __FreeBSD__
147 #define CLOCK_ID CLOCK_MONOTONIC_PRECISE
148 #else
149 #define CLOCK_ID CLOCK_MONOTONIC
150 #endif
151
152 static inline long long
153 get_time(void)
154 {
155 struct timespec time;
156 long long nano_total;
157 clock_gettime(CLOCK_ID, &time);
158 nano_total = time.tv_sec;
159 nano_total *= NANO_SCALE;
160 nano_total += time.tv_nsec;
161 return nano_total;
162 }
163
164 static inline long long
165 get_res(void)
166 {
167 struct timespec time;
168 long long nano_total;
169 clock_getres(CLOCK_ID, &time);
170 nano_total = time.tv_sec;
171 nano_total *= NANO_SCALE;
172 nano_total += time.tv_nsec;
173 return nano_total;
174 }
175 #endif
176 #endif
177 struct perf {
178 long long start;
179 long long stop;
180 long long run_total;
181 long long iterations;
182 };
183
184 static inline void
perf_init(struct perf * p)185 perf_init(struct perf *p)
186 {
187 p->start = 0;
188 p->stop = 0;
189 p->run_total = 0;
190 }
191
192 static inline void
perf_continue(struct perf * p)193 perf_continue(struct perf *p)
194 {
195 p->start = get_time();
196 }
197
198 static inline void
perf_pause(struct perf * p)199 perf_pause(struct perf *p)
200 {
201 p->stop = get_time();
202 p->run_total = p->run_total + p->stop - p->start;
203 p->start = p->stop;
204 }
205
206 static inline void
perf_start(struct perf * p)207 perf_start(struct perf *p)
208 {
209 perf_init(p);
210 perf_continue(p);
211 }
212
213 static inline void
perf_stop(struct perf * p)214 perf_stop(struct perf *p)
215 {
216 perf_pause(p);
217 }
218
219 static inline double
get_time_elapsed(struct perf * p)220 get_time_elapsed(struct perf *p)
221 {
222 return 1.0 * p->run_total / UNIT_SCALE;
223 }
224
225 static inline long long
get_base_elapsed(struct perf * p)226 get_base_elapsed(struct perf *p)
227 {
228 return p->run_total;
229 }
230
231 static inline unsigned long long
estimate_perf_iterations(struct perf * p,unsigned long long runs,unsigned long long total)232 estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total)
233 {
234 total = total * runs;
235 if (get_base_elapsed(p) > 0)
236 return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
237 else
238 return (total + get_res() - 1) / get_res();
239 }
240
241 #define CALIBRATE(PERF, FUNC_CALL) \
242 { \
243 unsigned long long _i, _iter = 1; \
244 perf_start(PERF); \
245 FUNC_CALL; \
246 perf_pause(PERF); \
247 \
248 while (get_base_elapsed(PERF) < CALIBRATE_TIME) { \
249 _iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME); \
250 perf_start(PERF); \
251 for (_i = 0; _i < _iter; _i++) { \
252 FUNC_CALL; \
253 } \
254 perf_stop(PERF); \
255 } \
256 (PERF)->iterations = _iter; \
257 }
258
259 #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) \
260 { \
261 unsigned long long _i, _iter = (PERF)->iterations; \
262 unsigned long long _run_total = RUN_TIME; \
263 _run_total *= UNIT_SCALE; \
264 _iter = estimate_perf_iterations(PERF, _iter, _run_total); \
265 (PERF)->iterations = 0; \
266 perf_start(PERF); \
267 for (_i = 0; _i < _iter; _i++) { \
268 FUNC_CALL; \
269 } \
270 perf_pause(PERF); \
271 (PERF)->iterations += _iter; \
272 \
273 if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
274 _iter = estimate_perf_iterations(PERF, _iter, \
275 _run_total - get_base_elapsed(PERF) + \
276 (UNIT_SCALE / 16)); \
277 perf_continue(PERF); \
278 for (_i = 0; _i < _iter; _i++) { \
279 FUNC_CALL; \
280 } \
281 perf_pause(PERF); \
282 (PERF)->iterations += _iter; \
283 } \
284 }
285
286 #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) \
287 { \
288 if ((RUN_TIME) > 0) { \
289 CALIBRATE(PERF, FUNC_CALL); \
290 PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \
291 \
292 } else { \
293 (PERF)->iterations = 1; \
294 perf_start(PERF); \
295 FUNC_CALL; \
296 perf_stop(PERF); \
297 } \
298 }
299
300 #ifdef USE_CYCLES
301 static inline void
perf_print(struct perf p,long long unit_count)302 perf_print(struct perf p, long long unit_count)
303 {
304 long long total_units = p.iterations * unit_count;
305
306 printf("runtime = %10lld ticks", get_base_elapsed(&p));
307 if (total_units != 0) {
308 printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000),
309 get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units);
310 }
311 printf("\n");
312 }
313 #else
314 static inline void
perf_print(struct perf p,double unit_count)315 perf_print(struct perf p, double unit_count)
316 {
317 long long total_units = p.iterations * unit_count;
318 long long usecs = (long long) (get_time_elapsed(&p) * 1000000);
319
320 printf("runtime = %10lld usecs", usecs);
321 if (total_units != 0) {
322 printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000),
323 get_time_elapsed(&p),
324 ((double) total_units) / (1000000 * get_time_elapsed(&p)));
325 }
326 printf("\n");
327 }
328 #endif
329
330 static inline uint64_t
get_filesize(FILE * fp)331 get_filesize(FILE *fp)
332 {
333 uint64_t file_size;
334 fpos_t pos, pos_curr;
335
336 fgetpos(fp, &pos_curr); /* Save current position */
337 #if defined(_WIN32) || defined(_WIN64)
338 _fseeki64(fp, 0, SEEK_END);
339 #else
340 fseeko(fp, 0, SEEK_END);
341 #endif
342 fgetpos(fp, &pos);
343 file_size = *(uint64_t *) &pos;
344 fsetpos(fp, &pos_curr); /* Restore position */
345
346 return file_size;
347 }
348
349 #ifdef __cplusplus
350 }
351 #endif
352
353 #endif // _TEST_H
354