xref: /isa-l/include/test.h (revision fa5b8baf84e6a18dbaad48a3fa0d1fa062ae2fe8)
1 /**********************************************************************
2   Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7     * Redistributions of source code must retain the above copyright
8       notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright
10       notice, this list of conditions and the following disclaimer in
11       the documentation and/or other materials provided with the
12       distribution.
13     * Neither the name of Intel Corporation nor the names of its
14       contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 
30 #ifndef _TEST_H
31 #define _TEST_H
32 
33 /**
34  *  @file  test.h
35  *  @brief Test helper include for common perf and test macros
36  *
37  *  This is a helper file to enable short and simple tests. Not intended for use
38  *  in library functions or production. Includes helper routines for alignment,
39  *  benchmark timing, and filesize.
40  */
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45 
46 #include <stdio.h>
47 #include <stdint.h>
48 
49 #ifdef _MSC_VER
50 #define inline __inline
51 #endif
52 
53 /* Make os-independent alignment attribute, alloc and free. */
54 #if defined __unix__ || defined __APPLE__
55 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
56 #define __forceinline                   static inline
57 #define aligned_free(x)                 free(x)
58 #else
59 #ifdef __MINGW32__
60 #define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
61 #define posix_memalign(p, algn, len)                                                               \
62         (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
63 #define aligned_free(x) _aligned_free(x)
64 #else
65 #define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
66 #define posix_memalign(p, algn, len)                                                               \
67         (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
68 #define aligned_free(x) _aligned_free(x)
69 #endif
70 #endif
71 
72 #ifdef DEBUG
73 #define DEBUG_PRINT(x) printf x
74 #else
75 #define DEBUG_PRINT(x)                                                                             \
76         do {                                                                                       \
77         } while (0)
78 #endif
79 
80 /* Decide whether to use benchmark time as an approximation or a minimum. Fewer
81  * calls to the timer are required for the approximation case.*/
82 #define BENCHMARK_MIN_TIME    0
83 #define BENCHMARK_APPROX_TIME 1
84 #ifndef BENCHMARK_TYPE
85 #define BENCHMARK_TYPE BENCHMARK_MIN_TIME
86 #endif
87 
88 #ifdef USE_RDTSC
89 /* The use of rtdsc is nuanced. On many processors it corresponds to a
90  * standardized clock source. To obtain a meaningful result it may be
91  * necessary to fix the CPU clock to match the rtdsc tick rate.
92  */
93 #include <inttypes.h>
94 #include <x86intrin.h>
95 #define USE_CYCLES
96 #else
97 #include <time.h>
98 #define USE_SECONDS
99 #endif
100 
101 #ifdef USE_RDTSC
102 #ifndef BENCHMARK_TIME
103 #define BENCHMARK_TIME 6
104 #endif
105 #define GHZ            1000000000
106 #define UNIT_SCALE     (GHZ)
107 #define CALIBRATE_TIME (UNIT_SCALE / 2)
108 static inline long long
get_time(void)109 get_time(void)
110 {
111         unsigned int dummy;
112         return __rdtscp(&dummy);
113 }
114 
115 static inline long long
get_res(void)116 get_res(void)
117 {
118         return 1;
119 }
120 #else
121 #ifndef BENCHMARK_TIME
122 #define BENCHMARK_TIME 3
123 #endif
124 #ifdef _MSC_VER
125 #define UNIT_SCALE     get_res()
126 #define CALIBRATE_TIME (UNIT_SCALE / 4)
127 static inline long long
128 get_time(void)
129 {
130         long long ret = 0;
131         QueryPerformanceCounter(&ret);
132         return ret;
133 }
134 
135 static inline long long
136 get_res(void)
137 {
138         long long ret = 0;
139         QueryPerformanceFrequency(&ret);
140         return ret;
141 }
142 #else
143 #define NANO_SCALE     1000000000
144 #define UNIT_SCALE     NANO_SCALE
145 #define CALIBRATE_TIME (UNIT_SCALE / 4)
146 #ifdef __FreeBSD__
147 #define CLOCK_ID CLOCK_MONOTONIC_PRECISE
148 #else
149 #define CLOCK_ID CLOCK_MONOTONIC
150 #endif
151 
152 static inline long long
153 get_time(void)
154 {
155         struct timespec time;
156         long long nano_total;
157         clock_gettime(CLOCK_ID, &time);
158         nano_total = time.tv_sec;
159         nano_total *= NANO_SCALE;
160         nano_total += time.tv_nsec;
161         return nano_total;
162 }
163 
164 static inline long long
165 get_res(void)
166 {
167         struct timespec time;
168         long long nano_total;
169         clock_getres(CLOCK_ID, &time);
170         nano_total = time.tv_sec;
171         nano_total *= NANO_SCALE;
172         nano_total += time.tv_nsec;
173         return nano_total;
174 }
175 #endif
176 #endif
177 struct perf {
178         long long start;
179         long long stop;
180         long long run_total;
181         long long iterations;
182 };
183 
184 static inline void
perf_init(struct perf * p)185 perf_init(struct perf *p)
186 {
187         p->start = 0;
188         p->stop = 0;
189         p->run_total = 0;
190 }
191 
192 static inline void
perf_continue(struct perf * p)193 perf_continue(struct perf *p)
194 {
195         p->start = get_time();
196 }
197 
198 static inline void
perf_pause(struct perf * p)199 perf_pause(struct perf *p)
200 {
201         p->stop = get_time();
202         p->run_total = p->run_total + p->stop - p->start;
203         p->start = p->stop;
204 }
205 
206 static inline void
perf_start(struct perf * p)207 perf_start(struct perf *p)
208 {
209         perf_init(p);
210         perf_continue(p);
211 }
212 
213 static inline void
perf_stop(struct perf * p)214 perf_stop(struct perf *p)
215 {
216         perf_pause(p);
217 }
218 
219 static inline double
get_time_elapsed(struct perf * p)220 get_time_elapsed(struct perf *p)
221 {
222         return 1.0 * p->run_total / UNIT_SCALE;
223 }
224 
225 static inline long long
get_base_elapsed(struct perf * p)226 get_base_elapsed(struct perf *p)
227 {
228         return p->run_total;
229 }
230 
231 static inline unsigned long long
estimate_perf_iterations(struct perf * p,unsigned long long runs,unsigned long long total)232 estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total)
233 {
234         total = total * runs;
235         if (get_base_elapsed(p) > 0)
236                 return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
237         else
238                 return (total + get_res() - 1) / get_res();
239 }
240 
241 #define CALIBRATE(PERF, FUNC_CALL)                                                                 \
242         {                                                                                          \
243                 unsigned long long _i, _iter = 1;                                                  \
244                 perf_start(PERF);                                                                  \
245                 FUNC_CALL;                                                                         \
246                 perf_pause(PERF);                                                                  \
247                                                                                                    \
248                 while (get_base_elapsed(PERF) < CALIBRATE_TIME) {                                  \
249                         _iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME);         \
250                         perf_start(PERF);                                                          \
251                         for (_i = 0; _i < _iter; _i++) {                                           \
252                                 FUNC_CALL;                                                         \
253                         }                                                                          \
254                         perf_stop(PERF);                                                           \
255                 }                                                                                  \
256                 (PERF)->iterations = _iter;                                                        \
257         }
258 
259 #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL)                                                \
260         {                                                                                          \
261                 unsigned long long _i, _iter = (PERF)->iterations;                                 \
262                 unsigned long long _run_total = RUN_TIME;                                          \
263                 _run_total *= UNIT_SCALE;                                                          \
264                 _iter = estimate_perf_iterations(PERF, _iter, _run_total);                         \
265                 (PERF)->iterations = 0;                                                            \
266                 perf_start(PERF);                                                                  \
267                 for (_i = 0; _i < _iter; _i++) {                                                   \
268                         FUNC_CALL;                                                                 \
269                 }                                                                                  \
270                 perf_pause(PERF);                                                                  \
271                 (PERF)->iterations += _iter;                                                       \
272                                                                                                    \
273                 if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
274                         _iter = estimate_perf_iterations(PERF, _iter,                              \
275                                                          _run_total - get_base_elapsed(PERF) +     \
276                                                                  (UNIT_SCALE / 16));               \
277                         perf_continue(PERF);                                                       \
278                         for (_i = 0; _i < _iter; _i++) {                                           \
279                                 FUNC_CALL;                                                         \
280                         }                                                                          \
281                         perf_pause(PERF);                                                          \
282                         (PERF)->iterations += _iter;                                               \
283                 }                                                                                  \
284         }
285 
286 #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL)                                                       \
287         {                                                                                          \
288                 if ((RUN_TIME) > 0) {                                                              \
289                         CALIBRATE(PERF, FUNC_CALL);                                                \
290                         PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL);                               \
291                                                                                                    \
292                 } else {                                                                           \
293                         (PERF)->iterations = 1;                                                    \
294                         perf_start(PERF);                                                          \
295                         FUNC_CALL;                                                                 \
296                         perf_stop(PERF);                                                           \
297                 }                                                                                  \
298         }
299 
300 #ifdef USE_CYCLES
301 static inline void
perf_print(struct perf p,long long unit_count)302 perf_print(struct perf p, long long unit_count)
303 {
304         long long total_units = p.iterations * unit_count;
305 
306         printf("runtime = %10lld ticks", get_base_elapsed(&p));
307         if (total_units != 0) {
308                 printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000),
309                        get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units);
310         }
311         printf("\n");
312 }
313 #else
314 static inline void
perf_print(struct perf p,double unit_count)315 perf_print(struct perf p, double unit_count)
316 {
317         long long total_units = p.iterations * unit_count;
318         long long usecs = (long long) (get_time_elapsed(&p) * 1000000);
319 
320         printf("runtime = %10lld usecs", usecs);
321         if (total_units != 0) {
322                 printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000),
323                        get_time_elapsed(&p),
324                        ((double) total_units) / (1000000 * get_time_elapsed(&p)));
325         }
326         printf("\n");
327 }
328 #endif
329 
330 static inline uint64_t
get_filesize(FILE * fp)331 get_filesize(FILE *fp)
332 {
333         uint64_t file_size;
334         fpos_t pos, pos_curr;
335 
336         fgetpos(fp, &pos_curr); /* Save current position */
337 #if defined(_WIN32) || defined(_WIN64)
338         _fseeki64(fp, 0, SEEK_END);
339 #else
340         fseeko(fp, 0, SEEK_END);
341 #endif
342         fgetpos(fp, &pos);
343         file_size = *(uint64_t *) &pos;
344         fsetpos(fp, &pos_curr); /* Restore position */
345 
346         return file_size;
347 }
348 
349 #ifdef __cplusplus
350 }
351 #endif
352 
353 #endif // _TEST_H
354