1 /* [Description]
2
3 Copyright 2010-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #include <stdlib.h>
24 #include <time.h>
25
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28
29 #undef _PROTO
30 #define _PROTO __GMP_PROTO
31 #include "speed.h"
32
33 /* Let f be a function for which we have several implementations f1, f2... */
34 /* We wish to have a quick overview of which implementation is the best */
35 /* in function of the point x where f(x) is computed and of the precision */
36 /* prec requested by the user. */
37 /* This is performed by drawing a 2D graphic with color indicating which */
38 /* method is the best. */
39 /* For building this graphic, the following structur is used (see the core */
40 /* of generate_2D_sample for an explanation of each field. */
41 struct speed_params2D
42 {
43 /* x-window: [min_x, max_x] or [2^min_x, 2^max_x] */
44 /* or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x] */
45 /* depending on the value of logarithmic_scale_x */
46 double min_x;
47 double max_x;
48
49 /* prec-window: [min_prec, max_prec] */
50 mpfr_prec_t min_prec;
51 mpfr_prec_t max_prec;
52
53 /* number of sample points for the x-axis and the prec-axis */
54 int nb_points_x;
55 int nb_points_prec;
56
57 /* should the sample points be logarithmically scaled or not */
58 int logarithmic_scale_x;
59 int logarithmic_scale_prec;
60
61 /* list of functions g1, g2... measuring the execution time of f1, f2... */
62 /* when given a point x and a precision prec stored in s. */
63 /* We use s->xp to store the significant of x, s->r to store its exponent */
64 /* s->align_xp to store its sign, and s->size to store prec. */
65 double (**speed_funcs) (struct speed_params *s);
66 };
67
68 /* Given an array t of nb_functions double indicating the timings of several */
69 /* implementations, return i, such that t[i] is the best timing. */
70 int
find_best(double * t,int nb_functions)71 find_best (double *t, int nb_functions)
72 {
73 int i, ibest;
74 double best;
75
76 if (nb_functions<=0)
77 {
78 fprintf (stderr, "There is no function\n");
79 abort ();
80 }
81
82 ibest = 0;
83 best = t[0];
84 for (i=1; i<nb_functions; i++)
85 {
86 if (t[i]<best)
87 {
88 best = t[i];
89 ibest = i;
90 }
91 }
92
93 return ibest;
94 }
95
generate_2D_sample(FILE * output,struct speed_params2D param)96 void generate_2D_sample (FILE *output, struct speed_params2D param)
97 {
98 mpfr_t temp;
99 double incr_prec;
100 mpfr_t incr_x;
101 mpfr_t x, x2;
102 double prec;
103 struct speed_params s;
104 int i;
105 int test;
106 int nb_functions;
107 double *t; /* store the timing of each implementation */
108
109 /* We first determine how many implementations we have */
110 nb_functions = 0;
111 while (param.speed_funcs[nb_functions] != NULL)
112 nb_functions++;
113
114 t = malloc (nb_functions * sizeof (double));
115 if (t == NULL)
116 {
117 fprintf (stderr, "Can't allocate memory.\n");
118 abort ();
119 }
120
121
122 mpfr_init2 (temp, MPFR_SMALL_PRECISION);
123
124 /* The precision is sampled from min_prec to max_prec with */
125 /* approximately nb_points_prec points. If logarithmic_scale_prec */
126 /* is true, the precision is multiplied by incr_prec at each */
127 /* step. Otherwise, incr_prec is added at each step. */
128 if (param.logarithmic_scale_prec)
129 {
130 mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU);
131 mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU);
132 mpfr_root (temp, temp,
133 (unsigned long int)param.nb_points_prec, MPFR_RNDU);
134 incr_prec = mpfr_get_d (temp, MPFR_RNDU);
135 }
136 else
137 {
138 incr_prec = (double)param.max_prec - (double)param.min_prec;
139 incr_prec = incr_prec/((double)param.nb_points_prec);
140 }
141
142 /* The points x are sampled according to the following rule: */
143 /* If logarithmic_scale_x = 0: */
144 /* nb_points_x points are equally distributed between min_x and max_x */
145 /* If logarithmic_scale_x = 1: */
146 /* nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At */
147 /* each step, the current point is multiplied by incr_x. */
148 /* If logarithmic_scale_x = -1: */
149 /* nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x) */
150 /* (at each step, the current point is divided by incr_x); and */
151 /* nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x) */
152 /* (at each step, the current point is multiplied by incr_x). */
153 mpfr_init2 (incr_x, param.max_prec);
154 if (param.logarithmic_scale_x == 0)
155 {
156 mpfr_set_d (incr_x,
157 (param.max_x - param.min_x)/(double)param.nb_points_x,
158 MPFR_RNDU);
159 }
160 else if (param.logarithmic_scale_x == -1)
161 {
162 mpfr_set_d (incr_x,
163 2.*(param.max_x - param.min_x)/(double)param.nb_points_x,
164 MPFR_RNDU);
165 mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
166 }
167 else
168 { /* other values of param.logarithmic_scale_x are considered as 1 */
169 mpfr_set_d (incr_x,
170 (param.max_x - param.min_x)/(double)param.nb_points_x,
171 MPFR_RNDU);
172 mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
173 }
174
175 /* Main loop */
176 mpfr_init2 (x, param.max_prec);
177 mpfr_init2 (x2, param.max_prec);
178 prec = (double)param.min_prec;
179 while (prec <= param.max_prec)
180 {
181 printf ("prec = %d\n", (int)prec);
182 if (param.logarithmic_scale_x == 0)
183 mpfr_set_d (temp, param.min_x, MPFR_RNDU);
184 else if (param.logarithmic_scale_x == -1)
185 {
186 mpfr_set_d (temp, param.max_x, MPFR_RNDD);
187 mpfr_exp2 (temp, temp, MPFR_RNDD);
188 mpfr_neg (temp, temp, MPFR_RNDU);
189 }
190 else
191 {
192 mpfr_set_d (temp, param.min_x, MPFR_RNDD);
193 mpfr_exp2 (temp, temp, MPFR_RNDD);
194 }
195
196 /* We perturb x a little bit, in order to avoid trailing zeros that */
197 /* might change the behavior of algorithms. */
198 mpfr_const_pi (x, MPFR_RNDN);
199 mpfr_div_2ui (x, x, 7, MPFR_RNDN);
200 mpfr_add_ui (x, x, 1, MPFR_RNDN);
201 mpfr_mul (x, x, temp, MPFR_RNDN);
202
203 test = 1;
204 while (test)
205 {
206 mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN));
207 mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec);
208
209 s.r = (mp_limb_t)mpfr_get_exp (x);
210 s.size = (mpfr_prec_t)prec;
211 s.align_xp = (mpfr_sgn (x) > 0)?1:2;
212 mpfr_set_prec (x2, (mpfr_prec_t)prec);
213 mpfr_set (x2, x, MPFR_RNDU);
214 s.xp = x2->_mpfr_d;
215
216 for (i=0; i<nb_functions; i++)
217 {
218 t[i] = speed_measure (param.speed_funcs[i], &s);
219 mpfr_fprintf (output, "%e\t", t[i]);
220 }
221 fprintf (output, "%d\n", 1 + find_best (t, nb_functions));
222
223 if (param.logarithmic_scale_x == 0)
224 {
225 mpfr_add (x, x, incr_x, MPFR_RNDU);
226 if (mpfr_cmp_d (x, param.max_x) > 0)
227 test=0;
228 }
229 else
230 {
231 if (mpfr_sgn (x) < 0 )
232 { /* if x<0, it means that logarithmic_scale_x=-1 */
233 mpfr_div (x, x, incr_x, MPFR_RNDU);
234 mpfr_abs (temp, x, MPFR_RNDD);
235 mpfr_log2 (temp, temp, MPFR_RNDD);
236 if (mpfr_cmp_d (temp, param.min_x) < 0)
237 mpfr_neg (x, x, MPFR_RNDN);
238 }
239 else
240 {
241 mpfr_mul (x, x, incr_x, MPFR_RNDU);
242 mpfr_set (temp, x, MPFR_RNDD);
243 mpfr_log2 (temp, temp, MPFR_RNDD);
244 if (mpfr_cmp_d (temp, param.max_x) > 0)
245 test=0;
246 }
247 }
248 }
249
250 prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec)
251 : (prec + incr_prec) );
252 fprintf (output, "\n");
253 }
254
255 free (t);
256 mpfr_clear (incr_x);
257 mpfr_clear (x);
258 mpfr_clear (x2);
259 mpfr_clear (temp);
260
261 return;
262 }
263
264 #define SPEED_MPFR_FUNC_2D(mean_func) \
265 do \
266 { \
267 double t; \
268 unsigned i; \
269 mpfr_t w, x; \
270 mp_size_t size; \
271 \
272 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
273 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
274 \
275 size = (s->size-1)/GMP_NUMB_BITS+1; \
276 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
277 MPFR_TMP_INIT1 (s->xp, x, s->size); \
278 MPFR_SET_EXP (x, (mpfr_exp_t) s->r); \
279 if (s->align_xp == 2) MPFR_SET_NEG (x); \
280 \
281 mpfr_init2 (w, s->size); \
282 speed_starttime (); \
283 i = s->reps; \
284 \
285 do \
286 mean_func (w, x, MPFR_RNDN); \
287 while (--i != 0); \
288 t = speed_endtime (); \
289 \
290 mpfr_clear (w); \
291 return t; \
292 } \
293 while (0)
294
295 mpfr_prec_t mpfr_exp_2_threshold;
296 mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD;
297 #undef MPFR_EXP_2_THRESHOLD
298 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
299 #include "exp_2.c"
300
301 double
timing_exp1(struct speed_params * s)302 timing_exp1 (struct speed_params *s)
303 {
304 mpfr_exp_2_threshold = s->size+1;
305 SPEED_MPFR_FUNC_2D (mpfr_exp_2);
306 }
307
308 double
timing_exp2(struct speed_params * s)309 timing_exp2 (struct speed_params *s)
310 {
311 mpfr_exp_2_threshold = s->size-1;
312 SPEED_MPFR_FUNC_2D (mpfr_exp_2);
313 }
314
315 double
timing_exp3(struct speed_params * s)316 timing_exp3 (struct speed_params *s)
317 {
318 SPEED_MPFR_FUNC_2D (mpfr_exp_3);
319 }
320
321
322 #include "ai.c"
323 double
timing_ai1(struct speed_params * s)324 timing_ai1 (struct speed_params *s)
325 {
326 SPEED_MPFR_FUNC_2D (mpfr_ai1);
327 }
328
329 double
timing_ai2(struct speed_params * s)330 timing_ai2 (struct speed_params *s)
331 {
332 SPEED_MPFR_FUNC_2D (mpfr_ai2);
333 }
334
335 /* These functions are for testing purpose only */
336 /* They are used to draw which method is actually used */
337 double
virtual_timing_ai1(struct speed_params * s)338 virtual_timing_ai1 (struct speed_params *s)
339 {
340 double t;
341 unsigned i;
342 mpfr_t w, x;
343 mp_size_t size;
344 mpfr_t temp1, temp2;
345
346 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
347 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
348
349 size = (s->size-1)/GMP_NUMB_BITS+1;
350 s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
351 MPFR_TMP_INIT1 (s->xp, x, s->size);
352 MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
353 if (s->align_xp == 2) MPFR_SET_NEG (x);
354
355 mpfr_init2 (w, s->size);
356 speed_starttime ();
357 i = s->reps;
358
359 mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
360 mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
361
362 mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
363 mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
364 mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
365
366 if (MPFR_IS_NEG (x))
367 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
368 else
369 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
370
371 mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
372
373 if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
374 t = 1000.;
375 else
376 t = 1.;
377
378 mpfr_clear (temp1);
379 mpfr_clear (temp2);
380
381 return t;
382 }
383
384 double
virtual_timing_ai2(struct speed_params * s)385 virtual_timing_ai2 (struct speed_params *s)
386 {
387 double t;
388 unsigned i;
389 mpfr_t w, x;
390 mp_size_t size;
391 mpfr_t temp1, temp2;
392
393 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
394 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
395
396 size = (s->size-1)/GMP_NUMB_BITS+1;
397 s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
398 MPFR_TMP_INIT1 (s->xp, x, s->size);
399 MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
400 if (s->align_xp == 2) MPFR_SET_NEG (x);
401
402 mpfr_init2 (w, s->size);
403 speed_starttime ();
404 i = s->reps;
405
406 mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
407 mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
408
409 mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
410 mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
411 mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
412
413 if (MPFR_IS_NEG (x))
414 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
415 else
416 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
417
418 mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
419
420 if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
421 t = 1.;
422 else
423 t = 1000.;
424
425 mpfr_clear (temp1);
426 mpfr_clear (temp2);
427
428 return t;
429 }
430
431 int
main(void)432 main (void)
433 {
434 FILE *output;
435 struct speed_params2D param;
436 double (*speed_funcs[3]) (struct speed_params *s);
437
438 /* char filename[256] = "virtual_timing_ai.dat"; */
439 /* speed_funcs[0] = virtual_timing_ai1; */
440 /* speed_funcs[1] = virtual_timing_ai2; */
441
442 char filename[256] = "airy.dat";
443 speed_funcs[0] = timing_ai1;
444 speed_funcs[1] = timing_ai2;
445
446 speed_funcs[2] = NULL;
447 output = fopen (filename, "w");
448 if (output == NULL)
449 {
450 fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
451 abort ();
452 }
453 param.min_x = -80;
454 param.max_x = 60;
455 param.min_prec = 50;
456 param.max_prec = 1500;
457 param.nb_points_x = 200;
458 param.nb_points_prec = 200;
459 param.logarithmic_scale_x = 0;
460 param.logarithmic_scale_prec = 0;
461 param.speed_funcs = speed_funcs;
462
463 generate_2D_sample (output, param);
464
465 fclose (output);
466 mpfr_free_cache ();
467 return 0;
468 }
469