xref: /netbsd-src/external/lgpl3/mpfr/dist/tune/bidimensional_sample.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1 /* [Description]
2 
3 Copyright 2010-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include <stdlib.h>
24 #include <time.h>
25 
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28 
29 #undef _PROTO
30 #define _PROTO __GMP_PROTO
31 #include "speed.h"
32 
33 /* Let f be a function for which we have several implementations f1, f2... */
34 /* We wish to have a quick overview of which implementation is the best    */
35 /* in function of the point x where f(x) is computed and of the precision */
36 /* prec requested by the user.                                             */
37 /* This is performed by drawing a 2D graphic with color indicating which   */
38 /* method is the best.                                                     */
39 /* For building this graphic, the following structur is used (see the core */
40 /* of generate_2D_sample for an explanation of each field.                 */
41 struct speed_params2D
42 {
43   /* x-window: [min_x, max_x] or [2^min_x, 2^max_x]                        */
44   /*           or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x]            */
45   /* depending on the value of logarithmic_scale_x                         */
46   double min_x;
47   double max_x;
48 
49   /* prec-window: [min_prec, max_prec] */
50   mpfr_prec_t min_prec;
51   mpfr_prec_t max_prec;
52 
53   /* number of sample points for the x-axis and the prec-axis */
54   int nb_points_x;
55   int nb_points_prec;
56 
57   /* should the sample points be logarithmically scaled or not */
58   int logarithmic_scale_x;
59   int logarithmic_scale_prec;
60 
61   /* list of functions g1, g2... measuring the execution time of f1, f2...  */
62   /* when given a point x and a precision prec stored in s.                 */
63   /* We use s->xp to store the significant of x, s->r to store its exponent */
64   /* s->align_xp to store its sign, and s->size to store prec.              */
65   double (**speed_funcs) (struct speed_params *s);
66 };
67 
68 /* Given an array t of nb_functions double indicating the timings of several */
69 /* implementations, return i, such that t[i] is the best timing.             */
70 int
find_best(double * t,int nb_functions)71 find_best (double *t, int nb_functions)
72 {
73   int i, ibest;
74   double best;
75 
76   if (nb_functions<=0)
77     {
78       fprintf (stderr, "There is no function\n");
79       abort ();
80     }
81 
82   ibest = 0;
83   best = t[0];
84   for (i=1; i<nb_functions; i++)
85     {
86       if (t[i]<best)
87         {
88           best = t[i];
89           ibest = i;
90         }
91     }
92 
93   return ibest;
94 }
95 
generate_2D_sample(FILE * output,struct speed_params2D param)96 void generate_2D_sample (FILE *output, struct speed_params2D param)
97 {
98   mpfr_t temp;
99   double incr_prec;
100   mpfr_t incr_x;
101   mpfr_t x, x2;
102   double prec;
103   struct speed_params s;
104   int i;
105   int test;
106   int nb_functions;
107   double *t; /* store the timing of each implementation */
108 
109   /* We first determine how many implementations we have */
110   nb_functions = 0;
111   while (param.speed_funcs[nb_functions] != NULL)
112     nb_functions++;
113 
114   t = malloc (nb_functions * sizeof (double));
115   if (t == NULL)
116     {
117       fprintf (stderr, "Can't allocate memory.\n");
118       abort ();
119     }
120 
121 
122   mpfr_init2 (temp, MPFR_SMALL_PRECISION);
123 
124   /* The precision is sampled from min_prec to max_prec with        */
125   /* approximately nb_points_prec points. If logarithmic_scale_prec */
126   /* is true, the precision is multiplied by incr_prec at each      */
127   /* step. Otherwise, incr_prec is added at each step.              */
128   if (param.logarithmic_scale_prec)
129     {
130       mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU);
131       mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU);
132       mpfr_root (temp, temp,
133                  (unsigned long int)param.nb_points_prec, MPFR_RNDU);
134       incr_prec = mpfr_get_d (temp, MPFR_RNDU);
135     }
136   else
137     {
138       incr_prec = (double)param.max_prec - (double)param.min_prec;
139       incr_prec = incr_prec/((double)param.nb_points_prec);
140     }
141 
142   /* The points x are sampled according to the following rule:             */
143   /* If logarithmic_scale_x = 0:                                           */
144   /*    nb_points_x points are equally distributed between min_x and max_x */
145   /* If logarithmic_scale_x = 1:                                           */
146   /*    nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At     */
147   /*    each step, the current point is multiplied by incr_x.              */
148   /* If logarithmic_scale_x = -1:                                          */
149   /*    nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x)     */
150   /*    (at each step, the current point is divided by incr_x);  and       */
151   /*    nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x)       */
152   /*    (at each step, the current point is multiplied by incr_x).         */
153   mpfr_init2 (incr_x, param.max_prec);
154   if (param.logarithmic_scale_x == 0)
155     {
156       mpfr_set_d (incr_x,
157                   (param.max_x - param.min_x)/(double)param.nb_points_x,
158                   MPFR_RNDU);
159     }
160   else if (param.logarithmic_scale_x == -1)
161     {
162       mpfr_set_d (incr_x,
163                   2.*(param.max_x - param.min_x)/(double)param.nb_points_x,
164                   MPFR_RNDU);
165       mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
166     }
167   else
168     { /* other values of param.logarithmic_scale_x are considered as 1 */
169       mpfr_set_d (incr_x,
170                   (param.max_x - param.min_x)/(double)param.nb_points_x,
171                   MPFR_RNDU);
172       mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
173     }
174 
175   /* Main loop */
176   mpfr_init2 (x, param.max_prec);
177   mpfr_init2 (x2, param.max_prec);
178   prec = (double)param.min_prec;
179   while (prec <= param.max_prec)
180     {
181       printf ("prec = %d\n", (int)prec);
182       if (param.logarithmic_scale_x == 0)
183         mpfr_set_d (temp, param.min_x, MPFR_RNDU);
184       else if (param.logarithmic_scale_x == -1)
185         {
186           mpfr_set_d (temp, param.max_x, MPFR_RNDD);
187           mpfr_exp2 (temp, temp, MPFR_RNDD);
188           mpfr_neg (temp, temp, MPFR_RNDU);
189         }
190       else
191         {
192           mpfr_set_d (temp, param.min_x, MPFR_RNDD);
193           mpfr_exp2 (temp, temp, MPFR_RNDD);
194         }
195 
196       /* We perturb x a little bit, in order to avoid trailing zeros that */
197       /* might change the behavior of algorithms.                         */
198       mpfr_const_pi (x, MPFR_RNDN);
199       mpfr_div_2ui (x, x, 7, MPFR_RNDN);
200       mpfr_add_ui (x, x, 1, MPFR_RNDN);
201       mpfr_mul (x, x, temp, MPFR_RNDN);
202 
203       test = 1;
204       while (test)
205         {
206           mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN));
207           mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec);
208 
209           s.r = (mp_limb_t)mpfr_get_exp (x);
210           s.size = (mpfr_prec_t)prec;
211           s.align_xp = (mpfr_sgn (x) > 0)?1:2;
212           mpfr_set_prec (x2, (mpfr_prec_t)prec);
213           mpfr_set (x2, x, MPFR_RNDU);
214           s.xp = x2->_mpfr_d;
215 
216           for (i=0; i<nb_functions; i++)
217             {
218               t[i] = speed_measure (param.speed_funcs[i], &s);
219               mpfr_fprintf (output, "%e\t", t[i]);
220             }
221           fprintf (output, "%d\n", 1 + find_best (t, nb_functions));
222 
223           if (param.logarithmic_scale_x == 0)
224             {
225               mpfr_add (x, x, incr_x, MPFR_RNDU);
226               if (mpfr_cmp_d (x, param.max_x) > 0)
227                 test=0;
228             }
229           else
230             {
231               if (mpfr_sgn (x) < 0 )
232                 { /* if x<0, it means that logarithmic_scale_x=-1 */
233                   mpfr_div (x, x, incr_x, MPFR_RNDU);
234                   mpfr_abs (temp, x, MPFR_RNDD);
235                   mpfr_log2 (temp, temp, MPFR_RNDD);
236                   if (mpfr_cmp_d (temp, param.min_x) < 0)
237                     mpfr_neg (x, x, MPFR_RNDN);
238                 }
239               else
240                 {
241                   mpfr_mul (x, x, incr_x, MPFR_RNDU);
242                   mpfr_set (temp, x, MPFR_RNDD);
243                   mpfr_log2 (temp, temp, MPFR_RNDD);
244                   if (mpfr_cmp_d (temp, param.max_x) > 0)
245                     test=0;
246                 }
247             }
248         }
249 
250       prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec)
251                : (prec + incr_prec) );
252       fprintf (output, "\n");
253     }
254 
255   free (t);
256   mpfr_clear (incr_x);
257   mpfr_clear (x);
258   mpfr_clear (x2);
259   mpfr_clear (temp);
260 
261   return;
262 }
263 
264 #define SPEED_MPFR_FUNC_2D(mean_func)                   \
265   do                                                    \
266     {                                                   \
267       double t;                                         \
268       unsigned i;                                       \
269       mpfr_t w, x;                                      \
270       mp_size_t size;                                   \
271                                                         \
272       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
273       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
274                                                         \
275       size = (s->size-1)/GMP_NUMB_BITS+1;               \
276       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
277       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
278       MPFR_SET_EXP (x, (mpfr_exp_t) s->r);              \
279       if (s->align_xp == 2) MPFR_SET_NEG (x);           \
280                                                         \
281       mpfr_init2 (w, s->size);                          \
282       speed_starttime ();                               \
283       i = s->reps;                                      \
284                                                         \
285       do                                                \
286         mean_func (w, x, MPFR_RNDN);                    \
287       while (--i != 0);                                 \
288       t = speed_endtime ();                             \
289                                                         \
290       mpfr_clear (w);                                   \
291       return t;                                         \
292     }                                                   \
293   while (0)
294 
295 mpfr_prec_t mpfr_exp_2_threshold;
296 mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD;
297 #undef  MPFR_EXP_2_THRESHOLD
298 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
299 #include "exp_2.c"
300 
301 double
timing_exp1(struct speed_params * s)302 timing_exp1 (struct speed_params *s)
303 {
304   mpfr_exp_2_threshold = s->size+1;
305   SPEED_MPFR_FUNC_2D (mpfr_exp_2);
306 }
307 
308 double
timing_exp2(struct speed_params * s)309 timing_exp2 (struct speed_params *s)
310 {
311   mpfr_exp_2_threshold = s->size-1;
312   SPEED_MPFR_FUNC_2D (mpfr_exp_2);
313 }
314 
315 double
timing_exp3(struct speed_params * s)316 timing_exp3 (struct speed_params *s)
317 {
318   SPEED_MPFR_FUNC_2D (mpfr_exp_3);
319 }
320 
321 
322 #include "ai.c"
323 double
timing_ai1(struct speed_params * s)324 timing_ai1 (struct speed_params *s)
325 {
326   SPEED_MPFR_FUNC_2D (mpfr_ai1);
327 }
328 
329 double
timing_ai2(struct speed_params * s)330 timing_ai2 (struct speed_params *s)
331 {
332   SPEED_MPFR_FUNC_2D (mpfr_ai2);
333 }
334 
335 /* These functions are for testing purpose only */
336 /* They are used to draw which method is actually used */
337 double
virtual_timing_ai1(struct speed_params * s)338 virtual_timing_ai1 (struct speed_params *s)
339 {
340   double t;
341   unsigned i;
342   mpfr_t w, x;
343   mp_size_t size;
344   mpfr_t temp1, temp2;
345 
346   SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
347   SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
348 
349   size = (s->size-1)/GMP_NUMB_BITS+1;
350   s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
351   MPFR_TMP_INIT1 (s->xp, x, s->size);
352   MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
353   if (s->align_xp == 2) MPFR_SET_NEG (x);
354 
355   mpfr_init2 (w, s->size);
356   speed_starttime ();
357   i = s->reps;
358 
359   mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
360   mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
361 
362   mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
363   mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
364   mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
365 
366   if (MPFR_IS_NEG (x))
367       mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
368   else
369       mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
370 
371   mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
372 
373   if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
374     t = 1000.;
375   else
376     t = 1.;
377 
378   mpfr_clear (temp1);
379   mpfr_clear (temp2);
380 
381   return t;
382 }
383 
384 double
virtual_timing_ai2(struct speed_params * s)385 virtual_timing_ai2 (struct speed_params *s)
386 {
387   double t;
388   unsigned i;
389   mpfr_t w, x;
390   mp_size_t size;
391   mpfr_t temp1, temp2;
392 
393   SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
394   SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
395 
396   size = (s->size-1)/GMP_NUMB_BITS+1;
397   s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
398   MPFR_TMP_INIT1 (s->xp, x, s->size);
399   MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
400   if (s->align_xp == 2) MPFR_SET_NEG (x);
401 
402   mpfr_init2 (w, s->size);
403   speed_starttime ();
404   i = s->reps;
405 
406   mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
407   mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
408 
409   mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
410   mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
411   mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
412 
413   if (MPFR_IS_NEG (x))
414       mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
415   else
416       mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
417 
418   mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
419 
420   if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
421     t = 1.;
422   else
423     t = 1000.;
424 
425   mpfr_clear (temp1);
426   mpfr_clear (temp2);
427 
428   return t;
429 }
430 
431 int
main(void)432 main (void)
433 {
434   FILE *output;
435   struct speed_params2D param;
436   double (*speed_funcs[3]) (struct speed_params *s);
437 
438   /* char filename[256] = "virtual_timing_ai.dat"; */
439   /* speed_funcs[0] = virtual_timing_ai1; */
440   /* speed_funcs[1] = virtual_timing_ai2; */
441 
442   char filename[256] = "airy.dat";
443   speed_funcs[0] = timing_ai1;
444   speed_funcs[1] = timing_ai2;
445 
446   speed_funcs[2] = NULL;
447   output = fopen (filename, "w");
448   if (output == NULL)
449     {
450       fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
451       abort ();
452     }
453   param.min_x = -80;
454   param.max_x = 60;
455   param.min_prec = 50;
456   param.max_prec = 1500;
457   param.nb_points_x = 200;
458   param.nb_points_prec = 200;
459   param.logarithmic_scale_x  = 0;
460   param.logarithmic_scale_prec = 0;
461   param.speed_funcs = speed_funcs;
462 
463   generate_2D_sample (output, param);
464 
465   fclose (output);
466   mpfr_free_cache ();
467   return 0;
468 }
469