xref: /netbsd-src/external/lgpl3/mpfr/dist/tune/tuneup.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1 /* Tune various threshold of MPFR
2 
3 Copyright 2005-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include <stdlib.h>
24 #include <time.h>
25 
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28 
29 #undef _PROTO
30 #define _PROTO __GMP_PROTO
31 #include "speed.h"
32 
33 /* Undefine static assertion system */
34 #undef MPFR_STAT_STATIC_ASSERT
35 #define MPFR_STAT_STATIC_ASSERT(a) MPFR_ASSERTN(a)
36 
37 int verbose;
38 
39 /* template for an unary function */
40 /* s->size: precision of both input and output
41    s->xp  : Mantissa of first input
42    s->yp  : mantissa of second input                    */
43 #define SPEED_MPFR_FUNC(mean_fun)                       \
44   do                                                    \
45     {                                                   \
46       unsigned  i;                                      \
47       mpfr_limb_ptr wp;                                 \
48       double    t;                                      \
49       mpfr_t    w, x;                                   \
50       mp_size_t size;                                   \
51       MPFR_TMP_DECL (marker);                           \
52                                                         \
53       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
54       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
55       MPFR_TMP_MARK (marker);                           \
56                                                         \
57       size = (s->size-1)/GMP_NUMB_BITS+1;               \
58       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
59       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
60       MPFR_SET_EXP (x, 0);                              \
61                                                         \
62       MPFR_TMP_INIT (wp, w, s->size, size);             \
63                                                         \
64       speed_operand_src (s, s->xp, size);               \
65       speed_operand_dst (s, wp, size);                  \
66       speed_cache_fill (s);                             \
67                                                         \
68       speed_starttime ();                               \
69       i = s->reps;                                      \
70       do                                                \
71         mean_fun (w, x, MPFR_RNDN);                     \
72       while (--i != 0);                                 \
73       t = speed_endtime ();                             \
74                                                         \
75       MPFR_TMP_FREE (marker);                           \
76       return t;                                         \
77     }                                                   \
78   while (0)
79 
80 /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
81 #define SPEED_MPFR_FUNC2(mean_fun)                      \
82   do                                                    \
83     {                                                   \
84       unsigned  i;                                      \
85       mpfr_limb_ptr vp, wp;                             \
86       double    t;                                      \
87       mpfr_t    v, w, x;                                \
88       mp_size_t size;                                   \
89       MPFR_TMP_DECL (marker);                           \
90                                                         \
91       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
92       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
93       MPFR_TMP_MARK (marker);                           \
94                                                         \
95       size = (s->size-1)/GMP_NUMB_BITS+1;               \
96       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
97       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
98       MPFR_SET_EXP (x, 0);                              \
99                                                         \
100       MPFR_TMP_INIT (vp, v, s->size, size);             \
101       MPFR_TMP_INIT (wp, w, s->size, size);             \
102                                                         \
103       speed_operand_src (s, s->xp, size);               \
104       speed_operand_dst (s, vp, size);                  \
105       speed_operand_dst (s, wp, size);                  \
106       speed_cache_fill (s);                             \
107                                                         \
108       speed_starttime ();                               \
109       i = s->reps;                                      \
110       do                                                \
111         mean_fun (v, w, x, MPFR_RNDN);                  \
112       while (--i != 0);                                 \
113       t = speed_endtime ();                             \
114                                                         \
115       MPFR_TMP_FREE (marker);                           \
116       return t;                                         \
117     }                                                   \
118   while (0)
119 
120 /* template for a function like mpfr_mul */
121 #define SPEED_MPFR_OP(mean_fun)                         \
122   do                                                    \
123     {                                                   \
124       unsigned  i;                                      \
125       mpfr_limb_ptr wp;                                 \
126       double    t;                                      \
127       mpfr_t    w, x, y;                                \
128       mp_size_t size;                                   \
129       MPFR_TMP_DECL (marker);                           \
130                                                         \
131       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
132       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
133       MPFR_TMP_MARK (marker);                           \
134                                                         \
135       size = (s->size-1)/GMP_NUMB_BITS+1;               \
136       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
137       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
138       MPFR_SET_EXP (x, 0);                              \
139       s->yp[size-1] |= MPFR_LIMB_HIGHBIT;               \
140       MPFR_TMP_INIT1 (s->yp, y, s->size);               \
141       MPFR_SET_EXP (y, 0);                              \
142                                                         \
143       MPFR_TMP_INIT (wp, w, s->size, size);             \
144                                                         \
145       speed_operand_src (s, s->xp, size);               \
146       speed_operand_src (s, s->yp, size);               \
147       speed_operand_dst (s, wp, size);                  \
148       speed_cache_fill (s);                             \
149                                                         \
150       speed_starttime ();                               \
151       i = s->reps;                                      \
152       do                                                \
153         mean_fun (w, x, y, MPFR_RNDN);                  \
154       while (--i != 0);                                 \
155       t = speed_endtime ();                             \
156                                                         \
157       MPFR_TMP_FREE (marker);                           \
158       return t;                                         \
159     }                                                   \
160   while (0)
161 
162 /* special template for mpfr_mul(a,b,b) */
163 #define SPEED_MPFR_SQR(mean_fun)                        \
164   do                                                    \
165     {                                                   \
166       unsigned  i;                                      \
167       mpfr_limb_ptr wp;                                 \
168       double    t;                                      \
169       mpfr_t    w, x;                                   \
170       mp_size_t size;                                   \
171       MPFR_TMP_DECL (marker);                           \
172                                                         \
173       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
174       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
175       MPFR_TMP_MARK (marker);                           \
176                                                         \
177       size = (s->size-1)/GMP_NUMB_BITS+1;               \
178       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
179       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
180       MPFR_SET_EXP (x, 0);                              \
181                                                         \
182       MPFR_TMP_INIT (wp, w, s->size, size);             \
183                                                         \
184       speed_operand_src (s, s->xp, size);               \
185       speed_operand_dst (s, wp, size);                  \
186       speed_cache_fill (s);                             \
187                                                         \
188       speed_starttime ();                               \
189       i = s->reps;                                      \
190       do                                                \
191         mean_fun (w, x, x, MPFR_RNDN);                  \
192       while (--i != 0);                                 \
193       t = speed_endtime ();                             \
194                                                         \
195       MPFR_TMP_FREE (marker);                           \
196       return t;                                         \
197     }                                                   \
198   while (0)
199 
200 /* s->size: precision of both input and output
201    s->xp  : Mantissa of first input
202    s->r   : exponent
203    s->align_xp : sign (1 means positive, 2 means negative)
204 */
205 #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun)         \
206   do                                                    \
207     {                                                   \
208       unsigned  i;                                      \
209       mpfr_limb_ptr wp;                                 \
210       double    t;                                      \
211       mpfr_t    w, x;                                   \
212       mp_size_t size;                                   \
213       MPFR_TMP_DECL (marker);                           \
214                                                         \
215       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
216       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
217       MPFR_TMP_MARK (marker);                           \
218                                                         \
219       size = (s->size-1)/GMP_NUMB_BITS+1;               \
220       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
221       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
222       MPFR_SET_EXP (x, s->r);                           \
223       if (s->align_xp == 2) MPFR_SET_NEG (x);           \
224                                                         \
225       MPFR_TMP_INIT (wp, w, s->size, size);             \
226                                                         \
227       speed_operand_src (s, s->xp, size);               \
228       speed_operand_dst (s, wp, size);                  \
229       speed_cache_fill (s);                             \
230                                                         \
231       speed_starttime ();                               \
232       i = s->reps;                                      \
233       do                                                \
234         mean_fun (w, x, MPFR_RNDN);                     \
235       while (--i != 0);                                 \
236       t = speed_endtime ();                             \
237                                                         \
238       MPFR_TMP_FREE (marker);                           \
239       return t;                                         \
240     }                                                   \
241   while (0)
242 
243 /* First we include all the functions we want to tune inside this program.
244    We can't use the GNU MPFR library since the thresholds are fixed macros. */
245 
246 /* Setup mpfr_exp_2 */
247 mpfr_prec_t mpfr_exp_2_threshold;
248 #undef  MPFR_EXP_2_THRESHOLD
249 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
250 #include "exp_2.c"
251 static double
speed_mpfr_exp_2(struct speed_params * s)252 speed_mpfr_exp_2 (struct speed_params *s)
253 {
254   SPEED_MPFR_FUNC (mpfr_exp_2);
255 }
256 
257 /* Setup mpfr_exp */
258 mpfr_prec_t mpfr_exp_threshold;
259 #undef  MPFR_EXP_THRESHOLD
260 #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
261 #include "exp.c"
262 static double
speed_mpfr_exp(struct speed_params * s)263 speed_mpfr_exp (struct speed_params *s)
264 {
265   SPEED_MPFR_FUNC (mpfr_exp);
266 }
267 
268 /* Setup mpfr_sin_cos */
269 mpfr_prec_t mpfr_sincos_threshold;
270 #undef MPFR_SINCOS_THRESHOLD
271 #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
272 #include "sin_cos.c"
273 #include "cos.c"
274 static double
speed_mpfr_sincos(struct speed_params * s)275 speed_mpfr_sincos (struct speed_params *s)
276 {
277   SPEED_MPFR_FUNC2 (mpfr_sin_cos);
278 }
279 
280 /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
281 /* Since mpfr_mul() deals with both mul and sqr, and contains an assert that
282    the thresholds are >= 1, we initialize both values to 1 to avoid a failed
283    assertion (let's recall that static assertions are replaced by normal
284    dynamic assertions here). */
285 mpfr_prec_t mpfr_mul_threshold = 1;
286 mpfr_prec_t mpfr_sqr_threshold = 1;
287 mpfr_prec_t mpfr_div_threshold;
288 #undef  MPFR_MUL_THRESHOLD
289 #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
290 #undef  MPFR_SQR_THRESHOLD
291 #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
292 #undef  MPFR_DIV_THRESHOLD
293 #define MPFR_DIV_THRESHOLD mpfr_div_threshold
294 #include "mul.c"
295 #include "div.c"
296 static double
speed_mpfr_mul(struct speed_params * s)297 speed_mpfr_mul (struct speed_params *s)
298 {
299   SPEED_MPFR_OP (mpfr_mul);
300 }
301 static double
speed_mpfr_sqr(struct speed_params * s)302 speed_mpfr_sqr (struct speed_params *s)
303 {
304   SPEED_MPFR_SQR (mpfr_mul);
305 }
306 static double
speed_mpfr_div(struct speed_params * s)307 speed_mpfr_div (struct speed_params *s)
308 {
309   SPEED_MPFR_OP (mpfr_div);
310 }
311 
312 /************************************************
313  * Common functions (inspired by GMP function)  *
314  ************************************************/
315 static int
analyze_data(double * dat,int ndat)316 analyze_data (double *dat, int ndat)
317 {
318   double  x, min_x;
319   int     j, min_j;
320 
321   x = 0.0;
322   for (j = 0; j < ndat; j++)
323     if (dat[j] > 0.0)
324       x += dat[j];
325 
326   min_x = x;
327   min_j = 0;
328 
329   for (j = 0; j < ndat; x -= dat[j], j++)
330     {
331       if (x < min_x)
332         {
333           min_x = x;
334           min_j = j;
335         }
336     }
337   return min_j;
338 }
339 
340 static double
mpfr_speed_measure(speed_function_t fun,struct speed_params * s,char * m)341 mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
342 {
343   double t = -1.0;
344   int i;
345   int number_of_iterations = 30;
346   for (i = 1; i <= number_of_iterations && t == -1.0; i++)
347     {
348       t = speed_measure (fun, s);
349       if ( (t == -1.0) && (i+1 <= number_of_iterations) )
350         printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
351                s->size, i+1, number_of_iterations);
352     }
353   if (t == -1.0)
354     {
355       fprintf (stderr, "Failed to measure %s!\n", m);
356       fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
357       fprintf (stderr, "   under Linux: cpufreq-selector -g performance\n");
358       fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
359       abort ();
360     }
361   return t;
362 }
363 
364 #define THRESHOLD_WINDOW 16
365 #define THRESHOLD_FINAL_WINDOW 128
366 static double
domeasure(mpfr_prec_t * threshold,double (* func)(struct speed_params *),mpfr_prec_t p)367 domeasure (mpfr_prec_t *threshold,
368            double (*func) (struct speed_params *),
369            mpfr_prec_t p)
370 {
371   struct speed_params s;
372   mp_size_t size;
373   double t1, t2, d;
374 
375   s.align_xp = s.align_yp = s.align_wp = 64;
376   s.size = p;
377   size = (p - 1)/GMP_NUMB_BITS+1;
378   s.xp = malloc (2*size*sizeof (mp_limb_t));
379   if (s.xp == NULL)
380     {
381       fprintf (stderr, "Can't allocate memory.\n");
382       abort ();
383     }
384   mpn_random (s.xp, size);
385   s.yp = s.xp + size;
386   mpn_random (s.yp, size);
387   *threshold = MPFR_PREC_MAX;
388   t1 = mpfr_speed_measure (func, &s, "function 1");
389   *threshold = 1;
390   t2 = mpfr_speed_measure (func, &s, "function 2");
391   free (s.xp);
392   /* t1 is the time of the first algo (used for low prec) */
393   if (t2 >= t1)
394     d = (t2 - t1) / t2;
395   else
396     d = (t2 - t1) / t1;
397   /* d > 0 if we have to use algo 1.
398      d < 0 if we have to use algo 2 */
399   return d;
400 }
401 
402 /* Performs measures when both the precision and the point of evaluation
403    shall vary. s.yp is ignored and not initialized.
404    It assumes that func depends on three thresholds with a boundary of the
405    form threshold1*x + threshold2*p = some scaling factor, if x<0,
406    and  threshold3*x + threshold2*p = some scaling factor, if x>=0.
407 */
408 static double
domeasure2(long int * threshold1,long int * threshold2,long int * threshold3,double (* func)(struct speed_params *),mpfr_prec_t p,mpfr_t x)409 domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
410             double (*func) (struct speed_params *),
411             mpfr_prec_t p,
412             mpfr_t x)
413 {
414   struct speed_params s;
415   mp_size_t size;
416   double t1, t2, d;
417   mpfr_t xtmp;
418 
419   if (MPFR_IS_SINGULAR (x))
420     {
421       mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
422       abort ();
423     }
424   if (MPFR_IS_NEG (x))
425     s.align_xp = 2;
426   else
427     s.align_xp = 1;
428 
429   s.align_yp = s.align_wp = 64;
430   s.size = p;
431   size = (p - 1)/GMP_NUMB_BITS+1;
432 
433   mpfr_init2 (xtmp, p);
434   mpn_random (xtmp->_mpfr_d, size);
435   xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
436   MPFR_SET_EXP (xtmp, -53);
437   mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
438   mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb)       */
439                                       /* where perturb ~ 2^(-53) is */
440                                       /* randomly chosen.           */
441   s.xp = xtmp->_mpfr_d;
442   s.r = MPFR_GET_EXP (xtmp);
443 
444   *threshold1 = 0;
445   *threshold2 = 0;
446   *threshold3 = 0;
447   t1 = mpfr_speed_measure (func, &s, "function 1");
448 
449   if (MPFR_IS_NEG (x))
450     *threshold1 = INT_MIN;
451   else
452     *threshold3 = INT_MAX;
453   *threshold2 = INT_MAX;
454   t2 = mpfr_speed_measure (func, &s, "function 2");
455 
456   /* t1 is the time of the first algo (used for low prec) */
457   if (t2 >= t1)
458     d = (t2 - t1) / t2;
459   else
460     d = (t2 - t1) / t1;
461   /* d > 0 if we have to use algo 1.
462      d < 0 if we have to use algo 2 */
463   mpfr_clear (xtmp);
464   return d;
465 }
466 
467 /* Tune a function with a simple THRESHOLD
468    The function doesn't depend on another threshold.
469    It assumes that it uses algo1 if p < THRESHOLD
470    and algo2 otherwise.
471    if algo2 is better for low prec, and algo1 better for high prec,
472    the behaviour of this function is undefined. */
473 static void
tune_simple_func(mpfr_prec_t * threshold,double (* func)(struct speed_params *),mpfr_prec_t pstart)474 tune_simple_func (mpfr_prec_t *threshold,
475                   double (*func) (struct speed_params *),
476                   mpfr_prec_t pstart)
477 {
478   double measure[THRESHOLD_FINAL_WINDOW+1];
479   double d = -1.0;
480   mpfr_prec_t pstep;
481   int i, numpos, numneg, try;
482   mpfr_prec_t pmin, pmax, p;
483 
484   /* first look for a lower bound within 10% */
485   pmin = p = pstart;
486   for (i = 0; i < 10 && d < 0.0; i++)
487     d = domeasure (threshold, func, pmin);
488   if (d < 0.0)
489     {
490       if (verbose)
491         printf ("Oops: even for precision %lu, algo 2 seems to be faster!\n",
492                 (unsigned long) pmin);
493       *threshold = pmin;
494       return;
495     }
496   if (d >= 1.00)
497     for (;;)
498       {
499         d = domeasure (threshold, func, pmin);
500         if (d < 1.00)
501           break;
502         p = pmin;
503         pmin += pmin/2;
504       }
505   pmin = p;
506   for (;;)
507     {
508       d = domeasure (threshold, func, pmin);
509       if (d < 0.10)
510         break;
511       pmin += GMP_NUMB_BITS;
512     }
513 
514   /* then look for an upper bound within 20% */
515   pmax = pmin * 2;
516   for (;;)
517     {
518       d = domeasure (threshold, func, pmax);
519       if (d < -0.20)
520         break;
521       pmax += pmin / 2; /* don't increase too rapidly */
522     }
523 
524   /* The threshold is between pmin and pmax. Affine them */
525   try = 0;
526   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
527     {
528       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
529       if (verbose)
530         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
531       p = (pmin + pmax) / 2;
532       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
533         {
534           measure[i] = domeasure (threshold, func,
535                                   p+(i-THRESHOLD_WINDOW/2)*pstep);
536           if (measure[i] > 0)
537             numpos ++;
538           else if (measure[i] < 0)
539             numneg ++;
540         }
541       if (numpos > numneg)
542         /* We use more often algo 1 than algo 2 */
543         pmin = p - THRESHOLD_WINDOW/2*pstep;
544       else if (numpos < numneg)
545         pmax = p + THRESHOLD_WINDOW/2*pstep;
546       else
547         /* numpos == numneg ... */
548         if (++ try > 2)
549           {
550             *threshold = p;
551             if (verbose)
552               printf ("Quick find: %lu\n", *threshold);
553             return ;
554           }
555     }
556 
557   /* Final tune... */
558   if (verbose)
559     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
560   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
561     measure[i] = domeasure (threshold, func, pmin+i);
562   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
563   *threshold = pmin + i;
564   if (verbose)
565     printf ("%lu\n", *threshold);
566   return;
567 }
568 
569 /* Tune a function which behavior depends on both p and x,
570    in a given direction.
571    It assumes that for (x,p) close to zero, algo1 is used
572    and algo2 is used when (x,p) is far from zero.
573    If algo2 is better for low prec, and algo1 better for high prec,
574    the behavior of this function is undefined.
575    This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
576    until it finds a point on the boundary. It returns ell.
577  */
578 static void
tune_simple_func_in_some_direction(long int * threshold1,long int * threshold2,long int * threshold3,double (* func)(struct speed_params *),mpfr_prec_t pstart,int dirx,int dirp,mpfr_t xres,mpfr_prec_t * pres)579 tune_simple_func_in_some_direction (long int *threshold1,
580                                     long int *threshold2,
581                                     long int *threshold3,
582                                     double (*func) (struct speed_params *),
583                                     mpfr_prec_t pstart,
584                                     int dirx, int dirp,
585                                     mpfr_t xres, mpfr_prec_t *pres)
586 {
587   double measure[THRESHOLD_FINAL_WINDOW+1];
588   double d;
589   mpfr_prec_t pstep;
590   int i, numpos, numneg, try;
591   mpfr_prec_t pmin, pmax, p;
592   mpfr_t xmin, xmax, x;
593   mpfr_t ratio;
594 
595   mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
596   mpfr_set_si (ratio, dirx, MPFR_RNDN);
597   mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
598 
599   mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
600   mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
601   mpfr_init2 (x, MPFR_SMALL_PRECISION);
602 
603   /* first look for a lower bound within 10% */
604   pmin = p = pstart;
605   mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
606   mpfr_set (x, xmin, MPFR_RNDN);
607 
608   d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
609   if (d < 0.0)
610     {
611       if (verbose)
612         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
613                 (unsigned long) pmin);
614       *pres = MPFR_PREC_MIN;
615       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
616       mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
617       return;
618     }
619   if (d >= 1.00)
620     for (;;)
621       {
622         d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
623         if (d < 1.00)
624           break;
625         p = pmin;
626         mpfr_set (x, xmin, MPFR_RNDN);
627         pmin += pmin/2;
628         mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
629       }
630   pmin = p;
631   mpfr_set (xmin, x, MPFR_RNDN);
632   for (;;)
633     {
634       d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
635       if (d < 0.10)
636         break;
637       pmin += GMP_NUMB_BITS;
638       mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
639     }
640 
641   /* then look for an upper bound within 20% */
642   pmax = pmin * 2;
643   mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
644   for (;;)
645     {
646       d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
647       if (d < -0.20)
648         break;
649       pmax += pmin / 2; /* don't increase too rapidly */
650       mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
651     }
652 
653   /* The threshold is between pmin and pmax. Affine them */
654   try = 0;
655   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
656     {
657       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
658       if (verbose)
659         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
660       p = (pmin + pmax) / 2;
661       mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
662       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
663         {
664           *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
665           mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
666           measure[i] = domeasure2 (threshold1, threshold2, threshold3,
667                                    func, *pres, xres);
668           if (measure[i] > 0)
669             numpos ++;
670           else if (measure[i] < 0)
671             numneg ++;
672         }
673       if (numpos > numneg)
674         {
675           /* We use more often algo 1 than algo 2 */
676           pmin = p - THRESHOLD_WINDOW/2*pstep;
677           mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
678         }
679       else if (numpos < numneg)
680         {
681           pmax = p + THRESHOLD_WINDOW/2*pstep;
682           mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
683         }
684       else
685         /* numpos == numneg ... */
686         if (++ try > 2)
687           {
688             *pres = p;
689             mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
690             if (verbose)
691               printf ("Quick find: %lu\n", *pres);
692             mpfr_clear (ratio);
693             mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
694             return ;
695           }
696     }
697 
698   /* Final tune... */
699   if (verbose)
700     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
701   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
702     {
703       *pres = pmin+i;
704       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
705       measure[i] = domeasure2 (threshold1, threshold2, threshold3,
706                                func, *pres, xres);
707     }
708   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
709   *pres = pmin + i;
710   mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
711   if (verbose)
712     printf ("%lu\n", *pres);
713   mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
714   return;
715 }
716 
717 /************************************
718  * Tune Mulders' mulhigh function   *
719  ************************************/
720 #define TOLERANCE 1.00
721 #define MULDERS_TABLE_SIZE 1024
722 #ifndef MPFR_MULHIGH_SIZE
723 # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
724 #endif
725 #ifndef MPFR_SQRHIGH_SIZE
726 # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
727 #endif
728 #ifndef MPFR_DIVHIGH_SIZE
729 # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
730 #endif
731 #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
732 #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
733 #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
734 #include "mulders.c"
735 
736 static double
speed_mpfr_mulhigh(struct speed_params * s)737 speed_mpfr_mulhigh (struct speed_params *s)
738 {
739   SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
740 }
741 
742 static double
speed_mpfr_sqrhigh(struct speed_params * s)743 speed_mpfr_sqrhigh (struct speed_params *s)
744 {
745   SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
746 }
747 
748 static double
speed_mpfr_divhigh(struct speed_params * s)749 speed_mpfr_divhigh (struct speed_params *s)
750 {
751   SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
752 }
753 
754 #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
755 
756 /* Tune mpfr_mulhigh_n for size n */
757 static mp_size_t
tune_mul_mulders_upto(mp_size_t n)758 tune_mul_mulders_upto (mp_size_t n)
759 {
760   struct speed_params s;
761   mp_size_t k, kbest, step;
762   double t, tbest;
763   MPFR_TMP_DECL (marker);
764 
765   if (n == 0)
766     return -1;
767 
768   MPFR_TMP_MARK (marker);
769   s.align_xp = s.align_yp = s.align_wp = 64;
770   s.size = n;
771   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
772   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
773   mpn_random (s.xp, n);
774   mpn_random (s.yp, n);
775 
776   /* Check k == -1, mpn_mul_basecase */
777   mulhigh_ktab[n] = -1;
778   kbest = -1;
779   tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
780 
781   /* Check k == 0, mpn_mulhigh_n_basecase */
782   mulhigh_ktab[n] = 0;
783   t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
784   if (t * TOLERANCE < tbest)
785     kbest = 0, tbest = t;
786 
787   /* Check Mulders with cutoff point k */
788   step = 1 + n / (2 * MAX_STEPS);
789   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
790   for (k = (n + 4) / 2 ; k < n ; k += step)
791     {
792       mulhigh_ktab[n] = k;
793       t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
794       if (t * TOLERANCE < tbest)
795         kbest = k, tbest = t;
796     }
797 
798   mulhigh_ktab[n] = kbest;
799 
800   MPFR_TMP_FREE (marker);
801   return kbest;
802 }
803 
804 /* Tune mpfr_sqrhigh_n for size n */
805 static mp_size_t
tune_sqr_mulders_upto(mp_size_t n)806 tune_sqr_mulders_upto (mp_size_t n)
807 {
808   struct speed_params s;
809   mp_size_t k, kbest, step;
810   double t, tbest;
811   MPFR_TMP_DECL (marker);
812 
813   if (n == 0)
814     return -1;
815 
816   MPFR_TMP_MARK (marker);
817   s.align_xp = s.align_wp = 64;
818   s.size = n;
819   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
820   mpn_random (s.xp, n);
821 
822   /* Check k == -1, mpn_sqr_basecase */
823   sqrhigh_ktab[n] = -1;
824   kbest = -1;
825   tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
826 
827   /* Check k == 0, mpfr_mulhigh_n_basecase */
828   sqrhigh_ktab[n] = 0;
829   t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
830   if (t * TOLERANCE < tbest)
831     kbest = 0, tbest = t;
832 
833   /* Check Mulders */
834   step = 1 + n / (2 * MAX_STEPS);
835   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
836   for (k = (n + 4) / 2 ; k < n ; k += step)
837     {
838       sqrhigh_ktab[n] = k;
839       t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
840       if (t * TOLERANCE < tbest)
841         kbest = k, tbest = t;
842     }
843 
844   sqrhigh_ktab[n] = kbest;
845 
846   MPFR_TMP_FREE (marker);
847   return kbest;
848 }
849 
850 /* Tune mpfr_divhigh_n for size n.
851    Ensure divhigh_ktab[n] < n for n > 0. */
852 static mp_size_t
tune_div_mulders_upto(mp_size_t n)853 tune_div_mulders_upto (mp_size_t n)
854 {
855   struct speed_params s;
856   mp_size_t k, kbest, step;
857   double t, tbest;
858   MPFR_TMP_DECL (marker);
859 
860   /* we require n > 2 in mpfr_divhigh */
861   if (n <= 2)
862     {
863       divhigh_ktab[n] = 0;
864       return 0;
865     }
866 
867   MPFR_TMP_MARK (marker);
868   s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
869   s.size = n;
870   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
871   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
872   mpn_random (s.xp, n);
873   mpn_random (s.yp, n);
874 
875   /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
876   kbest = 0;
877   tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
878 
879   /* Check Mulders */
880   step = 1 + n / (2 * MAX_STEPS);
881   /* we should have (n+3)/2 <= k < n-1, which translates into
882      (n+4)/2 <= k < n-1 in C */
883   for (k = (n + 4) / 2 ; k < n - 1; k += step)
884     {
885       divhigh_ktab[n] = k;
886       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
887       if (t * TOLERANCE < tbest)
888         kbest = k, tbest = t;
889     }
890 
891   MPFR_ASSERTN(kbest < n);
892   divhigh_ktab[n] = kbest;
893 
894   MPFR_TMP_FREE (marker);
895 
896   return kbest;
897 }
898 
899 static void
tune_mul_mulders(FILE * f)900 tune_mul_mulders (FILE *f)
901 {
902   mp_size_t k;
903 
904   if (verbose)
905     printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
906   fprintf (f, "#define MPFR_MULHIGH_TAB  \\\n ");
907   for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
908     {
909       fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
910       if (k != MPFR_MULHIGH_TAB_SIZE-1)
911         fputc (',', f);
912       if ((k+1) % 16 == 0)
913         fprintf (f, " \\\n ");
914       if (verbose)
915         putchar ('.');
916     }
917   fprintf (f, " \n");
918   if (verbose)
919     putchar ('\n');
920 }
921 
922 static void
tune_sqr_mulders(FILE * f)923 tune_sqr_mulders (FILE *f)
924 {
925   mp_size_t k;
926 
927   if (verbose)
928     printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
929   fprintf (f, "#define MPFR_SQRHIGH_TAB  \\\n ");
930   for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
931     {
932       fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
933       if (k != MPFR_SQRHIGH_TAB_SIZE-1)
934         fputc (',', f);
935       if ((k+1) % 16 == 0)
936         fprintf (f, " \\\n ");
937       if (verbose)
938         putchar ('.');
939     }
940   fprintf (f, " \n");
941   if (verbose)
942     putchar ('\n');
943 }
944 
945 static void
tune_div_mulders(FILE * f)946 tune_div_mulders (FILE *f)
947 {
948   mp_size_t k;
949 
950   if (verbose)
951     printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
952   fprintf (f, "#define MPFR_DIVHIGH_TAB  \\\n ");
953   for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
954     {
955       fprintf (f, "%d", (int) tune_div_mulders_upto (k));
956       if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
957         fputc (',', f);
958       if ((k+1) % 16 == 0)
959         fprintf (f, " /*%zu-%zu*/ \\\n ", (size_t) k - 15, (size_t) k);
960       if (verbose)
961         putchar ('.');
962     }
963   fprintf (f, " \n");
964   if (verbose)
965     putchar ('\n');
966 }
967 
968 /*******************************************************
969  *            Tuning functions for mpfr_ai             *
970  *******************************************************/
971 
972 long int mpfr_ai_threshold1;
973 long int mpfr_ai_threshold2;
974 long int mpfr_ai_threshold3;
975 #undef  MPFR_AI_THRESHOLD1
976 #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
977 #undef  MPFR_AI_THRESHOLD2
978 #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
979 #undef  MPFR_AI_THRESHOLD3
980 #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
981 
982 #include "ai.c"
983 
984 static double
speed_mpfr_ai(struct speed_params * s)985 speed_mpfr_ai (struct speed_params *s)
986 {
987   SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
988 }
989 
990 
991 /*******************************************************
992  *            Tune all the threshold of MPFR           *
993  * Warning: tune the function in their dependent order!*
994  *******************************************************/
995 static void
all(const char * filename)996 all (const char *filename)
997 {
998   FILE *f;
999   time_t  start_time, end_time;
1000   struct tm  *tp;
1001   mpfr_t x1, x2, x3, tmp1, tmp2;
1002   mpfr_prec_t p1, p2, p3;
1003 
1004   f = fopen (filename, "w");
1005   if (f == NULL)
1006     {
1007       fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
1008       abort ();
1009     }
1010 
1011   speed_time_init ();
1012   if (verbose)
1013     {
1014       printf ("Using: %s\n", speed_time_string);
1015       printf ("speed_precision %d", speed_precision);
1016       if (speed_unittime == 1.0)
1017         printf (", speed_unittime 1 cycle");
1018       else
1019         printf (", speed_unittime %.2e secs", speed_unittime);
1020       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1021         printf (", CPU freq unknown\n");
1022       else
1023         printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
1024     }
1025 
1026   time (&start_time);
1027   tp = localtime (&start_time);
1028   fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
1029           tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
1030 
1031 #ifdef __ICC
1032   fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
1033 #elif defined(__GNUC__)
1034 #ifdef __GNUC_PATCHLEVEL__
1035   fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
1036            __GNUC_PATCHLEVEL__);
1037 #else
1038   fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
1039 #endif
1040 #elif defined (__SUNPRO_C)
1041   fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
1042 #elif defined (__sgi) && defined (_COMPILER_VERSION)
1043   fprintf (f, "MIPSpro C %d.%d.%d */\n",
1044            _COMPILER_VERSION / 100,
1045            _COMPILER_VERSION / 10 % 10,
1046            _COMPILER_VERSION % 10);
1047 #elif defined (__DECC) && defined (__DECC_VER)
1048   fprintf (f, "DEC C %d */\n", __DECC_VER);
1049 #else
1050   fprintf (f, "system compiler */\n");
1051 #endif
1052   fprintf (f, "\n\n");
1053   fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
1054   fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
1055   fprintf (f, "#endif\n\n");
1056 
1057   /* Tune mulhigh */
1058   tune_mul_mulders (f);
1059 
1060   /* Tune sqrhigh */
1061   tune_sqr_mulders (f);
1062 
1063   /* Tune divhigh */
1064   tune_div_mulders (f);
1065   fflush (f);
1066 
1067   /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
1068   if (verbose)
1069     printf ("Tuning mpfr_mul...\n");
1070   tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
1071                     2*GMP_NUMB_BITS+1);
1072   fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
1073            (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
1074 
1075   /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
1076   if (verbose)
1077     printf ("Tuning mpfr_sqr...\n");
1078   tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
1079                     2*GMP_NUMB_BITS+1);
1080   fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
1081            (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
1082 
1083   /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
1084   if (verbose)
1085     printf ("Tuning mpfr_div...\n");
1086   tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
1087                     2*GMP_NUMB_BITS+1);
1088   fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
1089            (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
1090 
1091   /* Tune mpfr_exp_2 */
1092   if (verbose)
1093     printf ("Tuning mpfr_exp_2...\n");
1094   tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
1095   fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
1096            (unsigned long) mpfr_exp_2_threshold);
1097 
1098   /* Tune mpfr_exp */
1099   if (verbose)
1100     printf ("Tuning mpfr_exp...\n");
1101   tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
1102                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1103   fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
1104            (unsigned long) mpfr_exp_threshold);
1105 
1106   /* Tune mpfr_sin_cos */
1107   if (verbose)
1108     printf ("Tuning mpfr_sin_cos...\n");
1109   tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
1110                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1111   fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
1112            (unsigned long) mpfr_sincos_threshold);
1113 
1114   /* Tune mpfr_ai */
1115   if (verbose)
1116     printf ("Tuning mpfr_ai...\n");
1117   mpfr_init2 (x1, MPFR_SMALL_PRECISION);
1118   mpfr_init2 (x2, MPFR_SMALL_PRECISION);
1119   mpfr_init2 (x3, MPFR_SMALL_PRECISION);
1120   mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
1121   mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
1122 
1123   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1124                                       &mpfr_ai_threshold3, speed_mpfr_ai,
1125                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
1126                                       -60, 200, x1, &p1);
1127   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1128                                       &mpfr_ai_threshold3, speed_mpfr_ai,
1129                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
1130                                       -20, 500, x2, &p2);
1131   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1132                                       &mpfr_ai_threshold3, speed_mpfr_ai,
1133                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
1134                                       40, 200, x3, &p3);
1135 
1136   mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
1137   mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
1138   mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
1139   mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
1140 
1141   mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
1142   mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
1143   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1144   mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
1145 
1146   mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
1147   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1148   mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
1149 
1150   mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
1151   mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
1152   mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
1153   mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
1154   mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
1155 
1156   fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
1157   fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
1158   fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
1159 
1160   mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
1161   mpfr_clear (tmp1); mpfr_clear (tmp2);
1162 
1163   /* End of tuning */
1164   time (&end_time);
1165   fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
1166            (long) (end_time - start_time));
1167   if (verbose)
1168     printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
1169 
1170   fclose (f);
1171 }
1172 
1173 
1174 /* Main function */
main(int argc,char * argv[])1175 int main (int argc, char *argv[])
1176 {
1177   /* Unbuffered so if output is redirected to a file it isn't lost if the
1178      program is killed part way through.  */
1179   setbuf (stdout, NULL);
1180   setbuf (stderr, NULL);
1181 
1182   verbose = argc > 1;
1183 
1184   if (verbose)
1185     printf ("Tuning MPFR (Coffee time?)...\n");
1186 
1187   all ("mparam.h");
1188 
1189   return 0;
1190 }
1191