xref: /netbsd-src/external/lgpl3/mpfr/dist/tune/tuneup.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* Tune various threshold of MPFR
2 
3 Copyright 2005-2018 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include <stdlib.h>
24 #include <time.h>
25 
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28 
29 #undef _PROTO
30 #define _PROTO __GMP_PROTO
31 #include "speed.h"
32 
33 /* Undefine static assertion system */
34 #undef MPFR_DECL_STATIC_ASSERT
35 #undef MPFR_STAT_STATIC_ASSERT
36 #define MPFR_DECL_STATIC_ASSERT(a) MPFR_ASSERTN(a)
37 #define MPFR_STAT_STATIC_ASSERT(a) MPFR_ASSERTN(a)
38 
39 int verbose;
40 
41 /* template for an unary function */
42 /* s->size: precision of both input and output
43    s->xp  : Mantissa of first input
44    s->yp  : mantissa of second input                    */
45 #define SPEED_MPFR_FUNC(mean_fun)                       \
46   do                                                    \
47     {                                                   \
48       unsigned  i;                                      \
49       mpfr_limb_ptr wp;                                 \
50       double    t;                                      \
51       mpfr_t    w, x;                                   \
52       mp_size_t size;                                   \
53       MPFR_TMP_DECL (marker);                           \
54                                                         \
55       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
56       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
57       MPFR_TMP_MARK (marker);                           \
58                                                         \
59       size = (s->size-1)/GMP_NUMB_BITS+1;               \
60       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
61       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
62       MPFR_SET_EXP (x, 0);                              \
63                                                         \
64       MPFR_TMP_INIT (wp, w, s->size, size);             \
65                                                         \
66       speed_operand_src (s, s->xp, size);               \
67       speed_operand_dst (s, wp, size);                  \
68       speed_cache_fill (s);                             \
69                                                         \
70       speed_starttime ();                               \
71       i = s->reps;                                      \
72       do                                                \
73         mean_fun (w, x, MPFR_RNDN);                     \
74       while (--i != 0);                                 \
75       t = speed_endtime ();                             \
76                                                         \
77       MPFR_TMP_FREE (marker);                           \
78       return t;                                         \
79     }                                                   \
80   while (0)
81 
82 /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
83 #define SPEED_MPFR_FUNC2(mean_fun)                      \
84   do                                                    \
85     {                                                   \
86       unsigned  i;                                      \
87       mpfr_limb_ptr vp, wp;                             \
88       double    t;                                      \
89       mpfr_t    v, w, x;                                \
90       mp_size_t size;                                   \
91       MPFR_TMP_DECL (marker);                           \
92                                                         \
93       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
94       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
95       MPFR_TMP_MARK (marker);                           \
96                                                         \
97       size = (s->size-1)/GMP_NUMB_BITS+1;               \
98       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
99       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
100       MPFR_SET_EXP (x, 0);                              \
101                                                         \
102       MPFR_TMP_INIT (vp, v, s->size, size);             \
103       MPFR_TMP_INIT (wp, w, s->size, size);             \
104                                                         \
105       speed_operand_src (s, s->xp, size);               \
106       speed_operand_dst (s, vp, size);                  \
107       speed_operand_dst (s, wp, size);                  \
108       speed_cache_fill (s);                             \
109                                                         \
110       speed_starttime ();                               \
111       i = s->reps;                                      \
112       do                                                \
113         mean_fun (v, w, x, MPFR_RNDN);                  \
114       while (--i != 0);                                 \
115       t = speed_endtime ();                             \
116                                                         \
117       MPFR_TMP_FREE (marker);                           \
118       return t;                                         \
119     }                                                   \
120   while (0)
121 
122 /* template for a function like mpfr_mul */
123 #define SPEED_MPFR_OP(mean_fun)                         \
124   do                                                    \
125     {                                                   \
126       unsigned  i;                                      \
127       mpfr_limb_ptr wp;                                 \
128       double    t;                                      \
129       mpfr_t    w, x, y;                                \
130       mp_size_t size;                                   \
131       MPFR_TMP_DECL (marker);                           \
132                                                         \
133       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
134       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
135       MPFR_TMP_MARK (marker);                           \
136                                                         \
137       size = (s->size-1)/GMP_NUMB_BITS+1;               \
138       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
139       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
140       MPFR_SET_EXP (x, 0);                              \
141       s->yp[size-1] |= MPFR_LIMB_HIGHBIT;               \
142       MPFR_TMP_INIT1 (s->yp, y, s->size);               \
143       MPFR_SET_EXP (y, 0);                              \
144                                                         \
145       MPFR_TMP_INIT (wp, w, s->size, size);             \
146                                                         \
147       speed_operand_src (s, s->xp, size);               \
148       speed_operand_src (s, s->yp, size);               \
149       speed_operand_dst (s, wp, size);                  \
150       speed_cache_fill (s);                             \
151                                                         \
152       speed_starttime ();                               \
153       i = s->reps;                                      \
154       do                                                \
155         mean_fun (w, x, y, MPFR_RNDN);                  \
156       while (--i != 0);                                 \
157       t = speed_endtime ();                             \
158                                                         \
159       MPFR_TMP_FREE (marker);                           \
160       return t;                                         \
161     }                                                   \
162   while (0)
163 
164 /* special template for mpfr_mul(a,b,b) */
165 #define SPEED_MPFR_SQR(mean_fun)                        \
166   do                                                    \
167     {                                                   \
168       unsigned  i;                                      \
169       mpfr_limb_ptr wp;                                 \
170       double    t;                                      \
171       mpfr_t    w, x;                                   \
172       mp_size_t size;                                   \
173       MPFR_TMP_DECL (marker);                           \
174                                                         \
175       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
176       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
177       MPFR_TMP_MARK (marker);                           \
178                                                         \
179       size = (s->size-1)/GMP_NUMB_BITS+1;               \
180       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
181       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
182       MPFR_SET_EXP (x, 0);                              \
183                                                         \
184       MPFR_TMP_INIT (wp, w, s->size, size);             \
185                                                         \
186       speed_operand_src (s, s->xp, size);               \
187       speed_operand_dst (s, wp, size);                  \
188       speed_cache_fill (s);                             \
189                                                         \
190       speed_starttime ();                               \
191       i = s->reps;                                      \
192       do                                                \
193         mean_fun (w, x, x, MPFR_RNDN);                  \
194       while (--i != 0);                                 \
195       t = speed_endtime ();                             \
196                                                         \
197       MPFR_TMP_FREE (marker);                           \
198       return t;                                         \
199     }                                                   \
200   while (0)
201 
202 /* s->size: precision of both input and output
203    s->xp  : Mantissa of first input
204    s->r   : exponent
205    s->align_xp : sign (1 means positive, 2 means negative)
206 */
207 #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun)         \
208   do                                                    \
209     {                                                   \
210       unsigned  i;                                      \
211       mpfr_limb_ptr wp;                                 \
212       double    t;                                      \
213       mpfr_t    w, x;                                   \
214       mp_size_t size;                                   \
215       MPFR_TMP_DECL (marker);                           \
216                                                         \
217       SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
218       SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
219       MPFR_TMP_MARK (marker);                           \
220                                                         \
221       size = (s->size-1)/GMP_NUMB_BITS+1;               \
222       s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
223       MPFR_TMP_INIT1 (s->xp, x, s->size);               \
224       MPFR_SET_EXP (x, s->r);                           \
225       if (s->align_xp == 2) MPFR_SET_NEG (x);           \
226                                                         \
227       MPFR_TMP_INIT (wp, w, s->size, size);             \
228                                                         \
229       speed_operand_src (s, s->xp, size);               \
230       speed_operand_dst (s, wp, size);                  \
231       speed_cache_fill (s);                             \
232                                                         \
233       speed_starttime ();                               \
234       i = s->reps;                                      \
235       do                                                \
236         mean_fun (w, x, MPFR_RNDN);                     \
237       while (--i != 0);                                 \
238       t = speed_endtime ();                             \
239                                                         \
240       MPFR_TMP_FREE (marker);                           \
241       return t;                                         \
242     }                                                   \
243   while (0)
244 
245 /* First we include all the functions we want to tune inside this program.
246    We can't use the GNU MPFR library since the thresholds are fixed macros. */
247 
248 /* Setup mpfr_exp_2 */
249 mpfr_prec_t mpfr_exp_2_threshold;
250 #undef  MPFR_EXP_2_THRESHOLD
251 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
252 #include "exp_2.c"
253 static double
254 speed_mpfr_exp_2 (struct speed_params *s)
255 {
256   SPEED_MPFR_FUNC (mpfr_exp_2);
257 }
258 
259 /* Setup mpfr_exp */
260 mpfr_prec_t mpfr_exp_threshold;
261 #undef  MPFR_EXP_THRESHOLD
262 #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
263 #include "exp.c"
264 static double
265 speed_mpfr_exp (struct speed_params *s)
266 {
267   SPEED_MPFR_FUNC (mpfr_exp);
268 }
269 
270 /* Setup mpfr_sin_cos */
271 mpfr_prec_t mpfr_sincos_threshold;
272 #undef MPFR_SINCOS_THRESHOLD
273 #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
274 #include "sin_cos.c"
275 #include "cos.c"
276 static double
277 speed_mpfr_sincos (struct speed_params *s)
278 {
279   SPEED_MPFR_FUNC2 (mpfr_sin_cos);
280 }
281 
282 /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
283 /* Since mpfr_mul() deals with both mul and sqr, and contains an assert that
284    the thresholds are >= 1, we initialize both values to 1 to avoid a failed
285    assertion (let's recall that static assertions are replaced by normal
286    dynamic assertions here). */
287 mpfr_prec_t mpfr_mul_threshold = 1;
288 mpfr_prec_t mpfr_sqr_threshold = 1;
289 mpfr_prec_t mpfr_div_threshold;
290 #undef  MPFR_MUL_THRESHOLD
291 #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
292 #undef  MPFR_SQR_THRESHOLD
293 #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
294 #undef  MPFR_DIV_THRESHOLD
295 #define MPFR_DIV_THRESHOLD mpfr_div_threshold
296 #include "mul.c"
297 #include "div.c"
298 static double
299 speed_mpfr_mul (struct speed_params *s)
300 {
301   SPEED_MPFR_OP (mpfr_mul);
302 }
303 static double
304 speed_mpfr_sqr (struct speed_params *s)
305 {
306   SPEED_MPFR_SQR (mpfr_mul);
307 }
308 static double
309 speed_mpfr_div (struct speed_params *s)
310 {
311   SPEED_MPFR_OP (mpfr_div);
312 }
313 
314 /************************************************
315  * Common functions (inspired by GMP function)  *
316  ************************************************/
317 static int
318 analyze_data (double *dat, int ndat)
319 {
320   double  x, min_x;
321   int     j, min_j;
322 
323   x = 0.0;
324   for (j = 0; j < ndat; j++)
325     if (dat[j] > 0.0)
326       x += dat[j];
327 
328   min_x = x;
329   min_j = 0;
330 
331   for (j = 0; j < ndat; x -= dat[j], j++)
332     {
333       if (x < min_x)
334         {
335           min_x = x;
336           min_j = j;
337         }
338     }
339   return min_j;
340 }
341 
342 static double
343 mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
344 {
345   double t = -1.0;
346   int i;
347   int number_of_iterations = 30;
348   for (i = 1; i <= number_of_iterations && t == -1.0; i++)
349     {
350       t = speed_measure (fun, s);
351       if ( (t == -1.0) && (i+1 <= number_of_iterations) )
352         printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
353                s->size, i+1, number_of_iterations);
354     }
355   if (t == -1.0)
356     {
357       fprintf (stderr, "Failed to measure %s!\n", m);
358       fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
359       fprintf (stderr, "   under Linux: cpufreq-selector -g performance\n");
360       fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
361       abort ();
362     }
363   return t;
364 }
365 
366 #define THRESHOLD_WINDOW 16
367 #define THRESHOLD_FINAL_WINDOW 128
368 static double
369 domeasure (mpfr_prec_t *threshold,
370            double (*func) (struct speed_params *),
371            mpfr_prec_t p)
372 {
373   struct speed_params s;
374   mp_size_t size;
375   double t1, t2, d;
376 
377   s.align_xp = s.align_yp = s.align_wp = 64;
378   s.size = p;
379   size = (p - 1)/GMP_NUMB_BITS+1;
380   s.xp = malloc (2*size*sizeof (mp_limb_t));
381   if (s.xp == NULL)
382     {
383       fprintf (stderr, "Can't allocate memory.\n");
384       abort ();
385     }
386   mpn_random (s.xp, size);
387   s.yp = s.xp + size;
388   mpn_random (s.yp, size);
389   *threshold = MPFR_PREC_MAX;
390   t1 = mpfr_speed_measure (func, &s, "function 1");
391   *threshold = 1;
392   t2 = mpfr_speed_measure (func, &s, "function 2");
393   free (s.xp);
394   /* t1 is the time of the first algo (used for low prec) */
395   if (t2 >= t1)
396     d = (t2 - t1) / t2;
397   else
398     d = (t2 - t1) / t1;
399   /* d > 0 if we have to use algo 1.
400      d < 0 if we have to use algo 2 */
401   return d;
402 }
403 
404 /* Performs measures when both the precision and the point of evaluation
405    shall vary. s.yp is ignored and not initialized.
406    It assumes that func depends on three thresholds with a boundary of the
407    form threshold1*x + threshold2*p = some scaling factor, if x<0,
408    and  threshold3*x + threshold2*p = some scaling factor, if x>=0.
409 */
410 static double
411 domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
412             double (*func) (struct speed_params *),
413             mpfr_prec_t p,
414             mpfr_t x)
415 {
416   struct speed_params s;
417   mp_size_t size;
418   double t1, t2, d;
419   mpfr_t xtmp;
420 
421   if (MPFR_IS_SINGULAR (x))
422     {
423       mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
424       abort ();
425     }
426   if (MPFR_IS_NEG (x))
427     s.align_xp = 2;
428   else
429     s.align_xp = 1;
430 
431   s.align_yp = s.align_wp = 64;
432   s.size = p;
433   size = (p - 1)/GMP_NUMB_BITS+1;
434 
435   mpfr_init2 (xtmp, p);
436   mpn_random (xtmp->_mpfr_d, size);
437   xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
438   MPFR_SET_EXP (xtmp, -53);
439   mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
440   mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb)       */
441                                       /* where perturb ~ 2^(-53) is */
442                                       /* randomly chosen.           */
443   s.xp = xtmp->_mpfr_d;
444   s.r = MPFR_GET_EXP (xtmp);
445 
446   *threshold1 = 0;
447   *threshold2 = 0;
448   *threshold3 = 0;
449   t1 = mpfr_speed_measure (func, &s, "function 1");
450 
451   if (MPFR_IS_NEG (x))
452     *threshold1 = INT_MIN;
453   else
454     *threshold3 = INT_MAX;
455   *threshold2 = INT_MAX;
456   t2 = mpfr_speed_measure (func, &s, "function 2");
457 
458   /* t1 is the time of the first algo (used for low prec) */
459   if (t2 >= t1)
460     d = (t2 - t1) / t2;
461   else
462     d = (t2 - t1) / t1;
463   /* d > 0 if we have to use algo 1.
464      d < 0 if we have to use algo 2 */
465   mpfr_clear (xtmp);
466   return d;
467 }
468 
469 /* Tune a function with a simple THRESHOLD
470    The function doesn't depend on another threshold.
471    It assumes that it uses algo1 if p < THRESHOLD
472    and algo2 otherwise.
473    if algo2 is better for low prec, and algo1 better for high prec,
474    the behaviour of this function is undefined. */
475 static void
476 tune_simple_func (mpfr_prec_t *threshold,
477                   double (*func) (struct speed_params *),
478                   mpfr_prec_t pstart)
479 {
480   double measure[THRESHOLD_FINAL_WINDOW+1];
481   double d;
482   mpfr_prec_t pstep;
483   int i, numpos, numneg, try;
484   mpfr_prec_t pmin, pmax, p;
485 
486   /* first look for a lower bound within 10% */
487   pmin = p = pstart;
488   d = domeasure (threshold, func, pmin);
489   if (d < 0.0)
490     {
491       if (verbose)
492         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
493                 (unsigned long) pmin);
494       *threshold = MPFR_PREC_MIN;
495       return;
496     }
497   if (d >= 1.00)
498     for (;;)
499       {
500         d = domeasure (threshold, func, pmin);
501         if (d < 1.00)
502           break;
503         p = pmin;
504         pmin += pmin/2;
505       }
506   pmin = p;
507   for (;;)
508     {
509       d = domeasure (threshold, func, pmin);
510       if (d < 0.10)
511         break;
512       pmin += GMP_NUMB_BITS;
513     }
514 
515   /* then look for an upper bound within 20% */
516   pmax = pmin * 2;
517   for (;;)
518     {
519       d = domeasure (threshold, func, pmax);
520       if (d < -0.20)
521         break;
522       pmax += pmin / 2; /* don't increase too rapidly */
523     }
524 
525   /* The threshold is between pmin and pmax. Affine them */
526   try = 0;
527   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
528     {
529       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
530       if (verbose)
531         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
532       p = (pmin + pmax) / 2;
533       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
534         {
535           measure[i] = domeasure (threshold, func,
536                                   p+(i-THRESHOLD_WINDOW/2)*pstep);
537           if (measure[i] > 0)
538             numpos ++;
539           else if (measure[i] < 0)
540             numneg ++;
541         }
542       if (numpos > numneg)
543         /* We use more often algo 1 than algo 2 */
544         pmin = p - THRESHOLD_WINDOW/2*pstep;
545       else if (numpos < numneg)
546         pmax = p + THRESHOLD_WINDOW/2*pstep;
547       else
548         /* numpos == numneg ... */
549         if (++ try > 2)
550           {
551             *threshold = p;
552             if (verbose)
553               printf ("Quick find: %lu\n", *threshold);
554             return ;
555           }
556     }
557 
558   /* Final tune... */
559   if (verbose)
560     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
561   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
562     measure[i] = domeasure (threshold, func, pmin+i);
563   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
564   *threshold = pmin + i;
565   if (verbose)
566     printf ("%lu\n", *threshold);
567   return;
568 }
569 
570 /* Tune a function which behavior depends on both p and x,
571    in a given direction.
572    It assumes that for (x,p) close to zero, algo1 is used
573    and algo2 is used when (x,p) is far from zero.
574    If algo2 is better for low prec, and algo1 better for high prec,
575    the behavior of this function is undefined.
576    This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
577    until it finds a point on the boundary. It returns ell.
578  */
579 static void
580 tune_simple_func_in_some_direction (long int *threshold1,
581                                     long int *threshold2,
582                                     long int *threshold3,
583                                     double (*func) (struct speed_params *),
584                                     mpfr_prec_t pstart,
585                                     int dirx, int dirp,
586                                     mpfr_t xres, mpfr_prec_t *pres)
587 {
588   double measure[THRESHOLD_FINAL_WINDOW+1];
589   double d;
590   mpfr_prec_t pstep;
591   int i, numpos, numneg, try;
592   mpfr_prec_t pmin, pmax, p;
593   mpfr_t xmin, xmax, x;
594   mpfr_t ratio;
595 
596   mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
597   mpfr_set_si (ratio, dirx, MPFR_RNDN);
598   mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
599 
600   mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
601   mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
602   mpfr_init2 (x, MPFR_SMALL_PRECISION);
603 
604   /* first look for a lower bound within 10% */
605   pmin = p = pstart;
606   mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
607   mpfr_set (x, xmin, MPFR_RNDN);
608 
609   d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
610   if (d < 0.0)
611     {
612       if (verbose)
613         printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
614                 (unsigned long) pmin);
615       *pres = MPFR_PREC_MIN;
616       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
617       mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
618       return;
619     }
620   if (d >= 1.00)
621     for (;;)
622       {
623         d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
624         if (d < 1.00)
625           break;
626         p = pmin;
627         mpfr_set (x, xmin, MPFR_RNDN);
628         pmin += pmin/2;
629         mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
630       }
631   pmin = p;
632   mpfr_set (xmin, x, MPFR_RNDN);
633   for (;;)
634     {
635       d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
636       if (d < 0.10)
637         break;
638       pmin += GMP_NUMB_BITS;
639       mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
640     }
641 
642   /* then look for an upper bound within 20% */
643   pmax = pmin * 2;
644   mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
645   for (;;)
646     {
647       d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
648       if (d < -0.20)
649         break;
650       pmax += pmin / 2; /* don't increase too rapidly */
651       mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
652     }
653 
654   /* The threshold is between pmin and pmax. Affine them */
655   try = 0;
656   while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
657     {
658       pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
659       if (verbose)
660         printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
661       p = (pmin + pmax) / 2;
662       mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
663       for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
664         {
665           *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
666           mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
667           measure[i] = domeasure2 (threshold1, threshold2, threshold3,
668                                    func, *pres, xres);
669           if (measure[i] > 0)
670             numpos ++;
671           else if (measure[i] < 0)
672             numneg ++;
673         }
674       if (numpos > numneg)
675         {
676           /* We use more often algo 1 than algo 2 */
677           pmin = p - THRESHOLD_WINDOW/2*pstep;
678           mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
679         }
680       else if (numpos < numneg)
681         {
682           pmax = p + THRESHOLD_WINDOW/2*pstep;
683           mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
684         }
685       else
686         /* numpos == numneg ... */
687         if (++ try > 2)
688           {
689             *pres = p;
690             mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
691             if (verbose)
692               printf ("Quick find: %lu\n", *pres);
693             mpfr_clear (ratio);
694             mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
695             return ;
696           }
697     }
698 
699   /* Final tune... */
700   if (verbose)
701     printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
702   for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
703     {
704       *pres = pmin+i;
705       mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
706       measure[i] = domeasure2 (threshold1, threshold2, threshold3,
707                                func, *pres, xres);
708     }
709   i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
710   *pres = pmin + i;
711   mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
712   if (verbose)
713     printf ("%lu\n", *pres);
714   mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
715   return;
716 }
717 
718 /************************************
719  * Tune Mulders' mulhigh function   *
720  ************************************/
721 #define TOLERANCE 1.00
722 #define MULDERS_TABLE_SIZE 1024
723 #ifndef MPFR_MULHIGH_SIZE
724 # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
725 #endif
726 #ifndef MPFR_SQRHIGH_SIZE
727 # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
728 #endif
729 #ifndef MPFR_DIVHIGH_SIZE
730 # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
731 #endif
732 #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
733 #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
734 #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
735 #include "mulders.c"
736 
737 static double
738 speed_mpfr_mulhigh (struct speed_params *s)
739 {
740   SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
741 }
742 
743 static double
744 speed_mpfr_sqrhigh (struct speed_params *s)
745 {
746   SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
747 }
748 
749 static double
750 speed_mpfr_divhigh (struct speed_params *s)
751 {
752   SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
753 }
754 
755 #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
756 
757 /* Tune mpfr_mulhigh_n for size n */
758 static mp_size_t
759 tune_mul_mulders_upto (mp_size_t n)
760 {
761   struct speed_params s;
762   mp_size_t k, kbest, step;
763   double t, tbest;
764   MPFR_TMP_DECL (marker);
765 
766   if (n == 0)
767     return -1;
768 
769   MPFR_TMP_MARK (marker);
770   s.align_xp = s.align_yp = s.align_wp = 64;
771   s.size = n;
772   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
773   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
774   mpn_random (s.xp, n);
775   mpn_random (s.yp, n);
776 
777   /* Check k == -1, mpn_mul_basecase */
778   mulhigh_ktab[n] = -1;
779   kbest = -1;
780   tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
781 
782   /* Check k == 0, mpn_mulhigh_n_basecase */
783   mulhigh_ktab[n] = 0;
784   t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
785   if (t * TOLERANCE < tbest)
786     kbest = 0, tbest = t;
787 
788   /* Check Mulders with cutoff point k */
789   step = 1 + n / (2 * MAX_STEPS);
790   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
791   for (k = (n + 4) / 2 ; k < n ; k += step)
792     {
793       mulhigh_ktab[n] = k;
794       t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
795       if (t * TOLERANCE < tbest)
796         kbest = k, tbest = t;
797     }
798 
799   mulhigh_ktab[n] = kbest;
800 
801   MPFR_TMP_FREE (marker);
802   return kbest;
803 }
804 
805 /* Tune mpfr_sqrhigh_n for size n */
806 static mp_size_t
807 tune_sqr_mulders_upto (mp_size_t n)
808 {
809   struct speed_params s;
810   mp_size_t k, kbest, step;
811   double t, tbest;
812   MPFR_TMP_DECL (marker);
813 
814   if (n == 0)
815     return -1;
816 
817   MPFR_TMP_MARK (marker);
818   s.align_xp = s.align_wp = 64;
819   s.size = n;
820   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
821   mpn_random (s.xp, n);
822 
823   /* Check k == -1, mpn_sqr_basecase */
824   sqrhigh_ktab[n] = -1;
825   kbest = -1;
826   tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
827 
828   /* Check k == 0, mpfr_mulhigh_n_basecase */
829   sqrhigh_ktab[n] = 0;
830   t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
831   if (t * TOLERANCE < tbest)
832     kbest = 0, tbest = t;
833 
834   /* Check Mulders */
835   step = 1 + n / (2 * MAX_STEPS);
836   /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
837   for (k = (n + 4) / 2 ; k < n ; k += step)
838     {
839       sqrhigh_ktab[n] = k;
840       t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
841       if (t * TOLERANCE < tbest)
842         kbest = k, tbest = t;
843     }
844 
845   sqrhigh_ktab[n] = kbest;
846 
847   MPFR_TMP_FREE (marker);
848   return kbest;
849 }
850 
851 /* Tune mpfr_divhigh_n for size n */
852 static mp_size_t
853 tune_div_mulders_upto (mp_size_t n)
854 {
855   struct speed_params s;
856   mp_size_t k, kbest, step;
857   double t, tbest;
858   MPFR_TMP_DECL (marker);
859 
860   if (n == 0)
861     return 0;
862 
863   MPFR_TMP_MARK (marker);
864   s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
865   s.size = n;
866   s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
867   s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
868   mpn_random (s.xp, n);
869   mpn_random (s.yp, n);
870 
871   /* Check k == n, i.e., mpn_divrem */
872   divhigh_ktab[n] = n;
873   kbest = n;
874   tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
875 
876   /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
877 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
878   if (n > 2) /* mpn_sbpi1_divappr_q requires dn > 2 */
879 #endif
880     {
881       divhigh_ktab[n] = 0;
882       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
883       if (t * TOLERANCE < tbest)
884         kbest = 0, tbest = t;
885     }
886 
887   /* Check Mulders */
888   step = 1 + n / (2 * MAX_STEPS);
889   /* we should have (n+3)/2 <= k < n, which translates into
890      (n+4)/2 <= k < n in C */
891   for (k = (n + 4) / 2 ; k < n ; k += step)
892     {
893       divhigh_ktab[n] = k;
894       t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
895       if (t * TOLERANCE < tbest)
896         kbest = k, tbest = t;
897     }
898 
899   divhigh_ktab[n] = kbest;
900 
901   MPFR_TMP_FREE (marker);
902 
903   return kbest;
904 }
905 
906 static void
907 tune_mul_mulders (FILE *f)
908 {
909   mp_size_t k;
910 
911   if (verbose)
912     printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
913   fprintf (f, "#define MPFR_MULHIGH_TAB  \\\n ");
914   for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
915     {
916       fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
917       if (k != MPFR_MULHIGH_TAB_SIZE-1)
918         fputc (',', f);
919       if ((k+1) % 16 == 0)
920         fprintf (f, " \\\n ");
921       if (verbose)
922         putchar ('.');
923     }
924   fprintf (f, " \n");
925   if (verbose)
926     putchar ('\n');
927 }
928 
929 static void
930 tune_sqr_mulders (FILE *f)
931 {
932   mp_size_t k;
933 
934   if (verbose)
935     printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
936   fprintf (f, "#define MPFR_SQRHIGH_TAB  \\\n ");
937   for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
938     {
939       fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
940       if (k != MPFR_SQRHIGH_TAB_SIZE-1)
941         fputc (',', f);
942       if ((k+1) % 16 == 0)
943         fprintf (f, " \\\n ");
944       if (verbose)
945         putchar ('.');
946     }
947   fprintf (f, " \n");
948   if (verbose)
949     putchar ('\n');
950 }
951 
952 static void
953 tune_div_mulders (FILE *f)
954 {
955   mp_size_t k;
956 
957   if (verbose)
958     printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
959   fprintf (f, "#define MPFR_DIVHIGH_TAB  \\\n ");
960   for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
961     {
962       fprintf (f, "%d", (int) tune_div_mulders_upto (k));
963       if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
964         fputc (',', f);
965       if ((k+1) % 16 == 0)
966         fprintf (f, " /*%zu-%zu*/ \\\n ", (size_t) k - 15, (size_t) k);
967       if (verbose)
968         putchar ('.');
969     }
970   fprintf (f, " \n");
971   if (verbose)
972     putchar ('\n');
973 }
974 
975 /*******************************************************
976  *            Tuning functions for mpfr_ai             *
977  *******************************************************/
978 
979 long int mpfr_ai_threshold1;
980 long int mpfr_ai_threshold2;
981 long int mpfr_ai_threshold3;
982 #undef  MPFR_AI_THRESHOLD1
983 #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
984 #undef  MPFR_AI_THRESHOLD2
985 #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
986 #undef  MPFR_AI_THRESHOLD3
987 #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
988 
989 #include "ai.c"
990 
991 static double
992 speed_mpfr_ai (struct speed_params *s)
993 {
994   SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
995 }
996 
997 
998 /*******************************************************
999  *            Tune all the threshold of MPFR           *
1000  * Warning: tune the function in their dependent order!*
1001  *******************************************************/
1002 static void
1003 all (const char *filename)
1004 {
1005   FILE *f;
1006   time_t  start_time, end_time;
1007   struct tm  *tp;
1008   mpfr_t x1, x2, x3, tmp1, tmp2;
1009   mpfr_prec_t p1, p2, p3;
1010 
1011   f = fopen (filename, "w");
1012   if (f == NULL)
1013     {
1014       fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
1015       abort ();
1016     }
1017 
1018   speed_time_init ();
1019   if (verbose)
1020     {
1021       printf ("Using: %s\n", speed_time_string);
1022       printf ("speed_precision %d", speed_precision);
1023       if (speed_unittime == 1.0)
1024         printf (", speed_unittime 1 cycle");
1025       else
1026         printf (", speed_unittime %.2e secs", speed_unittime);
1027       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1028         printf (", CPU freq unknown\n");
1029       else
1030         printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
1031     }
1032 
1033   time (&start_time);
1034   tp = localtime (&start_time);
1035   fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
1036           tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
1037 
1038 #ifdef __ICC
1039   fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
1040 #elif defined(__GNUC__)
1041 #ifdef __GNUC_PATCHLEVEL__
1042   fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
1043            __GNUC_PATCHLEVEL__);
1044 #else
1045   fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
1046 #endif
1047 #elif defined (__SUNPRO_C)
1048   fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
1049 #elif defined (__sgi) && defined (_COMPILER_VERSION)
1050   fprintf (f, "MIPSpro C %d.%d.%d */\n",
1051            _COMPILER_VERSION / 100,
1052            _COMPILER_VERSION / 10 % 10,
1053            _COMPILER_VERSION % 10);
1054 #elif defined (__DECC) && defined (__DECC_VER)
1055   fprintf (f, "DEC C %d */\n", __DECC_VER);
1056 #else
1057   fprintf (f, "system compiler */\n");
1058 #endif
1059   fprintf (f, "\n\n");
1060   fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
1061   fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
1062   fprintf (f, "#endif\n\n");
1063 
1064   /* Tune mulhigh */
1065   tune_mul_mulders (f);
1066 
1067   /* Tune sqrhigh */
1068   tune_sqr_mulders (f);
1069 
1070   /* Tune divhigh */
1071   tune_div_mulders (f);
1072   fflush (f);
1073 
1074   /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
1075   if (verbose)
1076     printf ("Tuning mpfr_mul...\n");
1077   tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
1078                     2*GMP_NUMB_BITS+1);
1079   fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
1080            (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
1081 
1082   /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
1083   if (verbose)
1084     printf ("Tuning mpfr_sqr...\n");
1085   tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
1086                     2*GMP_NUMB_BITS+1);
1087   fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
1088            (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
1089 
1090   /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
1091   if (verbose)
1092     printf ("Tuning mpfr_div...\n");
1093   tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
1094                     2*GMP_NUMB_BITS+1);
1095   fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
1096            (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
1097 
1098   /* Tune mpfr_exp_2 */
1099   if (verbose)
1100     printf ("Tuning mpfr_exp_2...\n");
1101   tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
1102   fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
1103            (unsigned long) mpfr_exp_2_threshold);
1104 
1105   /* Tune mpfr_exp */
1106   if (verbose)
1107     printf ("Tuning mpfr_exp...\n");
1108   tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
1109                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1110   fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
1111            (unsigned long) mpfr_exp_threshold);
1112 
1113   /* Tune mpfr_sin_cos */
1114   if (verbose)
1115     printf ("Tuning mpfr_sin_cos...\n");
1116   tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
1117                     MPFR_PREC_MIN+3*GMP_NUMB_BITS);
1118   fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
1119            (unsigned long) mpfr_sincos_threshold);
1120 
1121   /* Tune mpfr_ai */
1122   if (verbose)
1123     printf ("Tuning mpfr_ai...\n");
1124   mpfr_init2 (x1, MPFR_SMALL_PRECISION);
1125   mpfr_init2 (x2, MPFR_SMALL_PRECISION);
1126   mpfr_init2 (x3, MPFR_SMALL_PRECISION);
1127   mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
1128   mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
1129 
1130   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1131                                       &mpfr_ai_threshold3, speed_mpfr_ai,
1132                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
1133                                       -60, 200, x1, &p1);
1134   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1135                                       &mpfr_ai_threshold3, speed_mpfr_ai,
1136                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
1137                                       -20, 500, x2, &p2);
1138   tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
1139                                       &mpfr_ai_threshold3, speed_mpfr_ai,
1140                                       MPFR_PREC_MIN+GMP_NUMB_BITS,
1141                                       40, 200, x3, &p3);
1142 
1143   mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
1144   mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
1145   mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
1146   mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
1147 
1148   mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
1149   mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
1150   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1151   mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
1152 
1153   mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
1154   mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
1155   mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
1156 
1157   mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
1158   mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
1159   mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
1160   mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
1161   mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
1162 
1163   fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
1164   fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
1165   fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
1166 
1167   mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
1168   mpfr_clear (tmp1); mpfr_clear (tmp2);
1169 
1170   /* End of tuning */
1171   time (&end_time);
1172   fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
1173            (long) (end_time - start_time));
1174   if (verbose)
1175     printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
1176 
1177   fclose (f);
1178 }
1179 
1180 
1181 /* Main function */
1182 int main (int argc, char *argv[])
1183 {
1184   /* Unbuffered so if output is redirected to a file it isn't lost if the
1185      program is killed part way through.  */
1186   setbuf (stdout, NULL);
1187   setbuf (stderr, NULL);
1188 
1189   verbose = argc > 1;
1190 
1191   if (verbose)
1192     printf ("Tuning MPFR (Coffee time?)...\n");
1193 
1194   all ("mparam.h");
1195 
1196   return 0;
1197 }
1198