xref: /netbsd-src/external/lgpl3/mpfr/dist/tests/terandom_chisq.c (revision 8e33eff89e26cf71871ead62f0d5063e1313c33a)
1 /* Chi-squared test for mpfr_erandom
2 
3 Copyright 2011-2023 Free Software Foundation, Inc.
4 Contributed by Charles Karney <charles@karney.com>, SRI International.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include "mpfr-test.h"
24 
25 /* Return Phi(x) = 1 - exp(-x), the cumulative probability function for the
26  * exponential distribution.  We only take differences of this function so the
27  * offset doesn't matter; here Phi(0) = 0. */
28 static void
29 exponential_cumulative (mpfr_ptr z, mpfr_ptr x, mpfr_rnd_t rnd)
30 {
31   mpfr_neg (z, x, rnd);
32   mpfr_expm1 (z, z, rnd);
33   mpfr_neg (z, z, rnd);
34 }
35 
36 /* Given nu and chisqp, compute probability that chisq > chisqp.  This uses,
37  * A&S 26.4.16,
38  *
39  * Q(nu,chisqp) =
40  *     erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2
41  *
42  * which is valid for nu > 30.  This is the basis for the formula in Knuth,
43  * TAOCP, Vol 2, 3.3.1, Table 1.  It more accurate than the similar formula,
44  * DLMF 8.11.10. */
45 static void
46 chisq_prob (mpfr_ptr q, long nu, mpfr_ptr chisqp)
47 {
48   mpfr_t t;
49   mpfr_rnd_t rnd;
50 
51   rnd = MPFR_RNDN;  /* This uses an approx formula.  Might as well use RNDN. */
52   mpfr_init2 (t, mpfr_get_prec (q));
53 
54   mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */
55   mpfr_cbrt (q, q, rnd);            /* (chisqp/nu)^(1/3) */
56   mpfr_sub_ui (q, q, 1, rnd);       /* (chisqp/nu)^(1/3) - 1 */
57   mpfr_set_ui (t, 2, rnd);
58   mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */
59   mpfr_add (q, q, t, rnd);       /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */
60   mpfr_sqrt_ui (t, nu, rnd);     /* sqrt(nu) */
61   mpfr_mul_d (t, t, 1.5, rnd);   /* (3/2)*sqrt(nu) */
62   mpfr_mul (q, q, t, rnd);       /* arg to erfc */
63   mpfr_erfc (q, q, rnd);         /* erfc(...) */
64   mpfr_div_ui (q, q, 2, rnd);    /* erfc(...)/2 */
65 
66   mpfr_clear (t);
67 }
68 
69 /* The continuous chi-squared test on with a set of bins of equal width.
70  *
71  * A single precision is picked for sampling and the chi-squared calculation.
72  * This should picked high enough so that binning in test doesn't need to be
73  * accurately aligned with possible values of the deviates.  Also we need the
74  * precision big enough that chi-squared calculation itself is reliable.
75  *
76  * There's no particular benefit is testing with at very higher precisions;
77  * because of the way terandom samples, this just adds additional barely
78  * significant random bits to the deviates.  So this chi-squared test with
79  * continuous equal width bins isn't a good tool for finding problems here.
80  *
81  * The testing of low precision exponential deviates is done by
82  * test_erandom_chisq_disc. */
83 static double
84 test_erandom_chisq_cont (long num, mpfr_prec_t prec, int nu,
85                          double xmin, double xmax, int verbose)
86 {
87   mpfr_t x, a, b, dx, z, pa, pb, ps, t;
88   long *counts;
89   int i, inexact;
90   long k;
91   mpfr_rnd_t rnd, rndd;
92   double Q, chisq;
93 
94   rnd = MPFR_RNDN;              /* For chi-squared calculation */
95   rndd = MPFR_RNDD;             /* For sampling and figuring the bins */
96   mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
97 
98   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
99   for (i = 0; i <= nu; i++)
100     counts[i] = 0;
101 
102   /* a and b are bounds of nu equally spaced bins.  Set dx = (b-a)/nu */
103   mpfr_set_d (a, xmin, rnd);
104   mpfr_set_d (b, xmax, rnd);
105 
106   mpfr_sub (dx, b, a, rnd);
107   mpfr_div_si (dx, dx, nu, rnd);
108 
109   for (k = 0; k < num; ++k)
110     {
111       inexact = mpfr_erandom (x, RANDS, rndd);
112       if (inexact == 0)
113         {
114           /* one call in the loop pretended to return an exact number! */
115           printf ("Error: mpfr_erandom() returns a zero ternary value.\n");
116           exit (1);
117         }
118       if (mpfr_signbit (x))
119         {
120           printf ("Error: mpfr_erandom() returns a negative deviate.\n");
121           exit (1);
122         }
123       mpfr_sub (x, x, a, rndd);
124       mpfr_div (x, x, dx, rndd);
125       i = mpfr_get_si (x, rndd);
126       ++counts[i >= 0 && i < nu ? i : nu];
127     }
128 
129   mpfr_set (x, a, rnd);
130   exponential_cumulative (pa, x, rnd);
131   mpfr_add_ui (ps, pa, 1, rnd);
132   mpfr_set_zero (t, 1);
133   for (i = 0; i <= nu; ++i)
134     {
135       if (i < nu)
136         {
137           mpfr_add (x, x, dx, rnd);
138           exponential_cumulative (pb, x, rnd);
139           mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */
140         }
141       else
142         mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */
143 
144       /* Compute z = counts[i] - num * p; t += z * z / (num * p) */
145       mpfr_mul_ui (pa, pa, num, rnd);
146       mpfr_ui_sub (z, counts[i], pa, rnd);
147       mpfr_sqr (z, z, rnd);
148       mpfr_div (z, z, pa, rnd);
149       mpfr_add (t, t, z, rnd);
150       mpfr_swap (pa, pb);       /* i.e., pa = pb */
151     }
152 
153   chisq = mpfr_get_d (t, rnd);
154   chisq_prob (t, nu, t);
155   Q = mpfr_get_d (t, rnd);
156   if (verbose)
157     {
158       printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n",
159               num, xmin, xmax, nu, chisq);
160       if (Q < 0.05)
161         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
162     }
163 
164   tests_free (counts, (nu + 1) * sizeof (long));
165   mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
166   return Q;
167 }
168 
169 /* Return a sequential number for a positive low-precision x.  x is altered by
170  * this function.  low precision means prec = 2, 3, or 4.  High values of
171  * precision will result in integer overflow. */
172 static long
173 sequential (mpfr_ptr x)
174 {
175   long expt, prec;
176 
177   prec = mpfr_get_prec (x);
178   expt =  mpfr_get_exp (x);
179   mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN);
180 
181   return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN);
182 }
183 
184 /* The chi-squared test on low precision exponential deviates.  wprec is the
185  * working precision for the chi-squared calculation.  prec is the precision
186  * for the sampling; choose this in [2,5].  The bins consist of all the
187  * possible deviate values in the range [xmin, xmax] coupled with the value of
188  * inexact.  Thus with prec = 2, the bins are
189  *   ...
190  *   (7/16, 1/2)  x = 1/2, inexact = +1
191  *   (1/2 , 5/8)  x = 1/2, inexact = -1
192  *   (5/8 , 3/4)  x = 3/4, inexact = +1
193  *   (3/4 , 7/8)  x = 3/4, inexact = -1
194  *   (7/8 , 1  )  x = 1  , inexact = +1
195  *   (1   , 5/4)  x = 1  , inexact = -1
196  *   (5/4 , 3/2)  x = 3/2, inexact = +1
197  *   (3/2 , 7/4)  x = 3/2, inexact = -1
198  *   ...
199  * In addition, two bins are allocated for [0,xmin) and (xmax,inf).
200  *
201  * The sampling is with MPFR_RNDN.  This is the rounding mode which elicits the
202  * most information.  trandom_deviate includes checks on the consistency of the
203  * results extracted from a random_deviate with other rounding modes.  */
204 static double
205 test_erandom_chisq_disc (long num, mpfr_prec_t wprec, int prec,
206                          double xmin, double xmax, int verbose)
207 {
208   mpfr_t x, v, pa, pb, z, t;
209   mpfr_rnd_t rnd;
210   int i, inexact, nu;
211   long *counts;
212   long k, seqmin, seqmax, seq;
213   double Q, chisq;
214 
215   rnd = MPFR_RNDN;
216   mpfr_init2 (x, prec);
217   mpfr_init2 (v, prec+1);
218   mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0);
219 
220   mpfr_set_d (x, xmin, rnd);
221   xmin = mpfr_get_d (x, rnd);
222   mpfr_set (v, x, rnd);
223   seqmin = sequential (x);
224   mpfr_set_d (x, xmax, rnd);
225   xmax = mpfr_get_d (x, rnd);
226   seqmax = sequential (x);
227 
228   /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u <
229    * umin and 1 for u > umax, minus 1 for degrees of freedom */
230   nu = 2 * (seqmax - seqmin + 1) + 2 - 1;
231   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
232   for (i = 0; i <= nu; i++)
233     counts[i] = 0;
234 
235   for (k = 0; k < num; ++k)
236     {
237       inexact = mpfr_erandom (x, RANDS, rnd);
238       if (mpfr_signbit (x))
239         {
240           printf ("Error: mpfr_erandom() returns a negative deviate.\n");
241           exit (1);
242         }
243       /* Don't call sequential with small args to avoid undefined behavior with
244        * zero and possibility of overflow. */
245       seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1;
246       ++counts[seq < seqmin ? 0 :
247                seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) :
248                nu];
249     }
250 
251   mpfr_set_zero (v, 1);
252   exponential_cumulative (pa, v, rnd);
253   /* Cycle through all the bin boundaries using mpfr_nextabove at precision
254    * prec + 1 starting at mpfr_nextbelow (xmin) */
255   mpfr_set_d (x, xmin, rnd);
256   mpfr_set (v, x, rnd);
257   mpfr_nextbelow (v);
258   mpfr_nextbelow (v);
259   mpfr_set_zero (t, 1);
260   for (i = 0; i <= nu; ++i)
261     {
262       if (i < nu)
263         mpfr_nextabove (v);
264       else
265         mpfr_set_inf (v, 1);
266       exponential_cumulative (pb, v, rnd);
267       mpfr_sub (pa, pb, pa, rnd);
268 
269       /* Compute z = counts[i] - num * p; t += z * z / (num * p). */
270       mpfr_mul_ui (pa, pa, num, rnd);
271       mpfr_ui_sub (z, counts[i], pa, rnd);
272       mpfr_sqr (z, z, rnd);
273       mpfr_div (z, z, pa, rnd);
274       mpfr_add (t, t, z, rnd);
275       mpfr_swap (pa, pb);       /* i.e., pa = pb */
276     }
277 
278   chisq = mpfr_get_d (t, rnd);
279   chisq_prob (t, nu, t);
280   Q = mpfr_get_d (t, rnd);
281   if (verbose)
282     {
283       printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], "
284               "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq);
285       if (Q < 0.05)
286         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
287     }
288 
289   tests_free (counts, (nu + 1) * sizeof (long));
290   mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0);
291   return Q;
292 }
293 
294 static void
295 run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int),
296            long num, mpfr_prec_t prec, int bin,
297            double xmin, double xmax, int verbose)
298 {
299   double Q, Qcum, Qbad, Qthresh;
300   int i;
301 
302   Qcum = 1;
303   Qbad = 1.e-9;
304   Qthresh = 0.01;
305   for (i = 0; i < 3; ++i)
306     {
307       Q = (*f)(num, prec, bin, xmin, xmax, verbose);
308       Qcum *= Q;
309       if (Q > Qthresh)
310         return;
311       else if (Q < Qbad)
312         {
313           printf ("Error: mpfr_erandom chi-squared failure "
314                   "(prob = %.2e < %.2e)\n", Q, Qbad);
315           exit (1);
316         }
317       num *= 10;
318       Qthresh /= 10;
319     }
320   if (Qcum < Qbad)              /* Presumably this is true */
321     {
322       printf ("Error: mpfr_erandom combined chi-squared failure "
323               "(prob = %.2e)\n", Qcum);
324       exit (1);
325     }
326 }
327 
328 int
329 main (int argc, char *argv[])
330 {
331   long nbtests;
332   int verbose;
333 
334   tests_start_mpfr ();
335 
336   verbose = 0;
337   nbtests = 100000;
338   if (argc > 1)
339     {
340       long a = atol (argv[1]);
341       verbose = 1;
342       if (a != 0)
343         nbtests = a;
344     }
345 
346   run_chisq (test_erandom_chisq_cont, nbtests, 64, 60, 0, 7, verbose);
347   run_chisq (test_erandom_chisq_disc, nbtests, 64, 2, 0.002, 6, verbose);
348   run_chisq (test_erandom_chisq_disc, nbtests, 64, 3, 0.02, 7, verbose);
349   run_chisq (test_erandom_chisq_disc, nbtests, 64, 4, 0.04, 8, verbose);
350 
351   tests_end_mpfr ();
352   return 0;
353 }
354