1 /* Chi-squared test for mpfr_erandom 2 3 Copyright 2011-2023 Free Software Foundation, Inc. 4 Contributed by Charles Karney <charles@karney.com>, SRI International. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #include "mpfr-test.h" 24 25 /* Return Phi(x) = 1 - exp(-x), the cumulative probability function for the 26 * exponential distribution. We only take differences of this function so the 27 * offset doesn't matter; here Phi(0) = 0. */ 28 static void 29 exponential_cumulative (mpfr_ptr z, mpfr_ptr x, mpfr_rnd_t rnd) 30 { 31 mpfr_neg (z, x, rnd); 32 mpfr_expm1 (z, z, rnd); 33 mpfr_neg (z, z, rnd); 34 } 35 36 /* Given nu and chisqp, compute probability that chisq > chisqp. This uses, 37 * A&S 26.4.16, 38 * 39 * Q(nu,chisqp) = 40 * erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2 41 * 42 * which is valid for nu > 30. This is the basis for the formula in Knuth, 43 * TAOCP, Vol 2, 3.3.1, Table 1. It more accurate than the similar formula, 44 * DLMF 8.11.10. */ 45 static void 46 chisq_prob (mpfr_ptr q, long nu, mpfr_ptr chisqp) 47 { 48 mpfr_t t; 49 mpfr_rnd_t rnd; 50 51 rnd = MPFR_RNDN; /* This uses an approx formula. Might as well use RNDN. */ 52 mpfr_init2 (t, mpfr_get_prec (q)); 53 54 mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */ 55 mpfr_cbrt (q, q, rnd); /* (chisqp/nu)^(1/3) */ 56 mpfr_sub_ui (q, q, 1, rnd); /* (chisqp/nu)^(1/3) - 1 */ 57 mpfr_set_ui (t, 2, rnd); 58 mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */ 59 mpfr_add (q, q, t, rnd); /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */ 60 mpfr_sqrt_ui (t, nu, rnd); /* sqrt(nu) */ 61 mpfr_mul_d (t, t, 1.5, rnd); /* (3/2)*sqrt(nu) */ 62 mpfr_mul (q, q, t, rnd); /* arg to erfc */ 63 mpfr_erfc (q, q, rnd); /* erfc(...) */ 64 mpfr_div_ui (q, q, 2, rnd); /* erfc(...)/2 */ 65 66 mpfr_clear (t); 67 } 68 69 /* The continuous chi-squared test on with a set of bins of equal width. 70 * 71 * A single precision is picked for sampling and the chi-squared calculation. 72 * This should picked high enough so that binning in test doesn't need to be 73 * accurately aligned with possible values of the deviates. Also we need the 74 * precision big enough that chi-squared calculation itself is reliable. 75 * 76 * There's no particular benefit is testing with at very higher precisions; 77 * because of the way terandom samples, this just adds additional barely 78 * significant random bits to the deviates. So this chi-squared test with 79 * continuous equal width bins isn't a good tool for finding problems here. 80 * 81 * The testing of low precision exponential deviates is done by 82 * test_erandom_chisq_disc. */ 83 static double 84 test_erandom_chisq_cont (long num, mpfr_prec_t prec, int nu, 85 double xmin, double xmax, int verbose) 86 { 87 mpfr_t x, a, b, dx, z, pa, pb, ps, t; 88 long *counts; 89 int i, inexact; 90 long k; 91 mpfr_rnd_t rnd, rndd; 92 double Q, chisq; 93 94 rnd = MPFR_RNDN; /* For chi-squared calculation */ 95 rndd = MPFR_RNDD; /* For sampling and figuring the bins */ 96 mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0); 97 98 counts = (long *) tests_allocate ((nu + 1) * sizeof (long)); 99 for (i = 0; i <= nu; i++) 100 counts[i] = 0; 101 102 /* a and b are bounds of nu equally spaced bins. Set dx = (b-a)/nu */ 103 mpfr_set_d (a, xmin, rnd); 104 mpfr_set_d (b, xmax, rnd); 105 106 mpfr_sub (dx, b, a, rnd); 107 mpfr_div_si (dx, dx, nu, rnd); 108 109 for (k = 0; k < num; ++k) 110 { 111 inexact = mpfr_erandom (x, RANDS, rndd); 112 if (inexact == 0) 113 { 114 /* one call in the loop pretended to return an exact number! */ 115 printf ("Error: mpfr_erandom() returns a zero ternary value.\n"); 116 exit (1); 117 } 118 if (mpfr_signbit (x)) 119 { 120 printf ("Error: mpfr_erandom() returns a negative deviate.\n"); 121 exit (1); 122 } 123 mpfr_sub (x, x, a, rndd); 124 mpfr_div (x, x, dx, rndd); 125 i = mpfr_get_si (x, rndd); 126 ++counts[i >= 0 && i < nu ? i : nu]; 127 } 128 129 mpfr_set (x, a, rnd); 130 exponential_cumulative (pa, x, rnd); 131 mpfr_add_ui (ps, pa, 1, rnd); 132 mpfr_set_zero (t, 1); 133 for (i = 0; i <= nu; ++i) 134 { 135 if (i < nu) 136 { 137 mpfr_add (x, x, dx, rnd); 138 exponential_cumulative (pb, x, rnd); 139 mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */ 140 } 141 else 142 mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */ 143 144 /* Compute z = counts[i] - num * p; t += z * z / (num * p) */ 145 mpfr_mul_ui (pa, pa, num, rnd); 146 mpfr_ui_sub (z, counts[i], pa, rnd); 147 mpfr_sqr (z, z, rnd); 148 mpfr_div (z, z, pa, rnd); 149 mpfr_add (t, t, z, rnd); 150 mpfr_swap (pa, pb); /* i.e., pa = pb */ 151 } 152 153 chisq = mpfr_get_d (t, rnd); 154 chisq_prob (t, nu, t); 155 Q = mpfr_get_d (t, rnd); 156 if (verbose) 157 { 158 printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n", 159 num, xmin, xmax, nu, chisq); 160 if (Q < 0.05) 161 printf (" WARNING: probability (less than 5%%) = %.2e\n", Q); 162 } 163 164 tests_free (counts, (nu + 1) * sizeof (long)); 165 mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0); 166 return Q; 167 } 168 169 /* Return a sequential number for a positive low-precision x. x is altered by 170 * this function. low precision means prec = 2, 3, or 4. High values of 171 * precision will result in integer overflow. */ 172 static long 173 sequential (mpfr_ptr x) 174 { 175 long expt, prec; 176 177 prec = mpfr_get_prec (x); 178 expt = mpfr_get_exp (x); 179 mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN); 180 181 return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN); 182 } 183 184 /* The chi-squared test on low precision exponential deviates. wprec is the 185 * working precision for the chi-squared calculation. prec is the precision 186 * for the sampling; choose this in [2,5]. The bins consist of all the 187 * possible deviate values in the range [xmin, xmax] coupled with the value of 188 * inexact. Thus with prec = 2, the bins are 189 * ... 190 * (7/16, 1/2) x = 1/2, inexact = +1 191 * (1/2 , 5/8) x = 1/2, inexact = -1 192 * (5/8 , 3/4) x = 3/4, inexact = +1 193 * (3/4 , 7/8) x = 3/4, inexact = -1 194 * (7/8 , 1 ) x = 1 , inexact = +1 195 * (1 , 5/4) x = 1 , inexact = -1 196 * (5/4 , 3/2) x = 3/2, inexact = +1 197 * (3/2 , 7/4) x = 3/2, inexact = -1 198 * ... 199 * In addition, two bins are allocated for [0,xmin) and (xmax,inf). 200 * 201 * The sampling is with MPFR_RNDN. This is the rounding mode which elicits the 202 * most information. trandom_deviate includes checks on the consistency of the 203 * results extracted from a random_deviate with other rounding modes. */ 204 static double 205 test_erandom_chisq_disc (long num, mpfr_prec_t wprec, int prec, 206 double xmin, double xmax, int verbose) 207 { 208 mpfr_t x, v, pa, pb, z, t; 209 mpfr_rnd_t rnd; 210 int i, inexact, nu; 211 long *counts; 212 long k, seqmin, seqmax, seq; 213 double Q, chisq; 214 215 rnd = MPFR_RNDN; 216 mpfr_init2 (x, prec); 217 mpfr_init2 (v, prec+1); 218 mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0); 219 220 mpfr_set_d (x, xmin, rnd); 221 xmin = mpfr_get_d (x, rnd); 222 mpfr_set (v, x, rnd); 223 seqmin = sequential (x); 224 mpfr_set_d (x, xmax, rnd); 225 xmax = mpfr_get_d (x, rnd); 226 seqmax = sequential (x); 227 228 /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u < 229 * umin and 1 for u > umax, minus 1 for degrees of freedom */ 230 nu = 2 * (seqmax - seqmin + 1) + 2 - 1; 231 counts = (long *) tests_allocate ((nu + 1) * sizeof (long)); 232 for (i = 0; i <= nu; i++) 233 counts[i] = 0; 234 235 for (k = 0; k < num; ++k) 236 { 237 inexact = mpfr_erandom (x, RANDS, rnd); 238 if (mpfr_signbit (x)) 239 { 240 printf ("Error: mpfr_erandom() returns a negative deviate.\n"); 241 exit (1); 242 } 243 /* Don't call sequential with small args to avoid undefined behavior with 244 * zero and possibility of overflow. */ 245 seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1; 246 ++counts[seq < seqmin ? 0 : 247 seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) : 248 nu]; 249 } 250 251 mpfr_set_zero (v, 1); 252 exponential_cumulative (pa, v, rnd); 253 /* Cycle through all the bin boundaries using mpfr_nextabove at precision 254 * prec + 1 starting at mpfr_nextbelow (xmin) */ 255 mpfr_set_d (x, xmin, rnd); 256 mpfr_set (v, x, rnd); 257 mpfr_nextbelow (v); 258 mpfr_nextbelow (v); 259 mpfr_set_zero (t, 1); 260 for (i = 0; i <= nu; ++i) 261 { 262 if (i < nu) 263 mpfr_nextabove (v); 264 else 265 mpfr_set_inf (v, 1); 266 exponential_cumulative (pb, v, rnd); 267 mpfr_sub (pa, pb, pa, rnd); 268 269 /* Compute z = counts[i] - num * p; t += z * z / (num * p). */ 270 mpfr_mul_ui (pa, pa, num, rnd); 271 mpfr_ui_sub (z, counts[i], pa, rnd); 272 mpfr_sqr (z, z, rnd); 273 mpfr_div (z, z, pa, rnd); 274 mpfr_add (t, t, z, rnd); 275 mpfr_swap (pa, pb); /* i.e., pa = pb */ 276 } 277 278 chisq = mpfr_get_d (t, rnd); 279 chisq_prob (t, nu, t); 280 Q = mpfr_get_d (t, rnd); 281 if (verbose) 282 { 283 printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], " 284 "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq); 285 if (Q < 0.05) 286 printf (" WARNING: probability (less than 5%%) = %.2e\n", Q); 287 } 288 289 tests_free (counts, (nu + 1) * sizeof (long)); 290 mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0); 291 return Q; 292 } 293 294 static void 295 run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int), 296 long num, mpfr_prec_t prec, int bin, 297 double xmin, double xmax, int verbose) 298 { 299 double Q, Qcum, Qbad, Qthresh; 300 int i; 301 302 Qcum = 1; 303 Qbad = 1.e-9; 304 Qthresh = 0.01; 305 for (i = 0; i < 3; ++i) 306 { 307 Q = (*f)(num, prec, bin, xmin, xmax, verbose); 308 Qcum *= Q; 309 if (Q > Qthresh) 310 return; 311 else if (Q < Qbad) 312 { 313 printf ("Error: mpfr_erandom chi-squared failure " 314 "(prob = %.2e < %.2e)\n", Q, Qbad); 315 exit (1); 316 } 317 num *= 10; 318 Qthresh /= 10; 319 } 320 if (Qcum < Qbad) /* Presumably this is true */ 321 { 322 printf ("Error: mpfr_erandom combined chi-squared failure " 323 "(prob = %.2e)\n", Qcum); 324 exit (1); 325 } 326 } 327 328 int 329 main (int argc, char *argv[]) 330 { 331 long nbtests; 332 int verbose; 333 334 tests_start_mpfr (); 335 336 verbose = 0; 337 nbtests = 100000; 338 if (argc > 1) 339 { 340 long a = atol (argv[1]); 341 verbose = 1; 342 if (a != 0) 343 nbtests = a; 344 } 345 346 run_chisq (test_erandom_chisq_cont, nbtests, 64, 60, 0, 7, verbose); 347 run_chisq (test_erandom_chisq_disc, nbtests, 64, 2, 0.002, 6, verbose); 348 run_chisq (test_erandom_chisq_disc, nbtests, 64, 3, 0.02, 7, verbose); 349 run_chisq (test_erandom_chisq_disc, nbtests, 64, 4, 0.04, 8, verbose); 350 351 tests_end_mpfr (); 352 return 0; 353 } 354