1 /* Chi-squared test for mpfr_nrandom 2 3 Copyright 2011-2020 Free Software Foundation, Inc. 4 Contributed by Charles Karney <charles@karney.com>, SRI International. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #include "mpfr-test.h" 24 25 /* Return Phi(x) = erf(x / sqrt(2)) / 2, the cumulative probability function 26 * for the normal distribution. We only take differences of this function so 27 * the offset doesn't matter; here Phi(0) = 0. */ 28 static void 29 normal_cumulative (mpfr_t z, mpfr_t x, mpfr_rnd_t rnd) 30 { 31 mpfr_sqrt_ui (z, 2, rnd); 32 mpfr_div (z, x, z, rnd); 33 mpfr_erf (z, z, rnd); 34 mpfr_div_ui (z, z, 2, rnd); 35 } 36 37 /* Given nu and chisqp, compute probability that chisq > chisqp. This uses, 38 * A&S 26.4.16, 39 * 40 * Q(nu,chisqp) = 41 * erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2 42 * 43 * which is valid for nu > 30. This is the basis for the formula in Knuth, 44 * TAOCP, Vol 2, 3.3.1, Table 1. It more accurate than the similar formula, 45 * DLMF 8.11.10. */ 46 static void 47 chisq_prob (mpfr_t q, long nu, mpfr_t chisqp) 48 { 49 mpfr_t t; 50 mpfr_rnd_t rnd; 51 52 rnd = MPFR_RNDN; /* This uses an approx formula. Might as well use RNDN. */ 53 mpfr_init2 (t, mpfr_get_prec (q)); 54 55 mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */ 56 mpfr_cbrt (q, q, rnd); /* (chisqp/nu)^(1/3) */ 57 mpfr_sub_ui (q, q, 1, rnd); /* (chisqp/nu)^(1/3) - 1 */ 58 mpfr_set_ui (t, 2, rnd); 59 mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */ 60 mpfr_add (q, q, t, rnd); /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */ 61 mpfr_sqrt_ui (t, nu, rnd); /* sqrt(nu) */ 62 mpfr_mul_d (t, t, 1.5, rnd); /* (3/2)*sqrt(nu) */ 63 mpfr_mul (q, q, t, rnd); /* arg to erfc */ 64 mpfr_erfc (q, q, rnd); /* erfc(...) */ 65 mpfr_div_ui (q, q, 2, rnd); /* erfc(...)/2 */ 66 67 mpfr_clear (t); 68 } 69 70 /* The continuous chi-squared test on with a set of bins of equal width. 71 * 72 * A single precision is picked for sampling and the chi-squared calculation. 73 * This should picked high enough so that binning in test doesn't need to be 74 * accurately aligned with possible values of the deviates. Also we need the 75 * precision big enough that chi-squared calculation itself is reliable. 76 * 77 * There's no particular benefit is testing with at very higher precisions; 78 * because of the way tnrandom samples, this just adds additional barely 79 * significant random bits to the deviates. So this chi-squared test with 80 * continuous equal width bins isn't a good tool for finding problems here. 81 * 82 * The testing of low precision normal deviates is done by 83 * test_nrandom_chisq_disc. */ 84 static double 85 test_nrandom_chisq_cont (long num, mpfr_prec_t prec, int nu, 86 double xmin, double xmax, int verbose) 87 { 88 mpfr_t x, a, b, dx, z, pa, pb, ps, t; 89 long *counts; 90 int i, inexact; 91 long k; 92 mpfr_rnd_t rnd, rndd; 93 double Q, chisq; 94 95 rnd = MPFR_RNDN; /* For chi-squared calculation */ 96 rndd = MPFR_RNDD; /* For sampling and figuring the bins */ 97 mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0); 98 99 counts = (long *) tests_allocate ((nu + 1) * sizeof (long)); 100 for (i = 0; i <= nu; i++) 101 counts[i] = 0; 102 103 /* a and b are bounds of nu equally spaced bins. Set dx = (b-a)/nu */ 104 mpfr_set_d (a, xmin, rnd); 105 mpfr_set_d (b, xmax, rnd); 106 107 mpfr_sub (dx, b, a, rnd); 108 mpfr_div_si (dx, dx, nu, rnd); 109 110 for (k = 0; k < num; ++k) 111 { 112 inexact = mpfr_nrandom (x, RANDS, rndd); 113 if (inexact == 0) 114 { 115 /* one call in the loop pretended to return an exact number! */ 116 printf ("Error: mpfr_nrandom() returns a zero ternary value.\n"); 117 exit (1); 118 } 119 mpfr_sub (x, x, a, rndd); 120 mpfr_div (x, x, dx, rndd); 121 i = mpfr_get_si (x, rndd); 122 ++counts[i >= 0 && i < nu ? i : nu]; 123 } 124 125 mpfr_set (x, a, rnd); 126 normal_cumulative (pa, x, rnd); 127 mpfr_add_ui (ps, pa, 1, rnd); 128 mpfr_set_zero (t, 1); 129 for (i = 0; i <= nu; ++i) 130 { 131 if (i < nu) 132 { 133 mpfr_add (x, x, dx, rnd); 134 normal_cumulative (pb, x, rnd); 135 mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */ 136 } 137 else 138 mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */ 139 140 /* Compute z = counts[i] - num * p; t += z * z / (num * p) */ 141 mpfr_mul_ui (pa, pa, num, rnd); 142 mpfr_ui_sub (z, counts[i], pa, rnd); 143 mpfr_sqr (z, z, rnd); 144 mpfr_div (z, z, pa, rnd); 145 mpfr_add (t, t, z, rnd); 146 mpfr_swap (pa, pb); /* i.e., pa = pb */ 147 } 148 149 chisq = mpfr_get_d (t, rnd); 150 chisq_prob (t, nu, t); 151 Q = mpfr_get_d (t, rnd); 152 if (verbose) 153 { 154 printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n", 155 num, xmin, xmax, nu, chisq); 156 if (Q < 0.05) 157 printf (" WARNING: probability (less than 5%%) = %.2e\n", Q); 158 } 159 160 tests_free (counts, (nu + 1) * sizeof (long)); 161 mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0); 162 return Q; 163 } 164 165 /* Return a sequential number for a positive low-precision x. x is altered by 166 * this function. low precision means prec = 2, 3, or 4. High values of 167 * precision will result in integer overflow. */ 168 static long 169 sequential (mpfr_t x) 170 { 171 long expt, prec; 172 173 prec = mpfr_get_prec (x); 174 expt = mpfr_get_exp (x); 175 mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN); 176 177 return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN); 178 } 179 180 /* The chi-squared test on low precision normal deviates. wprec is the working 181 * precision for the chi-squared calculation. prec is the precision for the 182 * sampling; choose this in [2,5]. The bins consist of all the possible 183 * deviate values in the range [xmin, xmax] coupled with the value of inexact. 184 * Thus with prec = 2, the bins are 185 * ... 186 * (7/16, 1/2) x = 1/2, inexact = +1 187 * (1/2 , 5/8) x = 1/2, inexact = -1 188 * (5/8 , 3/4) x = 3/4, inexact = +1 189 * (3/4 , 7/8) x = 3/4, inexact = -1 190 * (7/8 , 1 ) x = 1 , inexact = +1 191 * (1 , 5/4) x = 1 , inexact = -1 192 * (5/4 , 3/2) x = 3/2, inexact = +1 193 * (3/2 , 7/4) x = 3/2, inexact = -1 194 * ... 195 * In addition, two bins are allocated for [0,xmin) and (xmax,inf). 196 * 197 * This test is applied to the absolute values of the deviates. The sign is 198 * tested by test_nrandom_chisq_cont. In any case, the way the sign is 199 * assigned in mpfr_nrandom is trivial. In addition, the sampling is with 200 * MPFR_RNDN. This is the rounding mode which elicits the most information. 201 * trandom_deviate includes checks on the consistency of the results extracted 202 * from a random_deviate with other rounding modes. */ 203 static double 204 test_nrandom_chisq_disc (long num, mpfr_prec_t wprec, int prec, 205 double xmin, double xmax, int verbose) 206 { 207 mpfr_t x, v, pa, pb, z, t; 208 mpfr_rnd_t rnd; 209 int i, inexact, nu; 210 long *counts; 211 long k, seqmin, seqmax, seq; 212 double Q, chisq; 213 214 rnd = MPFR_RNDN; 215 mpfr_init2 (x, prec); 216 mpfr_init2 (v, prec+1); 217 mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0); 218 219 mpfr_set_d (x, xmin, rnd); 220 xmin = mpfr_get_d (x, rnd); 221 mpfr_set (v, x, rnd); 222 seqmin = sequential (x); 223 mpfr_set_d (x, xmax, rnd); 224 xmax = mpfr_get_d (x, rnd); 225 seqmax = sequential (x); 226 227 /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u < 228 * umin and 1 for u > umax, minus 1 for degrees of freedom */ 229 nu = 2 * (seqmax - seqmin + 1) + 2 - 1; 230 counts = (long *) tests_allocate ((nu + 1) * sizeof (long)); 231 for (i = 0; i <= nu; i++) 232 counts[i] = 0; 233 234 for (k = 0; k < num; ++k) 235 { 236 inexact = mpfr_nrandom (x, RANDS, rnd); 237 if (mpfr_signbit (x)) 238 { 239 inexact = -inexact; 240 mpfr_setsign (x, x, 0, rnd); 241 } 242 /* Don't call sequential with small args to avoid undefined behavior with 243 * zero and possibility of overflow. */ 244 seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1; 245 ++counts[seq < seqmin ? 0 : 246 seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) : 247 nu]; 248 } 249 250 mpfr_set_zero (v, 1); 251 normal_cumulative (pa, v, rnd); 252 /* Cycle through all the bin boundaries using mpfr_nextabove at precision 253 * prec + 1 starting at mpfr_nextbelow (xmin) */ 254 mpfr_set_d (x, xmin, rnd); 255 mpfr_set (v, x, rnd); 256 mpfr_nextbelow (v); 257 mpfr_nextbelow (v); 258 mpfr_set_zero (t, 1); 259 for (i = 0; i <= nu; ++i) 260 { 261 if (i < nu) 262 mpfr_nextabove (v); 263 else 264 mpfr_set_inf (v, 1); 265 normal_cumulative (pb, v, rnd); 266 mpfr_sub (pa, pb, pa, rnd); 267 268 /* Compute z = counts[i] - num * p; t += z * z / (num * p). 2*num to 269 * account for the fact the p needs to be doubled since we are 270 * considering only the absolute value of the deviates. */ 271 mpfr_mul_ui (pa, pa, 2*num, rnd); 272 mpfr_ui_sub (z, counts[i], pa, rnd); 273 mpfr_sqr (z, z, rnd); 274 mpfr_div (z, z, pa, rnd); 275 mpfr_add (t, t, z, rnd); 276 mpfr_swap (pa, pb); /* i.e., pa = pb */ 277 } 278 279 chisq = mpfr_get_d (t, rnd); 280 chisq_prob (t, nu, t); 281 Q = mpfr_get_d (t, rnd); 282 if (verbose) 283 { 284 printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], " 285 "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq); 286 if (Q < 0.05) 287 printf (" WARNING: probability (less than 5%%) = %.2e\n", Q); 288 } 289 290 tests_free (counts, (nu + 1) * sizeof (long)); 291 mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0); 292 return Q; 293 } 294 295 static void 296 run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int), 297 long num, mpfr_prec_t prec, int bin, 298 double xmin, double xmax, int verbose) 299 { 300 double Q, Qcum, Qbad, Qthresh; 301 int i; 302 303 Qcum = 1; 304 Qbad = 1.e-9; 305 Qthresh = 0.01; 306 for (i = 0; i < 3; ++i) 307 { 308 Q = (*f)(num, prec, bin, xmin, xmax, verbose); 309 Qcum *= Q; 310 if (Q > Qthresh) 311 return; 312 else if (Q < Qbad) 313 { 314 printf ("Error: mpfr_nrandom chi-squared failure " 315 "(prob = %.2e)\n", Q); 316 exit (1); 317 } 318 num *= 10; 319 Qthresh /= 10; 320 } 321 if (Qcum < Qbad) /* Presumably this is true */ 322 { 323 printf ("Error: mpfr_nrandom combined chi-squared failure " 324 "(prob = %.2e)\n", Qcum); 325 exit (1); 326 } 327 } 328 329 int 330 main (int argc, char *argv[]) 331 { 332 long nbtests; 333 int verbose; 334 335 tests_start_mpfr (); 336 337 verbose = 0; 338 nbtests = 100000; 339 if (argc > 1) 340 { 341 long a = atol (argv[1]); 342 verbose = 1; 343 if (a != 0) 344 nbtests = a; 345 } 346 347 run_chisq (test_nrandom_chisq_cont, nbtests, 64, 60, -4, 4, verbose); 348 run_chisq (test_nrandom_chisq_disc, nbtests, 64, 2, 0.0005, 3, verbose); 349 run_chisq (test_nrandom_chisq_disc, nbtests, 64, 3, 0.002, 4, verbose); 350 run_chisq (test_nrandom_chisq_disc, nbtests, 64, 4, 0.004, 4, verbose); 351 352 tests_end_mpfr (); 353 return 0; 354 } 355