151c586b8Smrg /* stat.c -- statistical tests of random number sequences. */
251c586b8Smrg
351c586b8Smrg /*
451c586b8Smrg Copyright 1999, 2000 Free Software Foundation, Inc.
551c586b8Smrg
6dab47db4Smrg This file is part of the GNU MP Library test suite.
751c586b8Smrg
8dab47db4Smrg The GNU MP Library test suite is free software; you can redistribute it
9dab47db4Smrg and/or modify it under the terms of the GNU General Public License as
10dab47db4Smrg published by the Free Software Foundation; either version 3 of the License,
11dab47db4Smrg or (at your option) any later version.
1251c586b8Smrg
13dab47db4Smrg The GNU MP Library test suite is distributed in the hope that it will be
14dab47db4Smrg useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15dab47db4Smrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
16dab47db4Smrg Public License for more details.
1751c586b8Smrg
18dab47db4Smrg You should have received a copy of the GNU General Public License along with
19*ce543368Smrg the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
2051c586b8Smrg
2151c586b8Smrg /* Examples:
2251c586b8Smrg
2351c586b8Smrg $ gen 1000 | stat
2451c586b8Smrg Test 1000 real numbers.
2551c586b8Smrg
2651c586b8Smrg $ gen 30000 | stat -2 1000
2751c586b8Smrg Test 1000 real numbers 30 times and then test the 30 results in a
2851c586b8Smrg ``second level''.
2951c586b8Smrg
3051c586b8Smrg $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
3151c586b8Smrg Test 1000 integers 0 <= X <= 2^32-1.
3251c586b8Smrg
3351c586b8Smrg $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
3451c586b8Smrg Test 1000 integers 0 <= X <= 2^34-1.
3551c586b8Smrg
3651c586b8Smrg */
3751c586b8Smrg
3851c586b8Smrg #include <stdio.h>
3951c586b8Smrg #include <stdlib.h>
4051c586b8Smrg #include <unistd.h>
4151c586b8Smrg #include <math.h>
4251c586b8Smrg #include "gmpstat.h"
4351c586b8Smrg
4451c586b8Smrg #if !HAVE_DECL_OPTARG
4551c586b8Smrg extern char *optarg;
4651c586b8Smrg extern int optind, opterr;
4751c586b8Smrg #endif
4851c586b8Smrg
4951c586b8Smrg #define FVECSIZ (100000L)
5051c586b8Smrg
5151c586b8Smrg int g_debug = 0;
5251c586b8Smrg
5351c586b8Smrg static void
print_ks_results(mpf_t f_p,mpf_t f_p_prob,mpf_t f_m,mpf_t f_m_prob,FILE * fp)5451c586b8Smrg print_ks_results (mpf_t f_p, mpf_t f_p_prob,
5551c586b8Smrg mpf_t f_m, mpf_t f_m_prob,
5651c586b8Smrg FILE *fp)
5751c586b8Smrg {
5851c586b8Smrg double p, pp, m, mp;
5951c586b8Smrg
6051c586b8Smrg p = mpf_get_d (f_p);
6151c586b8Smrg m = mpf_get_d (f_m);
6251c586b8Smrg pp = mpf_get_d (f_p_prob);
6351c586b8Smrg mp = mpf_get_d (f_m_prob);
6451c586b8Smrg
6551c586b8Smrg fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
6651c586b8Smrg fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
6751c586b8Smrg }
6851c586b8Smrg
6951c586b8Smrg static void
print_x2_table(unsigned int v,FILE * fp)7051c586b8Smrg print_x2_table (unsigned int v, FILE *fp)
7151c586b8Smrg {
7251c586b8Smrg double t[7];
7351c586b8Smrg int f;
7451c586b8Smrg
7551c586b8Smrg
7651c586b8Smrg fprintf (fp, "Chi-square table for v=%u\n", v);
7751c586b8Smrg fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
7851c586b8Smrg x2_table (t, v);
7951c586b8Smrg for (f = 0; f < 7; f++)
8051c586b8Smrg fprintf (fp, "%.2f\t", t[f]);
8151c586b8Smrg fputs ("\n", fp);
8251c586b8Smrg }
8351c586b8Smrg
8451c586b8Smrg
8551c586b8Smrg
8651c586b8Smrg /* Pks () -- Distribution function for KS results with a big n (like 1000
8751c586b8Smrg or so): F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
8851c586b8Smrg /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2) */
8951c586b8Smrg
9051c586b8Smrg static void
Pks(mpf_t p,mpf_t x)9151c586b8Smrg Pks (mpf_t p, mpf_t x)
9251c586b8Smrg {
9351c586b8Smrg double dt; /* temp double */
9451c586b8Smrg
9551c586b8Smrg mpf_set (p, x);
9651c586b8Smrg mpf_mul (p, p, p); /* p = x^2 */
9751c586b8Smrg mpf_mul_ui (p, p, 2); /* p = 2*x^2 */
9851c586b8Smrg mpf_neg (p, p); /* p = -2*x^2 */
9951c586b8Smrg /* No pow() in gmp. Use doubles. */
10051c586b8Smrg /* FIXME: Use exp()? */
10151c586b8Smrg dt = pow (M_E, mpf_get_d (p));
10251c586b8Smrg mpf_set_d (p, dt);
10351c586b8Smrg mpf_ui_sub (p, 1, p);
10451c586b8Smrg }
10551c586b8Smrg
10651c586b8Smrg /* f_freq() -- frequency test on real numbers 0<=f<1*/
10751c586b8Smrg static void
f_freq(const unsigned l1runs,const unsigned l2runs,mpf_t fvec[],const unsigned long n)10851c586b8Smrg f_freq (const unsigned l1runs, const unsigned l2runs,
10951c586b8Smrg mpf_t fvec[], const unsigned long n)
11051c586b8Smrg {
11151c586b8Smrg unsigned f;
11251c586b8Smrg mpf_t f_p, f_p_prob;
11351c586b8Smrg mpf_t f_m, f_m_prob;
11451c586b8Smrg mpf_t *l1res; /* level 1 result array */
11551c586b8Smrg
11651c586b8Smrg mpf_init (f_p); mpf_init (f_m);
11751c586b8Smrg mpf_init (f_p_prob); mpf_init (f_m_prob);
11851c586b8Smrg
11951c586b8Smrg
12051c586b8Smrg /* Allocate space for 1st level results. */
12151c586b8Smrg l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
12251c586b8Smrg if (NULL == l1res)
12351c586b8Smrg {
12451c586b8Smrg fprintf (stderr, "stat: malloc failure\n");
12551c586b8Smrg exit (1);
12651c586b8Smrg }
12751c586b8Smrg
12851c586b8Smrg printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
12951c586b8Smrg printf ("\tKp\t\tKm\n");
13051c586b8Smrg
13151c586b8Smrg for (f = 0; f < l2runs; f++)
13251c586b8Smrg {
13351c586b8Smrg /* f_printvec (fvec, n); */
13451c586b8Smrg mpf_freqt (f_p, f_m, fvec + f * n, n);
13551c586b8Smrg
13651c586b8Smrg /* what's the probability of getting these results? */
13751c586b8Smrg ks_table (f_p_prob, f_p, n);
13851c586b8Smrg ks_table (f_m_prob, f_m, n);
13951c586b8Smrg
14051c586b8Smrg if (l1runs == 0)
14151c586b8Smrg {
14251c586b8Smrg /*printf ("%u:\t", f + 1);*/
14351c586b8Smrg print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
14451c586b8Smrg }
14551c586b8Smrg else
14651c586b8Smrg {
14751c586b8Smrg /* save result */
14851c586b8Smrg mpf_init_set (l1res[f], f_p);
14951c586b8Smrg mpf_init_set (l1res[f + l2runs], f_m);
15051c586b8Smrg }
15151c586b8Smrg }
15251c586b8Smrg
15351c586b8Smrg /* Now, apply the KS test on the results from the 1st level rounds
15451c586b8Smrg with the distribution
15551c586b8Smrg F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */
15651c586b8Smrg
15751c586b8Smrg if (l1runs != 0)
15851c586b8Smrg {
15951c586b8Smrg /*printf ("-------------------------------------\n");*/
16051c586b8Smrg
16151c586b8Smrg /* The Kp's. */
16251c586b8Smrg ks (f_p, f_m, l1res, Pks, l2runs);
16351c586b8Smrg ks_table (f_p_prob, f_p, l2runs);
16451c586b8Smrg ks_table (f_m_prob, f_m, l2runs);
16551c586b8Smrg printf ("Kp:\t");
16651c586b8Smrg print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
16751c586b8Smrg
16851c586b8Smrg /* The Km's. */
16951c586b8Smrg ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
17051c586b8Smrg ks_table (f_p_prob, f_p, l2runs);
17151c586b8Smrg ks_table (f_m_prob, f_m, l2runs);
17251c586b8Smrg printf ("Km:\t");
17351c586b8Smrg print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
17451c586b8Smrg }
17551c586b8Smrg
17651c586b8Smrg mpf_clear (f_p); mpf_clear (f_m);
17751c586b8Smrg mpf_clear (f_p_prob); mpf_clear (f_m_prob);
17851c586b8Smrg free (l1res);
17951c586b8Smrg }
18051c586b8Smrg
18151c586b8Smrg /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
18251c586b8Smrg 0<=z<=MAX */
18351c586b8Smrg static void
z_freq(const unsigned l1runs,const unsigned l2runs,mpz_t zvec[],const unsigned long n,unsigned int max)18451c586b8Smrg z_freq (const unsigned l1runs,
18551c586b8Smrg const unsigned l2runs,
18651c586b8Smrg mpz_t zvec[],
18751c586b8Smrg const unsigned long n,
18851c586b8Smrg unsigned int max)
18951c586b8Smrg {
19051c586b8Smrg mpf_t V; /* result */
19151c586b8Smrg double d_V; /* result as a double */
19251c586b8Smrg
19351c586b8Smrg mpf_init (V);
19451c586b8Smrg
19551c586b8Smrg
19651c586b8Smrg printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
19751c586b8Smrg print_x2_table (max, stdout);
19851c586b8Smrg
19951c586b8Smrg mpz_freqt (V, zvec, max, n);
20051c586b8Smrg
20151c586b8Smrg d_V = mpf_get_d (V);
20251c586b8Smrg printf ("V = %.2f (n = %lu)\n", d_V, n);
20351c586b8Smrg
20451c586b8Smrg mpf_clear (V);
20551c586b8Smrg }
20651c586b8Smrg
20751c586b8Smrg unsigned int stat_debug = 0;
20851c586b8Smrg
20951c586b8Smrg int
main(argc,argv)21051c586b8Smrg main (argc, argv)
21151c586b8Smrg int argc;
21251c586b8Smrg char *argv[];
21351c586b8Smrg {
21451c586b8Smrg const char usage[] =
21551c586b8Smrg "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
21651c586b8Smrg " file filename\n" \
21751c586b8Smrg " -2 runs perform 2-level test with RUNS runs on 1st level\n" \
21851c586b8Smrg " -d increase debugging level\n" \
21951c586b8Smrg " -i max input is integers 0 <= Z <= MAX\n" \
22051c586b8Smrg " -r max input is real numbers 0 <= R < 1 and use MAX as\n" \
22151c586b8Smrg " maximum value when converting real numbers to integers\n" \
22251c586b8Smrg "";
22351c586b8Smrg
22451c586b8Smrg mpf_t fvec[FVECSIZ];
22551c586b8Smrg mpz_t zvec[FVECSIZ];
22651c586b8Smrg unsigned long int f, n, vecentries;
22751c586b8Smrg char *filen;
22851c586b8Smrg FILE *fp;
22951c586b8Smrg int c;
23051c586b8Smrg int omitoutput = 0;
23151c586b8Smrg int realinput = -1; /* 1: input is real numbers 0<=R<1;
23251c586b8Smrg 0: input is integers 0 <= Z <= MAX. */
23351c586b8Smrg long l1runs = 0, /* 1st level runs */
23451c586b8Smrg l2runs = 1; /* 2nd level runs */
23551c586b8Smrg mpf_t f_temp;
23651c586b8Smrg mpz_t z_imax; /* max value when converting between
23751c586b8Smrg real number and integer. */
23851c586b8Smrg mpf_t f_imax_plus1; /* f_imax + 1 stored in an mpf_t for
23951c586b8Smrg convenience */
24051c586b8Smrg mpf_t f_imax_minus1; /* f_imax - 1 stored in an mpf_t for
24151c586b8Smrg convenience */
24251c586b8Smrg
24351c586b8Smrg
24451c586b8Smrg mpf_init (f_temp);
24551c586b8Smrg mpz_init_set_ui (z_imax, 0x7fffffff);
24651c586b8Smrg mpf_init (f_imax_plus1);
24751c586b8Smrg mpf_init (f_imax_minus1);
24851c586b8Smrg
24951c586b8Smrg while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
25051c586b8Smrg switch (c)
25151c586b8Smrg {
25251c586b8Smrg case '2':
25351c586b8Smrg l1runs = atol (optarg);
25451c586b8Smrg l2runs = -1; /* set later on */
25551c586b8Smrg break;
25651c586b8Smrg case 'd': /* increase debug level */
25751c586b8Smrg stat_debug++;
25851c586b8Smrg break;
25951c586b8Smrg case 'i':
26051c586b8Smrg if (1 == realinput)
26151c586b8Smrg {
26251c586b8Smrg fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
26351c586b8Smrg exit (1);
26451c586b8Smrg }
26551c586b8Smrg if (mpz_set_str (z_imax, optarg, 0))
26651c586b8Smrg {
26751c586b8Smrg fprintf (stderr, "stat: bad max value %s\n", optarg);
26851c586b8Smrg exit (1);
26951c586b8Smrg }
27051c586b8Smrg realinput = 0;
27151c586b8Smrg break;
27251c586b8Smrg case 'r':
27351c586b8Smrg if (0 == realinput)
27451c586b8Smrg {
27551c586b8Smrg fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
27651c586b8Smrg exit (1);
27751c586b8Smrg }
27851c586b8Smrg if (mpz_set_str (z_imax, optarg, 0))
27951c586b8Smrg {
28051c586b8Smrg fprintf (stderr, "stat: bad max value %s\n", optarg);
28151c586b8Smrg exit (1);
28251c586b8Smrg }
28351c586b8Smrg realinput = 1;
28451c586b8Smrg break;
28551c586b8Smrg case 'o':
28651c586b8Smrg omitoutput = atoi (optarg);
28751c586b8Smrg break;
28851c586b8Smrg case '?':
28951c586b8Smrg default:
29051c586b8Smrg fputs (usage, stderr);
29151c586b8Smrg exit (1);
29251c586b8Smrg }
29351c586b8Smrg argc -= optind;
29451c586b8Smrg argv += optind;
29551c586b8Smrg
29651c586b8Smrg if (argc < 1)
29751c586b8Smrg fp = stdin;
29851c586b8Smrg else
29951c586b8Smrg filen = argv[0];
30051c586b8Smrg
30151c586b8Smrg if (fp != stdin)
30251c586b8Smrg if (NULL == (fp = fopen (filen, "r")))
30351c586b8Smrg {
30451c586b8Smrg perror (filen);
30551c586b8Smrg exit (1);
30651c586b8Smrg }
30751c586b8Smrg
30851c586b8Smrg if (-1 == realinput)
30951c586b8Smrg realinput = 1; /* default is real numbers */
31051c586b8Smrg
31151c586b8Smrg /* read file and fill appropriate vec */
31251c586b8Smrg if (1 == realinput) /* real input */
31351c586b8Smrg {
31451c586b8Smrg for (f = 0; f < FVECSIZ ; f++)
31551c586b8Smrg {
31651c586b8Smrg mpf_init (fvec[f]);
31751c586b8Smrg if (!mpf_inp_str (fvec[f], fp, 10))
31851c586b8Smrg break;
31951c586b8Smrg }
32051c586b8Smrg }
32151c586b8Smrg else /* integer input */
32251c586b8Smrg {
32351c586b8Smrg for (f = 0; f < FVECSIZ ; f++)
32451c586b8Smrg {
32551c586b8Smrg mpz_init (zvec[f]);
32651c586b8Smrg if (!mpz_inp_str (zvec[f], fp, 10))
32751c586b8Smrg break;
32851c586b8Smrg }
32951c586b8Smrg }
33051c586b8Smrg vecentries = n = f; /* number of entries read */
33151c586b8Smrg fclose (fp);
33251c586b8Smrg
33351c586b8Smrg if (FVECSIZ == f)
33451c586b8Smrg fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
33551c586b8Smrg "of only %ld entries. sorry.\n", FVECSIZ);
33651c586b8Smrg
33751c586b8Smrg printf ("Got %lu numbers.\n", n);
33851c586b8Smrg
33951c586b8Smrg /* convert and fill the other vec */
34051c586b8Smrg /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
34151c586b8Smrg 0<=z<=imax and we are truncating all fractions when
34251c586b8Smrg converting float to int, we have to add 1 to imax.*/
34351c586b8Smrg mpf_set_z (f_imax_plus1, z_imax);
34451c586b8Smrg mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
34551c586b8Smrg if (1 == realinput) /* fill zvec[] */
34651c586b8Smrg {
34751c586b8Smrg for (f = 0; f < n; f++)
34851c586b8Smrg {
34951c586b8Smrg mpf_mul (f_temp, fvec[f], f_imax_plus1);
35051c586b8Smrg mpz_init (zvec[f]);
35151c586b8Smrg mpz_set_f (zvec[f], f_temp); /* truncating fraction */
35251c586b8Smrg if (stat_debug > 1)
35351c586b8Smrg {
35451c586b8Smrg mpz_out_str (stderr, 10, zvec[f]);
35551c586b8Smrg fputs ("\n", stderr);
35651c586b8Smrg }
35751c586b8Smrg }
35851c586b8Smrg }
35951c586b8Smrg else /* integer input; fill fvec[] */
36051c586b8Smrg {
36151c586b8Smrg /* mpf_set_z (f_imax_minus1, z_imax);
36251c586b8Smrg mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
36351c586b8Smrg for (f = 0; f < n; f++)
36451c586b8Smrg {
36551c586b8Smrg mpf_init (fvec[f]);
36651c586b8Smrg mpf_set_z (fvec[f], zvec[f]);
36751c586b8Smrg mpf_div (fvec[f], fvec[f], f_imax_plus1);
36851c586b8Smrg if (stat_debug > 1)
36951c586b8Smrg {
37051c586b8Smrg mpf_out_str (stderr, 10, 0, fvec[f]);
37151c586b8Smrg fputs ("\n", stderr);
37251c586b8Smrg }
37351c586b8Smrg }
37451c586b8Smrg }
37551c586b8Smrg
37651c586b8Smrg /* 2 levels? */
37751c586b8Smrg if (1 != l2runs)
37851c586b8Smrg {
37951c586b8Smrg l2runs = n / l1runs;
38051c586b8Smrg printf ("Doing %ld second level rounds "\
38151c586b8Smrg "with %ld entries in each round", l2runs, l1runs);
38251c586b8Smrg if (n % l1runs)
38351c586b8Smrg printf (" (discarding %ld entr%s)", n % l1runs,
38451c586b8Smrg n % l1runs == 1 ? "y" : "ies");
38551c586b8Smrg puts (".");
38651c586b8Smrg n = l1runs;
38751c586b8Smrg }
38851c586b8Smrg
38951c586b8Smrg #ifndef DONT_FFREQ
39051c586b8Smrg f_freq (l1runs, l2runs, fvec, n);
39151c586b8Smrg #endif
39251c586b8Smrg #ifdef DO_ZFREQ
39351c586b8Smrg z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
39451c586b8Smrg #endif
39551c586b8Smrg
39651c586b8Smrg mpf_clear (f_temp); mpz_clear (z_imax);
39751c586b8Smrg mpf_clear (f_imax_plus1);
39851c586b8Smrg mpf_clear (f_imax_minus1);
39951c586b8Smrg for (f = 0; f < vecentries; f++)
40051c586b8Smrg {
40151c586b8Smrg mpf_clear (fvec[f]);
40251c586b8Smrg mpz_clear (zvec[f]);
40351c586b8Smrg }
40451c586b8Smrg
40551c586b8Smrg return 0;
40651c586b8Smrg }
407