xref: /netbsd-src/external/lgpl3/gmp/dist/tests/rand/stat.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /* stat.c -- statistical tests of random number sequences. */
2 
3 /*
4 Copyright 1999, 2000 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library test suite.
7 
8 The GNU MP Library test suite is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 3 of the License,
11 or (at your option) any later version.
12 
13 The GNU MP Library test suite is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
16 Public License for more details.
17 
18 You should have received a copy of the GNU General Public License along with
19 the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
20 
21 /* Examples:
22 
23   $ gen 1000 | stat
24 Test 1000 real numbers.
25 
26   $ gen 30000 | stat -2 1000
27 Test 1000 real numbers 30 times and then test the 30 results in a
28 ``second level''.
29 
30   $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
31 Test 1000 integers 0 <= X <= 2^32-1.
32 
33   $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
34 Test 1000 integers 0 <= X <= 2^34-1.
35 
36 */
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <math.h>
42 #include "gmp.h"
43 #include "gmpstat.h"
44 
45 #if !HAVE_DECL_OPTARG
46 extern char *optarg;
47 extern int optind, opterr;
48 #endif
49 
50 #define FVECSIZ (100000L)
51 
52 int g_debug = 0;
53 
54 static void
55 print_ks_results (mpf_t f_p, mpf_t f_p_prob,
56 		  mpf_t f_m, mpf_t f_m_prob,
57 		  FILE *fp)
58 {
59   double p, pp, m, mp;
60 
61   p = mpf_get_d (f_p);
62   m = mpf_get_d (f_m);
63   pp = mpf_get_d (f_p_prob);
64   mp = mpf_get_d (f_m_prob);
65 
66   fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
67   fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
68 }
69 
70 static void
71 print_x2_table (unsigned int v, FILE *fp)
72 {
73   double t[7];
74   int f;
75 
76 
77   fprintf (fp, "Chi-square table for v=%u\n", v);
78   fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
79   x2_table (t, v);
80   for (f = 0; f < 7; f++)
81     fprintf (fp, "%.2f\t", t[f]);
82   fputs ("\n", fp);
83 }
84 
85 
86 
87 /* Pks () -- Distribution function for KS results with a big n (like 1000
88    or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
89 /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */
90 
91 static void
92 Pks (mpf_t p, mpf_t x)
93 {
94   double dt;			/* temp double */
95 
96   mpf_set (p, x);
97   mpf_mul (p, p, p);		/* p = x^2 */
98   mpf_mul_ui (p, p, 2);		/* p = 2*x^2 */
99   mpf_neg (p, p);		/* p = -2*x^2 */
100   /* No pow() in gmp.  Use doubles. */
101   /* FIXME: Use exp()? */
102   dt = pow (M_E, mpf_get_d (p));
103   mpf_set_d (p, dt);
104   mpf_ui_sub (p, 1, p);
105 }
106 
107 /* f_freq() -- frequency test on real numbers 0<=f<1*/
108 static void
109 f_freq (const unsigned l1runs, const unsigned l2runs,
110 	mpf_t fvec[], const unsigned long n)
111 {
112   unsigned f;
113   mpf_t f_p, f_p_prob;
114   mpf_t f_m, f_m_prob;
115   mpf_t *l1res;			/* level 1 result array */
116 
117   mpf_init (f_p);  mpf_init (f_m);
118   mpf_init (f_p_prob);  mpf_init (f_m_prob);
119 
120 
121   /* Allocate space for 1st level results. */
122   l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
123   if (NULL == l1res)
124     {
125       fprintf (stderr, "stat: malloc failure\n");
126       exit (1);
127     }
128 
129   printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
130   printf ("\tKp\t\tKm\n");
131 
132   for (f = 0; f < l2runs; f++)
133     {
134       /*  f_printvec (fvec, n); */
135       mpf_freqt (f_p, f_m, fvec + f * n, n);
136 
137       /* what's the probability of getting these results? */
138       ks_table (f_p_prob, f_p, n);
139       ks_table (f_m_prob, f_m, n);
140 
141       if (l1runs == 0)
142 	{
143 	  /*printf ("%u:\t", f + 1);*/
144 	  print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
145 	}
146       else
147 	{
148 	  /* save result */
149 	  mpf_init_set (l1res[f], f_p);
150 	  mpf_init_set (l1res[f + l2runs], f_m);
151 	}
152     }
153 
154   /* Now, apply the KS test on the results from the 1st level rounds
155      with the distribution
156      F(x) = 1 - pow(e, -2*x^2)	[Knuth, vol 2, p.51] */
157 
158   if (l1runs != 0)
159     {
160       /*printf ("-------------------------------------\n");*/
161 
162       /* The Kp's. */
163       ks (f_p, f_m, l1res, Pks, l2runs);
164       ks_table (f_p_prob, f_p, l2runs);
165       ks_table (f_m_prob, f_m, l2runs);
166       printf ("Kp:\t");
167       print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
168 
169       /* The Km's. */
170       ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
171       ks_table (f_p_prob, f_p, l2runs);
172       ks_table (f_m_prob, f_m, l2runs);
173       printf ("Km:\t");
174       print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
175     }
176 
177   mpf_clear (f_p);  mpf_clear (f_m);
178   mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
179   free (l1res);
180 }
181 
182 /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
183    0<=z<=MAX */
184 static void
185 z_freq (const unsigned l1runs,
186 	const unsigned l2runs,
187 	mpz_t zvec[],
188 	const unsigned long n,
189 	unsigned int max)
190 {
191   mpf_t V;			/* result */
192   double d_V;			/* result as a double */
193 
194   mpf_init (V);
195 
196 
197   printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
198   print_x2_table (max, stdout);
199 
200   mpz_freqt (V, zvec, max, n);
201 
202   d_V = mpf_get_d (V);
203   printf ("V = %.2f (n = %lu)\n", d_V, n);
204 
205   mpf_clear (V);
206 }
207 
208 unsigned int stat_debug = 0;
209 
210 int
211 main (argc, argv)
212      int argc;
213      char *argv[];
214 {
215   const char usage[] =
216     "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
217     "       file     filename\n" \
218     "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
219     "       -d       increase debugging level\n" \
220     "       -i max   input is integers 0 <= Z <= MAX\n" \
221     "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
222     "                maximum value when converting real numbers to integers\n" \
223     "";
224 
225   mpf_t fvec[FVECSIZ];
226   mpz_t zvec[FVECSIZ];
227   unsigned long int f, n, vecentries;
228   char *filen;
229   FILE *fp;
230   int c;
231   int omitoutput = 0;
232   int realinput = -1;		/* 1: input is real numbers 0<=R<1;
233 				   0: input is integers 0 <= Z <= MAX. */
234   long l1runs = 0,		/* 1st level runs */
235     l2runs = 1;			/* 2nd level runs */
236   mpf_t f_temp;
237   mpz_t z_imax;			/* max value when converting between
238 				   real number and integer. */
239   mpf_t f_imax_plus1;		/* f_imax + 1 stored in an mpf_t for
240 				   convenience */
241   mpf_t f_imax_minus1;		/* f_imax - 1 stored in an mpf_t for
242 				   convenience */
243 
244 
245   mpf_init (f_temp);
246   mpz_init_set_ui (z_imax, 0x7fffffff);
247   mpf_init (f_imax_plus1);
248   mpf_init (f_imax_minus1);
249 
250   while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
251     switch (c)
252       {
253       case '2':
254 	l1runs = atol (optarg);
255 	l2runs = -1;		/* set later on */
256 	break;
257       case 'd':			/* increase debug level */
258 	stat_debug++;
259 	break;
260       case 'i':
261 	if (1 == realinput)
262 	  {
263 	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
264 	    exit (1);
265 	  }
266 	if (mpz_set_str (z_imax, optarg, 0))
267 	  {
268 	    fprintf (stderr, "stat: bad max value %s\n", optarg);
269 	    exit (1);
270 	  }
271 	realinput = 0;
272 	break;
273       case 'r':
274 	if (0 == realinput)
275 	  {
276 	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
277 	    exit (1);
278 	  }
279 	if (mpz_set_str (z_imax, optarg, 0))
280 	  {
281 	    fprintf (stderr, "stat: bad max value %s\n", optarg);
282 	    exit (1);
283 	  }
284 	realinput = 1;
285 	break;
286       case 'o':
287 	omitoutput = atoi (optarg);
288 	break;
289       case '?':
290       default:
291 	fputs (usage, stderr);
292 	exit (1);
293       }
294   argc -= optind;
295   argv += optind;
296 
297   if (argc < 1)
298     fp = stdin;
299   else
300     filen = argv[0];
301 
302   if (fp != stdin)
303     if (NULL == (fp = fopen (filen, "r")))
304       {
305 	perror (filen);
306 	exit (1);
307       }
308 
309   if (-1 == realinput)
310     realinput = 1;		/* default is real numbers */
311 
312   /* read file and fill appropriate vec */
313   if (1 == realinput)		/* real input */
314     {
315       for (f = 0; f < FVECSIZ ; f++)
316 	{
317 	  mpf_init (fvec[f]);
318 	  if (!mpf_inp_str (fvec[f], fp, 10))
319 	    break;
320 	}
321     }
322   else				/* integer input */
323     {
324       for (f = 0; f < FVECSIZ ; f++)
325 	{
326 	  mpz_init (zvec[f]);
327 	  if (!mpz_inp_str (zvec[f], fp, 10))
328 	    break;
329 	}
330     }
331   vecentries = n = f;		/* number of entries read */
332   fclose (fp);
333 
334   if (FVECSIZ == f)
335     fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
336 	     "of only %ld entries.  sorry.\n", FVECSIZ);
337 
338   printf ("Got %lu numbers.\n", n);
339 
340   /* convert and fill the other vec */
341   /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
342      0<=z<=imax and we are truncating all fractions when
343      converting float to int, we have to add 1 to imax.*/
344   mpf_set_z (f_imax_plus1, z_imax);
345   mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
346   if (1 == realinput)		/* fill zvec[] */
347     {
348       for (f = 0; f < n; f++)
349 	{
350 	  mpf_mul (f_temp, fvec[f], f_imax_plus1);
351 	  mpz_init (zvec[f]);
352 	  mpz_set_f (zvec[f], f_temp); /* truncating fraction */
353 	  if (stat_debug > 1)
354 	    {
355 	      mpz_out_str (stderr, 10, zvec[f]);
356 	      fputs ("\n", stderr);
357 	    }
358 	}
359     }
360   else				/* integer input; fill fvec[] */
361     {
362       /*    mpf_set_z (f_imax_minus1, z_imax);
363 	    mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
364       for (f = 0; f < n; f++)
365 	{
366 	  mpf_init (fvec[f]);
367 	  mpf_set_z (fvec[f], zvec[f]);
368 	  mpf_div (fvec[f], fvec[f], f_imax_plus1);
369 	  if (stat_debug > 1)
370 	    {
371 	      mpf_out_str (stderr, 10, 0, fvec[f]);
372 	      fputs ("\n", stderr);
373 	    }
374 	}
375     }
376 
377   /* 2 levels? */
378   if (1 != l2runs)
379     {
380       l2runs = n / l1runs;
381       printf ("Doing %ld second level rounds "\
382 	      "with %ld entries in each round", l2runs, l1runs);
383       if (n % l1runs)
384 	printf (" (discarding %ld entr%s)", n % l1runs,
385 		n % l1runs == 1 ? "y" : "ies");
386       puts (".");
387       n = l1runs;
388     }
389 
390 #ifndef DONT_FFREQ
391   f_freq (l1runs, l2runs, fvec, n);
392 #endif
393 #ifdef DO_ZFREQ
394   z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
395 #endif
396 
397   mpf_clear (f_temp); mpz_clear (z_imax);
398   mpf_clear (f_imax_plus1);
399   mpf_clear (f_imax_minus1);
400   for (f = 0; f < vecentries; f++)
401     {
402       mpf_clear (fvec[f]);
403       mpz_clear (zvec[f]);
404     }
405 
406   return 0;
407 }
408