xref: /netbsd-src/external/lgpl3/gmp/dist/tests/rand/stat.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* stat.c -- statistical tests of random number sequences. */
2 
3 /*
4 Copyright 1999, 2000 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library test suite.
7 
8 The GNU MP Library test suite is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 3 of the License,
11 or (at your option) any later version.
12 
13 The GNU MP Library test suite is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
16 Public License for more details.
17 
18 You should have received a copy of the GNU General Public License along with
19 the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
20 
21 /* Examples:
22 
23   $ gen 1000 | stat
24 Test 1000 real numbers.
25 
26   $ gen 30000 | stat -2 1000
27 Test 1000 real numbers 30 times and then test the 30 results in a
28 ``second level''.
29 
30   $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
31 Test 1000 integers 0 <= X <= 2^32-1.
32 
33   $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
34 Test 1000 integers 0 <= X <= 2^34-1.
35 
36 */
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <math.h>
42 #include "gmpstat.h"
43 
44 #if !HAVE_DECL_OPTARG
45 extern char *optarg;
46 extern int optind, opterr;
47 #endif
48 
49 #define FVECSIZ (100000L)
50 
51 int g_debug = 0;
52 
53 static void
print_ks_results(mpf_t f_p,mpf_t f_p_prob,mpf_t f_m,mpf_t f_m_prob,FILE * fp)54 print_ks_results (mpf_t f_p, mpf_t f_p_prob,
55 		  mpf_t f_m, mpf_t f_m_prob,
56 		  FILE *fp)
57 {
58   double p, pp, m, mp;
59 
60   p = mpf_get_d (f_p);
61   m = mpf_get_d (f_m);
62   pp = mpf_get_d (f_p_prob);
63   mp = mpf_get_d (f_m_prob);
64 
65   fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
66   fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
67 }
68 
69 static void
print_x2_table(unsigned int v,FILE * fp)70 print_x2_table (unsigned int v, FILE *fp)
71 {
72   double t[7];
73   int f;
74 
75 
76   fprintf (fp, "Chi-square table for v=%u\n", v);
77   fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
78   x2_table (t, v);
79   for (f = 0; f < 7; f++)
80     fprintf (fp, "%.2f\t", t[f]);
81   fputs ("\n", fp);
82 }
83 
84 
85 
86 /* Pks () -- Distribution function for KS results with a big n (like 1000
87    or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
88 /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */
89 
90 static void
Pks(mpf_t p,mpf_t x)91 Pks (mpf_t p, mpf_t x)
92 {
93   double dt;			/* temp double */
94 
95   mpf_set (p, x);
96   mpf_mul (p, p, p);		/* p = x^2 */
97   mpf_mul_ui (p, p, 2);		/* p = 2*x^2 */
98   mpf_neg (p, p);		/* p = -2*x^2 */
99   /* No pow() in gmp.  Use doubles. */
100   /* FIXME: Use exp()? */
101   dt = pow (M_E, mpf_get_d (p));
102   mpf_set_d (p, dt);
103   mpf_ui_sub (p, 1, p);
104 }
105 
106 /* f_freq() -- frequency test on real numbers 0<=f<1*/
107 static void
f_freq(const unsigned l1runs,const unsigned l2runs,mpf_t fvec[],const unsigned long n)108 f_freq (const unsigned l1runs, const unsigned l2runs,
109 	mpf_t fvec[], const unsigned long n)
110 {
111   unsigned f;
112   mpf_t f_p, f_p_prob;
113   mpf_t f_m, f_m_prob;
114   mpf_t *l1res;			/* level 1 result array */
115 
116   mpf_init (f_p);  mpf_init (f_m);
117   mpf_init (f_p_prob);  mpf_init (f_m_prob);
118 
119 
120   /* Allocate space for 1st level results. */
121   l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
122   if (NULL == l1res)
123     {
124       fprintf (stderr, "stat: malloc failure\n");
125       exit (1);
126     }
127 
128   printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
129   printf ("\tKp\t\tKm\n");
130 
131   for (f = 0; f < l2runs; f++)
132     {
133       /*  f_printvec (fvec, n); */
134       mpf_freqt (f_p, f_m, fvec + f * n, n);
135 
136       /* what's the probability of getting these results? */
137       ks_table (f_p_prob, f_p, n);
138       ks_table (f_m_prob, f_m, n);
139 
140       if (l1runs == 0)
141 	{
142 	  /*printf ("%u:\t", f + 1);*/
143 	  print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
144 	}
145       else
146 	{
147 	  /* save result */
148 	  mpf_init_set (l1res[f], f_p);
149 	  mpf_init_set (l1res[f + l2runs], f_m);
150 	}
151     }
152 
153   /* Now, apply the KS test on the results from the 1st level rounds
154      with the distribution
155      F(x) = 1 - pow(e, -2*x^2)	[Knuth, vol 2, p.51] */
156 
157   if (l1runs != 0)
158     {
159       /*printf ("-------------------------------------\n");*/
160 
161       /* The Kp's. */
162       ks (f_p, f_m, l1res, Pks, l2runs);
163       ks_table (f_p_prob, f_p, l2runs);
164       ks_table (f_m_prob, f_m, l2runs);
165       printf ("Kp:\t");
166       print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
167 
168       /* The Km's. */
169       ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
170       ks_table (f_p_prob, f_p, l2runs);
171       ks_table (f_m_prob, f_m, l2runs);
172       printf ("Km:\t");
173       print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
174     }
175 
176   mpf_clear (f_p);  mpf_clear (f_m);
177   mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
178   free (l1res);
179 }
180 
181 /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
182    0<=z<=MAX */
183 static void
z_freq(const unsigned l1runs,const unsigned l2runs,mpz_t zvec[],const unsigned long n,unsigned int max)184 z_freq (const unsigned l1runs,
185 	const unsigned l2runs,
186 	mpz_t zvec[],
187 	const unsigned long n,
188 	unsigned int max)
189 {
190   mpf_t V;			/* result */
191   double d_V;			/* result as a double */
192 
193   mpf_init (V);
194 
195 
196   printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
197   print_x2_table (max, stdout);
198 
199   mpz_freqt (V, zvec, max, n);
200 
201   d_V = mpf_get_d (V);
202   printf ("V = %.2f (n = %lu)\n", d_V, n);
203 
204   mpf_clear (V);
205 }
206 
207 unsigned int stat_debug = 0;
208 
209 int
main(argc,argv)210 main (argc, argv)
211      int argc;
212      char *argv[];
213 {
214   const char usage[] =
215     "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
216     "       file     filename\n" \
217     "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
218     "       -d       increase debugging level\n" \
219     "       -i max   input is integers 0 <= Z <= MAX\n" \
220     "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
221     "                maximum value when converting real numbers to integers\n" \
222     "";
223 
224   mpf_t fvec[FVECSIZ];
225   mpz_t zvec[FVECSIZ];
226   unsigned long int f, n, vecentries;
227   char *filen;
228   FILE *fp;
229   int c;
230   int omitoutput = 0;
231   int realinput = -1;		/* 1: input is real numbers 0<=R<1;
232 				   0: input is integers 0 <= Z <= MAX. */
233   long l1runs = 0,		/* 1st level runs */
234     l2runs = 1;			/* 2nd level runs */
235   mpf_t f_temp;
236   mpz_t z_imax;			/* max value when converting between
237 				   real number and integer. */
238   mpf_t f_imax_plus1;		/* f_imax + 1 stored in an mpf_t for
239 				   convenience */
240   mpf_t f_imax_minus1;		/* f_imax - 1 stored in an mpf_t for
241 				   convenience */
242 
243 
244   mpf_init (f_temp);
245   mpz_init_set_ui (z_imax, 0x7fffffff);
246   mpf_init (f_imax_plus1);
247   mpf_init (f_imax_minus1);
248 
249   while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
250     switch (c)
251       {
252       case '2':
253 	l1runs = atol (optarg);
254 	l2runs = -1;		/* set later on */
255 	break;
256       case 'd':			/* increase debug level */
257 	stat_debug++;
258 	break;
259       case 'i':
260 	if (1 == realinput)
261 	  {
262 	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
263 	    exit (1);
264 	  }
265 	if (mpz_set_str (z_imax, optarg, 0))
266 	  {
267 	    fprintf (stderr, "stat: bad max value %s\n", optarg);
268 	    exit (1);
269 	  }
270 	realinput = 0;
271 	break;
272       case 'r':
273 	if (0 == realinput)
274 	  {
275 	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
276 	    exit (1);
277 	  }
278 	if (mpz_set_str (z_imax, optarg, 0))
279 	  {
280 	    fprintf (stderr, "stat: bad max value %s\n", optarg);
281 	    exit (1);
282 	  }
283 	realinput = 1;
284 	break;
285       case 'o':
286 	omitoutput = atoi (optarg);
287 	break;
288       case '?':
289       default:
290 	fputs (usage, stderr);
291 	exit (1);
292       }
293   argc -= optind;
294   argv += optind;
295 
296   if (argc < 1)
297     fp = stdin;
298   else
299     filen = argv[0];
300 
301   if (fp != stdin)
302     if (NULL == (fp = fopen (filen, "r")))
303       {
304 	perror (filen);
305 	exit (1);
306       }
307 
308   if (-1 == realinput)
309     realinput = 1;		/* default is real numbers */
310 
311   /* read file and fill appropriate vec */
312   if (1 == realinput)		/* real input */
313     {
314       for (f = 0; f < FVECSIZ ; f++)
315 	{
316 	  mpf_init (fvec[f]);
317 	  if (!mpf_inp_str (fvec[f], fp, 10))
318 	    break;
319 	}
320     }
321   else				/* integer input */
322     {
323       for (f = 0; f < FVECSIZ ; f++)
324 	{
325 	  mpz_init (zvec[f]);
326 	  if (!mpz_inp_str (zvec[f], fp, 10))
327 	    break;
328 	}
329     }
330   vecentries = n = f;		/* number of entries read */
331   fclose (fp);
332 
333   if (FVECSIZ == f)
334     fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
335 	     "of only %ld entries.  sorry.\n", FVECSIZ);
336 
337   printf ("Got %lu numbers.\n", n);
338 
339   /* convert and fill the other vec */
340   /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
341      0<=z<=imax and we are truncating all fractions when
342      converting float to int, we have to add 1 to imax.*/
343   mpf_set_z (f_imax_plus1, z_imax);
344   mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
345   if (1 == realinput)		/* fill zvec[] */
346     {
347       for (f = 0; f < n; f++)
348 	{
349 	  mpf_mul (f_temp, fvec[f], f_imax_plus1);
350 	  mpz_init (zvec[f]);
351 	  mpz_set_f (zvec[f], f_temp); /* truncating fraction */
352 	  if (stat_debug > 1)
353 	    {
354 	      mpz_out_str (stderr, 10, zvec[f]);
355 	      fputs ("\n", stderr);
356 	    }
357 	}
358     }
359   else				/* integer input; fill fvec[] */
360     {
361       /*    mpf_set_z (f_imax_minus1, z_imax);
362 	    mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
363       for (f = 0; f < n; f++)
364 	{
365 	  mpf_init (fvec[f]);
366 	  mpf_set_z (fvec[f], zvec[f]);
367 	  mpf_div (fvec[f], fvec[f], f_imax_plus1);
368 	  if (stat_debug > 1)
369 	    {
370 	      mpf_out_str (stderr, 10, 0, fvec[f]);
371 	      fputs ("\n", stderr);
372 	    }
373 	}
374     }
375 
376   /* 2 levels? */
377   if (1 != l2runs)
378     {
379       l2runs = n / l1runs;
380       printf ("Doing %ld second level rounds "\
381 	      "with %ld entries in each round", l2runs, l1runs);
382       if (n % l1runs)
383 	printf (" (discarding %ld entr%s)", n % l1runs,
384 		n % l1runs == 1 ? "y" : "ies");
385       puts (".");
386       n = l1runs;
387     }
388 
389 #ifndef DONT_FFREQ
390   f_freq (l1runs, l2runs, fvec, n);
391 #endif
392 #ifdef DO_ZFREQ
393   z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
394 #endif
395 
396   mpf_clear (f_temp); mpz_clear (z_imax);
397   mpf_clear (f_imax_plus1);
398   mpf_clear (f_imax_minus1);
399   for (f = 0; f < vecentries; f++)
400     {
401       mpf_clear (fvec[f]);
402       mpz_clear (zvec[f]);
403     }
404 
405   return 0;
406 }
407