1 /* stat.c -- statistical tests of random number sequences. */
2
3 /*
4 Copyright 1999, 2000 Free Software Foundation, Inc.
5
6 This file is part of the GNU MP Library test suite.
7
8 The GNU MP Library test suite is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 3 of the License,
11 or (at your option) any later version.
12
13 The GNU MP Library test suite is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
16 Public License for more details.
17
18 You should have received a copy of the GNU General Public License along with
19 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
20
21 /* Examples:
22
23 $ gen 1000 | stat
24 Test 1000 real numbers.
25
26 $ gen 30000 | stat -2 1000
27 Test 1000 real numbers 30 times and then test the 30 results in a
28 ``second level''.
29
30 $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
31 Test 1000 integers 0 <= X <= 2^32-1.
32
33 $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
34 Test 1000 integers 0 <= X <= 2^34-1.
35
36 */
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <math.h>
42 #include "gmpstat.h"
43
44 #if !HAVE_DECL_OPTARG
45 extern char *optarg;
46 extern int optind, opterr;
47 #endif
48
49 #define FVECSIZ (100000L)
50
51 int g_debug = 0;
52
53 static void
print_ks_results(mpf_t f_p,mpf_t f_p_prob,mpf_t f_m,mpf_t f_m_prob,FILE * fp)54 print_ks_results (mpf_t f_p, mpf_t f_p_prob,
55 mpf_t f_m, mpf_t f_m_prob,
56 FILE *fp)
57 {
58 double p, pp, m, mp;
59
60 p = mpf_get_d (f_p);
61 m = mpf_get_d (f_m);
62 pp = mpf_get_d (f_p_prob);
63 mp = mpf_get_d (f_m_prob);
64
65 fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
66 fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
67 }
68
69 static void
print_x2_table(unsigned int v,FILE * fp)70 print_x2_table (unsigned int v, FILE *fp)
71 {
72 double t[7];
73 int f;
74
75
76 fprintf (fp, "Chi-square table for v=%u\n", v);
77 fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
78 x2_table (t, v);
79 for (f = 0; f < 7; f++)
80 fprintf (fp, "%.2f\t", t[f]);
81 fputs ("\n", fp);
82 }
83
84
85
86 /* Pks () -- Distribution function for KS results with a big n (like 1000
87 or so): F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
88 /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2) */
89
90 static void
Pks(mpf_t p,mpf_t x)91 Pks (mpf_t p, mpf_t x)
92 {
93 double dt; /* temp double */
94
95 mpf_set (p, x);
96 mpf_mul (p, p, p); /* p = x^2 */
97 mpf_mul_ui (p, p, 2); /* p = 2*x^2 */
98 mpf_neg (p, p); /* p = -2*x^2 */
99 /* No pow() in gmp. Use doubles. */
100 /* FIXME: Use exp()? */
101 dt = pow (M_E, mpf_get_d (p));
102 mpf_set_d (p, dt);
103 mpf_ui_sub (p, 1, p);
104 }
105
106 /* f_freq() -- frequency test on real numbers 0<=f<1*/
107 static void
f_freq(const unsigned l1runs,const unsigned l2runs,mpf_t fvec[],const unsigned long n)108 f_freq (const unsigned l1runs, const unsigned l2runs,
109 mpf_t fvec[], const unsigned long n)
110 {
111 unsigned f;
112 mpf_t f_p, f_p_prob;
113 mpf_t f_m, f_m_prob;
114 mpf_t *l1res; /* level 1 result array */
115
116 mpf_init (f_p); mpf_init (f_m);
117 mpf_init (f_p_prob); mpf_init (f_m_prob);
118
119
120 /* Allocate space for 1st level results. */
121 l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
122 if (NULL == l1res)
123 {
124 fprintf (stderr, "stat: malloc failure\n");
125 exit (1);
126 }
127
128 printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
129 printf ("\tKp\t\tKm\n");
130
131 for (f = 0; f < l2runs; f++)
132 {
133 /* f_printvec (fvec, n); */
134 mpf_freqt (f_p, f_m, fvec + f * n, n);
135
136 /* what's the probability of getting these results? */
137 ks_table (f_p_prob, f_p, n);
138 ks_table (f_m_prob, f_m, n);
139
140 if (l1runs == 0)
141 {
142 /*printf ("%u:\t", f + 1);*/
143 print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
144 }
145 else
146 {
147 /* save result */
148 mpf_init_set (l1res[f], f_p);
149 mpf_init_set (l1res[f + l2runs], f_m);
150 }
151 }
152
153 /* Now, apply the KS test on the results from the 1st level rounds
154 with the distribution
155 F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */
156
157 if (l1runs != 0)
158 {
159 /*printf ("-------------------------------------\n");*/
160
161 /* The Kp's. */
162 ks (f_p, f_m, l1res, Pks, l2runs);
163 ks_table (f_p_prob, f_p, l2runs);
164 ks_table (f_m_prob, f_m, l2runs);
165 printf ("Kp:\t");
166 print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
167
168 /* The Km's. */
169 ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
170 ks_table (f_p_prob, f_p, l2runs);
171 ks_table (f_m_prob, f_m, l2runs);
172 printf ("Km:\t");
173 print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
174 }
175
176 mpf_clear (f_p); mpf_clear (f_m);
177 mpf_clear (f_p_prob); mpf_clear (f_m_prob);
178 free (l1res);
179 }
180
181 /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
182 0<=z<=MAX */
183 static void
z_freq(const unsigned l1runs,const unsigned l2runs,mpz_t zvec[],const unsigned long n,unsigned int max)184 z_freq (const unsigned l1runs,
185 const unsigned l2runs,
186 mpz_t zvec[],
187 const unsigned long n,
188 unsigned int max)
189 {
190 mpf_t V; /* result */
191 double d_V; /* result as a double */
192
193 mpf_init (V);
194
195
196 printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
197 print_x2_table (max, stdout);
198
199 mpz_freqt (V, zvec, max, n);
200
201 d_V = mpf_get_d (V);
202 printf ("V = %.2f (n = %lu)\n", d_V, n);
203
204 mpf_clear (V);
205 }
206
207 unsigned int stat_debug = 0;
208
209 int
main(argc,argv)210 main (argc, argv)
211 int argc;
212 char *argv[];
213 {
214 const char usage[] =
215 "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
216 " file filename\n" \
217 " -2 runs perform 2-level test with RUNS runs on 1st level\n" \
218 " -d increase debugging level\n" \
219 " -i max input is integers 0 <= Z <= MAX\n" \
220 " -r max input is real numbers 0 <= R < 1 and use MAX as\n" \
221 " maximum value when converting real numbers to integers\n" \
222 "";
223
224 mpf_t fvec[FVECSIZ];
225 mpz_t zvec[FVECSIZ];
226 unsigned long int f, n, vecentries;
227 char *filen;
228 FILE *fp;
229 int c;
230 int omitoutput = 0;
231 int realinput = -1; /* 1: input is real numbers 0<=R<1;
232 0: input is integers 0 <= Z <= MAX. */
233 long l1runs = 0, /* 1st level runs */
234 l2runs = 1; /* 2nd level runs */
235 mpf_t f_temp;
236 mpz_t z_imax; /* max value when converting between
237 real number and integer. */
238 mpf_t f_imax_plus1; /* f_imax + 1 stored in an mpf_t for
239 convenience */
240 mpf_t f_imax_minus1; /* f_imax - 1 stored in an mpf_t for
241 convenience */
242
243
244 mpf_init (f_temp);
245 mpz_init_set_ui (z_imax, 0x7fffffff);
246 mpf_init (f_imax_plus1);
247 mpf_init (f_imax_minus1);
248
249 while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
250 switch (c)
251 {
252 case '2':
253 l1runs = atol (optarg);
254 l2runs = -1; /* set later on */
255 break;
256 case 'd': /* increase debug level */
257 stat_debug++;
258 break;
259 case 'i':
260 if (1 == realinput)
261 {
262 fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
263 exit (1);
264 }
265 if (mpz_set_str (z_imax, optarg, 0))
266 {
267 fprintf (stderr, "stat: bad max value %s\n", optarg);
268 exit (1);
269 }
270 realinput = 0;
271 break;
272 case 'r':
273 if (0 == realinput)
274 {
275 fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
276 exit (1);
277 }
278 if (mpz_set_str (z_imax, optarg, 0))
279 {
280 fprintf (stderr, "stat: bad max value %s\n", optarg);
281 exit (1);
282 }
283 realinput = 1;
284 break;
285 case 'o':
286 omitoutput = atoi (optarg);
287 break;
288 case '?':
289 default:
290 fputs (usage, stderr);
291 exit (1);
292 }
293 argc -= optind;
294 argv += optind;
295
296 if (argc < 1)
297 fp = stdin;
298 else
299 filen = argv[0];
300
301 if (fp != stdin)
302 if (NULL == (fp = fopen (filen, "r")))
303 {
304 perror (filen);
305 exit (1);
306 }
307
308 if (-1 == realinput)
309 realinput = 1; /* default is real numbers */
310
311 /* read file and fill appropriate vec */
312 if (1 == realinput) /* real input */
313 {
314 for (f = 0; f < FVECSIZ ; f++)
315 {
316 mpf_init (fvec[f]);
317 if (!mpf_inp_str (fvec[f], fp, 10))
318 break;
319 }
320 }
321 else /* integer input */
322 {
323 for (f = 0; f < FVECSIZ ; f++)
324 {
325 mpz_init (zvec[f]);
326 if (!mpz_inp_str (zvec[f], fp, 10))
327 break;
328 }
329 }
330 vecentries = n = f; /* number of entries read */
331 fclose (fp);
332
333 if (FVECSIZ == f)
334 fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
335 "of only %ld entries. sorry.\n", FVECSIZ);
336
337 printf ("Got %lu numbers.\n", n);
338
339 /* convert and fill the other vec */
340 /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
341 0<=z<=imax and we are truncating all fractions when
342 converting float to int, we have to add 1 to imax.*/
343 mpf_set_z (f_imax_plus1, z_imax);
344 mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
345 if (1 == realinput) /* fill zvec[] */
346 {
347 for (f = 0; f < n; f++)
348 {
349 mpf_mul (f_temp, fvec[f], f_imax_plus1);
350 mpz_init (zvec[f]);
351 mpz_set_f (zvec[f], f_temp); /* truncating fraction */
352 if (stat_debug > 1)
353 {
354 mpz_out_str (stderr, 10, zvec[f]);
355 fputs ("\n", stderr);
356 }
357 }
358 }
359 else /* integer input; fill fvec[] */
360 {
361 /* mpf_set_z (f_imax_minus1, z_imax);
362 mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
363 for (f = 0; f < n; f++)
364 {
365 mpf_init (fvec[f]);
366 mpf_set_z (fvec[f], zvec[f]);
367 mpf_div (fvec[f], fvec[f], f_imax_plus1);
368 if (stat_debug > 1)
369 {
370 mpf_out_str (stderr, 10, 0, fvec[f]);
371 fputs ("\n", stderr);
372 }
373 }
374 }
375
376 /* 2 levels? */
377 if (1 != l2runs)
378 {
379 l2runs = n / l1runs;
380 printf ("Doing %ld second level rounds "\
381 "with %ld entries in each round", l2runs, l1runs);
382 if (n % l1runs)
383 printf (" (discarding %ld entr%s)", n % l1runs,
384 n % l1runs == 1 ? "y" : "ies");
385 puts (".");
386 n = l1runs;
387 }
388
389 #ifndef DONT_FFREQ
390 f_freq (l1runs, l2runs, fvec, n);
391 #endif
392 #ifdef DO_ZFREQ
393 z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
394 #endif
395
396 mpf_clear (f_temp); mpz_clear (z_imax);
397 mpf_clear (f_imax_plus1);
398 mpf_clear (f_imax_minus1);
399 for (f = 0; f < vecentries; f++)
400 {
401 mpf_clear (fvec[f]);
402 mpz_clear (zvec[f]);
403 }
404
405 return 0;
406 }
407