1*9022bcc8SMatthew Dillon /*
2*9022bcc8SMatthew Dillon * cc randread.c -o ~/bin/randread -O2 -lm
3*9022bcc8SMatthew Dillon *
4*9022bcc8SMatthew Dillon * randread device [bufsize:512 [range%:90 [nprocs:32]]]
5*9022bcc8SMatthew Dillon *
6*9022bcc8SMatthew Dillon * requires TSC
7*9022bcc8SMatthew Dillon */
875a0ef5dSMatthew Dillon #include <sys/types.h>
9*9022bcc8SMatthew Dillon #include <sys/sysctl.h>
1075a0ef5dSMatthew Dillon #include <sys/stat.h>
1175a0ef5dSMatthew Dillon #include <sys/file.h>
12*9022bcc8SMatthew Dillon #include <sys/mman.h>
1375a0ef5dSMatthew Dillon #include <sys/errno.h>
1475a0ef5dSMatthew Dillon #include <sys/wait.h>
1575a0ef5dSMatthew Dillon #include <string.h>
1675a0ef5dSMatthew Dillon #include <stdio.h>
1775a0ef5dSMatthew Dillon #include <stdlib.h>
1875a0ef5dSMatthew Dillon #include <unistd.h>
19*9022bcc8SMatthew Dillon #include <math.h>
20*9022bcc8SMatthew Dillon #include <assert.h>
2175a0ef5dSMatthew Dillon #include <machine/atomic.h>
22*9022bcc8SMatthew Dillon #include <machine/cpufunc.h>
23*9022bcc8SMatthew Dillon
24*9022bcc8SMatthew Dillon typedef struct pdata {
25*9022bcc8SMatthew Dillon int64_t counter;
26*9022bcc8SMatthew Dillon int64_t lotime;
27*9022bcc8SMatthew Dillon int64_t hitime;
28*9022bcc8SMatthew Dillon int64_t tsc_total1;
29*9022bcc8SMatthew Dillon int64_t tsc_total2;
30*9022bcc8SMatthew Dillon int64_t unused00;
31*9022bcc8SMatthew Dillon int64_t unused01;
32*9022bcc8SMatthew Dillon int unused02;
33*9022bcc8SMatthew Dillon int reset;
34*9022bcc8SMatthew Dillon } pdata_t;
3575a0ef5dSMatthew Dillon
3675a0ef5dSMatthew Dillon int
main(int ac,char ** av)3775a0ef5dSMatthew Dillon main(int ac, char **av)
3875a0ef5dSMatthew Dillon {
3975a0ef5dSMatthew Dillon char *buf;
4075a0ef5dSMatthew Dillon size_t bytes = 512;
4175a0ef5dSMatthew Dillon off_t limit;
4275a0ef5dSMatthew Dillon int fd;
4375a0ef5dSMatthew Dillon int i;
44*9022bcc8SMatthew Dillon int loops;
4575a0ef5dSMatthew Dillon int nprocs = 32;
4675a0ef5dSMatthew Dillon double range = 90.0;
47*9022bcc8SMatthew Dillon volatile pdata_t *pdata;
48*9022bcc8SMatthew Dillon int64_t tsc1;
49*9022bcc8SMatthew Dillon int64_t tsc2;
50*9022bcc8SMatthew Dillon int64_t delta;
51*9022bcc8SMatthew Dillon int64_t tscfreq = 0;
52*9022bcc8SMatthew Dillon int64_t lotime;
53*9022bcc8SMatthew Dillon int64_t hitime;
54*9022bcc8SMatthew Dillon size_t tscfreq_size = sizeof(tscfreq);
55*9022bcc8SMatthew Dillon
56*9022bcc8SMatthew Dillon sysctlbyname("hw.tsc_frequency", &tscfreq, &tscfreq_size, NULL, 0);
57*9022bcc8SMatthew Dillon assert(tscfreq != 0);
5875a0ef5dSMatthew Dillon
5975a0ef5dSMatthew Dillon if (ac < 2 || ac > 5) {
6075a0ef5dSMatthew Dillon fprintf(stderr, "%s <device> [bufsize:512 [range%:90 [nprocs:32]]]\n",
6175a0ef5dSMatthew Dillon av[0]);
6275a0ef5dSMatthew Dillon exit (1);
6375a0ef5dSMatthew Dillon }
6475a0ef5dSMatthew Dillon
6575a0ef5dSMatthew Dillon if (ac >= 3) {
6675a0ef5dSMatthew Dillon bytes = (size_t)strtoul(av[2], NULL, 0);
6775a0ef5dSMatthew Dillon if (bytes < 512 || (bytes ^ (bytes - 1)) != ((bytes << 1) - 1)) {
6875a0ef5dSMatthew Dillon fprintf(stderr, "bytes must be a power of 2 >= 512\n");
6975a0ef5dSMatthew Dillon exit (1);
7075a0ef5dSMatthew Dillon }
7175a0ef5dSMatthew Dillon }
7275a0ef5dSMatthew Dillon buf = malloc(bytes);
7375a0ef5dSMatthew Dillon
7475a0ef5dSMatthew Dillon if (ac >= 4) {
7575a0ef5dSMatthew Dillon range = strtod(av[3], NULL);
7675a0ef5dSMatthew Dillon }
7775a0ef5dSMatthew Dillon
7875a0ef5dSMatthew Dillon if (ac >= 5) {
7975a0ef5dSMatthew Dillon nprocs = strtol(av[4], NULL, 0);
8075a0ef5dSMatthew Dillon if (nprocs < 0 || nprocs > 512) {
8175a0ef5dSMatthew Dillon fprintf(stderr, "absurd nprocs (%d)\n", nprocs);
8275a0ef5dSMatthew Dillon exit(1);
8375a0ef5dSMatthew Dillon }
8475a0ef5dSMatthew Dillon }
8575a0ef5dSMatthew Dillon
8675a0ef5dSMatthew Dillon fd = open(av[1], O_RDONLY);
8775a0ef5dSMatthew Dillon if (fd < 0) {
8875a0ef5dSMatthew Dillon fprintf(stderr, "open %s: %s\n", av[1], strerror(errno));
8975a0ef5dSMatthew Dillon exit (1);
9075a0ef5dSMatthew Dillon }
9175a0ef5dSMatthew Dillon
9275a0ef5dSMatthew Dillon lseek(fd, 0L, 2);
9375a0ef5dSMatthew Dillon limit = lseek(fd, 0L, 1);
9475a0ef5dSMatthew Dillon limit = (off_t)((double)limit * range / 100.0);
9575a0ef5dSMatthew Dillon limit &= ~(off_t)(bytes - 1);
9675a0ef5dSMatthew Dillon printf("device %s bufsize %zd limit %4.3fGB nprocs %d\n",
9775a0ef5dSMatthew Dillon av[1], bytes, (double)limit / (1024.0*1024.0*1024.0), nprocs);
9875a0ef5dSMatthew Dillon
99*9022bcc8SMatthew Dillon pdata = mmap(NULL, nprocs * sizeof(*pdata), PROT_READ|PROT_WRITE,
10075a0ef5dSMatthew Dillon MAP_SHARED|MAP_ANON, -1, 0);
10175a0ef5dSMatthew Dillon
10275a0ef5dSMatthew Dillon for (i = 0; i < nprocs; ++i) {
10375a0ef5dSMatthew Dillon if (fork() == 0) {
10479580b6bSMatthew Dillon close(fd);
10579580b6bSMatthew Dillon fd = open(av[1], O_RDONLY);
10675a0ef5dSMatthew Dillon srandomdev();
107*9022bcc8SMatthew Dillon pdata += i;
108*9022bcc8SMatthew Dillon
109*9022bcc8SMatthew Dillon tsc2 = rdtsc();
110*9022bcc8SMatthew Dillon pdata->lotime = 0x7FFFFFFFFFFFFFFFLL;
111*9022bcc8SMatthew Dillon
11275a0ef5dSMatthew Dillon for (;;) {
113f2428747SMatthew Dillon long pos;
114f2428747SMatthew Dillon
115*9022bcc8SMatthew Dillon if (pdata->reset) {
116*9022bcc8SMatthew Dillon pdata->counter = 0;
117*9022bcc8SMatthew Dillon pdata->tsc_total1 = 0;
118*9022bcc8SMatthew Dillon pdata->tsc_total2 = 0;
119*9022bcc8SMatthew Dillon pdata->lotime = 0x7FFFFFFFFFFFFFFFLL;
120*9022bcc8SMatthew Dillon pdata->hitime = 0;
121*9022bcc8SMatthew Dillon pdata->reset = 0;
122*9022bcc8SMatthew Dillon }
123*9022bcc8SMatthew Dillon
124f2428747SMatthew Dillon pos = random() ^ ((long)random() << 31);
125f2428747SMatthew Dillon pos &= 0x7FFFFFFFFFFFFFFFLLU;
126f2428747SMatthew Dillon pos = (pos % limit) & ~(off_t)(bytes - 1);
12775a0ef5dSMatthew Dillon lseek(fd, pos, 0);
12875a0ef5dSMatthew Dillon read(fd, buf, bytes);
129*9022bcc8SMatthew Dillon tsc1 = tsc2;
130*9022bcc8SMatthew Dillon tsc2 = rdtsc();
131*9022bcc8SMatthew Dillon delta = tsc2 - tsc1;
132*9022bcc8SMatthew Dillon ++pdata->counter;
133*9022bcc8SMatthew Dillon pdata->tsc_total1 += delta;
134*9022bcc8SMatthew Dillon pdata->tsc_total2 += delta * delta;
135*9022bcc8SMatthew Dillon if (pdata->lotime > delta)
136*9022bcc8SMatthew Dillon pdata->lotime = delta;
137*9022bcc8SMatthew Dillon if (pdata->hitime < delta)
138*9022bcc8SMatthew Dillon pdata->hitime = delta;
13975a0ef5dSMatthew Dillon }
14075a0ef5dSMatthew Dillon }
14175a0ef5dSMatthew Dillon }
142*9022bcc8SMatthew Dillon
143*9022bcc8SMatthew Dillon tsc2 = rdtsc();
144*9022bcc8SMatthew Dillon loops = 0;
14575a0ef5dSMatthew Dillon
14675a0ef5dSMatthew Dillon for (;;) {
147*9022bcc8SMatthew Dillon int64_t count;
148*9022bcc8SMatthew Dillon int64_t total1;
149*9022bcc8SMatthew Dillon int64_t total2;
150*9022bcc8SMatthew Dillon double v;
151*9022bcc8SMatthew Dillon double lo;
152*9022bcc8SMatthew Dillon double hi;
153*9022bcc8SMatthew Dillon double s1;
154*9022bcc8SMatthew Dillon double s2;
155*9022bcc8SMatthew Dillon double stddev;
156*9022bcc8SMatthew Dillon
15775a0ef5dSMatthew Dillon sleep(1);
158*9022bcc8SMatthew Dillon lotime = pdata[0].lotime;
159*9022bcc8SMatthew Dillon hitime = pdata[0].hitime;
160*9022bcc8SMatthew Dillon total1 = 0;
161*9022bcc8SMatthew Dillon total2 = 0;
162*9022bcc8SMatthew Dillon count = 0;
163*9022bcc8SMatthew Dillon
164*9022bcc8SMatthew Dillon for (i = 0; i < nprocs; ++i) {
165*9022bcc8SMatthew Dillon count += pdata[i].counter;
166*9022bcc8SMatthew Dillon total1 += pdata[i].tsc_total1;
167*9022bcc8SMatthew Dillon total2 += pdata[i].tsc_total2;
168*9022bcc8SMatthew Dillon if (lotime > pdata[i].lotime)
169*9022bcc8SMatthew Dillon lotime = pdata[i].lotime;
170*9022bcc8SMatthew Dillon if (hitime < pdata[i].hitime)
171*9022bcc8SMatthew Dillon hitime = pdata[i].hitime;
172*9022bcc8SMatthew Dillon pdata[i].reset = 1;
173*9022bcc8SMatthew Dillon }
174*9022bcc8SMatthew Dillon tsc1 = tsc2;
175*9022bcc8SMatthew Dillon tsc2 = rdtsc();
176*9022bcc8SMatthew Dillon delta = tsc2 - tsc1;
177*9022bcc8SMatthew Dillon v = count * ((double)delta / (double)tscfreq);
178*9022bcc8SMatthew Dillon lo = (double)lotime / (double)tscfreq;
179*9022bcc8SMatthew Dillon hi = (double)hitime / (double)tscfreq;
180*9022bcc8SMatthew Dillon
181*9022bcc8SMatthew Dillon s1 = ((double)total2 - (double)total1 * (double)total1 / (double)count) / ((double)count - 1);
182*9022bcc8SMatthew Dillon if (s1 < 0.0)
183*9022bcc8SMatthew Dillon stddev = -sqrt(-s1);
184*9022bcc8SMatthew Dillon else
185*9022bcc8SMatthew Dillon stddev = sqrt(s1);
186*9022bcc8SMatthew Dillon stddev = stddev / (double)tscfreq; /* normalize to 1 second units */
187*9022bcc8SMatthew Dillon
188*9022bcc8SMatthew Dillon if (loops) {
189*9022bcc8SMatthew Dillon printf("%6.0f/s avg=%6.2fuS bw=%-6.2fMB/s "
190*9022bcc8SMatthew Dillon "lo=%-3.2fuS, hi=%-3.2fuS stddev=%3.2fuS\n",
191*9022bcc8SMatthew Dillon v,
192*9022bcc8SMatthew Dillon 1e6 * nprocs / v,
193*9022bcc8SMatthew Dillon (double)count * bytes / 1e6 / ((double)delta / (double)tscfreq),
194*9022bcc8SMatthew Dillon lo * 1e6,
195*9022bcc8SMatthew Dillon hi * 1e6,
196*9022bcc8SMatthew Dillon stddev * 1e6);
197*9022bcc8SMatthew Dillon }
198*9022bcc8SMatthew Dillon ++loops;
19975a0ef5dSMatthew Dillon }
20075a0ef5dSMatthew Dillon return 0;
20175a0ef5dSMatthew Dillon }
202