1 #include <assert.h>
2 #include <errno.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8 #include <minix/type.h>
9
10 #include "tdist.h"
11
12 /* user-configurable settings */
13 #define DEBUG 0
14
15 #define PROC_NAME_WIDTH 10
16
17 #define SYMBOL_NAME_WIDTH 24
18
19 /* types */
20 #define SYMBOL_HASHTAB_SIZE 1024
21
22 #define SYMBOL_NAME_SIZE 52
23
24 struct symbol_count {
25 unsigned long sum;
26 unsigned long long sum2;
27 unsigned long min;
28 unsigned long max;
29 };
30
31 enum symbol_class {
32 sc_total,
33 sc_idle,
34 sc_system,
35 sc_user,
36 sc_process,
37 sc_symbol
38 };
39
40 struct symbol_info {
41 struct symbol_info *next;
42 struct symbol_info *hashtab_next;
43 char binary[PROC_NAME_LEN];
44 char name[SYMBOL_NAME_SIZE];
45 struct symbol_count count[2];
46 long diff;
47 enum symbol_class class;
48 };
49
50 /* global variables */
51 static unsigned n1, n2;
52 static struct symbol_info *symbols;
53 static struct symbol_info *symbol_hashtab[SYMBOL_HASHTAB_SIZE];
54
55 /* prototypes */
56 static double compute_sig(double avg1, double var1, double avg2, double var2);
57 static void compute_stats(const struct symbol_count *count, unsigned n,
58 double *avg, double *var);
59 static void load_file(const char *path, int count_index);
60 static void *malloc_checked(size_t size);
61 static void print_report(void);
62 static void print_report_line(const struct symbol_info *symbol);
63 static int read_line(FILE *file, const char *path, int line, char *binary,
64 char *name, unsigned long *samples);
65 static enum symbol_class symbol_classify(const char *binary, const char *name);
66 static unsigned string_hash(const char *s, size_t size);
67 static struct symbol_info *symbol_find_or_add(const char *binary,
68 const char *name);
69 static unsigned symbol_hash(const char *binary, const char *name);
70 static int symbol_qsort_compare(const void *p1, const void *p2);
71 static void symbol_tally(const char *binary, const char *name,
72 unsigned long samples, int count_index);
73 static unsigned symbols_count(void);
74 static void usage(const char *argv0);
75
76 #define MALLOC_CHECKED(type, count) \
77 ((type *) malloc_checked(sizeof(type) * (count)))
78
79 #if DEBUG
80 #define dprintf(...) do { \
81 fprintf(stderr, "debug(%s:%d): ", __FUNCTION__, __LINE__); \
82 fprintf(stderr, __VA_ARGS__); \
83 } while(0)
84 #else
85 #define dprintf(...)
86 #endif
87
main(int argc,char ** argv)88 int main(int argc, char **argv) {
89 int i;
90
91 #ifdef DEBUG
92 /* disable buffering so the output mixes correctly */
93 setvbuf(stdout, NULL, _IONBF, 0);
94 setvbuf(stderr, NULL, _IONBF, 0);
95 #endif
96
97 if (argc < 3) usage(argv[0]);
98
99 /* load left-hand files */
100 for (i = 1; i < argc; i++) {
101 if (strcmp(argv[i], "-r") == 0) {
102 i++;
103 break;
104 }
105 if (argc == 3 && i == 2) break;
106 load_file(argv[i], 0);
107 n1++;
108 }
109
110 /* load right-hand files */
111 for (; i < argc; i++) {
112 load_file(argv[i], 1);
113 n2++;
114 }
115
116 if (n1 < 1 || n2 < 1) usage(argv[0]);
117
118 /* report analysis results */
119 print_report();
120 return 0;
121 }
122
compute_sig(double avg1,double var1,double avg2,double var2)123 static double compute_sig(double avg1, double var1, double avg2, double var2) {
124 double df, t, var;
125
126 /* prevent division by zero with lack of variance */
127 var = var1 / n1 + var2 / n2;
128 if (var <= 0 || n1 <= 1 || n2 <= 1) return -1;
129
130 /* do we have enough degrees of freedom? */
131 df = var * var / (
132 var1 * var1 / (n1 * n1 * (n1 - 1)) +
133 var2 * var2 / (n2 * n2 * (n2 - 1)));
134 if (df < 1) return -1;
135
136 /* perform t-test */
137 t = (avg1 - avg2) / sqrt(var);
138 return student_t_p_2tail(t, df);
139 }
140
compute_stats(const struct symbol_count * count,unsigned n,double * avg,double * var)141 static void compute_stats(const struct symbol_count *count, unsigned n,
142 double *avg, double *var) {
143 double sum;
144
145 assert(count);
146 assert(avg);
147 assert(var);
148
149 sum = count->sum;
150 if (n < 1) {
151 *avg = 0;
152 } else {
153 *avg = sum / n;
154 }
155
156 if (n < 2) {
157 *var = 0;
158 } else {
159 *var = (count->sum2 - sum * sum / n) / (n - 1);
160 }
161 }
162
load_file(const char * path,int count_index)163 static void load_file(const char *path, int count_index) {
164 char binary[PROC_NAME_LEN];
165 FILE *file;
166 int line;
167 char name[SYMBOL_NAME_SIZE];
168 unsigned long samples;
169
170 assert(path);
171 assert(count_index == 0 || count_index == 1);
172
173 file = fopen(path, "r");
174 if (!file) {
175 fprintf(stderr, "error: cannot open \"%s\": %s\n",
176 path, strerror(errno));
177 exit(1);
178 }
179
180 line = 1;
181 while (read_line(file, path, line++, binary, name, &samples)) {
182 symbol_tally(binary, name, samples, count_index);
183 }
184
185 fclose(file);
186 }
187
malloc_checked(size_t size)188 static void *malloc_checked(size_t size) {
189 void *p;
190 if (!size) return NULL;
191 p = malloc(size);
192 if (!p) {
193 fprintf(stderr, "error: malloc cannot allocate %lu bytes: %s\n",
194 (unsigned long) size, strerror(errno));
195 exit(-1);
196 }
197 return p;
198 }
199
print_report(void)200 static void print_report(void) {
201 unsigned i, index, symbol_count;
202 struct symbol_info *symbol, **symbol_list;
203
204 /* list the symbols in an array for sorting */
205 symbol_count = symbols_count();
206 symbol_list = MALLOC_CHECKED(struct symbol_info *, symbol_count);
207 index = 0;
208 for (symbol = symbols; symbol; symbol = symbol->next) {
209 symbol_list[index++] = symbol;
210
211 /* sort by difference in average, multiply both sides by
212 * n1 * n2 to avoid division
213 */
214 symbol->diff = (long) (symbol->count[1].sum * n1) -
215 (long) (symbol->count[0].sum * n2);
216 }
217 assert(index == symbol_count);
218
219 /* sort symbols */
220 qsort(symbol_list, symbol_count, sizeof(struct symbol_info *),
221 symbol_qsort_compare);
222
223 printf("%-*s %-*s ------avg------ ----stdev---- diff sig\n",
224 PROC_NAME_WIDTH, "binary", SYMBOL_NAME_WIDTH, "symbol");
225 printf("%-*s left right left right\n",
226 PROC_NAME_WIDTH + SYMBOL_NAME_WIDTH + 1, "");
227 printf("\n");
228 for (i = 0; i < symbol_count; i++) {
229 if (i > 0 && symbol_list[i]->class >= sc_process &&
230 symbol_list[i]->class != symbol_list[i - 1]->class) {
231 printf("\n");
232 }
233 print_report_line(symbol_list[i]);
234 }
235 printf("\n");
236 printf("significance levels (two-tailed):\n");
237 printf(" * p < 0.05\n");
238 printf(" ** p < 0.01\n");
239 printf(" *** p < 0.001\n");
240 free(symbol_list);
241 }
242
print_report_line(const struct symbol_info * symbol)243 static void print_report_line(const struct symbol_info *symbol) {
244 double avg1, avg2, p, var1, var2;
245
246 /* compute statistics; t is Welch's t, which is a t-test that allows
247 * for unpaired samples with unequal variance; df is the degrees of
248 * freedom as given by the Welch-Satterthwaite equation
249 */
250 compute_stats(&symbol->count[0], n1, &avg1, &var1);
251 compute_stats(&symbol->count[1], n2, &avg2, &var2);
252 p = compute_sig(avg1, var1, avg2, var2);
253
254 /* list applicable values */
255 assert(PROC_NAME_WIDTH <= PROC_NAME_LEN);
256 assert(SYMBOL_NAME_WIDTH <= SYMBOL_NAME_SIZE);
257 printf("%-*.*s %-*.*s",
258 PROC_NAME_WIDTH, PROC_NAME_WIDTH, symbol->binary,
259 SYMBOL_NAME_WIDTH, SYMBOL_NAME_WIDTH, symbol->name);
260 if (symbol->count[0].sum > 0) {
261 printf("%8.0f", avg1);
262 } else {
263 printf(" ");
264 }
265 if (symbol->count[1].sum > 0) {
266 printf("%8.0f", avg2);
267 } else {
268 printf(" ");
269 }
270 if (symbol->count[0].sum > 0 && n1 >= 2) {
271 printf("%7.0f", sqrt(var1));
272 } else {
273 printf(" ");
274 }
275 if (symbol->count[1].sum > 0 && n2 >= 2) {
276 printf("%7.0f", sqrt(var2));
277 } else {
278 printf(" ");
279 }
280 printf("%8.0f ", avg2 - avg1);
281 if (p >= 0) {
282 if (p <= 0.05) printf("*");
283 if (p <= 0.01) printf("*");
284 if (p <= 0.001) printf("*");
285 }
286 printf("\n");
287 }
288
read_line(FILE * file,const char * path,int line,char * binary,char * name,unsigned long * samples)289 static int read_line(FILE *file, const char *path, int line, char *binary,
290 char *name, unsigned long *samples) {
291 int c, index;
292
293 assert(file);
294 assert(binary);
295 assert(name);
296 assert(samples);
297
298 c = fgetc(file);
299 if (c == EOF) return 0;
300
301 /* read binary name, truncating if necessary */
302 index = 0;
303 while (c != '\t' && c != '\n') {
304 if (index < PROC_NAME_LEN) binary[index++] = c;
305 c = fgetc(file);
306 }
307 if (index < PROC_NAME_LEN) binary[index] = 0;
308
309 /* read tab */
310 if (c != '\t') {
311 fprintf(stderr, "error: garbage %d after binary name "
312 "(\"%s\", line %d)\n", c, path, line);
313 exit(1);
314 }
315 c = fgetc(file);
316
317 /* read symbol name, truncating if necessary */
318 index = 0;
319 while (c != '\t' && c != '\n') {
320 if (index < SYMBOL_NAME_SIZE) name[index++] = c;
321 c = fgetc(file);
322 }
323 if (index < SYMBOL_NAME_SIZE) name[index] = 0;
324
325 /* read tab */
326 if (c != '\t') {
327 fprintf(stderr, "error: garbage %d after symbol name "
328 "(\"%s\", line %d)\n", c, path, line);
329 exit(1);
330 }
331 c = fgetc(file);
332
333 /* read number of samples */
334 *samples = 0;
335 while (c >= '0' && c <= '9') {
336 *samples = *samples * 10 + (c - '0');
337 c = fgetc(file);
338 }
339
340 /* read newline */
341 if (c != '\n') {
342 fprintf(stderr, "error: garbage %d after sample count "
343 "(\"%s\", line %d)\n", c, path, line);
344 exit(1);
345 }
346 return 1;
347 }
348
string_hash(const char * s,size_t size)349 static unsigned string_hash(const char *s, size_t size) {
350 unsigned result = 0;
351
352 assert(s);
353
354 while (*s && size-- > 0) {
355 result = result * 31 + *(s++);
356 }
357 return result;
358 }
359
symbol_classify(const char * binary,const char * name)360 static enum symbol_class symbol_classify(const char *binary, const char *name) {
361 if (strncmp(binary, "(total)", PROC_NAME_LEN) == 0) return sc_total;
362 if (strncmp(binary, "(idle)", PROC_NAME_LEN) == 0) return sc_idle;
363 if (strncmp(binary, "(system)", PROC_NAME_LEN) == 0) return sc_system;
364 if (strncmp(binary, "(user)", PROC_NAME_LEN) == 0) return sc_user;
365 if (strncmp(name, "(total)", SYMBOL_NAME_SIZE) == 0) return sc_process;
366 return sc_symbol;
367 }
368
symbol_find_or_add(const char * binary,const char * name)369 static struct symbol_info *symbol_find_or_add(const char *binary,
370 const char *name) {
371 struct symbol_info **ptr, *symbol;
372
373 assert(binary);
374 assert(name);
375
376 /* look up symbol in hash table */
377 ptr = &symbol_hashtab[symbol_hash(binary, name) % SYMBOL_HASHTAB_SIZE];
378 while ((symbol = *ptr)) {
379 if (strncmp(symbol->binary, binary, PROC_NAME_LEN) == 0 &&
380 strncmp(symbol->name, name, SYMBOL_NAME_SIZE) == 0) {
381 return symbol;
382 }
383 ptr = &symbol->hashtab_next;
384 }
385
386 /* unknown symbol, add it */
387 *ptr = symbol = MALLOC_CHECKED(struct symbol_info, 1);
388 memset(symbol, 0, sizeof(struct symbol_info));
389 strncpy(symbol->binary, binary, PROC_NAME_LEN);
390 strncpy(symbol->name, name, SYMBOL_NAME_SIZE);
391 symbol->count[0].min = ~0UL;
392 symbol->count[1].min = ~0UL;
393 symbol->class = symbol_classify(binary, name);
394
395 /* also add to linked list */
396 symbol->next = symbols;
397 symbols = symbol;
398 return symbol;
399 }
400
symbol_hash(const char * binary,const char * name)401 static unsigned symbol_hash(const char *binary, const char *name) {
402 return string_hash(binary, PROC_NAME_LEN) +
403 string_hash(name, SYMBOL_NAME_SIZE);
404 }
405
symbol_qsort_compare(const void * p1,const void * p2)406 static int symbol_qsort_compare(const void *p1, const void *p2) {
407 int r;
408 const struct symbol_info *s1, *s2;
409
410 assert(p1);
411 assert(p2);
412 s1 = *(const struct symbol_info **) p1;
413 s2 = *(const struct symbol_info **) p2;
414 assert(s1);
415 assert(s2);
416
417 /* totals come first */
418 if (s1->class < s2->class) return -1;
419 if (s1->class > s2->class) return 1;
420
421 /* sort by difference in average */
422 if (s1->diff < s2->diff) return -1;
423 if (s1->diff > s2->diff) return 1;
424
425 /* otherwise, by name */
426 r = strncmp(s1->binary, s2->binary, PROC_NAME_LEN);
427 if (r) return r;
428
429 return strncmp(s1->name, s2->name, SYMBOL_NAME_SIZE);
430 }
431
symbol_tally(const char * binary,const char * name,unsigned long samples,int count_index)432 static void symbol_tally(const char *binary, const char *name,
433 unsigned long samples, int count_index) {
434 struct symbol_count *count;
435 struct symbol_info *symbol;
436
437 /* look up or add symbol */
438 symbol = symbol_find_or_add(binary, name);
439
440 /* update count */
441 count = &symbol->count[count_index];
442 count->sum += samples;
443 count->sum2 += (unsigned long long) samples * samples;
444 if (count->min > samples) count->min = samples;
445 if (count->max < samples) count->max = samples;
446 }
447
symbols_count(void)448 static unsigned symbols_count(void) {
449 int count = 0;
450 const struct symbol_info *symbol;
451
452 for (symbol = symbols; symbol; symbol = symbol->next) {
453 count++;
454 }
455 return count;
456 }
457
usage(const char * argv0)458 static void usage(const char *argv0) {
459 printf("usage:\n");
460 printf(" %s leftfile rightfile\n", argv0);
461 printf(" %s leftfile... -r rightfile...\n", argv0);
462 printf("\n");
463 printf("sprofdiff compares the sprofile information from multiple\n");
464 printf("output files of sprofalyze -d.\n");
465 exit(1);
466 }
467