1 /* $OpenBSD: pctr.c,v 1.6 2001/07/18 17:17:39 pvalchev Exp $ */ 2 3 /* 4 * Pentium performance counter control program for OpenBSD. 5 * Copyright 1996 David Mazieres <dm@lcs.mit.edu>. 6 * 7 * Modification and redistribution in source and binary forms is 8 * permitted provided that due credit is given to the author and the 9 * OpenBSD project by leaving this copyright notice intact. 10 */ 11 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <unistd.h> 16 #include <sys/param.h> 17 #include <sys/types.h> 18 #include <sys/stat.h> 19 #include <sys/sysctl.h> 20 #include <sys/ioctl.h> 21 #include <err.h> 22 #include <fcntl.h> 23 #include <machine/cpu.h> 24 #include <machine/pctr.h> 25 #include <machine/specialreg.h> 26 27 #define CFL_MESI 0x1 /* Unit mask accepts MESI encoding */ 28 #define CFL_SA 0x2 /* Unit mask accepts Self/Any bit */ 29 #define CFL_C0 0x4 /* Counter 0 only */ 30 #define CFL_C1 0x8 /* Counter 1 only */ 31 32 /* Kernel cpuid values. */ 33 int cpu_id, cpu_feature; 34 char cpu_vendor[16]; 35 36 int pctr_isintel; 37 38 #define usetsc (cpu_feature & CPUID_TSC) 39 #define usep5ctr (pctr_isintel && (((cpu_id >> 8) & 15) == 5) && \ 40 (((cpu_id >> 4) & 15) > 0)) 41 #define usep6ctr (pctr_isintel && ((cpu_id >> 8) & 15) == 6) 42 #define cpufamily ((cpu_id >> 8) & 15) 43 44 extern char *__progname; 45 46 struct ctrfn { 47 u_int fn; 48 int flags; 49 char *name; 50 char *desc; 51 }; 52 53 struct ctrfn p5fn[] = { 54 {0x00, 0, "Data read", NULL}, 55 {0x01, 0, "Data write", NULL}, 56 {0x02, 0, "Data TLB miss", NULL}, 57 {0x03, 0, "Data read miss", NULL}, 58 {0x04, 0, "Data write miss", NULL}, 59 {0x05, 0, "Write (hit) to M or E state lines", NULL}, 60 {0x06, 0, "Data cache lines written back", NULL}, 61 {0x07, 0, "Data cache snoops", NULL}, 62 {0x08, 0, "Data cache snoop hits", NULL}, 63 {0x09, 0, "Memory accesses in both pipes", NULL}, 64 {0x0a, 0, "Bank conflicts", NULL}, 65 {0x0b, 0, "Misaligned data memory references", NULL}, 66 {0x0c, 0, "Code read", NULL}, 67 {0x0d, 0, "Code TLB miss", NULL}, 68 {0x0e, 0, "Code cache miss", NULL}, 69 {0x0f, 0, "Any segment register load", NULL}, 70 {0x12, 0, "Branches", NULL}, 71 {0x13, 0, "BTB hits", NULL}, 72 {0x14, 0, "Taken branch or BTB hit", NULL}, 73 {0x15, 0, "Pipeline flushes", NULL}, 74 {0x16, 0, "Instructions executed", NULL}, 75 {0x17, 0, "Instructions executed in the V-pipe", NULL}, 76 {0x18, 0, "Bus utilization (clocks)", NULL}, 77 {0x19, 0, "Pipeline stalled by write backup", NULL}, 78 {0x1a, 0, "Pipeline stalled by data memory read", NULL}, 79 {0x1b, 0, "Pipeline stalled by write to E or M line", NULL}, 80 {0x1c, 0, "Locked bus cycle", NULL}, 81 {0x1d, 0, "I/O read or write cycle", NULL}, 82 {0x1e, 0, "Noncacheable memory references", NULL}, 83 {0x1f, 0, "AGI (Address Generation Interlock)", NULL}, 84 {0x22, 0, "Floating-point operations", NULL}, 85 {0x23, 0, "Breakpoint 0 match", NULL}, 86 {0x24, 0, "Breakpoint 1 match", NULL}, 87 {0x25, 0, "Breakpoint 2 match", NULL}, 88 {0x26, 0, "Breakpoint 3 match", NULL}, 89 {0x27, 0, "Hardware interupts", NULL}, 90 {0x28, 0, "Data read or data write", NULL}, 91 {0x29, 0, "Data read miss or data write miss", NULL}, 92 {0x0, 0, NULL, NULL}, 93 }; 94 95 struct ctrfn p6fn[] = { 96 {0x03, 0, "LD_BLOCKS", 97 "Number of store buffer blocks."}, 98 {0x04, 0, "SB_DRAINS", 99 "Number of store buffer drain cycles."}, 100 {0x05, 0, "MISALIGN_MEM_REF", 101 "Number of misaligned data memory references."}, 102 {0x06, 0, "SEGMENT_REG_LOADS", 103 "Number of segment register loads."}, 104 {0x10, CFL_C0, "FP_COMP_OPS_EXE", 105 "Number of computational floating-point operations executed."}, 106 {0x11, CFL_C1, "FP_ASSIST", 107 "Number of floating-point exception cases handled by microcode."}, 108 {0x12, CFL_C1, "MUL", 109 "Number of multiplies."}, 110 {0x13, CFL_C1, "DIV", 111 "Number of divides."}, 112 {0x14, CFL_C0, "CYCLES_DIV_BUSY", 113 "Number of cycles during which the divider is busy."}, 114 {0x21, 0, "L2_ADS", 115 "Number of L2 address strobes."}, 116 {0x22, 0, "L2_DBUS_BUSY", 117 "Number of cycles durring which the data bus was busy."}, 118 {0x23, 0, "L2_DBUS_BUSY_RD", 119 "Number of cycles during which the data bus was busy transferring " 120 "data from L2 to the processor."}, 121 {0x24, 0, "L2_LINES_IN", 122 "Number of lines allocated in the L2."}, 123 {0x25, 0, "L2_M_LINES_INM", 124 "Number of modified lines allocated in the L2."}, 125 {0x26, 0, "L2_LINES_OUT", 126 "Number of lines removed from the L2 for any reason."}, 127 {0x27, 0, "L2_M_LINES_OUTM", 128 "Number of modified lines removed from the L2 for any reason."}, 129 {0x28, CFL_MESI, "L2_IFETCH", 130 "Number of L2 instruction fetches."}, 131 {0x29, CFL_MESI, "L2_LD", 132 "Number of L2 data loads."}, 133 {0x2a, CFL_MESI, "L2_ST", 134 "Number of L2 data stores."}, 135 {0x2e, CFL_MESI, "L2_RQSTS", 136 "Number of L2 requests."}, 137 {0x43, 0, "DATA_MEM_REFS", 138 "All memory references, both cacheable and non-cacheable."}, 139 {0x45, 0, "DCU_LINES_IN", 140 "Total lines allocated in the DCU."}, 141 {0x46, 0, "DCU_M_LINES_IN", 142 "Number of M state lines allocated in the DCU."}, 143 {0x47, 0, "DCU_M_LINES_OUT", 144 "Number of M state lines evicted from the DCU. " 145 "This includes evictions via snoop HITM, intervention or replacement"}, 146 {0x48, 0, "DCU_MISS_OUTSTANDING", 147 "Weighted number of cycles while a DCU miss is outstanding."}, 148 {0x60, 0, "BUS_REQ_OUTSTANDING", 149 "Number of bus requests outstanding."}, 150 {0x61, 0, "BUS_BNR_DRV", 151 "Number of bus clock cycles during which the processor is " 152 "driving the BNR pin."}, 153 {0x62, CFL_SA, "BUS_DRDY_CLOCKS", 154 "Number of clocks during which DRDY is asserted."}, 155 {0x63, CFL_SA, "BUS_LOCK_CLOCKS", 156 "Number of clocks during which LOCK is asserted."}, 157 {0x64, 0, "BUS_DATA_RCV", 158 "Number of bus clock cycles during which the processor is " 159 "receiving data."}, 160 {0x65, CFL_SA, "BUS_TRAN_BRD", 161 "Number of burst read transactions."}, 162 {0x66, CFL_SA, "BUS_TRAN_RFO", 163 "Number of read for ownership transactions."}, 164 {0x67, CFL_SA, "BUS_TRANS_WB", 165 "Number of write back transactions."}, 166 {0x68, CFL_SA, "BUS_TRAN_IFETCH", 167 "Number of instruction fetch transactions."}, 168 {0x69, CFL_SA, "BUS_TRAN_INVAL", 169 "Number of invalidate transactions."}, 170 {0x6a, CFL_SA, "BUS_TRAN_PWR", 171 "Number of partial write transactions."}, 172 {0x6b, CFL_SA, "BUS_TRANS_P", 173 "Number of partial transactions."}, 174 {0x6c, CFL_SA, "BUS_TRANS_IO", 175 "Number of I/O transactions."}, 176 {0x6d, CFL_SA, "BUS_TRAN_DEF", 177 "Number of deferred transactions."}, 178 {0x6e, CFL_SA, "BUS_TRAN_BURST", 179 "Number of burst transactions."}, 180 {0x6f, CFL_SA, "BUS_TRAN_MEM", 181 "Number of memory transactions."}, 182 {0x70, CFL_SA, "BUS_TRAN_ANY", 183 "Number of all transactions."}, 184 {0x79, 0, "CPU_CLK_UNHALTED", 185 "Number of cycles during which the processor is not halted."}, 186 {0x7a, 0, "BUS_HIT_DRV", 187 "Number of bus clock cycles during which the processor is " 188 "driving the HIT pin."}, 189 {0x7b, 0, "BUS_HITM_DRV", 190 "Number of bus clock cycles during which the processor is " 191 "driving the HITM pin."}, 192 {0x7e, 0, "BUS_SNOOP_STALL", 193 "Number of clock cycles during which the bus is snoop stalled."}, 194 {0x80, 0, "IFU_IFETCH", 195 "Number of instruction fetches, both cacheable and non-cacheable."}, 196 {0x81, 0, "IFU_IFETCH_MISS", 197 "Number of instruction fetch misses."}, 198 {0x85, 0, "ITLB_MISS", 199 "Number of ITLB misses."}, 200 {0x86, 0, "IFU_MEM_STALL", 201 "Number of cycles that the instruction fetch pipe stage is stalled, " 202 "including cache mises, ITLB misses, ITLB faults, " 203 "and victim cache evictions"}, 204 {0x87, 0, "ILD_STALL", 205 "Number of cycles that the instruction length decoder is stalled"}, 206 {0xa2, 0, "RESOURCE_STALLS", 207 "Number of cycles during which there are resource-related stalls."}, 208 {0xc0, 0, "INST_RETIRED", 209 "Number of instructions retired."}, 210 {0xc1, CFL_C0, "FLOPS", 211 "Number of computational floating-point operations retired."}, 212 {0xc2, 0, "UOPS_RETIRED", 213 "Number of UOPs retired."}, 214 {0xc4, 0, "BR_INST_RETIRED", 215 "Number of branch instructions retired."}, 216 {0xc5, 0, "BR_MISS_PRED_RETIRED", 217 "Number of mispredicted branches retired."}, 218 {0xc6, 0, "CYCLES_INT_MASKED", 219 "Number of processor cycles for which interrupts are disabled."}, 220 {0xc7, 0, "CYCLES_INT_PENDING_AND_MASKED", 221 "Number of processor cycles for which interrupts are disabled " 222 "and interrupts are pending."}, 223 {0xc8, 0, "HW_INT_RX", 224 "Number of hardware interrupts received."}, 225 {0xc9, 0, "BR_TAKEN_RETIRED", 226 "Number of taken branches retired."}, 227 {0xca, 0, "BR_MISS_PRED_TAKEN_RET", 228 "Number of taken mispredictioned branches retired."}, 229 {0xd0, 0, "INST_DECODER", 230 "Number of instructions decoded."}, 231 {0xd2, 0, "PARTIAL_RAT_STALLS", 232 "Number of cycles or events for partial stalls."}, 233 {0xe0, 0, "BR_INST_DECODED", 234 "Number of branch instructions decoded."}, 235 {0xe2, 0, "BTB_MISSES", 236 "Number of branches that miss the BTB."}, 237 {0xe4, 0, "BR_BOGUS", 238 "Number of bogus branches."}, 239 {0xe6, 0, "BACLEARS", 240 "Number of times BACLEAR is asserted."}, 241 {0x0, 0, NULL, NULL}, 242 }; 243 244 static void 245 printdesc (char *desc) 246 { 247 char *p; 248 249 for (;;) { 250 while (*desc == ' ') 251 desc++; 252 if (strlen (desc) < 70) { 253 if (*desc) 254 printf (" %s\n", desc); 255 return; 256 } 257 p = desc + 72; 258 while (*--p != ' ') 259 ; 260 while (*--p == ' ') 261 ; 262 p++; 263 printf (" %.*s\n", p - desc, desc); 264 desc = p; 265 } 266 267 } 268 269 /* Print all possible counter functions */ 270 static void 271 list (int fam) 272 { 273 struct ctrfn *cfnp; 274 275 if (fam == 5) 276 cfnp = p5fn; 277 else if (fam == 6) 278 cfnp = p6fn; 279 else { 280 fprintf (stderr, "Unknown CPU family %d\n", fam); 281 exit (1); 282 } 283 printf ("Hardware counter functions for the %s:\n\n", 284 fam == 5 ? "Pentium" : "Pentium Pro"); 285 for (; cfnp->name; cfnp++) { 286 printf ("%02x %s", cfnp->fn, cfnp->name); 287 if (cfnp->flags & CFL_MESI) 288 printf ("/mesi"); 289 else if (cfnp->flags & CFL_SA) 290 printf ("/a"); 291 if (cfnp->flags & CFL_C0) 292 printf (" (ctr0 only)"); 293 if (cfnp->flags & CFL_C1) 294 printf (" (ctr1 only)"); 295 printf ("\n"); 296 if (cfnp->desc) 297 printdesc (cfnp->desc); 298 } 299 } 300 301 struct ctrfn * 302 fn2cfnp (u_int family, u_int sel) 303 { 304 struct ctrfn *cfnp; 305 306 if (family == 6) { 307 cfnp = p6fn; 308 sel &= 0xff; 309 } 310 else { 311 cfnp = p5fn; 312 sel &= 0x3f; 313 } 314 for (; cfnp->name; cfnp++) 315 if (cfnp->fn == sel) 316 return (cfnp); 317 return (NULL); 318 } 319 320 static char * 321 fn2str (int family, u_int sel) 322 { 323 static char buf[128]; 324 char um[9] = ""; 325 char cm[6] = ""; 326 struct ctrfn *cfnp; 327 u_int fn; 328 329 if (family == 5) { 330 fn = sel & 0x3f; 331 cfnp = fn2cfnp (family, fn); 332 sprintf (buf, "%c%c%c %02x %s", 333 sel & P5CTR_C ? 'c' : '-', 334 sel & P5CTR_U ? 'u' : '-', 335 sel & P5CTR_K ? 'k' : '-', 336 fn, cfnp ? cfnp->name : "unknown function"); 337 } 338 else if (family == 6) { 339 fn = sel & 0xff; 340 cfnp = fn2cfnp (family, fn); 341 if (cfnp && cfnp->flags & CFL_MESI) 342 sprintf (um, "/%c%c%c%c", 343 sel & P6CTR_UM_M ? 'm' : '-', 344 sel & P6CTR_UM_E ? 'e' : '-', 345 sel & P6CTR_UM_S ? 's' : '-', 346 sel & P6CTR_UM_I ? 'i' : '-'); 347 else if (cfnp && cfnp->flags & CFL_SA) 348 sprintf (um, "/%c", sel & P6CTR_UM_A ? 'a' : '-'); 349 if (sel >> 24) 350 sprintf (cm, "+%d", sel >> 24); 351 sprintf (buf, "%c%c%c%c %02x%s%s%*s %s", 352 sel & P6CTR_I ? 'i' : '-', 353 sel & P6CTR_E ? 'e' : '-', 354 sel & P6CTR_K ? 'k' : '-', 355 sel & P6CTR_U ? 'u' : '-', 356 fn, cm, um, 7 - (strlen (cm) + strlen (um)), "", 357 cfnp ? cfnp->name : "unknown function"); 358 } 359 else 360 return (NULL); 361 return (buf); 362 } 363 364 /* Print status of counters */ 365 static void 366 readst (void) 367 { 368 int fd, i; 369 struct pctrst st; 370 371 fd = open (_PATH_PCTR, O_RDONLY); 372 if (fd < 0) { 373 perror (_PATH_PCTR); 374 exit (1); 375 } 376 if (ioctl (fd, PCIOCRD, &st) < 0) { 377 perror ("PCIOCRD"); 378 exit (1); 379 } 380 close (fd); 381 382 if (usep5ctr || usep6ctr) { 383 for (i = 0; i < PCTR_NUM; i++) 384 printf (" ctr%d = %16qd [%s]\n", i, st.pctr_hwc[i], 385 fn2str (cpufamily, st.pctr_fn[i])); 386 } 387 printf (" tsc = %16qd\n idl = %16qd\n", st.pctr_tsc, st.pctr_idl); 388 } 389 390 static void 391 setctr (int ctr, u_int val) 392 { 393 int fd; 394 395 fd = open (_PATH_PCTR, O_WRONLY); 396 if (fd < 0) { 397 perror (_PATH_PCTR); 398 exit (1); 399 } 400 if (ioctl (fd, PCIOCS0 + ctr, &val) < 0) { 401 perror ("PCIOCSn"); 402 exit (1); 403 } 404 close (fd); 405 } 406 407 static void 408 usage (void) 409 { 410 fprintf (stderr, 411 "usage:\n" 412 " %s\n" 413 " Read the counters.\n" 414 " %s -l [5|6]\n" 415 " List all possible counter functions for P5/P6.\n", 416 __progname, __progname); 417 if (usep5ctr) 418 fprintf (stderr, 419 " %s -s {0|1} [-[c][u][k]] function\n" 420 " Configure counter.\n" 421 " 0/1 - counter to configure\n" 422 " c - count cycles not events\n" 423 " u - count events in user mode (ring 3)\n" 424 " k - count events in kernel mode (rings 0-2)\n", 425 __progname); 426 else if (usep6ctr) 427 fprintf (stderr, 428 " %s -s {0|1} [-[i][e][k][u]] " 429 "function[+cm][/{[m][e][s][i]|[a]}]\n" 430 " Configure counter.\n" 431 " 0/1 - counter number to configure\n" 432 " i - invert cm\n" 433 " e - edge detect\n" 434 " k - count events in kernel mode (rings 0-2)\n" 435 " u - count events in user mode (ring 3)\n" 436 " cm - # events/cycle required to bump ctr\n" 437 " mesi - Modified/Exclusive/Shared/Invalid in cache\n" 438 " s/a - self generated/all events\n", __progname); 439 exit (1); 440 } 441 442 443 int 444 main (int argc, char **argv) 445 { 446 u_int ctr; 447 char *cp; 448 u_int fn, fl = 0; 449 char **ap; 450 int ac; 451 struct ctrfn *cfnp; 452 int mib[2]; 453 size_t len; 454 455 /* Get the kernel cpuid return values. */ 456 mib[0] = CTL_MACHDEP; 457 mib[1] = CPU_CPUVENDOR; 458 if (sysctl(mib, 2, NULL, &len, NULL, 0) == -1) 459 err(1, "sysctl CPU_CPUVENDOR"); 460 if (len > sizeof(cpu_vendor)) /* Shouldn't ever happen. */ 461 err(1, "sysctl CPU_CPUVENDOR too big"); 462 if (sysctl(mib, 2, cpu_vendor, &len, NULL, 0) == -1) 463 err(1, "sysctl CPU_CPUVENDOR"); 464 465 mib[1] = CPU_CPUID; 466 len = sizeof(cpu_id); 467 if (sysctl(mib, 2, &cpu_id, &len, NULL, 0) == -1) 468 err(1, "sysctl CPU_CPUID"); 469 470 mib[1] = CPU_CPUFEATURE; 471 len = sizeof(cpu_feature); 472 if (sysctl(mib, 2, &cpu_feature, &len, NULL, 0) == -1) 473 err(1, "sysctl CPU_CPUFEATURE"); 474 475 pctr_isintel = (strcmp(cpu_vendor, "GenuineIntel") == 0); 476 477 if (argc <= 1) 478 readst (); 479 else if (argc == 2 && !strcmp (argv[1], "-l")) 480 list (cpufamily); 481 else if (argc == 3 && !strcmp (argv[1], "-l")) 482 list (atoi (argv[2])); 483 else if (!strcmp (argv[1], "-s") && argc >= 4) { 484 ctr = atoi (argv[2]); 485 if (ctr >= PCTR_NUM) 486 usage (); 487 ap = &argv[3]; 488 ac = argc - 3; 489 490 if (usep6ctr) 491 fl |= P6CTR_EN; 492 if (**ap == '-') { 493 cp = *ap; 494 if (usep6ctr) 495 while (*++cp) 496 switch (*cp) { 497 case 'i': 498 fl |= P6CTR_I; 499 break; 500 case 'e': 501 fl |= P6CTR_E; 502 break; 503 case 'k': 504 fl |= P6CTR_K; 505 break; 506 case 'u': 507 fl |= P6CTR_U; 508 break; 509 default: 510 usage (); 511 } 512 else if(usep5ctr) 513 while (*++cp) 514 switch (*cp) { 515 case 'c': 516 fl |= P5CTR_C; 517 break; 518 case 'k': 519 fl |= P5CTR_K; 520 break; 521 case 'u': 522 fl |= P5CTR_U; 523 break; 524 default: 525 usage (); 526 } 527 ap++; 528 ac--; 529 } 530 else { 531 if (usep6ctr) 532 fl |= P6CTR_U|P6CTR_K; 533 else if (usep5ctr) 534 fl |= P5CTR_U|P5CTR_K; 535 } 536 537 if (!ac) 538 usage (); 539 540 fn = strtoul (*ap, NULL, 16); 541 if ((usep6ctr && (fn & ~0xff)) || (!usep6ctr && (fn & ~0x3f))) 542 usage (); 543 fl |= fn; 544 if (usep6ctr && (cp = strchr (*ap, '+'))) { 545 cp++; 546 fn = strtol (cp, NULL, 0); 547 if (fn & ~0xff) 548 usage (); 549 fl |= (fn << 24); 550 } 551 cfnp = fn2cfnp (6, fl); 552 if (usep6ctr && cfnp && (cp = strchr (*ap, '/'))) { 553 if (cfnp->flags & CFL_MESI) 554 while (*++cp) 555 switch (*cp) { 556 case 'm': 557 fl |= P6CTR_UM_M; 558 break; 559 case 'e': 560 fl |= P6CTR_UM_E; 561 break; 562 case 's': 563 fl |= P6CTR_UM_S; 564 break; 565 case 'i': 566 fl |= P6CTR_UM_I; 567 break; 568 default: 569 usage (); 570 } 571 else if (cfnp->flags & CFL_SA) 572 while (*++cp) 573 switch (*cp) { 574 case 'a': 575 fl |= P6CTR_UM_A; 576 break; 577 default: 578 usage (); 579 } 580 else 581 usage (); 582 } 583 else if (cfnp && (cfnp->flags & CFL_MESI)) 584 fl |= P6CTR_UM_MESI; 585 ap++; 586 ac--; 587 588 if (ac) 589 usage (); 590 591 if (usep6ctr && ! (fl & 0xff)) 592 fl = 0; 593 setctr (ctr, fl); 594 } 595 else 596 usage (); 597 598 return 0; 599 } 600