xref: /plan9-contrib/sys/src/9k/386/pmcio.c (revision 49d76b4b10eb2f614c42c7db16ef9eb8cc20de58)
1 /*
2  *  Performance counters non portable part
3  */
4 
5 #include	"u.h"
6 #include	"../port/lib.h"
7 #include	"mem.h"
8 #include	"dat.h"
9 #include	"fns.h"
10 #include	"../port/error.h"
11 
12 #include	"amd64.h"
13 #include	"pmc.h"
14 
15 typedef struct PmcCfg PmcCfg;
16 typedef struct PmcCore PmcCore;
17 
18 enum {
19 	PeUnk,
20 	PeAmd,
21 	/*
22 	 *	See Vol 3B Intel
23 	 *	64 Architecture's Software Developer's manual
24 	 */
25 	PeIntel,
26 };
27 
28 enum {
29 	_PeUnk,
30 	/* Non architectural */
31 	PeIntelSandy,
32 	PeIntelNehalem,
33 	PeIntelWestmere,
34 	/*
35 	 * See  BKDG for AMD cfg.family 10 Processors
36 	 * section 2.16 and 3.14
37 	*/
38 	PeK10,
39 
40 };
41 
42 enum {
43 	PeNregAmd	= 4,	/* Number of Pe/Pct regs for K10 */
44 };
45 
46 enum {						/* MSRs */
47 	PerfCtrbaseIntel= 0x000000c1,		/* Performance Counters */
48 	PerfEvtbaseIntel= 0x00000186,		/* Performance Event Select */
49 	PerfGlobalCtr	= 0x0000038f,		/* Performance Event Global Ctrl, intel */
50 
51 	PerfEvtbaseAmd	= 0xc0010000,		/* Performance Event Select */
52 	PerfCtrbaseAmd	= 0xc0010004,		/* Performance Counters */
53 };
54 
55 enum {						/* HW Performance Counters Event Selector */
56 
57 	PeHo		= 0x0000020000000000ull,/* Host only */
58 	PeGo		= 0x0000010000000000ull,/* Guest only */
59 	PeEvMskH	= 0x0000000f00000000ull,/* Event mask H */
60 	PeCtMsk		= 0x00000000ff000000ull,/* Counter mask */
61 	PeInMsk		= 0x0000000000800000ull,/* Invert mask */
62 	PeCtEna		= 0x0000000000400000ull,/* Counter enable */
63 	PeInEna		= 0x0000000000100000ull,/* Interrupt enable */
64 	PePnCtl		= 0x0000000000080000ull,/* Pin control */
65 	PeEdg		= 0x0000000000040000ull,/* Edge detect */
66 	PeOS		= 0x0000000000020000ull,/* OS mode */
67 	PeUsr		= 0x0000000000010000ull,/* User mode */
68 	PeUnMsk		= 0x000000000000ff00ull,/* Unit Mask */
69 	PeEvMskL	= 0x00000000000000ffull,/* Event Mask L */
70 
71 	PeEvMsksh	= 32ull,		/* Event mask shift */
72 };
73 
74 struct PmcCfg {
75 	int nregs;
76 	u32int ctrbase;
77 	u32int evtbase;
78 	int vendor;
79 	int family;
80 	PmcCtlCtrId *pmcidsarch;
81 	PmcCtlCtrId *pmcids;
82 };
83 
84 extern int pmcdebug;
85 
86 static PmcCfg cfg;
87 static PmcCore pmccore[MACHMAX];
88 
89 static void pmcmachupdate(void);
90 
91 int
pmcnregs(void)92 pmcnregs(void)
93 {
94 	u32int info[4];
95 	int nregs;
96 
97 	if(cfg.nregs != 0)
98 		return cfg.nregs;	/* don't call cpuid more than necessary */
99 	switch(cfg.vendor){
100 	case PeAmd:
101 		nregs = PeNregAmd;
102 		break;
103 	case PeIntel:
104 		cpuid(0xa, 0, info);
105 		nregs = (info[0]>>8)&0xff;
106 		break;
107 	default:
108 		nregs = 0;
109 	}
110 	if(nregs > PmcMaxCtrs)
111 		nregs = PmcMaxCtrs;
112 	return nregs;
113 }
114 
115 static u64int
pmcmsk(void)116 pmcmsk(void)
117 {
118 	u32int info[4];
119 	u64int msk;
120 
121 	msk = 0;
122 	switch(cfg.vendor){
123 	case PeAmd:
124 		msk = ~0ULL;
125 		break;
126 	case PeIntel:
127 		cpuid(0xa, 0, info);
128 		msk = (1<<((info[0]>>16)&0xff)) - 1;
129 		break;
130 	}
131 	return msk;
132 }
133 
134 PmcCtlCtrId pmcidsk10[] = {
135 	{"locked instr", "0x024 0x1"},
136 	{"locked cycles nonspecul", "0x024 0x4"},	/* in  cycles */
137 	{"SMI intr", "0x02b 0x0"},
138 	{"DC access", "0x040 0x0"},
139 	{"DC miss", "0x041 0x0"},
140 	{"DC refills", "0x042 0x1f"},
141 	{"DC evicted", "0x042 0x3f"},
142 	{"L1 DTLB miss", "0x045 0x7"},				/* DTLB L2 hits */
143 	{"L2 DTLB miss", "0x046 0x7"},
144 	{"L1 DTLB hit", "0x04d 0x3"},
145 	{"global TLB flush", "0x054 0x0"},
146 	{"L2 hit", "0x07d 0x3f"},
147 	{"L2 miss", "0x07e 0xf"},
148 	{"IC miss", "0x081 0x0"},
149 	{"IC refill from L2", "0x082 0x0"},
150 	{"IC refill from system", "0x083 0x0"},
151 	{"L1 ITLB miss", "0x084 0x0"},					/* L2 ITLB hits */
152 	{"L2 ITLB miss", "0x085 0x3"},
153 	{"DRAM access", "0x0e0 0x3f"},
154 	//{"L3 miss core 0", "0x4e1 0x13"},
155 	//{"L3 miss core 1", "0x4e1 0x23"},
156 	//{"L3 miss core 2", "0x4e1 0x43"},
157 	//{"L3 miss core 3", "0x4e1 0x83"},
158 	{"L3 miss", "0x4e1 0xf3"},						/* all cores in the socket */
159 	{"", ""},
160 };
161 
162 /*18.2.3 Intel Software Deveveloper's Manual */
163 PmcCtlCtrId pmcidsintel[] = {
164 	{"unhalted cycles", "0x3c 0x0"},
165 	{"instr", "0xc0 0x0"},
166 	{"Llast misses", "0x2e 0x41"},
167 	{"branch instr", "0xc4 0x0"},
168 	{"branch misses", "0xc5 0x0 "},
169 	{"", ""},
170 };
171 
172 /* Table 19.7 Intel Software Deveveloper's Manual */
173 PmcCtlCtrId pmcidsandy[] = {
174 	{"DTLB walk cycles", "0x49 0x4"},				/* all levels */
175 	{"DTLB miss", "0x8 0x2"},
176 	{"DTLB hit", "0x8 0x4"},
177 	{"L2 hit", "0x24 0x4"},
178 	{"L2 miss", "0x24 0x8"},
179 	{"IL2 hit", "0x24 0x10"},
180 	{"IL2 miss", "0x24 0x20"},
181 	{"ITLB miss", "0x85 0x2"},
182 	{"ITLB walk cycles", "0x85 0x4"},
183 	{"ITLB flush", "0xae 0x1"},
184 	{"mem loads", "0xd0 0xf1"},					/* counts μops */
185 	{"mem stores", "0xd0 0xf2"},
186 	{"mem ops", "0xd0 0xf3"},
187 	{"", ""},
188 };
189 
190 #define X86MODEL(x)	((((x)>>4) & 0x0F) | (((x)>>16) & 0x0F)<<4)
191 #define X86FAMILY(x)	((((x)>>8) & 0x0F) | (((x)>>20) & 0xFF)<<4)
192 
193 static int
pmcintelfamily(void)194 pmcintelfamily(void)
195 {
196 	u32int info, fam, mod;
197 
198 	info = m->cpuinfo[1][0];
199 
200 	fam = X86FAMILY(info);
201 	mod = X86MODEL(info);
202 	if(fam != 0x6)
203 		return PeUnk;
204 	switch(mod){
205 	case 0x2a:
206 		return PeIntelSandy;
207 	case 0x1a:
208 	case 0x1e:
209 	case 0x1f:
210 		return PeIntelNehalem;
211 	case 0x25:
212 	case 0x2c:
213 		return PeIntelWestmere;
214 	}
215 	return PeUnk;
216 }
217 
218 void
pmcinitctl(PmcCtl * p)219 pmcinitctl(PmcCtl *p)
220 {
221 	memset(p, 0xff, sizeof(PmcCtl));
222 	p->enab = PmcCtlNullval;
223 	p->user = PmcCtlNullval;
224 	p->os = PmcCtlNullval;
225 	p->nodesc = 1;
226 }
227 
228 void
pmcconfigure(void)229 pmcconfigure(void)
230 {
231 	Mach *mach;
232 	int i, j, isrecog;
233 
234 	isrecog = 0;
235 
236 	if(memcmp(&m->cpuinfo[0][1], "AuthcAMDenti", 12) == 0){
237 		isrecog++;
238 		cfg.ctrbase = PerfCtrbaseAmd;
239 		cfg.evtbase = PerfEvtbaseAmd;
240 		cfg.vendor = PeAmd;
241 		cfg.family = PeUnk;
242 		cfg.pmcidsarch = pmcidsk10;
243 	}else if(memcmp(&m->cpuinfo[0][1], "GenuntelineI", 12) == 0){
244 		isrecog++;
245 		cfg.ctrbase = PerfCtrbaseIntel;
246 		cfg.evtbase = PerfEvtbaseIntel;
247 		cfg.vendor = PeIntel;
248 		cfg.family = pmcintelfamily();
249 		cfg.pmcidsarch = pmcidsintel;
250 		switch(cfg.family){
251 		case PeIntelSandy:
252 			cfg.pmcids = pmcidsandy;
253 			break;
254 		case PeIntelNehalem:
255 		case PeIntelWestmere:
256 			break;
257 		}
258 	}else
259 		cfg.vendor = PeUnk;
260 
261 	cfg.nregs = pmcnregs();
262 	if(isrecog)
263 		pmcupdate = pmcmachupdate;
264 
265 	for(i = 0; i < MACHMAX; i++) {
266 		if((mach = sys->machptr[i]) != nil && mach->online != 0){
267 			for(j = 0; j < cfg.nregs; j++)
268 				pmcinitctl(&pmccore[i].ctr[j]);
269 		}
270 	}
271 }
272 
273 static void
pmcenab(void)274 pmcenab(void)
275 {
276 	switch(cfg.vendor){
277 	case PeAmd:
278 		return;
279 	case PeIntel:
280 		wrmsr(PerfGlobalCtr, pmcmsk());
281 		break;
282 	}
283 }
284 
285 /* so they can be read from user space */
286 static int
pmcuserenab(int enable)287 pmcuserenab(int enable)
288 {
289 	u64int cr4;
290 
291 	cr4 = cr4get();
292 	if (enable){
293 		cr4 |= Pce;
294 	} else
295 		cr4 &=  ~Pce;
296 	cr4put(cr4);
297 	return cr4&Pce;
298 }
299 
300 int
pmctrans(PmcCtl * p)301 pmctrans(PmcCtl *p)
302 {
303 	PmcCtlCtrId *pi;
304 	int n;
305 
306 	n = 0;
307 	if(cfg.pmcidsarch != nil)
308 		for (pi = &cfg.pmcidsarch[0]; pi->portdesc[0] != '\0'; pi++){
309 			if (strncmp(p->descstr, pi->portdesc, strlen(pi->portdesc)) == 0){
310 				strncpy(p->descstr, pi->archdesc, strlen(pi->archdesc) + 1);
311 				n = 1;
312 				break;
313 			}
314 		}
315 	/* this ones supersede the other ones */
316 	if(cfg.pmcids != nil)
317 		for (pi = &cfg.pmcids[0]; pi->portdesc[0] != '\0'; pi++){
318 			if (strncmp(p->descstr, pi->portdesc, strlen(pi->portdesc)) == 0){
319 				strncpy(p->descstr, pi->archdesc, strlen(pi->archdesc) + 1);
320 				n = 1;
321 				break;
322 			}
323 		}
324 	if(pmcdebug != 0)
325 		print("really setting %s\n", p->descstr);
326 	return n;
327 }
328 
329 //PeHo|PeGo
330 #define PeAll	(PeOS|PeUsr)
331 #define SetEvMsk(v, e) ((v)|(((e)&PeEvMskL)|(((e)<<(PeEvMsksh-8))&PeEvMskH)))
332 #define SetUMsk(v, u) ((v)|(((u)<<8ull)&PeUnMsk))
333 
334 #define GetEvMsk(e) (((e)&PeEvMskL)|(((e)&PeEvMskH)>>(PeEvMsksh-8)))
335 #define GetUMsk(u) (((u)&PeUnMsk)>>8ull)
336 
337 static int
getctl(PmcCtl * p,u32int regno)338 getctl(PmcCtl *p, u32int regno)
339 {
340 	u64int r, e, u;
341 
342 	r = rdmsr(regno + cfg.evtbase);
343 	p->enab = (r&PeCtEna) != 0;
344 	p->user = (r&PeUsr) != 0;
345 	p->os = (r&PeOS) != 0;
346 	e = GetEvMsk(r);
347 	u = GetUMsk(r);
348 	/* TODO inverse translation */
349 	snprint(p->descstr, KNAMELEN, "%#ullx %#ullx", e, u);
350 	p->nodesc = 0;
351 	return 0;
352 }
353 
354 static int
pmcanyenab(void)355 pmcanyenab(void)
356 {
357 	int i;
358 	PmcCtl p;
359 
360 	for (i = 0; i < cfg.nregs; i++) {
361 		if (getctl(&p, i) < 0)
362 			return -1;
363 		if (p.enab)
364 			return 1;
365 	}
366 
367 	return 0;
368 }
369 
370 
371 static int
setctl(PmcCtl * p,int regno)372 setctl(PmcCtl *p, int regno)
373 {
374 	u64int v, e, u;
375 	char *toks[2];
376 	char str[KNAMELEN];
377 
378 	v = rdmsr(regno + cfg.evtbase);
379 	v &= PeEvMskH|PeEvMskL|PeCtEna|PeOS|PeUsr|PeUnMsk;
380 	if (p->enab != PmcCtlNullval)
381 		if (p->enab)
382 			v |= PeCtEna;
383 		else
384 			v &= ~PeCtEna;
385 
386 	if (p->user != PmcCtlNullval)
387 		if (p->user)
388 			v |= PeUsr;
389 		else
390 			v &= ~PeUsr;
391 
392 	if (p->os != PmcCtlNullval)
393 		if (p->os)
394 			v |= PeOS;
395 		else
396 			v &= ~PeOS;
397 
398 	if (pmctrans(p) < 0)
399 		return -1;
400 
401 	if (p->nodesc == 0) {
402 		memmove(str, p->descstr, KNAMELEN);
403 		if (tokenize(str, toks, 2) != 2)
404 			return -1;
405 		e = atoi(toks[0]);
406 		u = atoi(toks[1]);
407 		v &= ~(PeEvMskL|PeEvMskH|PeUnMsk);
408 		v |= SetEvMsk(v, e);
409 		v |= SetUMsk(v, u);
410 	}
411 	wrmsr(regno+ cfg.evtbase, v);
412 	pmcuserenab(pmcanyenab());
413 	if (pmcdebug) {
414 		v = rdmsr(regno+ cfg.evtbase);
415 		print("conf pmc[%#ux]: %#llux\n", regno, v);
416 	}
417 	return 0;
418 }
419 
420 int
pmcdescstr(char * str,int nstr)421 pmcdescstr(char *str, int nstr)
422 {
423 	PmcCtlCtrId *pi;
424 	int ns;
425 
426 	ns = 0;
427 
428 	if(pmcdebug != 0)
429 		print("vendor %x family %x nregs %d pmcnregs %d\n", cfg.vendor, cfg.family, cfg.nregs, pmcnregs());
430 	if(cfg.pmcidsarch == nil && cfg.pmcids == nil){
431 		*str = 0;
432 		return ns;
433 	}
434 
435 	if(cfg.pmcidsarch != nil)
436 		for (pi = &cfg.pmcidsarch[0]; pi->portdesc[0] != '\0'; pi++)
437 			ns += snprint(str + ns, nstr - ns, "%s\n",pi->portdesc);
438 	if(cfg.pmcids != nil)
439 		for (pi = &cfg.pmcids[0]; pi->portdesc[0] != '\0'; pi++)
440 			ns += snprint(str + ns, nstr - ns, "%s\n",pi->portdesc);
441 	return ns;
442 }
443 
444 static u64int
getctr(u32int regno)445 getctr(u32int regno)
446 {
447 	return rdmsr(regno + cfg.ctrbase);
448 }
449 
450 static int
setctr(u64int v,u32int regno)451 setctr(u64int v, u32int regno)
452 {
453 	wrmsr(regno + cfg.ctrbase, v);
454 	return 0;
455 }
456 
457 u64int
pmcgetctr(u32int coreno,u32int regno)458 pmcgetctr(u32int coreno, u32int regno)
459 {
460 	PmcCtr *p;
461 	u64int ctr;
462 
463 	if (regno >= cfg.nregs)
464 		error("invalid reg");
465 	p = &pmccore[coreno].ctr[regno];
466 
467 	ilock(&pmccore[coreno]);
468 	if(coreno == m->machno)
469 		ctr = getctr(regno);
470 	else
471 		ctr = p->ctr;
472 	iunlock(&pmccore[coreno]);
473 
474 	return ctr;
475 }
476 
477 int
pmcsetctr(u32int coreno,u64int v,u32int regno)478 pmcsetctr(u32int coreno, u64int v, u32int regno)
479 {
480 	PmcCtr *p;
481 	int n;
482 
483 	if (regno >= cfg.nregs)
484 		error("invalid reg");
485 	p = &pmccore[coreno].ctr[regno];
486 
487 	ilock(&pmccore[coreno]);
488 	if(coreno == m->machno)
489 		n = setctr(v, regno);
490 	else{
491 		p->ctr = v;
492 		p->ctrset |= PmcSet;
493 		p->stale = 1;
494 		n = 0;
495 	}
496 	iunlock(&pmccore[coreno]);
497 
498 	return n;
499 }
500 
501 static void
ctl2ctl(PmcCtl * dctl,PmcCtl * sctl)502 ctl2ctl(PmcCtl *dctl, PmcCtl *sctl)
503 {
504 	if(sctl->enab != PmcCtlNullval)
505 		dctl->enab = sctl->enab;
506 	if(sctl->user != PmcCtlNullval)
507 		dctl->user = sctl->user;
508 	if(sctl->os != PmcCtlNullval)
509 		dctl->os = sctl->os;
510 	if(sctl->nodesc == 0) {
511 		memmove(dctl->descstr, sctl->descstr, KNAMELEN);
512 		dctl->nodesc = 0;
513 	}
514 }
515 
516 int
pmcsetctl(u32int coreno,PmcCtl * pctl,u32int regno)517 pmcsetctl(u32int coreno, PmcCtl *pctl, u32int regno)
518 {
519 	PmcCtr *p;
520 	int n;
521 
522 	if (regno >= cfg.nregs)
523 		error("invalid reg");
524 	p = &pmccore[coreno].ctr[regno];
525 
526 	ilock(&pmccore[coreno]);
527 	if(coreno == m->machno)
528 		n = setctl(pctl, regno);
529 	else{
530 		ctl2ctl(&p->PmcCtl, pctl);
531 		p->ctlset |= PmcSet;
532 		p->stale = 1;
533 		n = 0;
534 	}
535 	iunlock(&pmccore[coreno]);
536 
537 	return n;
538 }
539 
540 int
pmcgetctl(u32int coreno,PmcCtl * pctl,u32int regno)541 pmcgetctl(u32int coreno, PmcCtl *pctl, u32int regno)
542 {
543 	PmcCtr *p;
544 	int n;
545 
546 	if (regno >= cfg.nregs)
547 		error("invalid reg");
548 	p = &pmccore[coreno].ctr[regno];
549 
550 	ilock(&pmccore[coreno]);
551 	if(coreno == m->machno)
552 		n = getctl(pctl, regno);
553 	else{
554 		memmove(pctl, &p->PmcCtl, sizeof(PmcCtl));
555 		n = 0;
556 	}
557 	iunlock(&pmccore[coreno]);
558 
559 	return n;
560 }
561 
562 static void
pmcmachupdate(void)563 pmcmachupdate(void)
564 {
565 	PmcCtr *p;
566 	int coreno, i, maxct;
567 
568 	if((maxct = cfg.nregs) <= 0)
569 		return;
570 	coreno = m->machno;
571 
572 	ilock(&pmccore[coreno]);
573 	for (i = 0; i < maxct; i++) {
574 		p = &pmccore[coreno].ctr[i];
575 		if(p->ctrset & PmcSet)
576 			setctr(p->ctr, i);
577 		if(p->ctlset & PmcSet)
578 			setctl(p, i);
579 		p->ctr = getctr(i);
580 		getctl(p, i);
581 		p->ctrset = PmcIgn;
582 		p->ctlset = PmcIgn;
583 		p->stale = 0;
584 	}
585 	iunlock(&pmccore[coreno]);
586 }
587