xref: /openbsd-src/sys/arch/amd64/amd64/est.c (revision 4f0f137048839449c419480aeb26d92922b0120b)
1 /*	$OpenBSD: est.c,v 1.42 2021/08/12 15:16:23 tb Exp $ */
2 /*
3  * Copyright (c) 2003 Michael Eriksson.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 
30 /*
31  * This is a driver for Intel's Enhanced SpeedStep, as implemented in
32  * Pentium M processors.
33  *
34  * Reference documentation:
35  *
36  * - IA-32 Intel Architecture Software Developer's Manual, Volume 3:
37  *   System Programming Guide.
38  *   Section 13.14, Enhanced Intel SpeedStep technology.
39  *   Table B-2, MSRs in Pentium M Processors.
40  *   http://www.intel.com/design/pentium4/manuals/245472.htm
41  *
42  * - Intel Pentium M Processor Datasheet.
43  *   Table 5, Voltage and Current Specifications.
44  *   http://www.intel.com/design/mobile/datashts/252612.htm
45  *
46  * - Intel Pentium M Processor on 90 nm Process with 2-MB L2 Cache Datasheet
47  *   Table 3-4, Voltage and Current Specifications.
48  *   http://www.intel.com/design/mobile/datashts/302189.htm
49  *
50  * - Linux cpufreq patches, speedstep-centrino.c.
51  *   Encoding of MSR_PERF_CTL and MSR_PERF_STATUS.
52  *   http://www.codemonkey.org.uk/projects/cpufreq/cpufreq-2.4.22-pre6-1.gz
53  */
54 
55 
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/sysctl.h>
59 #include <sys/malloc.h>
60 
61 #include <machine/cpu.h>
62 #include <machine/cpufunc.h>
63 #include <machine/specialreg.h>
64 #include <machine/bus.h>
65 
66 #include "acpicpu.h"
67 
68 #if NACPICPU > 0
69 #include <dev/acpi/acpidev.h>
70 #endif
71 
72 /* Possible bus speeds (multiplied by 100 for rounding) */
73 #define BUS100 10000
74 #define BUS133 13333
75 #define BUS166 16667
76 #define BUS200 20000
77 #define BUS266 26667
78 #define BUS333 33333
79 
80 #define MSR2MHZ(msr, bus) \
81 	(((((int)(msr) >> 8) & 0xff) * (bus) + 50) / 100)
82 
83 struct est_op {
84 	uint16_t ctrl;
85 	uint16_t mhz;
86 	uint16_t pct;
87 };
88 
89 struct fqlist {
90 	int vendor: 5;
91 	unsigned bus_clk : 1;
92 	unsigned n : 5;
93 	struct est_op *table;
94 };
95 
96 
97 static struct fqlist *est_fqlist;
98 
99 extern int setperf_prio;
100 extern int perflevel;
101 
102 int bus_clock;
103 
104 void p4_get_bus_clock(struct cpu_info *);
105 void p3_get_bus_clock(struct cpu_info *);
106 
107 void
p4_get_bus_clock(struct cpu_info * ci)108 p4_get_bus_clock(struct cpu_info *ci)
109 {
110 	u_int64_t msr;
111 	int model, bus;
112 
113 	model = (ci->ci_signature >> 4) & 15;
114 	msr = rdmsr(MSR_EBC_FREQUENCY_ID);
115 	if (model < 2) {
116 		bus = (msr >> 21) & 0x7;
117 		switch (bus) {
118 		case 0:
119 			bus_clock = BUS100;
120 			break;
121 		case 1:
122 			bus_clock = BUS133;
123 			break;
124 		default:
125 			printf("%s: unknown Pentium 4 (model %d) "
126 			    "EBC_FREQUENCY_ID value %d\n",
127 			    ci->ci_dev->dv_xname, model, bus);
128 			break;
129 		}
130 	} else {
131 		bus = (msr >> 16) & 0x7;
132 		switch (bus) {
133 		case 0:
134 			bus_clock = (model == 2) ? BUS100 : BUS266;
135 			break;
136 		case 1:
137 			bus_clock = BUS133;
138 			break;
139 		case 2:
140 			bus_clock = BUS200;
141 			break;
142 		case 3:
143 			bus_clock = BUS166;
144 			break;
145 		default:
146 			printf("%s: unknown Pentium 4 (model %d) "
147 			    "EBC_FREQUENCY_ID value %d\n",
148 			    ci->ci_dev->dv_xname, model, bus);
149 			break;
150 		}
151 	}
152 }
153 
154 void
p3_get_bus_clock(struct cpu_info * ci)155 p3_get_bus_clock(struct cpu_info *ci)
156 {
157 	u_int64_t msr;
158 	int bus;
159 
160 	switch (ci->ci_model) {
161 	case 0xe: /* Core Duo/Solo */
162 	case 0xf: /* Core Xeon */
163 	case 0x16: /* 65nm Celeron */
164 	case 0x17: /* Core 2 Extreme/45nm Xeon */
165 	case 0x1d: /* Xeon MP 7400 */
166 		msr = rdmsr(MSR_FSB_FREQ);
167 		bus = (msr >> 0) & 0x7;
168 		switch (bus) {
169 		case 5:
170 			bus_clock = BUS100;
171 			break;
172 		case 1:
173 			bus_clock = BUS133;
174 			break;
175 		case 3:
176 			bus_clock = BUS166;
177 			break;
178 		case 2:
179 			bus_clock = BUS200;
180 			break;
181 		case 0:
182 			bus_clock = BUS266;
183 			break;
184 		case 4:
185 			bus_clock = BUS333;
186 			break;
187 		default:
188 			printf("%s: unknown Core FSB_FREQ value %d",
189 			    ci->ci_dev->dv_xname, bus);
190 			goto print_msr;
191 		}
192 		break;
193 	case 0x1c: /* Atom */
194 	case 0x26: /* Atom Z6xx */
195 	case 0x36: /* Atom [DN]2xxx */
196 		msr = rdmsr(MSR_FSB_FREQ);
197 		bus = (msr >> 0) & 0x7;
198 		switch (bus) {
199 		case 5:
200 			bus_clock = BUS100;
201 			break;
202 		case 1:
203 			bus_clock = BUS133;
204 			break;
205 		case 3:
206 			bus_clock = BUS166;
207 			break;
208 		case 2:
209 			bus_clock = BUS200;
210 			break;
211 		default:
212 			printf("%s: unknown Atom FSB_FREQ value %d",
213 			    ci->ci_dev->dv_xname, bus);
214 			goto print_msr;
215 		}
216 		break;
217 	default:
218 		/* no FSB on modern Intel processors */
219 		break;
220 	}
221 	return;
222 print_msr:
223 	/*
224 	 * Show the EBL_CR_POWERON MSR, so we'll at least have
225 	 * some extra information, such as clock ratio, etc.
226 	 */
227 	printf(" (0x%llx)\n", rdmsr(MSR_EBL_CR_POWERON));
228 }
229 
230 #if NACPICPU > 0
231 struct fqlist * est_acpi_init(void);
232 void est_acpi_pss_changed(struct acpicpu_pss *, int);
233 
234 struct fqlist *
est_acpi_init(void)235 est_acpi_init(void)
236 {
237 	struct acpicpu_pss *pss;
238 	struct fqlist *acpilist;
239 	int nstates, i;
240 	int high, low;
241 
242 	if ((nstates = acpicpu_fetch_pss(&pss)) == 0)
243 		goto nolist;
244 
245 	high = pss[0].pss_core_freq;
246 	low = pss[nstates - 1].pss_core_freq;
247 	if (high - low <= 0)
248 		goto nolist;
249 
250 	if ((acpilist = malloc(sizeof(struct fqlist), M_DEVBUF, M_NOWAIT))
251 	    == NULL)
252 		goto nolist;
253 
254 	if ((acpilist->table = mallocarray(nstates, sizeof(struct est_op),
255 	    M_DEVBUF, M_NOWAIT)) == NULL)
256 		goto notable;
257 
258 	acpilist->n = nstates;
259 
260 	for (i = 0; i < nstates; i++) {
261 		acpilist->table[i].mhz = pss[i].pss_core_freq;
262 		acpilist->table[i].ctrl = pss[i].pss_ctrl;
263 		acpilist->table[i].pct =
264 		    (pss[i].pss_core_freq - low) * 100 / (high - low);
265 	}
266 
267 	acpicpu_set_notify(est_acpi_pss_changed);
268 
269 	return acpilist;
270 
271 notable:
272 	free(acpilist, M_DEVBUF, sizeof(struct fqlist));
273 	acpilist = NULL;
274 nolist:
275 	return NULL;
276 }
277 
278 void
est_acpi_pss_changed(struct acpicpu_pss * pss,int npss)279 est_acpi_pss_changed(struct acpicpu_pss *pss, int npss)
280 {
281 	struct fqlist *acpilist;
282 	int needtran = 1, i;
283 	int high, low;
284 	u_int64_t msr;
285 	u_int16_t cur;
286 
287 	msr = rdmsr(MSR_PERF_STATUS);
288 	cur = msr & 0xffff;
289 
290 	high = pss[0].pss_core_freq;
291 	low = pss[npss - 1].pss_core_freq;
292 	if (high - low <= 0) {
293 		printf("est_acpi_pss_changed: new est state has no "
294 		    "speed step\n");
295 		return;
296 	}
297 
298 	if ((acpilist = malloc(sizeof(struct fqlist), M_DEVBUF, M_NOWAIT))
299 	    == NULL) {
300 		printf("est_acpi_pss_changed: cannot allocate memory for new "
301 		    "est state\n");
302 		return;
303 	}
304 
305 	if ((acpilist->table = mallocarray(npss, sizeof(struct est_op),
306 	    M_DEVBUF, M_NOWAIT)) == NULL) {
307 		printf("est_acpi_pss_changed: cannot allocate memory for new "
308 		    "operating points\n");
309 		free(acpilist, M_DEVBUF, sizeof(struct fqlist));
310 		return;
311 	}
312 
313 	for (i = 0; i < npss; i++) {
314 		acpilist->table[i].mhz = pss[i].pss_core_freq;
315 		acpilist->table[i].ctrl = pss[i].pss_ctrl;
316 		acpilist->table[i].pct =
317 		    (pss[i].pss_core_freq - low) * 100 / (high - low);
318 		if (pss[i].pss_ctrl == cur)
319 			needtran = 0;
320 	}
321 
322 	free(est_fqlist->table, M_DEVBUF, npss * sizeof(struct est_op));
323 	free(est_fqlist, M_DEVBUF, sizeof(struct fqlist));
324 	est_fqlist = acpilist;
325 
326 	if (needtran) {
327 		est_setperf(perflevel);
328 	}
329 }
330 #endif
331 
332 void
est_init(struct cpu_info * ci)333 est_init(struct cpu_info *ci)
334 {
335 	const char *cpu_device = ci->ci_dev->dv_xname;
336 	int vendor = -1;
337 	int i, low, high;
338 	u_int64_t msr;
339 	u_int16_t idhi, idlo, cur;
340 	u_int8_t crhi, crlo, crcur;
341 	struct fqlist *fake_fqlist;
342 	struct est_op *fake_table;
343 
344 	if (setperf_prio > 3)
345 		return;
346 
347 #if NACPICPU > 0
348 	est_fqlist = est_acpi_init();
349 #endif
350 
351 	/* bus_clock is only used if we can't get values from ACPI */
352 	if (est_fqlist == NULL) {
353 		if (ci->ci_family == 0xf)
354 			p4_get_bus_clock(ci);
355 		else if (ci->ci_family == 6)
356 			p3_get_bus_clock(ci);
357 	}
358 
359 	/*
360 	 * Interpreting the values of PERF_STATUS is not valid
361 	 * on recent processors so don't do it on anything unknown
362 	 */
363 	if (est_fqlist == NULL && bus_clock != 0) {
364 		msr = rdmsr(MSR_PERF_STATUS);
365 		idhi = (msr >> 32) & 0xffff;
366 		idlo = (msr >> 48) & 0xffff;
367 		cur = msr & 0xffff;
368 		crhi = (idhi  >> 8) & 0xff;
369 		crlo = (idlo  >> 8) & 0xff;
370 		crcur = (cur >> 8) & 0xff;
371 
372 		if (crhi == 0 || crcur == 0 || crlo > crhi ||
373 		    crcur < crlo || crcur > crhi) {
374 			/*
375 			 * Do complain about other weirdness, because we first
376 			 * want to know about it, before we decide what to do
377 			 * with it.
378 			 */
379 			printf("%s: EST: strange msr value 0x%016llx\n",
380 			    cpu_device, msr);
381 			return;
382 		}
383 		if (crlo == 0 || crhi == crlo) {
384 			/*
385 			 * Don't complain about these cases, and silently
386 			 * disable EST: - A lowest clock ratio of 0, which
387 			 * seems to happen on all Pentium 4's that report EST.
388 			 * - An equal highest and lowest clock ratio, which
389 			 * happens on at least the Core 2 Duo X6800, maybe on
390 			 * newer models too.
391 			 */
392 			return;
393 		}
394 
395 		printf("%s: unknown Enhanced SpeedStep CPU, msr 0x%016llx\n",
396 		    cpu_device, msr);
397 		/*
398 		 * Generate a fake table with the power states we know.
399 		 */
400 
401 		if ((fake_fqlist = malloc(sizeof(struct fqlist), M_DEVBUF,
402 		    M_NOWAIT)) == NULL) {
403 			printf("%s: EST: cannot allocate memory for fake "
404 			    "list\n", cpu_device);
405 			return;
406 		}
407 
408 
409 		if ((fake_table = mallocarray(3, sizeof(struct est_op),
410 		    M_DEVBUF, M_NOWAIT)) == NULL) {
411 			free(fake_fqlist, M_DEVBUF, sizeof(struct fqlist));
412 			printf("%s: EST: cannot allocate memory for fake "
413 			    "table\n", cpu_device);
414 			return;
415 		}
416 		fake_table[0].ctrl = idhi;
417 		fake_table[0].mhz = MSR2MHZ(idhi, bus_clock);
418 		if (cur == idhi || cur == idlo) {
419 			printf("%s: using only highest and lowest power "
420 			       "states\n", cpu_device);
421 
422 			fake_table[0].pct = 51;
423 
424 			fake_table[1].ctrl = idlo;
425 			fake_table[1].mhz = MSR2MHZ(idlo, bus_clock);
426 			fake_table[1].pct = 0;
427 			fake_fqlist->n = 2;
428 		} else {
429 			printf("%s: using only highest, current and lowest "
430 			    "power states\n", cpu_device);
431 
432 			fake_table[0].pct = 67;
433 
434 			fake_table[1].ctrl = cur;
435 			fake_table[1].mhz = MSR2MHZ(cur, bus_clock);
436 			fake_table[1].pct = 34;
437 
438 			fake_table[2].ctrl = idlo;
439 			fake_table[2].mhz = MSR2MHZ(idlo, bus_clock);
440 			fake_table[2].pct = 0;
441 			fake_fqlist->n = 3;
442 		}
443 
444 		fake_fqlist->vendor = vendor;
445 		fake_fqlist->table = fake_table;
446 		est_fqlist = fake_fqlist;
447 	}
448 
449 	if (est_fqlist == NULL)
450 		return;
451 
452 	if (est_fqlist->n < 2)
453 		goto nospeedstep;
454 
455 	low = est_fqlist->table[est_fqlist->n - 1].mhz;
456 	high = est_fqlist->table[0].mhz;
457 	if (low == high)
458 		goto nospeedstep;
459 
460 	perflevel = (cpuspeed - low) * 100 / (high - low);
461 
462 	printf("%s: Enhanced SpeedStep %d MHz", cpu_device, cpuspeed);
463 
464 	/*
465 	 * OK, tell the user the available frequencies.
466 	 */
467 	printf(": speeds: ");
468 	for (i = 0; i < est_fqlist->n; i++)
469 		printf("%d%s", est_fqlist->table[i].mhz, i < est_fqlist->n - 1
470 		    ?  ", " : " MHz\n");
471 
472 	cpu_setperf = est_setperf;
473 	setperf_prio = 3;
474 
475 	return;
476 
477 nospeedstep:
478 	free(est_fqlist->table, M_DEVBUF, 0);
479 	free(est_fqlist, M_DEVBUF, sizeof(*est_fqlist));
480 }
481 
482 void
est_setperf(int level)483 est_setperf(int level)
484 {
485 	int i;
486 	uint64_t msr;
487 
488 	if (est_fqlist == NULL)
489 		return;
490 
491 	for (i = 0; i < est_fqlist->n; i++) {
492 		if (level >= est_fqlist->table[i].pct)
493 			break;
494 	}
495 
496 	msr = rdmsr(MSR_PERF_CTL);
497 	msr &= ~0xffffULL;
498 	msr |= est_fqlist->table[i].ctrl;
499 
500 	wrmsr(MSR_PERF_CTL, msr);
501 	cpuspeed = est_fqlist->table[i].mhz;
502 }
503