1 /* $OpenBSD: est.c,v 1.42 2021/08/12 15:16:23 tb Exp $ */
2 /*
3 * Copyright (c) 2003 Michael Eriksson.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29
30 /*
31 * This is a driver for Intel's Enhanced SpeedStep, as implemented in
32 * Pentium M processors.
33 *
34 * Reference documentation:
35 *
36 * - IA-32 Intel Architecture Software Developer's Manual, Volume 3:
37 * System Programming Guide.
38 * Section 13.14, Enhanced Intel SpeedStep technology.
39 * Table B-2, MSRs in Pentium M Processors.
40 * http://www.intel.com/design/pentium4/manuals/245472.htm
41 *
42 * - Intel Pentium M Processor Datasheet.
43 * Table 5, Voltage and Current Specifications.
44 * http://www.intel.com/design/mobile/datashts/252612.htm
45 *
46 * - Intel Pentium M Processor on 90 nm Process with 2-MB L2 Cache Datasheet
47 * Table 3-4, Voltage and Current Specifications.
48 * http://www.intel.com/design/mobile/datashts/302189.htm
49 *
50 * - Linux cpufreq patches, speedstep-centrino.c.
51 * Encoding of MSR_PERF_CTL and MSR_PERF_STATUS.
52 * http://www.codemonkey.org.uk/projects/cpufreq/cpufreq-2.4.22-pre6-1.gz
53 */
54
55
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/sysctl.h>
59 #include <sys/malloc.h>
60
61 #include <machine/cpu.h>
62 #include <machine/cpufunc.h>
63 #include <machine/specialreg.h>
64 #include <machine/bus.h>
65
66 #include "acpicpu.h"
67
68 #if NACPICPU > 0
69 #include <dev/acpi/acpidev.h>
70 #endif
71
72 /* Possible bus speeds (multiplied by 100 for rounding) */
73 #define BUS100 10000
74 #define BUS133 13333
75 #define BUS166 16667
76 #define BUS200 20000
77 #define BUS266 26667
78 #define BUS333 33333
79
80 #define MSR2MHZ(msr, bus) \
81 (((((int)(msr) >> 8) & 0xff) * (bus) + 50) / 100)
82
83 struct est_op {
84 uint16_t ctrl;
85 uint16_t mhz;
86 uint16_t pct;
87 };
88
89 struct fqlist {
90 int vendor: 5;
91 unsigned bus_clk : 1;
92 unsigned n : 5;
93 struct est_op *table;
94 };
95
96
97 static struct fqlist *est_fqlist;
98
99 extern int setperf_prio;
100 extern int perflevel;
101
102 int bus_clock;
103
104 void p4_get_bus_clock(struct cpu_info *);
105 void p3_get_bus_clock(struct cpu_info *);
106
107 void
p4_get_bus_clock(struct cpu_info * ci)108 p4_get_bus_clock(struct cpu_info *ci)
109 {
110 u_int64_t msr;
111 int model, bus;
112
113 model = (ci->ci_signature >> 4) & 15;
114 msr = rdmsr(MSR_EBC_FREQUENCY_ID);
115 if (model < 2) {
116 bus = (msr >> 21) & 0x7;
117 switch (bus) {
118 case 0:
119 bus_clock = BUS100;
120 break;
121 case 1:
122 bus_clock = BUS133;
123 break;
124 default:
125 printf("%s: unknown Pentium 4 (model %d) "
126 "EBC_FREQUENCY_ID value %d\n",
127 ci->ci_dev->dv_xname, model, bus);
128 break;
129 }
130 } else {
131 bus = (msr >> 16) & 0x7;
132 switch (bus) {
133 case 0:
134 bus_clock = (model == 2) ? BUS100 : BUS266;
135 break;
136 case 1:
137 bus_clock = BUS133;
138 break;
139 case 2:
140 bus_clock = BUS200;
141 break;
142 case 3:
143 bus_clock = BUS166;
144 break;
145 default:
146 printf("%s: unknown Pentium 4 (model %d) "
147 "EBC_FREQUENCY_ID value %d\n",
148 ci->ci_dev->dv_xname, model, bus);
149 break;
150 }
151 }
152 }
153
154 void
p3_get_bus_clock(struct cpu_info * ci)155 p3_get_bus_clock(struct cpu_info *ci)
156 {
157 u_int64_t msr;
158 int bus;
159
160 switch (ci->ci_model) {
161 case 0xe: /* Core Duo/Solo */
162 case 0xf: /* Core Xeon */
163 case 0x16: /* 65nm Celeron */
164 case 0x17: /* Core 2 Extreme/45nm Xeon */
165 case 0x1d: /* Xeon MP 7400 */
166 msr = rdmsr(MSR_FSB_FREQ);
167 bus = (msr >> 0) & 0x7;
168 switch (bus) {
169 case 5:
170 bus_clock = BUS100;
171 break;
172 case 1:
173 bus_clock = BUS133;
174 break;
175 case 3:
176 bus_clock = BUS166;
177 break;
178 case 2:
179 bus_clock = BUS200;
180 break;
181 case 0:
182 bus_clock = BUS266;
183 break;
184 case 4:
185 bus_clock = BUS333;
186 break;
187 default:
188 printf("%s: unknown Core FSB_FREQ value %d",
189 ci->ci_dev->dv_xname, bus);
190 goto print_msr;
191 }
192 break;
193 case 0x1c: /* Atom */
194 case 0x26: /* Atom Z6xx */
195 case 0x36: /* Atom [DN]2xxx */
196 msr = rdmsr(MSR_FSB_FREQ);
197 bus = (msr >> 0) & 0x7;
198 switch (bus) {
199 case 5:
200 bus_clock = BUS100;
201 break;
202 case 1:
203 bus_clock = BUS133;
204 break;
205 case 3:
206 bus_clock = BUS166;
207 break;
208 case 2:
209 bus_clock = BUS200;
210 break;
211 default:
212 printf("%s: unknown Atom FSB_FREQ value %d",
213 ci->ci_dev->dv_xname, bus);
214 goto print_msr;
215 }
216 break;
217 default:
218 /* no FSB on modern Intel processors */
219 break;
220 }
221 return;
222 print_msr:
223 /*
224 * Show the EBL_CR_POWERON MSR, so we'll at least have
225 * some extra information, such as clock ratio, etc.
226 */
227 printf(" (0x%llx)\n", rdmsr(MSR_EBL_CR_POWERON));
228 }
229
230 #if NACPICPU > 0
231 struct fqlist * est_acpi_init(void);
232 void est_acpi_pss_changed(struct acpicpu_pss *, int);
233
234 struct fqlist *
est_acpi_init(void)235 est_acpi_init(void)
236 {
237 struct acpicpu_pss *pss;
238 struct fqlist *acpilist;
239 int nstates, i;
240 int high, low;
241
242 if ((nstates = acpicpu_fetch_pss(&pss)) == 0)
243 goto nolist;
244
245 high = pss[0].pss_core_freq;
246 low = pss[nstates - 1].pss_core_freq;
247 if (high - low <= 0)
248 goto nolist;
249
250 if ((acpilist = malloc(sizeof(struct fqlist), M_DEVBUF, M_NOWAIT))
251 == NULL)
252 goto nolist;
253
254 if ((acpilist->table = mallocarray(nstates, sizeof(struct est_op),
255 M_DEVBUF, M_NOWAIT)) == NULL)
256 goto notable;
257
258 acpilist->n = nstates;
259
260 for (i = 0; i < nstates; i++) {
261 acpilist->table[i].mhz = pss[i].pss_core_freq;
262 acpilist->table[i].ctrl = pss[i].pss_ctrl;
263 acpilist->table[i].pct =
264 (pss[i].pss_core_freq - low) * 100 / (high - low);
265 }
266
267 acpicpu_set_notify(est_acpi_pss_changed);
268
269 return acpilist;
270
271 notable:
272 free(acpilist, M_DEVBUF, sizeof(struct fqlist));
273 acpilist = NULL;
274 nolist:
275 return NULL;
276 }
277
278 void
est_acpi_pss_changed(struct acpicpu_pss * pss,int npss)279 est_acpi_pss_changed(struct acpicpu_pss *pss, int npss)
280 {
281 struct fqlist *acpilist;
282 int needtran = 1, i;
283 int high, low;
284 u_int64_t msr;
285 u_int16_t cur;
286
287 msr = rdmsr(MSR_PERF_STATUS);
288 cur = msr & 0xffff;
289
290 high = pss[0].pss_core_freq;
291 low = pss[npss - 1].pss_core_freq;
292 if (high - low <= 0) {
293 printf("est_acpi_pss_changed: new est state has no "
294 "speed step\n");
295 return;
296 }
297
298 if ((acpilist = malloc(sizeof(struct fqlist), M_DEVBUF, M_NOWAIT))
299 == NULL) {
300 printf("est_acpi_pss_changed: cannot allocate memory for new "
301 "est state\n");
302 return;
303 }
304
305 if ((acpilist->table = mallocarray(npss, sizeof(struct est_op),
306 M_DEVBUF, M_NOWAIT)) == NULL) {
307 printf("est_acpi_pss_changed: cannot allocate memory for new "
308 "operating points\n");
309 free(acpilist, M_DEVBUF, sizeof(struct fqlist));
310 return;
311 }
312
313 for (i = 0; i < npss; i++) {
314 acpilist->table[i].mhz = pss[i].pss_core_freq;
315 acpilist->table[i].ctrl = pss[i].pss_ctrl;
316 acpilist->table[i].pct =
317 (pss[i].pss_core_freq - low) * 100 / (high - low);
318 if (pss[i].pss_ctrl == cur)
319 needtran = 0;
320 }
321
322 free(est_fqlist->table, M_DEVBUF, npss * sizeof(struct est_op));
323 free(est_fqlist, M_DEVBUF, sizeof(struct fqlist));
324 est_fqlist = acpilist;
325
326 if (needtran) {
327 est_setperf(perflevel);
328 }
329 }
330 #endif
331
332 void
est_init(struct cpu_info * ci)333 est_init(struct cpu_info *ci)
334 {
335 const char *cpu_device = ci->ci_dev->dv_xname;
336 int vendor = -1;
337 int i, low, high;
338 u_int64_t msr;
339 u_int16_t idhi, idlo, cur;
340 u_int8_t crhi, crlo, crcur;
341 struct fqlist *fake_fqlist;
342 struct est_op *fake_table;
343
344 if (setperf_prio > 3)
345 return;
346
347 #if NACPICPU > 0
348 est_fqlist = est_acpi_init();
349 #endif
350
351 /* bus_clock is only used if we can't get values from ACPI */
352 if (est_fqlist == NULL) {
353 if (ci->ci_family == 0xf)
354 p4_get_bus_clock(ci);
355 else if (ci->ci_family == 6)
356 p3_get_bus_clock(ci);
357 }
358
359 /*
360 * Interpreting the values of PERF_STATUS is not valid
361 * on recent processors so don't do it on anything unknown
362 */
363 if (est_fqlist == NULL && bus_clock != 0) {
364 msr = rdmsr(MSR_PERF_STATUS);
365 idhi = (msr >> 32) & 0xffff;
366 idlo = (msr >> 48) & 0xffff;
367 cur = msr & 0xffff;
368 crhi = (idhi >> 8) & 0xff;
369 crlo = (idlo >> 8) & 0xff;
370 crcur = (cur >> 8) & 0xff;
371
372 if (crhi == 0 || crcur == 0 || crlo > crhi ||
373 crcur < crlo || crcur > crhi) {
374 /*
375 * Do complain about other weirdness, because we first
376 * want to know about it, before we decide what to do
377 * with it.
378 */
379 printf("%s: EST: strange msr value 0x%016llx\n",
380 cpu_device, msr);
381 return;
382 }
383 if (crlo == 0 || crhi == crlo) {
384 /*
385 * Don't complain about these cases, and silently
386 * disable EST: - A lowest clock ratio of 0, which
387 * seems to happen on all Pentium 4's that report EST.
388 * - An equal highest and lowest clock ratio, which
389 * happens on at least the Core 2 Duo X6800, maybe on
390 * newer models too.
391 */
392 return;
393 }
394
395 printf("%s: unknown Enhanced SpeedStep CPU, msr 0x%016llx\n",
396 cpu_device, msr);
397 /*
398 * Generate a fake table with the power states we know.
399 */
400
401 if ((fake_fqlist = malloc(sizeof(struct fqlist), M_DEVBUF,
402 M_NOWAIT)) == NULL) {
403 printf("%s: EST: cannot allocate memory for fake "
404 "list\n", cpu_device);
405 return;
406 }
407
408
409 if ((fake_table = mallocarray(3, sizeof(struct est_op),
410 M_DEVBUF, M_NOWAIT)) == NULL) {
411 free(fake_fqlist, M_DEVBUF, sizeof(struct fqlist));
412 printf("%s: EST: cannot allocate memory for fake "
413 "table\n", cpu_device);
414 return;
415 }
416 fake_table[0].ctrl = idhi;
417 fake_table[0].mhz = MSR2MHZ(idhi, bus_clock);
418 if (cur == idhi || cur == idlo) {
419 printf("%s: using only highest and lowest power "
420 "states\n", cpu_device);
421
422 fake_table[0].pct = 51;
423
424 fake_table[1].ctrl = idlo;
425 fake_table[1].mhz = MSR2MHZ(idlo, bus_clock);
426 fake_table[1].pct = 0;
427 fake_fqlist->n = 2;
428 } else {
429 printf("%s: using only highest, current and lowest "
430 "power states\n", cpu_device);
431
432 fake_table[0].pct = 67;
433
434 fake_table[1].ctrl = cur;
435 fake_table[1].mhz = MSR2MHZ(cur, bus_clock);
436 fake_table[1].pct = 34;
437
438 fake_table[2].ctrl = idlo;
439 fake_table[2].mhz = MSR2MHZ(idlo, bus_clock);
440 fake_table[2].pct = 0;
441 fake_fqlist->n = 3;
442 }
443
444 fake_fqlist->vendor = vendor;
445 fake_fqlist->table = fake_table;
446 est_fqlist = fake_fqlist;
447 }
448
449 if (est_fqlist == NULL)
450 return;
451
452 if (est_fqlist->n < 2)
453 goto nospeedstep;
454
455 low = est_fqlist->table[est_fqlist->n - 1].mhz;
456 high = est_fqlist->table[0].mhz;
457 if (low == high)
458 goto nospeedstep;
459
460 perflevel = (cpuspeed - low) * 100 / (high - low);
461
462 printf("%s: Enhanced SpeedStep %d MHz", cpu_device, cpuspeed);
463
464 /*
465 * OK, tell the user the available frequencies.
466 */
467 printf(": speeds: ");
468 for (i = 0; i < est_fqlist->n; i++)
469 printf("%d%s", est_fqlist->table[i].mhz, i < est_fqlist->n - 1
470 ? ", " : " MHz\n");
471
472 cpu_setperf = est_setperf;
473 setperf_prio = 3;
474
475 return;
476
477 nospeedstep:
478 free(est_fqlist->table, M_DEVBUF, 0);
479 free(est_fqlist, M_DEVBUF, sizeof(*est_fqlist));
480 }
481
482 void
est_setperf(int level)483 est_setperf(int level)
484 {
485 int i;
486 uint64_t msr;
487
488 if (est_fqlist == NULL)
489 return;
490
491 for (i = 0; i < est_fqlist->n; i++) {
492 if (level >= est_fqlist->table[i].pct)
493 break;
494 }
495
496 msr = rdmsr(MSR_PERF_CTL);
497 msr &= ~0xffffULL;
498 msr |= est_fqlist->table[i].ctrl;
499
500 wrmsr(MSR_PERF_CTL, msr);
501 cpuspeed = est_fqlist->table[i].mhz;
502 }
503