xref: /netbsd-src/sys/arch/x86/x86/coretemp.c (revision 7001505d69c9f1e37493e96709ea7a38716eec30)
1 /* $NetBSD: coretemp.c,v 1.42 2024/07/15 01:57:23 gutteridge Exp $ */
2 
3 /*-
4  * Copyright (c) 2011 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jukka Ruohonen.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*-
34  * Copyright (c) 2007 Juan Romero Pardines.
35  * Copyright (c) 2007 Rui Paulo <rpaulo@FreeBSD.org>
36  * All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
49  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
50  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
51  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
52  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
53  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
55  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
56  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57  * POSSIBILITY OF SUCH DAMAGE.
58  *
59  * $FreeBSD: src/sys/dev/coretemp/coretemp.c,v 1.4 2007/10/15 20:00:21 netchild Exp $
60  *
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: coretemp.c,v 1.42 2024/07/15 01:57:23 gutteridge Exp $");
65 
66 #include <sys/param.h>
67 #include <sys/device.h>
68 #include <sys/cpu.h>
69 #include <sys/module.h>
70 #include <sys/xcall.h>
71 
72 #include <dev/sysmon/sysmonvar.h>
73 
74 #include <machine/cpuvar.h>
75 #include <machine/cpufunc.h>
76 #include <machine/cputypes.h>
77 #include <machine/specialreg.h>
78 
79 #define MSR_THERM_STATUS_STA		__BIT(0)
80 #define MSR_THERM_STATUS_LOG		__BIT(1)
81 #define MSR_THERM_STATUS_PROCHOT_EVT	__BIT(2)
82 #define MSR_THERM_STATUS_PROCHOT_LOG	__BIT(3)
83 #define MSR_THERM_STATUS_CRIT_STA	__BIT(4)
84 #define MSR_THERM_STATUS_CRIT_LOG	__BIT(5)
85 #define MSR_THERM_STATUS_TRIP1_STA	__BIT(6)
86 #define MSR_THERM_STATUS_TRIP1_LOG	__BIT(7)
87 #define MSR_THERM_STATUS_TRIP2_STA	__BIT(8)
88 #define MSR_THERM_STATUS_TRIP2_LOG	__BIT(9)
89 #define MSR_THERM_STATUS_READOUT	__BITS(16, 22)
90 #define MSR_THERM_STATUS_RESOLUTION	__BITS(27, 30)
91 #define MSR_THERM_STATUS_VALID		__BIT(31)
92 
93 #define MSR_THERM_INTR_HITEMP		__BIT(0)
94 #define MSR_THERM_INTR_LOTEMPT		__BIT(1)
95 #define MSR_THERM_INTR_PROCHOT		__BIT(2)
96 #define MSR_THERM_INTR_FORCPR		__BIT(3)
97 #define MSR_THERM_INTR_OVERHEAT		__BIT(4)
98 #define MSR_THERM_INTR_TRIP1_VAL	__BITS(8, 14)
99 #define MSR_THERM_INTR_TRIP1		__BIT(15)
100 #define MSR_THERM_INTR_TRIP2_VAL	__BITS(16, 22)
101 #define MSR_THERM_INTR_TRIP2		__BIT(23)
102 
103 #define MSR_TEMP_TARGET_READOUT		__BITS(16, 23)
104 
105 #define TJMAX_DEFAULT		100
106 #define TJMAX_LIMIT_LOW		60
107 #define TJMAX_LIMIT_HIGH	120
108 
109 static int	coretemp_match(device_t, cfdata_t, void *);
110 static void	coretemp_attach(device_t, device_t, void *);
111 static int	coretemp_detach(device_t, int);
112 static int	coretemp_quirks(struct cpu_info *);
113 static int	coretemp_tjmax(device_t);
114 static void	coretemp_refresh(struct sysmon_envsys *, envsys_data_t *);
115 static void	coretemp_refresh_xcall(void *, void *);
116 
117 struct coretemp_softc {
118 	device_t		 sc_dev;
119 	struct cpu_info		*sc_ci;
120 	struct sysmon_envsys	*sc_sme;
121 	envsys_data_t		 sc_sensor;
122 	int			 sc_tjmax;
123 };
124 
125 CFATTACH_DECL_NEW(coretemp, sizeof(struct coretemp_softc),
126     coretemp_match, coretemp_attach, coretemp_detach, NULL);
127 
128 static int
coretemp_match(device_t parent,cfdata_t cf,void * aux)129 coretemp_match(device_t parent, cfdata_t cf, void *aux)
130 {
131 	struct cpufeature_attach_args *cfaa = aux;
132 	struct cpu_info *ci = cfaa->ci;
133 	uint32_t regs[4];
134 
135 	if (strcmp(cfaa->name, "temperature") != 0)
136 		return 0;
137 
138 	if (cpu_vendor != CPUVENDOR_INTEL || cpuid_level < 0x06)
139 		return 0;
140 
141 	/*
142 	 * Only attach on the first SMT ID.
143 	 */
144 	if (ci->ci_smt_id != 0)
145 		return 0;
146 
147 	/*
148 	 * CPUID 0x06 returns 1 if the processor
149 	 * has on-die thermal sensors. EBX[0:3]
150 	 * contains the number of sensors.
151 	 */
152 	x86_cpuid(0x06, regs);
153 
154 	if ((regs[0] & CPUID_DSPM_DTS) == 0)
155 		return 0;
156 
157 	return coretemp_quirks(ci);
158 }
159 
160 static void
coretemp_attach(device_t parent,device_t self,void * aux)161 coretemp_attach(device_t parent, device_t self, void *aux)
162 {
163 	struct coretemp_softc *sc = device_private(self);
164 	struct cpufeature_attach_args *cfaa = aux;
165 	struct cpu_info *ci = cfaa->ci;
166 	uint64_t msr;
167 
168 	sc->sc_ci = ci;
169 	sc->sc_dev = self;
170 
171 	msr = rdmsr(MSR_THERM_STATUS);
172 	msr = __SHIFTOUT(msr, MSR_THERM_STATUS_RESOLUTION);
173 
174 	aprint_naive("\n");
175 	aprint_normal(": thermal sensor, %u C resolution", (uint32_t)msr);
176 
177 	sc->sc_sensor.units = ENVSYS_STEMP;
178 	sc->sc_sensor.state = ENVSYS_SINVALID;
179 	sc->sc_sensor.flags = ENVSYS_FMONCRITICAL | ENVSYS_FHAS_ENTROPY;
180 
181 	(void)pmf_device_register(self, NULL, NULL);
182 	(void)snprintf(sc->sc_sensor.desc, sizeof(sc->sc_sensor.desc),
183 	    "%s temperature", device_xname(ci->ci_dev));
184 
185 	sc->sc_sme = sysmon_envsys_create();
186 
187 	if (sysmon_envsys_sensor_attach(sc->sc_sme, &sc->sc_sensor) != 0)
188 		goto fail;
189 
190 	sc->sc_sme->sme_cookie = sc;
191 	sc->sc_sme->sme_name = device_xname(self);
192 	sc->sc_sme->sme_refresh = coretemp_refresh;
193 
194 	if (sysmon_envsys_register(sc->sc_sme) != 0)
195 		goto fail;
196 
197 	if (coretemp_tjmax(self) == 0) {
198 		aprint_verbose(", Tjmax=%d", sc->sc_tjmax);
199 		aprint_normal("\n");
200 	}
201 	return;
202 
203 fail:
204 	sysmon_envsys_destroy(sc->sc_sme);
205 	sc->sc_sme = NULL;
206 	aprint_normal("\n");
207 }
208 
209 static int
coretemp_detach(device_t self,int flags)210 coretemp_detach(device_t self, int flags)
211 {
212 	struct coretemp_softc *sc = device_private(self);
213 
214 	if (sc->sc_sme != NULL)
215 		sysmon_envsys_unregister(sc->sc_sme);
216 
217 	pmf_device_deregister(self);
218 
219 	return 0;
220 }
221 
222 static int
coretemp_quirks(struct cpu_info * ci)223 coretemp_quirks(struct cpu_info *ci)
224 {
225 	uint32_t model, stepping;
226 	uint64_t msr;
227 
228 	model = CPUID_TO_MODEL(ci->ci_signature);
229 	stepping = CPUID_TO_STEPPING(ci->ci_signature);
230 
231 	/*
232 	 * Check if the MSR contains thermal
233 	 * reading valid bit, this avoid false
234 	 * positives on systems that fake up
235 	 * a compatible CPU that doesn't have
236 	 * access to these MSRs; such as VMWare.
237 	 */
238 	msr = rdmsr(MSR_THERM_STATUS);
239 
240 	if ((msr & MSR_THERM_STATUS_VALID) == 0)
241 		return 0;
242 
243 	/*
244 	 * Check for errata AE18, "Processor Digital
245 	 * Thermal Sensor (DTS) Readout Stops Updating
246 	 * upon Returning from C3/C4 State".
247 	 *
248 	 * Adapted from the Linux coretemp driver.
249 	 */
250 	if (model == 0x0E && stepping < 0x0C) {
251 
252 		msr = rdmsr(MSR_BIOS_SIGN);
253 		msr = msr >> 32;
254 
255 		if (msr < 0x39)
256 			return 0;
257 	}
258 
259 	return 1;
260 }
261 
262 static int
coretemp_tjmax(device_t self)263 coretemp_tjmax(device_t self)
264 {
265 	struct coretemp_softc *sc = device_private(self);
266 	struct cpu_info *ci = sc->sc_ci;
267 	uint64_t msr;
268 	uint32_t model, stepping;
269 	int tjmax;
270 
271 	model = CPUID_TO_MODEL(ci->ci_signature);
272 	stepping = CPUID_TO_STEPPING(ci->ci_signature);
273 
274 	/* Set the initial value. */
275 	sc->sc_tjmax = TJMAX_DEFAULT;
276 
277 	if ((model == 0x0f && stepping >= 2) || (model == 0x0e)) {
278 		/*
279 		 * Check MSR_IA32_PLATFORM_ID(0x17) bit 28. It's not documented
280 		 * in the datasheet, but the following page describes the
281 		 * detail:
282 		 *   https://web.archive.org/web/20110608131711/http://software.intel.com/
283 		 *     en-us/articles/mobile-intel-core2-processor-detection-table/
284 		 *   Was: http://softwarecommunity.intel.com/Wiki/Mobility/
285 		 *     720.htm
286 		 */
287 		if (rdmsr_safe(MSR_IA32_PLATFORM_ID, &msr) != 0)
288 			goto notee;
289 		if ((msr & __BIT(28)) == 0)
290 			goto notee;
291 
292 		if (rdmsr_safe(MSR_IA32_EXT_CONFIG, &msr) == EFAULT) {
293 			aprint_normal("\n");
294 			aprint_error_dev(sc->sc_dev,
295 			    "Failed to read MSR_IA32_EXT_CONFIG MSR. "
296 			    "Using default (%d)\n", sc->sc_tjmax);
297 			return 1;
298 		}
299 
300 		if ((msr & __BIT(30)) != 0)
301 			sc->sc_tjmax = 85;
302 	} else if (model == 0x17 && stepping == 0x06) {
303 		/* The mobile Penryn family. */
304 		sc->sc_tjmax = 105;
305 	} else if (model == 0x1c) {
306 		if (stepping == 0x0a) {
307 			/* 45nm Atom D400, N400 and D500 series */
308 			sc->sc_tjmax = 100;
309 		} else
310 			sc->sc_tjmax = 90;
311 	} else {
312 notee:
313 		/*
314 		 * Attempt to get Tj(max) from IA32_TEMPERATURE_TARGET.
315 		 * It is not fully known which CPU models have the MSR.
316 		 */
317 		if (rdmsr_safe(MSR_TEMPERATURE_TARGET, &msr) == EFAULT) {
318 			aprint_normal("\n");
319 			aprint_error_dev(sc->sc_dev,
320 			    "Failed to read TEMPERATURE_TARGET MSR. "
321 			    "Using default (%d)\n", sc->sc_tjmax);
322 			return 1;
323 		}
324 
325 		tjmax = __SHIFTOUT(msr, MSR_TEMP_TARGET_READOUT);
326 		if (tjmax < TJMAX_LIMIT_LOW) {
327 			aprint_normal("\n");
328 			aprint_error_dev(sc->sc_dev,
329 			    "WARNING: Tjmax(%d) retrieved was below expected range, "
330 				"using default (%d).\n", tjmax, sc->sc_tjmax);
331 			return 1;
332 		}
333 
334 		if (tjmax > TJMAX_LIMIT_HIGH) {
335 			aprint_normal("\n");
336 			aprint_error_dev(sc->sc_dev,
337 			    "WARNING: Tjmax(%d) might exceed the limit.\n",
338 			    tjmax);
339 			sc->sc_tjmax = tjmax;
340 			return 1;
341 		}
342 		sc->sc_tjmax = tjmax;
343 	}
344 
345 	return 0;
346 }
347 
348 static void
coretemp_refresh(struct sysmon_envsys * sme,envsys_data_t * edata)349 coretemp_refresh(struct sysmon_envsys *sme, envsys_data_t *edata)
350 {
351 	struct coretemp_softc *sc = sme->sme_cookie;
352 	uint64_t xc;
353 
354 	xc = xc_unicast(0, coretemp_refresh_xcall, sc, edata, sc->sc_ci);
355 	xc_wait(xc);
356 }
357 
358 static void
coretemp_refresh_xcall(void * arg0,void * arg1)359 coretemp_refresh_xcall(void *arg0, void *arg1)
360 {
361 	struct coretemp_softc *sc = arg0;
362 	envsys_data_t *edata = arg1;
363 	uint64_t msr;
364 
365 	msr = rdmsr(MSR_THERM_STATUS);
366 
367 	if ((msr & MSR_THERM_STATUS_VALID) == 0)
368 		edata->state = ENVSYS_SINVALID;
369 	else {
370 		/*
371 		 * The temperature is computed by
372 		 * subtracting the reading by Tj(max).
373 		 */
374 		edata->value_cur = sc->sc_tjmax;
375 		edata->value_cur -= __SHIFTOUT(msr, MSR_THERM_STATUS_READOUT);
376 
377 		/*
378 		 * Convert to mK.
379 		 */
380 		edata->value_cur *= 1000000;
381 		edata->value_cur += 273150000;
382 		edata->state = ENVSYS_SVALID;
383 	}
384 
385 	if ((msr & MSR_THERM_STATUS_CRIT_STA) != 0)
386 		edata->state = ENVSYS_SCRITICAL;
387 }
388 
389 MODULE(MODULE_CLASS_DRIVER, coretemp, "sysmon_envsys");
390 
391 #ifdef _MODULE
392 #include "ioconf.c"
393 #endif
394 
395 static int
coretemp_modcmd(modcmd_t cmd,void * aux)396 coretemp_modcmd(modcmd_t cmd, void *aux)
397 {
398 	int error = 0;
399 
400 	switch (cmd) {
401 	case MODULE_CMD_INIT:
402 #ifdef _MODULE
403 		error = config_init_component(cfdriver_ioconf_coretemp,
404 		    cfattach_ioconf_coretemp, cfdata_ioconf_coretemp);
405 #endif
406 		return error;
407 	case MODULE_CMD_FINI:
408 #ifdef _MODULE
409 		error = config_fini_component(cfdriver_ioconf_coretemp,
410 		    cfattach_ioconf_coretemp, cfdata_ioconf_coretemp);
411 #endif
412 		return error;
413 	default:
414 		return ENOTTY;
415 	}
416 }
417