xref: /dflybsd-src/sys/dev/misc/dimm/dimm.c (revision 7ee0de79152d1f7788c347f64b52790d187699e0)
1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/sensors.h>
42 #include <sys/sysctl.h>
43 #include <sys/systm.h>
44 
45 #include <dev/misc/dimm/dimm.h>
46 
47 #define DIMM_TEMP_HIWAT_DEFAULT	85
48 #define DIMM_TEMP_LOWAT_DEFAULT	75
49 
50 #define DIMM_ECC_THRESH_DEFAULT	5
51 
52 struct dimm_softc {
53 	TAILQ_ENTRY(dimm_softc) dimm_link;
54 	int			dimm_node;
55 	int			dimm_chan;
56 	int			dimm_slot;
57 	int			dimm_temp_hiwat;
58 	int			dimm_temp_lowat;
59 	int			dimm_id;
60 	int			dimm_ref;
61 	int			dimm_ecc_cnt;
62 	int			dimm_ecc_thresh;
63 
64 	struct ksensordev	dimm_sensdev;
65 	uint32_t		dimm_sens_taskflags;	/* DIMM_SENS_TF_ */
66 
67 	struct sysctl_ctx_list	dimm_sysctl_ctx;
68 	struct sysctl_oid	*dimm_sysctl_tree;
69 };
70 TAILQ_HEAD(dimm_softc_list, dimm_softc);
71 
72 #define DIMM_SENS_TF_TEMP_CRIT		0x1
73 #define DIMM_SENS_TF_ECC_CRIT		0x2
74 
75 static void	dimm_mod_unload(void);
76 
77 /* In the ascending order of dimm_softc.dimm_id */
78 static struct dimm_softc_list	dimm_softc_list;
79 
80 static SYSCTL_NODE(_hw, OID_AUTO, dimminfo, CTLFLAG_RD, NULL,
81     "DIMM information");
82 
83 struct dimm_softc *
84 dimm_create(int node, int chan, int slot)
85 {
86 	struct dimm_softc *sc, *after = NULL;
87 	int dimm_id = 0;
88 
89 	SYSCTL_XLOCK();
90 
91 	TAILQ_FOREACH(sc, &dimm_softc_list, dimm_link) {
92 		/*
93 		 * Already exists; done.
94 		 */
95 		if (sc->dimm_node == node && sc->dimm_chan == chan &&
96 		    sc->dimm_slot == slot) {
97 			KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d",
98 			    sc->dimm_ref));
99 			sc->dimm_ref++;
100 			SYSCTL_XUNLOCK();
101 			return sc;
102 		}
103 
104 		/*
105 		 * Find the lowest usable id.
106 		 */
107 		if (sc->dimm_id == dimm_id) {
108 			++dimm_id;
109 			after = sc;
110 		}
111 	}
112 
113 	sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
114 	sc->dimm_node = node;
115 	sc->dimm_chan = chan;
116 	sc->dimm_slot = slot;
117 	sc->dimm_id = dimm_id;
118 	sc->dimm_ref = 1;
119 	sc->dimm_temp_hiwat = DIMM_TEMP_HIWAT_DEFAULT;
120 	sc->dimm_temp_lowat = DIMM_TEMP_LOWAT_DEFAULT;
121 	sc->dimm_ecc_thresh = DIMM_ECC_THRESH_DEFAULT;
122 
123 	ksnprintf(sc->dimm_sensdev.xname, sizeof(sc->dimm_sensdev.xname),
124 	    "dimm%d", sc->dimm_id);
125 
126 	/*
127 	 * Create sysctl tree for the location information.  Use
128 	 * same name as the sensor device.
129 	 */
130 	sysctl_ctx_init(&sc->dimm_sysctl_ctx);
131 	sc->dimm_sysctl_tree = SYSCTL_ADD_NODE(&sc->dimm_sysctl_ctx,
132 	    SYSCTL_STATIC_CHILDREN(_hw_dimminfo), OID_AUTO,
133 	    sc->dimm_sensdev.xname, CTLFLAG_RD, 0, "");
134 	if (sc->dimm_sysctl_tree != NULL) {
135 		SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
136 		    SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
137 		    "node", CTLFLAG_RD, &sc->dimm_node, 0,
138 		    "CPU node of this DIMM");
139 		SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
140 		    SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
141 		    "chan", CTLFLAG_RD, &sc->dimm_chan, 0,
142 		    "channel of this DIMM");
143 		SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
144 		    SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
145 		    "slot", CTLFLAG_RD, &sc->dimm_slot, 0,
146 		    "slot of this DIMM");
147 		SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
148 		    SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
149 		    "temp_hiwat", CTLFLAG_RW, &sc->dimm_temp_hiwat, 0,
150 		    "Raise alarm once DIMM temperature is above this value "
151 		    "(unit: C)");
152 		SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
153 		    SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
154 		    "temp_lowat", CTLFLAG_RW, &sc->dimm_temp_lowat, 0,
155 		    "Cancel alarm once DIMM temperature is below this value "
156 		    "(unit: C)");
157 		SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
158 		    SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
159 		    "ecc_thresh", CTLFLAG_RW, &sc->dimm_ecc_thresh, 0,
160 		    "Raise alarm once number ECC errors go above this value");
161 	}
162 
163 	if (after == NULL) {
164 		KKASSERT(sc->dimm_id == 0);
165 		TAILQ_INSERT_HEAD(&dimm_softc_list, sc, dimm_link);
166 	} else {
167 		TAILQ_INSERT_AFTER(&dimm_softc_list, after, sc, dimm_link);
168 	}
169 
170 	sensordev_install(&sc->dimm_sensdev);
171 
172 	SYSCTL_XUNLOCK();
173 	return sc;
174 }
175 
176 int
177 dimm_destroy(struct dimm_softc *sc)
178 {
179 	SYSCTL_XLOCK();
180 
181 	KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d", sc->dimm_ref));
182 	sc->dimm_ref--;
183 	if (sc->dimm_ref > 0) {
184 		SYSCTL_XUNLOCK();
185 		return EAGAIN;
186 	}
187 
188 	sensordev_deinstall(&sc->dimm_sensdev);
189 
190 	TAILQ_REMOVE(&dimm_softc_list, sc, dimm_link);
191 	if (sc->dimm_sysctl_tree != NULL)
192 		sysctl_ctx_free(&sc->dimm_sysctl_ctx);
193 	kfree(sc, M_DEVBUF);
194 
195 	SYSCTL_XUNLOCK();
196 	return 0;
197 }
198 
199 void
200 dimm_sensor_attach(struct dimm_softc *sc, struct ksensor *sens)
201 {
202 	sensor_attach(&sc->dimm_sensdev, sens);
203 }
204 
205 void
206 dimm_sensor_detach(struct dimm_softc *sc, struct ksensor *sens)
207 {
208 	sensor_detach(&sc->dimm_sensdev, sens);
209 }
210 
211 void
212 dimm_set_temp_thresh(struct dimm_softc *sc, int hiwat, int lowat)
213 {
214 	sc->dimm_temp_hiwat = hiwat;
215 	sc->dimm_temp_lowat = lowat;
216 }
217 
218 void
219 dimm_set_ecc_thresh(struct dimm_softc *sc, int thresh)
220 {
221 	sc->dimm_ecc_thresh = thresh;
222 }
223 
224 void
225 dimm_sensor_temp(struct dimm_softc *sc, struct ksensor *sens, int temp)
226 {
227 	enum sensor_status status;
228 
229 	if (temp >= sc->dimm_temp_hiwat &&
230 	    (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) == 0) {
231 		char temp_str[16], data[64];
232 
233 		ksnprintf(temp_str, sizeof(temp_str), "%d", temp);
234 		ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d",
235 		    sc->dimm_node, sc->dimm_chan, sc->dimm_slot);
236 		devctl_notify("memtemp", "Thermal", temp_str, data);
237 
238 		kprintf("dimm%d: node%d channel%d DIMM%d "
239 		    "temperature (%dC) is too high (>= %dC)\n",
240 		    sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot,
241 		    temp, sc->dimm_temp_hiwat);
242 
243 		sc->dimm_sens_taskflags |= DIMM_SENS_TF_TEMP_CRIT;
244 	} else if ((sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) &&
245 	     temp < sc->dimm_temp_lowat) {
246 		sc->dimm_sens_taskflags &= ~DIMM_SENS_TF_TEMP_CRIT;
247 	}
248 
249 	if (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT)
250 		status = SENSOR_S_CRIT;
251 	else
252 		status = SENSOR_S_OK;
253 	sensor_set_temp_degc(sens, temp, status);
254 }
255 
256 void
257 dimm_sensor_ecc_set(struct dimm_softc *sc, struct ksensor *sens,
258     int ecc_cnt, boolean_t crit)
259 {
260 	enum sensor_status status;
261 
262 	sc->dimm_ecc_cnt = ecc_cnt;
263 
264 	if (!crit && sc->dimm_ecc_cnt >= sc->dimm_ecc_thresh)
265 		crit = TRUE;
266 
267 	if (crit && (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT) == 0) {
268 		char ecc_str[16], data[64];
269 
270 		ksnprintf(ecc_str, sizeof(ecc_str), "%d", sc->dimm_ecc_cnt);
271 		ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d",
272 		    sc->dimm_node, sc->dimm_chan, sc->dimm_slot);
273 		devctl_notify("ecc", "ECC", ecc_str, data);
274 
275 		kprintf("dimm%d: node%d channel%d DIMM%d "
276 		    "too many ECC errors %d\n",
277 		    sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot,
278 		    sc->dimm_ecc_cnt);
279 
280 		sc->dimm_sens_taskflags |= DIMM_SENS_TF_ECC_CRIT;
281 	}
282 
283 	if (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT)
284 		status = SENSOR_S_CRIT;
285 	else
286 		status = SENSOR_S_OK;
287 	sensor_set(sens, sc->dimm_ecc_cnt, status);
288 }
289 
290 static void
291 dimm_mod_unload(void)
292 {
293 	struct dimm_softc *sc;
294 
295 	SYSCTL_XLOCK();
296 
297 	while ((sc = TAILQ_FIRST(&dimm_softc_list)) != NULL) {
298 		int error;
299 
300 		error = dimm_destroy(sc);
301 		KASSERT(!error, ("dimm%d is still referenced, ref %d",
302 		    sc->dimm_id, sc->dimm_ref));
303 	}
304 
305 	SYSCTL_XUNLOCK();
306 }
307 
308 static int
309 dimm_mod_event(module_t mod, int type, void *unused)
310 {
311 	switch (type) {
312 	case MOD_LOAD:
313 		TAILQ_INIT(&dimm_softc_list);
314 		return 0;
315 
316 	case MOD_UNLOAD:
317 		dimm_mod_unload();
318 		return 0;
319 
320 	default:
321 		return 0;
322 	}
323 }
324 
325 static moduledata_t dimm_mod = {
326 	"dimm",
327 	dimm_mod_event,
328 	0
329 };
330 DECLARE_MODULE(dimm, dimm_mod, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY);
331 MODULE_VERSION(dimm, 1);
332