1 /* 2 * Copyright (c) 2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/module.h> 41 #include <sys/sensors.h> 42 #include <sys/sysctl.h> 43 #include <sys/systm.h> 44 45 #include <dev/misc/dimm/dimm.h> 46 47 #define DIMM_TEMP_HIWAT_DEFAULT 85 48 #define DIMM_TEMP_LOWAT_DEFAULT 75 49 50 #define DIMM_ECC_THRESH_DEFAULT 5 51 52 struct dimm_softc { 53 TAILQ_ENTRY(dimm_softc) dimm_link; 54 int dimm_node; 55 int dimm_chan; 56 int dimm_slot; 57 int dimm_temp_hiwat; 58 int dimm_temp_lowat; 59 int dimm_id; 60 int dimm_ref; 61 int dimm_ecc_cnt; 62 int dimm_ecc_thresh; 63 64 struct ksensordev dimm_sensdev; 65 uint32_t dimm_sens_taskflags; /* DIMM_SENS_TF_ */ 66 67 struct sysctl_ctx_list dimm_sysctl_ctx; 68 struct sysctl_oid *dimm_sysctl_tree; 69 }; 70 TAILQ_HEAD(dimm_softc_list, dimm_softc); 71 72 #define DIMM_SENS_TF_TEMP_CRIT 0x1 73 #define DIMM_SENS_TF_ECC_CRIT 0x2 74 75 static void dimm_mod_unload(void); 76 77 /* In the ascending order of dimm_softc.dimm_id */ 78 static struct dimm_softc_list dimm_softc_list; 79 80 static SYSCTL_NODE(_hw, OID_AUTO, dimminfo, CTLFLAG_RD, NULL, 81 "DIMM information"); 82 83 struct dimm_softc * 84 dimm_create(int node, int chan, int slot) 85 { 86 struct dimm_softc *sc, *after = NULL; 87 int dimm_id = 0; 88 89 SYSCTL_XLOCK(); 90 91 TAILQ_FOREACH(sc, &dimm_softc_list, dimm_link) { 92 /* 93 * Already exists; done. 94 */ 95 if (sc->dimm_node == node && sc->dimm_chan == chan && 96 sc->dimm_slot == slot) { 97 KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d", 98 sc->dimm_ref)); 99 sc->dimm_ref++; 100 SYSCTL_XUNLOCK(); 101 return sc; 102 } 103 104 /* 105 * Find the lowest usable id. 106 */ 107 if (sc->dimm_id == dimm_id) { 108 ++dimm_id; 109 after = sc; 110 } 111 } 112 113 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 114 sc->dimm_node = node; 115 sc->dimm_chan = chan; 116 sc->dimm_slot = slot; 117 sc->dimm_id = dimm_id; 118 sc->dimm_ref = 1; 119 sc->dimm_temp_hiwat = DIMM_TEMP_HIWAT_DEFAULT; 120 sc->dimm_temp_lowat = DIMM_TEMP_LOWAT_DEFAULT; 121 sc->dimm_ecc_thresh = DIMM_ECC_THRESH_DEFAULT; 122 123 ksnprintf(sc->dimm_sensdev.xname, sizeof(sc->dimm_sensdev.xname), 124 "dimm%d", sc->dimm_id); 125 126 /* 127 * Create sysctl tree for the location information. Use 128 * same name as the sensor device. 129 */ 130 sysctl_ctx_init(&sc->dimm_sysctl_ctx); 131 sc->dimm_sysctl_tree = SYSCTL_ADD_NODE(&sc->dimm_sysctl_ctx, 132 SYSCTL_STATIC_CHILDREN(_hw_dimminfo), OID_AUTO, 133 sc->dimm_sensdev.xname, CTLFLAG_RD, 0, ""); 134 if (sc->dimm_sysctl_tree != NULL) { 135 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 136 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 137 "node", CTLFLAG_RD, &sc->dimm_node, 0, 138 "CPU node of this DIMM"); 139 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 140 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 141 "chan", CTLFLAG_RD, &sc->dimm_chan, 0, 142 "channel of this DIMM"); 143 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 144 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 145 "slot", CTLFLAG_RD, &sc->dimm_slot, 0, 146 "slot of this DIMM"); 147 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 148 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 149 "temp_hiwat", CTLFLAG_RW, &sc->dimm_temp_hiwat, 0, 150 "Raise alarm once DIMM temperature is above this value " 151 "(unit: C)"); 152 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 153 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 154 "temp_lowat", CTLFLAG_RW, &sc->dimm_temp_lowat, 0, 155 "Cancel alarm once DIMM temperature is below this value " 156 "(unit: C)"); 157 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 158 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 159 "ecc_thresh", CTLFLAG_RW, &sc->dimm_ecc_thresh, 0, 160 "Raise alarm once number ECC errors go above this value"); 161 } 162 163 if (after == NULL) { 164 KKASSERT(sc->dimm_id == 0); 165 TAILQ_INSERT_HEAD(&dimm_softc_list, sc, dimm_link); 166 } else { 167 TAILQ_INSERT_AFTER(&dimm_softc_list, after, sc, dimm_link); 168 } 169 170 sensordev_install(&sc->dimm_sensdev); 171 172 SYSCTL_XUNLOCK(); 173 return sc; 174 } 175 176 int 177 dimm_destroy(struct dimm_softc *sc) 178 { 179 SYSCTL_XLOCK(); 180 181 KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d", sc->dimm_ref)); 182 sc->dimm_ref--; 183 if (sc->dimm_ref > 0) { 184 SYSCTL_XUNLOCK(); 185 return EAGAIN; 186 } 187 188 sensordev_deinstall(&sc->dimm_sensdev); 189 190 TAILQ_REMOVE(&dimm_softc_list, sc, dimm_link); 191 if (sc->dimm_sysctl_tree != NULL) 192 sysctl_ctx_free(&sc->dimm_sysctl_ctx); 193 kfree(sc, M_DEVBUF); 194 195 SYSCTL_XUNLOCK(); 196 return 0; 197 } 198 199 void 200 dimm_sensor_attach(struct dimm_softc *sc, struct ksensor *sens) 201 { 202 sensor_attach(&sc->dimm_sensdev, sens); 203 } 204 205 void 206 dimm_sensor_detach(struct dimm_softc *sc, struct ksensor *sens) 207 { 208 sensor_detach(&sc->dimm_sensdev, sens); 209 } 210 211 void 212 dimm_set_temp_thresh(struct dimm_softc *sc, int hiwat, int lowat) 213 { 214 sc->dimm_temp_hiwat = hiwat; 215 sc->dimm_temp_lowat = lowat; 216 } 217 218 void 219 dimm_set_ecc_thresh(struct dimm_softc *sc, int thresh) 220 { 221 sc->dimm_ecc_thresh = thresh; 222 } 223 224 void 225 dimm_sensor_temp(struct dimm_softc *sc, struct ksensor *sens, int temp) 226 { 227 enum sensor_status status; 228 229 if (temp >= sc->dimm_temp_hiwat && 230 (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) == 0) { 231 char temp_str[16], data[64]; 232 233 ksnprintf(temp_str, sizeof(temp_str), "%d", temp); 234 ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d", 235 sc->dimm_node, sc->dimm_chan, sc->dimm_slot); 236 devctl_notify("memtemp", "Thermal", temp_str, data); 237 238 kprintf("dimm%d: node%d channel%d DIMM%d " 239 "temperature (%dC) is too high (>= %dC)\n", 240 sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot, 241 temp, sc->dimm_temp_hiwat); 242 243 sc->dimm_sens_taskflags |= DIMM_SENS_TF_TEMP_CRIT; 244 } else if ((sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) && 245 temp < sc->dimm_temp_lowat) { 246 sc->dimm_sens_taskflags &= ~DIMM_SENS_TF_TEMP_CRIT; 247 } 248 249 if (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) 250 status = SENSOR_S_CRIT; 251 else 252 status = SENSOR_S_OK; 253 sensor_set_temp_degc(sens, temp, status); 254 } 255 256 void 257 dimm_sensor_ecc_set(struct dimm_softc *sc, struct ksensor *sens, 258 int ecc_cnt, boolean_t crit) 259 { 260 enum sensor_status status; 261 262 sc->dimm_ecc_cnt = ecc_cnt; 263 264 if (!crit && sc->dimm_ecc_cnt >= sc->dimm_ecc_thresh) 265 crit = TRUE; 266 267 if (crit && (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT) == 0) { 268 char ecc_str[16], data[64]; 269 270 ksnprintf(ecc_str, sizeof(ecc_str), "%d", sc->dimm_ecc_cnt); 271 ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d", 272 sc->dimm_node, sc->dimm_chan, sc->dimm_slot); 273 devctl_notify("ecc", "ECC", ecc_str, data); 274 275 kprintf("dimm%d: node%d channel%d DIMM%d " 276 "too many ECC errors %d\n", 277 sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot, 278 sc->dimm_ecc_cnt); 279 280 sc->dimm_sens_taskflags |= DIMM_SENS_TF_ECC_CRIT; 281 } 282 283 if (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT) 284 status = SENSOR_S_CRIT; 285 else 286 status = SENSOR_S_OK; 287 sensor_set(sens, sc->dimm_ecc_cnt, status); 288 } 289 290 static void 291 dimm_mod_unload(void) 292 { 293 struct dimm_softc *sc; 294 295 SYSCTL_XLOCK(); 296 297 while ((sc = TAILQ_FIRST(&dimm_softc_list)) != NULL) { 298 int error; 299 300 error = dimm_destroy(sc); 301 KASSERT(!error, ("dimm%d is still referenced, ref %d", 302 sc->dimm_id, sc->dimm_ref)); 303 } 304 305 SYSCTL_XUNLOCK(); 306 } 307 308 static int 309 dimm_mod_event(module_t mod, int type, void *unused) 310 { 311 switch (type) { 312 case MOD_LOAD: 313 TAILQ_INIT(&dimm_softc_list); 314 return 0; 315 316 case MOD_UNLOAD: 317 dimm_mod_unload(); 318 return 0; 319 320 default: 321 return 0; 322 } 323 } 324 325 static moduledata_t dimm_mod = { 326 "dimm", 327 dimm_mod_event, 328 0 329 }; 330 DECLARE_MODULE(dimm, dimm_mod, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY); 331 MODULE_VERSION(dimm, 1); 332