1 /* 2 * Copyright (c) 2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitops.h> 38 #include <sys/bus.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/queue.h> 42 #include <sys/sensors.h> 43 44 #include <bus/pci/pcivar.h> 45 #include <bus/pci/pcireg.h> 46 #include <bus/pci/pcibus.h> 47 #include <bus/pci/pci_cfgreg.h> 48 #include <bus/pci/pcib_private.h> 49 50 #include "pcib_if.h" 51 52 #include <dev/misc/dimm/dimm.h> 53 #include <dev/misc/ecc/e5_imc_reg.h> 54 #include <dev/misc/ecc/e5_imc_var.h> 55 56 struct ecc_e5_dimm { 57 TAILQ_ENTRY(ecc_e5_dimm) dimm_link; 58 struct dimm_softc *dimm_softc; 59 struct ksensor dimm_sensor; 60 }; 61 62 struct ecc_e5_rank { 63 struct ecc_e5_dimm *rank_dimm_sc; 64 int rank_dimm; /* owner dimm */ 65 int rank_dimm_rank; /* rank within the owner dimm */ 66 }; 67 68 struct ecc_e5_softc { 69 device_t ecc_dev; 70 const struct e5_imc_chan *ecc_chan; 71 int ecc_node; 72 int ecc_rank_cnt; 73 struct ecc_e5_rank ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX]; 74 TAILQ_HEAD(, ecc_e5_dimm) ecc_dimm; 75 }; 76 77 #define ecc_printf(sc, fmt, arg...) \ 78 device_printf((sc)->ecc_dev, fmt , ##arg) 79 80 static int ecc_e5_probe(device_t); 81 static int ecc_e5_attach(device_t); 82 static int ecc_e5_detach(device_t); 83 static void ecc_e5_shutdown(device_t); 84 85 static void ecc_e5_sensor_task(void *); 86 87 #define ECC_E5_CHAN(v, imc, c, c_ext) \ 88 { \ 89 .did = PCI_E5V##v##_IMC##imc##_ERROR_CHN##c##_DID_ID, \ 90 .slot = PCISLOT_E5V##v##_IMC##imc##_ERROR_CHN##c, \ 91 .func = PCIFUNC_E5V##v##_IMC##imc##_ERROR_CHN##c, \ 92 .desc = "Intel E5 v" #v " ECC", \ 93 \ 94 E5_IMC_CHAN_FIELDS(v, imc, c, c_ext) \ 95 } 96 97 #define ECC_E5_CHAN_V2(c) ECC_E5_CHAN(2, 0, c, c) 98 #define ECC_E5_CHAN_IMC0_V3(c) ECC_E5_CHAN(3, 0, c, c) 99 #define ECC_E5_CHAN_IMC1_V3(c, c_ext) ECC_E5_CHAN(3, 1, c, c_ext) 100 #define ECC_E5_CHAN_END E5_IMC_CHAN_END 101 102 static const struct e5_imc_chan ecc_e5_chans[] = { 103 ECC_E5_CHAN_V2(0), 104 ECC_E5_CHAN_V2(1), 105 ECC_E5_CHAN_V2(2), 106 ECC_E5_CHAN_V2(3), 107 108 ECC_E5_CHAN_IMC0_V3(0), 109 ECC_E5_CHAN_IMC0_V3(1), 110 ECC_E5_CHAN_IMC0_V3(2), 111 ECC_E5_CHAN_IMC0_V3(3), 112 ECC_E5_CHAN_IMC1_V3(0, 2), /* IMC1 chan0 -> channel2 */ 113 ECC_E5_CHAN_IMC1_V3(1, 3), /* IMC1 chan1 -> channel3 */ 114 115 ECC_E5_CHAN_END 116 }; 117 118 #undef ECC_E5_CHAN_END 119 #undef ECC_E5_CHAN_V2 120 #undef ECC_E5_CHAN 121 122 static device_method_t ecc_e5_methods[] = { 123 /* Device interface */ 124 DEVMETHOD(device_probe, ecc_e5_probe), 125 DEVMETHOD(device_attach, ecc_e5_attach), 126 DEVMETHOD(device_detach, ecc_e5_detach), 127 DEVMETHOD(device_shutdown, ecc_e5_shutdown), 128 DEVMETHOD(device_suspend, bus_generic_suspend), 129 DEVMETHOD(device_resume, bus_generic_resume), 130 DEVMETHOD_END 131 }; 132 133 static driver_t ecc_e5_driver = { 134 "ecc", 135 ecc_e5_methods, 136 sizeof(struct ecc_e5_softc) 137 }; 138 static devclass_t ecc_devclass; 139 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL); 140 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1); 141 MODULE_DEPEND(ecc_e5, dimm, 1, 1, 1); 142 143 static int 144 ecc_e5_probe(device_t dev) 145 { 146 const struct e5_imc_chan *c; 147 uint16_t vid, did; 148 int slot, func; 149 150 vid = pci_get_vendor(dev); 151 if (vid != PCI_E5_IMC_VID_ID) 152 return ENXIO; 153 154 did = pci_get_device(dev); 155 slot = pci_get_slot(dev); 156 func = pci_get_function(dev); 157 158 for (c = ecc_e5_chans; c->desc != NULL; ++c) { 159 if (c->did == did && c->slot == slot && c->func == func) { 160 struct ecc_e5_softc *sc = device_get_softc(dev); 161 int node; 162 163 node = e5_imc_node_probe(dev, c); 164 if (node < 0) 165 break; 166 167 device_set_desc(dev, c->desc); 168 169 sc->ecc_chan = c; 170 sc->ecc_node = node; 171 return 0; 172 } 173 } 174 return ENXIO; 175 } 176 177 static int 178 ecc_e5_attach(device_t dev) 179 { 180 struct ecc_e5_softc *sc = device_get_softc(dev); 181 uint32_t mcmtr; 182 int dimm, rank; 183 184 TAILQ_INIT(&sc->ecc_dimm); 185 sc->ecc_dev = dev; 186 187 mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, sc->ecc_chan, 188 PCI_E5_IMC_CPGC_MCMTR); 189 if (bootverbose) { 190 if (sc->ecc_chan->ver == E5_IMC_CHAN_VER3 && 191 (mcmtr & PCI_E5V3_IMC_CPGC_MCMTR_DDR4)) 192 ecc_printf(sc, "DDR4\n"); 193 if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) == 194 PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3) { 195 ecc_printf(sc, "native %s\n", 196 sc->ecc_chan->ver == E5_IMC_CHAN_VER2 ? 197 "DDR3" : "DDR"); 198 } 199 } 200 201 rank = 0; 202 for (dimm = 0; dimm < PCI_E5_IMC_CHN_DIMM_MAX; ++dimm) { 203 struct ecc_e5_dimm *dimm_sc; 204 struct ksensor *sens; 205 const char *width; 206 uint32_t dimmmtr; 207 int rank_cnt, r; 208 int density; 209 int val; 210 211 dimmmtr = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan, 212 PCI_E5_IMC_CTAD_DIMMMTR(dimm)); 213 214 if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0) 215 continue; 216 217 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT); 218 switch (val) { 219 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR: 220 rank_cnt = 1; 221 break; 222 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR: 223 rank_cnt = 2; 224 break; 225 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR: 226 rank_cnt = 4; 227 break; 228 case PCI_E5V3_IMC_CTAD_DIMMMTR_RANK_CNT_8R: 229 if (sc->ecc_chan->ver >= E5_IMC_CHAN_VER3) { 230 rank_cnt = 8; 231 break; 232 } 233 /* FALL THROUGH */ 234 default: 235 ecc_printf(sc, "unknown rank count 0x%x\n", val); 236 return ENXIO; 237 } 238 239 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH); 240 switch (val) { 241 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4: 242 width = "x4"; 243 break; 244 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8: 245 width = "x8"; 246 break; 247 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16: 248 width = "x16"; 249 break; 250 default: 251 ecc_printf(sc, "unknown ddr3 width 0x%x\n", val); 252 return ENXIO; 253 } 254 255 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY); 256 switch (val) { 257 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G: 258 density = 2; 259 break; 260 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G: 261 density = 4; 262 break; 263 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G: 264 density = 8; 265 break; 266 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G: 267 if (sc->ecc_chan->ver < E5_IMC_CHAN_VER3) { 268 density = 1; 269 break; 270 } 271 /* FALL THROUGH */ 272 default: 273 ecc_printf(sc, "unknown ddr3 density 0x%x\n", val); 274 return ENXIO; 275 } 276 277 if (bootverbose) { 278 ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n", 279 dimm, density * rank_cnt * 2, 280 rank_cnt, width, density); 281 } 282 283 dimm_sc = kmalloc(sizeof(*dimm_sc), M_DEVBUF, 284 M_WAITOK | M_ZERO); 285 dimm_sc->dimm_softc = 286 dimm_create(sc->ecc_node, sc->ecc_chan->chan_ext, dimm); 287 288 sens = &dimm_sc->dimm_sensor; 289 ksnprintf(sens->desc, sizeof(sens->desc), 290 "node%d chan%d DIMM%d ecc", 291 sc->ecc_node, sc->ecc_chan->chan_ext, dimm); 292 sens->type = SENSOR_ECC; 293 sensor_set(sens, 0, SENSOR_S_OK); 294 dimm_sensor_attach(dimm_sc->dimm_softc, sens); 295 296 TAILQ_INSERT_TAIL(&sc->ecc_dimm, dimm_sc, dimm_link); 297 298 for (r = 0; r < rank_cnt; ++r) { 299 struct ecc_e5_rank *rk; 300 301 if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) { 302 ecc_printf(sc, "too many ranks\n"); 303 return ENXIO; 304 } 305 rk = &sc->ecc_rank[rank]; 306 307 rk->rank_dimm_sc = dimm_sc; 308 rk->rank_dimm = dimm; 309 rk->rank_dimm_rank = r; 310 311 ++rank; 312 } 313 } 314 sc->ecc_rank_cnt = rank; 315 316 if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) { 317 ecc_printf(sc, "ECC is not enabled\n"); 318 return 0; 319 } 320 321 if (bootverbose) { 322 for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) { 323 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank]; 324 uint32_t thr, mask; 325 int ofs; 326 327 ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2); 328 if (rank & 1) 329 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI; 330 else 331 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_LO; 332 333 thr = pci_read_config(sc->ecc_dev, ofs, 4); 334 ecc_printf(sc, "DIMM%d rank%d, " 335 "corrected error threshold %d\n", 336 rk->rank_dimm, rk->rank_dimm_rank, 337 __SHIFTOUT(thr, mask)); 338 } 339 } 340 341 sensor_task_register(sc, ecc_e5_sensor_task, 1); 342 return 0; 343 } 344 345 static void 346 ecc_e5_sensor_task(void *xsc) 347 { 348 struct ecc_e5_softc *sc = xsc; 349 uint32_t err_ranks, val; 350 351 val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4); 352 353 err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS); 354 while (err_ranks != 0) { 355 int rank; 356 357 rank = ffs(err_ranks) - 1; 358 err_ranks &= ~(1 << rank); 359 360 if (rank < sc->ecc_rank_cnt) { 361 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank]; 362 struct ecc_e5_dimm *dimm_sc = rk->rank_dimm_sc; 363 uint32_t err, mask; 364 int ofs, ecc_cnt; 365 366 ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2); 367 if (rank & 1) 368 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI; 369 else 370 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO; 371 372 err = pci_read_config(sc->ecc_dev, ofs, 4); 373 ecc_cnt = __SHIFTOUT(err, mask); 374 375 ecc_printf(sc, "node%d channel%d DIMM%d rank%d, " 376 "too many errors %d", 377 sc->ecc_node, sc->ecc_chan->chan_ext, 378 rk->rank_dimm, rk->rank_dimm_rank, ecc_cnt); 379 dimm_sensor_ecc_set(dimm_sc->dimm_softc, 380 &dimm_sc->dimm_sensor, ecc_cnt, TRUE); 381 } 382 } 383 384 if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) { 385 pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 386 val, 4); 387 } 388 } 389 390 static void 391 ecc_e5_stop(device_t dev) 392 { 393 struct ecc_e5_softc *sc = device_get_softc(dev); 394 395 sensor_task_unregister(sc); 396 } 397 398 static int 399 ecc_e5_detach(device_t dev) 400 { 401 struct ecc_e5_softc *sc = device_get_softc(dev); 402 struct ecc_e5_dimm *dimm_sc; 403 404 ecc_e5_stop(dev); 405 406 while ((dimm_sc = TAILQ_FIRST(&sc->ecc_dimm)) != NULL) { 407 TAILQ_REMOVE(&sc->ecc_dimm, dimm_sc, dimm_link); 408 dimm_sensor_detach(dimm_sc->dimm_softc, &dimm_sc->dimm_sensor); 409 dimm_destroy(dimm_sc->dimm_softc); 410 411 kfree(dimm_sc, M_DEVBUF); 412 } 413 return 0; 414 } 415 416 static void 417 ecc_e5_shutdown(device_t dev) 418 { 419 ecc_e5_stop(dev); 420 } 421