1 /* $OpenBSD: aplcpu.c,v 1.1 2022/02/20 19:25:57 kettenis Exp $ */ 2 /* 3 * Copyright (c) 2022 Mark Kettenis <kettenis@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 #include <sys/systm.h> 20 #include <sys/device.h> 21 #include <sys/malloc.h> 22 #include <sys/sysctl.h> 23 24 #include <machine/bus.h> 25 #include <machine/fdt.h> 26 27 #include <dev/ofw/openfirm.h> 28 #include <dev/ofw/fdt.h> 29 30 /* 31 * This driver is based on preliminary device tree bindings and will 32 * almost certainly need changes once the official bindings land in 33 * mainline Linux. Support for these preliminary bindings will be 34 * dropped as soon as official bindings are available. 35 */ 36 37 #define CLUSTER_PSTATE 0x0020 38 #define CLUSTER_PSTATE_BUSY (1U << 31) 39 #define CLUSTER_PSTATE_SET (1 << 25) 40 #define CLUSTER_PSTATE_DESIRED2_MASK (0xf << 12) 41 #define CLUSTER_PSTATE_DESIRED2_SHIFT 12 42 #define CLUSTER_PSTATE_DESIRED1_MASK (0xf << 0) 43 #define CLUSTER_PSTATE_DESIRED1_SHIFT 0 44 45 struct opp { 46 uint64_t opp_hz; 47 uint32_t opp_level; 48 }; 49 50 struct opp_table { 51 LIST_ENTRY(opp_table) ot_list; 52 uint32_t ot_phandle; 53 54 struct opp *ot_opp; 55 u_int ot_nopp; 56 uint64_t ot_opp_hz_min; 57 uint64_t ot_opp_hz_max; 58 }; 59 60 #define APLCPU_MAX_CLUSTERS 8 61 62 struct aplcpu_softc { 63 struct device sc_dev; 64 bus_space_tag_t sc_iot; 65 bus_space_handle_t sc_ioh[APLCPU_MAX_CLUSTERS]; 66 bus_size_t sc_ios[APLCPU_MAX_CLUSTERS]; 67 68 int sc_node; 69 u_int sc_nclusters; 70 int sc_perflevel; 71 72 LIST_HEAD(, opp_table) sc_opp_tables; 73 struct opp_table *sc_opp_table[APLCPU_MAX_CLUSTERS]; 74 uint64_t sc_opp_hz_min; 75 uint64_t sc_opp_hz_max; 76 }; 77 78 struct aplcpu_softc *aplcpu_sc; 79 80 int aplcpu_match(struct device *, void *, void *); 81 void aplcpu_attach(struct device *, struct device *, void *); 82 83 const struct cfattach aplcpu_ca = { 84 sizeof (struct aplcpu_softc), aplcpu_match, aplcpu_attach 85 }; 86 87 struct cfdriver aplcpu_cd = { 88 NULL, "aplcpu", DV_DULL 89 }; 90 91 void aplcpu_opp_init(struct aplcpu_softc *, int); 92 int aplcpu_clockspeed(int *); 93 void aplcpu_setperf(int level); 94 95 int 96 aplcpu_match(struct device *parent, void *match, void *aux) 97 { 98 struct fdt_attach_args *faa = aux; 99 100 return OF_is_compatible(faa->fa_node, "apple,cluster-cpufreq"); 101 } 102 103 void 104 aplcpu_attach(struct device *parent, struct device *self, void *aux) 105 { 106 struct aplcpu_softc *sc = (struct aplcpu_softc *)self; 107 struct fdt_attach_args *faa = aux; 108 struct cpu_info *ci; 109 CPU_INFO_ITERATOR cii; 110 int i; 111 112 if (faa->fa_nreg < 1) { 113 printf(": no registers\n"); 114 return; 115 } 116 117 if (faa->fa_nreg > APLCPU_MAX_CLUSTERS) { 118 printf(": too many registers\n"); 119 return; 120 } 121 122 sc->sc_iot = faa->fa_iot; 123 for (i = 0; i < faa->fa_nreg; i++) { 124 if (bus_space_map(sc->sc_iot, faa->fa_reg[i].addr, 125 faa->fa_reg[i].size, 0, &sc->sc_ioh[i])) { 126 printf(": can't map registers\n"); 127 goto unmap; 128 } 129 sc->sc_ios[i] = faa->fa_reg[i].size; 130 } 131 132 printf("\n"); 133 134 sc->sc_node = faa->fa_node; 135 sc->sc_nclusters = faa->fa_nreg; 136 137 sc->sc_opp_hz_min = UINT64_MAX; 138 sc->sc_opp_hz_max = 0; 139 140 LIST_INIT(&sc->sc_opp_tables); 141 CPU_INFO_FOREACH(cii, ci) { 142 aplcpu_opp_init(sc, ci->ci_node); 143 } 144 145 aplcpu_sc = sc; 146 cpu_cpuspeed = aplcpu_clockspeed; 147 cpu_setperf = aplcpu_setperf; 148 return; 149 150 unmap: 151 for (i = 0; i < faa->fa_nreg; i++) { 152 if (sc->sc_ios[i] == 0) 153 continue; 154 bus_space_unmap(sc->sc_iot, sc->sc_ioh[i], sc->sc_ios[i]); 155 } 156 } 157 158 void 159 aplcpu_opp_init(struct aplcpu_softc *sc, int node) 160 { 161 struct opp_table *ot; 162 int count, child; 163 uint32_t freq_domain[2], phandle; 164 uint32_t opp_hz, opp_level; 165 int i, j; 166 167 if (OF_getpropintarray(node, "apple,freq-domain", freq_domain, 168 sizeof(freq_domain)) != sizeof(freq_domain)) 169 return; 170 if (freq_domain[0] != OF_getpropint(sc->sc_node, "phandle", 0)) 171 return; 172 if (freq_domain[1] > APLCPU_MAX_CLUSTERS) 173 return; 174 175 phandle = OF_getpropint(node, "operating-points-v2", 0); 176 if (phandle == 0) 177 return; 178 179 LIST_FOREACH(ot, &sc->sc_opp_tables, ot_list) { 180 if (ot->ot_phandle == phandle) { 181 sc->sc_opp_table[freq_domain[1]] = ot; 182 return; 183 } 184 } 185 186 node = OF_getnodebyphandle(phandle); 187 if (node == 0) 188 return; 189 190 if (!OF_is_compatible(node, "operating-points-v2")) 191 return; 192 193 count = 0; 194 for (child = OF_child(node); child != 0; child = OF_peer(child)) { 195 if (OF_getproplen(child, "turbo-mode") == 0) 196 continue; 197 count++; 198 } 199 if (count == 0) 200 return; 201 202 ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK); 203 ot->ot_phandle = phandle; 204 ot->ot_opp = mallocarray(count, sizeof(struct opp), 205 M_DEVBUF, M_ZERO | M_WAITOK); 206 ot->ot_nopp = count; 207 208 count = 0; 209 for (child = OF_child(node); child != 0; child = OF_peer(child)) { 210 if (OF_getproplen(child, "turbo-mode") == 0) 211 continue; 212 opp_hz = OF_getpropint64(child, "opp-hz", 0); 213 opp_level = OF_getpropint(child, "opp-level", 0); 214 215 /* Insert into the array, keeping things sorted. */ 216 for (i = 0; i < count; i++) { 217 if (opp_hz < ot->ot_opp[i].opp_hz) 218 break; 219 } 220 for (j = count; j > i; j--) 221 ot->ot_opp[j] = ot->ot_opp[j - 1]; 222 ot->ot_opp[i].opp_hz = opp_hz; 223 ot->ot_opp[i].opp_level = opp_level; 224 count++; 225 } 226 227 ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz; 228 ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz; 229 230 LIST_INSERT_HEAD(&sc->sc_opp_tables, ot, ot_list); 231 sc->sc_opp_table[freq_domain[1]] = ot; 232 233 /* Keep track of overall min/max frequency. */ 234 if (sc->sc_opp_hz_min > ot->ot_opp_hz_min) 235 sc->sc_opp_hz_min = ot->ot_opp_hz_min; 236 if (sc->sc_opp_hz_max < ot->ot_opp_hz_max) 237 sc->sc_opp_hz_max = ot->ot_opp_hz_max; 238 } 239 240 int 241 aplcpu_clockspeed(int *freq) 242 { 243 struct aplcpu_softc *sc = aplcpu_sc; 244 struct opp_table *ot; 245 uint32_t opp_hz = 0, opp_level; 246 uint64_t pstate; 247 int i, j; 248 249 /* 250 * Clusters can run at different frequencies. We report the 251 * highest frequency among all clusters. 252 */ 253 254 for (i = 0; i < sc->sc_nclusters; i++) { 255 if (sc->sc_opp_table[i] == NULL) 256 continue; 257 258 pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[i], 259 CLUSTER_PSTATE); 260 opp_level = (pstate & CLUSTER_PSTATE_DESIRED1_MASK); 261 opp_level >>= CLUSTER_PSTATE_DESIRED1_SHIFT; 262 263 /* Translate P-state to frequency. */ 264 ot = sc->sc_opp_table[i]; 265 for (j = 0; j < ot->ot_nopp; j++) { 266 if (ot->ot_opp[j].opp_level == opp_level) 267 opp_hz = MAX(opp_hz, ot->ot_opp[j].opp_hz); 268 } 269 } 270 if (opp_hz == 0) 271 return EINVAL; 272 273 *freq = opp_hz / 1000000; 274 return 0; 275 } 276 277 void 278 aplcpu_setperf(int level) 279 { 280 struct aplcpu_softc *sc = aplcpu_sc; 281 struct opp_table *ot; 282 uint64_t min, max; 283 uint64_t level_hz; 284 uint32_t opp_level; 285 uint64_t reg; 286 int i, j, timo; 287 288 if (sc->sc_perflevel == level) 289 return; 290 291 /* 292 * We let the CPU performance level span the entire range 293 * between the lowest frequency on any of the clusters and the 294 * highest frequency on any of the clusters. We pick a 295 * frequency within that range based on the performance level 296 * and set all the clusters to the frequency that is closest 297 * to but less than that frequency. This isn't a particularly 298 * sensible method but it is easy to implement and it is hard 299 * to come up with something more sensible given the 300 * constraints of the hw.setperf sysctl interface. 301 */ 302 min = sc->sc_opp_hz_min; 303 max = sc->sc_opp_hz_max; 304 level_hz = min + (level * (max - min)) / 100; 305 306 for (i = 0; i < sc->sc_nclusters; i++) { 307 if (sc->sc_opp_table[i] == NULL) 308 continue; 309 310 /* Translate performance level to a P-state. */ 311 opp_level = 0; 312 ot = sc->sc_opp_table[i]; 313 for (j = 0; j < ot->ot_nopp; j++) { 314 if (ot->ot_opp[j].opp_hz <= level_hz && 315 ot->ot_opp[j].opp_level >= opp_level) 316 opp_level = ot->ot_opp[j].opp_level; 317 } 318 319 /* Wait until P-state logic isn't busy. */ 320 for (timo = 100; timo > 0; timo--) { 321 reg = bus_space_read_8(sc->sc_iot, sc->sc_ioh[i], 322 CLUSTER_PSTATE); 323 if ((reg & CLUSTER_PSTATE_BUSY) == 0) 324 break; 325 delay(1); 326 } 327 if (reg & CLUSTER_PSTATE_BUSY) 328 continue; 329 330 /* Set desired P-state. */ 331 reg &= ~CLUSTER_PSTATE_DESIRED1_MASK; 332 reg &= ~CLUSTER_PSTATE_DESIRED2_MASK; 333 reg |= (opp_level << CLUSTER_PSTATE_DESIRED1_SHIFT); 334 reg |= (opp_level << CLUSTER_PSTATE_DESIRED2_SHIFT); 335 reg |= CLUSTER_PSTATE_SET; 336 bus_space_write_8(sc->sc_iot, sc->sc_ioh[i], 337 CLUSTER_PSTATE, reg); 338 } 339 340 sc->sc_perflevel = level; 341 } 342