xref: /openbsd-src/sys/arch/arm64/dev/aplcpu.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: aplcpu.c,v 1.7 2023/05/09 10:13:23 kettenis Exp $	*/
2 /*
3  * Copyright (c) 2022 Mark Kettenis <kettenis@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/device.h>
21 #include <sys/malloc.h>
22 #include <sys/sensors.h>
23 #include <sys/sysctl.h>
24 
25 #include <machine/bus.h>
26 #include <machine/fdt.h>
27 
28 #include <dev/ofw/openfirm.h>
29 #include <dev/ofw/fdt.h>
30 
31 #define DVFS_CMD			0x0020
32 #define DVFS_CMD_BUSY			(1U << 31)
33 #define DVFS_CMD_SET			(1 << 25)
34 #define DVFS_CMD_PS2_MASK		(0x1f << 12)
35 #define DVFS_CMD_PS2_SHIFT		12
36 #define DVFS_CMD_PS1_MASK		(0x1f << 0)
37 #define DVFS_CMD_PS1_SHIFT		0
38 
39 #define DVFS_STATUS			0x50
40 #define DVFS_T8103_STATUS_CUR_PS_MASK	(0xf << 4)
41 #define DVFS_T8103_STATUS_CUR_PS_SHIFT	4
42 #define DVFS_T8112_STATUS_CUR_PS_MASK	(0x1f << 5)
43 #define DVFS_T8112_STATUS_CUR_PS_SHIFT	5
44 
45 struct opp {
46 	uint64_t opp_hz;
47 	uint32_t opp_level;
48 };
49 
50 struct opp_table {
51 	LIST_ENTRY(opp_table) ot_list;
52 	uint32_t ot_phandle;
53 
54 	struct opp *ot_opp;
55 	u_int ot_nopp;
56 	uint64_t ot_opp_hz_min;
57 	uint64_t ot_opp_hz_max;
58 };
59 
60 #define APLCPU_MAX_CLUSTERS	8
61 
62 struct aplcpu_softc {
63 	struct device		sc_dev;
64 	bus_space_tag_t		sc_iot;
65 	bus_space_handle_t	sc_ioh[APLCPU_MAX_CLUSTERS];
66 	bus_size_t		sc_ios[APLCPU_MAX_CLUSTERS];
67 
68 	int			sc_node;
69 	u_int			sc_nclusters;
70 	int			sc_perflevel;
71 
72 	uint32_t		sc_cur_ps_mask;
73 	u_int			sc_cur_ps_shift;
74 
75 	LIST_HEAD(, opp_table)	sc_opp_tables;
76 	struct opp_table	*sc_opp_table[APLCPU_MAX_CLUSTERS];
77 	uint64_t		sc_opp_hz_min;
78 	uint64_t		sc_opp_hz_max;
79 
80 	struct ksensordev	sc_sensordev;
81 	struct ksensor		sc_sensor[APLCPU_MAX_CLUSTERS];
82 };
83 
84 int	aplcpu_match(struct device *, void *, void *);
85 void	aplcpu_attach(struct device *, struct device *, void *);
86 
87 const struct cfattach aplcpu_ca = {
88 	sizeof (struct aplcpu_softc), aplcpu_match, aplcpu_attach
89 };
90 
91 struct cfdriver aplcpu_cd = {
92 	NULL, "aplcpu", DV_DULL
93 };
94 
95 void	aplcpu_opp_init(struct aplcpu_softc *, int);
96 uint32_t aplcpu_opp_level(struct aplcpu_softc *, int);
97 int	aplcpu_clockspeed(int *);
98 void	aplcpu_setperf(int level);
99 void	aplcpu_refresh_sensors(void *);
100 
101 int
102 aplcpu_match(struct device *parent, void *match, void *aux)
103 {
104 	struct fdt_attach_args *faa = aux;
105 
106 	return OF_is_compatible(faa->fa_node, "apple,soc-cpufreq") ||
107 	    OF_is_compatible(faa->fa_node, "apple,cluster-cpufreq");
108 }
109 
110 void
111 aplcpu_attach(struct device *parent, struct device *self, void *aux)
112 {
113 	struct aplcpu_softc *sc = (struct aplcpu_softc *)self;
114 	struct fdt_attach_args *faa = aux;
115 	struct cpu_info *ci;
116 	CPU_INFO_ITERATOR cii;
117 	int i;
118 
119 	if (faa->fa_nreg < 1) {
120 		printf(": no registers\n");
121 		return;
122 	}
123 
124 	if (faa->fa_nreg > APLCPU_MAX_CLUSTERS) {
125 		printf(": too many registers\n");
126 		return;
127 	}
128 
129 	sc->sc_iot = faa->fa_iot;
130 	for (i = 0; i < faa->fa_nreg; i++) {
131 		if (bus_space_map(sc->sc_iot, faa->fa_reg[i].addr,
132 		    faa->fa_reg[i].size, 0, &sc->sc_ioh[i])) {
133 			printf(": can't map registers\n");
134 			goto unmap;
135 		}
136 		sc->sc_ios[i] = faa->fa_reg[i].size;
137 	}
138 
139 	printf("\n");
140 
141 	sc->sc_node = faa->fa_node;
142 	sc->sc_nclusters = faa->fa_nreg;
143 
144 	if (OF_is_compatible(sc->sc_node, "apple,t8103-soc-cpufreq") ||
145 	    OF_is_compatible(sc->sc_node, "apple,t8103-cluster-cpufreq")) {
146 		sc->sc_cur_ps_mask = DVFS_T8103_STATUS_CUR_PS_MASK;
147 		sc->sc_cur_ps_shift = DVFS_T8103_STATUS_CUR_PS_SHIFT;
148 	} else if (OF_is_compatible(sc->sc_node, "apple,t8112-soc-cpufreq") ||
149 	    OF_is_compatible(sc->sc_node, "apple,t8112-cluster-cpufreq")) {
150 		sc->sc_cur_ps_mask = DVFS_T8112_STATUS_CUR_PS_MASK;
151 		sc->sc_cur_ps_shift = DVFS_T8112_STATUS_CUR_PS_SHIFT;
152 	}
153 
154 	sc->sc_opp_hz_min = UINT64_MAX;
155 	sc->sc_opp_hz_max = 0;
156 
157 	LIST_INIT(&sc->sc_opp_tables);
158 	CPU_INFO_FOREACH(cii, ci) {
159 		aplcpu_opp_init(sc, ci->ci_node);
160 	}
161 
162 	for (i = 0; i < sc->sc_nclusters; i++) {
163 		sc->sc_sensor[i].type = SENSOR_FREQ;
164 		sensor_attach(&sc->sc_sensordev, &sc->sc_sensor[i]);
165 	}
166 
167 	aplcpu_refresh_sensors(sc);
168 
169 	strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
170 	    sizeof(sc->sc_sensordev.xname));
171 	sensordev_install(&sc->sc_sensordev);
172 	sensor_task_register(sc, aplcpu_refresh_sensors, 1);
173 
174 	cpu_cpuspeed = aplcpu_clockspeed;
175 	cpu_setperf = aplcpu_setperf;
176 	return;
177 
178 unmap:
179 	for (i = 0; i < faa->fa_nreg; i++) {
180 		if (sc->sc_ios[i] == 0)
181 			continue;
182 		bus_space_unmap(sc->sc_iot, sc->sc_ioh[i], sc->sc_ios[i]);
183 	}
184 }
185 
186 void
187 aplcpu_opp_init(struct aplcpu_softc *sc, int node)
188 {
189 	struct opp_table *ot;
190 	int count, child;
191 	uint32_t freq_domain[2], phandle;
192 	uint32_t opp_hz, opp_level;
193 	int i, j;
194 
195 	freq_domain[0] = OF_getpropint(node, "performance-domains", 0);
196 	freq_domain[1] = 0;
197 	if (freq_domain[0] == 0) {
198 		if (OF_getpropintarray(node, "apple,freq-domain", freq_domain,
199 		    sizeof(freq_domain)) != sizeof(freq_domain))
200 			return;
201 		if (freq_domain[1] > APLCPU_MAX_CLUSTERS)
202 			return;
203 	}
204 	if (freq_domain[0] != OF_getpropint(sc->sc_node, "phandle", 0))
205 		return;
206 
207 	phandle = OF_getpropint(node, "operating-points-v2", 0);
208 	if (phandle == 0)
209 		return;
210 
211 	LIST_FOREACH(ot, &sc->sc_opp_tables, ot_list) {
212 		if (ot->ot_phandle == phandle) {
213 			sc->sc_opp_table[freq_domain[1]] = ot;
214 			return;
215 		}
216 	}
217 
218 	node = OF_getnodebyphandle(phandle);
219 	if (node == 0)
220 		return;
221 
222 	if (!OF_is_compatible(node, "operating-points-v2"))
223 		return;
224 
225 	count = 0;
226 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
227 		if (OF_getproplen(child, "turbo-mode") == 0)
228 			continue;
229 		count++;
230 	}
231 	if (count == 0)
232 		return;
233 
234 	ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK);
235 	ot->ot_phandle = phandle;
236 	ot->ot_opp = mallocarray(count, sizeof(struct opp),
237 	    M_DEVBUF, M_ZERO | M_WAITOK);
238 	ot->ot_nopp = count;
239 
240 	count = 0;
241 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
242 		if (OF_getproplen(child, "turbo-mode") == 0)
243 			continue;
244 		opp_hz = OF_getpropint64(child, "opp-hz", 0);
245 		opp_level = OF_getpropint(child, "opp-level", 0);
246 
247 		/* Insert into the array, keeping things sorted. */
248 		for (i = 0; i < count; i++) {
249 			if (opp_hz < ot->ot_opp[i].opp_hz)
250 				break;
251 		}
252 		for (j = count; j > i; j--)
253 			ot->ot_opp[j] = ot->ot_opp[j - 1];
254 		ot->ot_opp[i].opp_hz = opp_hz;
255 		ot->ot_opp[i].opp_level = opp_level;
256 		count++;
257 	}
258 
259 	ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz;
260 	ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz;
261 
262 	LIST_INSERT_HEAD(&sc->sc_opp_tables, ot, ot_list);
263 	sc->sc_opp_table[freq_domain[1]] = ot;
264 
265 	/* Keep track of overall min/max frequency. */
266 	if (sc->sc_opp_hz_min > ot->ot_opp_hz_min)
267 		sc->sc_opp_hz_min = ot->ot_opp_hz_min;
268 	if (sc->sc_opp_hz_max < ot->ot_opp_hz_max)
269 		sc->sc_opp_hz_max = ot->ot_opp_hz_max;
270 }
271 
272 uint32_t
273 aplcpu_opp_level(struct aplcpu_softc *sc, int cluster)
274 {
275 	uint32_t opp_level;
276 	uint64_t pstate;
277 
278 	if (sc->sc_cur_ps_mask) {
279 		pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
280 		    DVFS_STATUS);
281 		opp_level = (pstate & sc->sc_cur_ps_mask);
282 		opp_level >>= sc->sc_cur_ps_shift;
283 	} else {
284 		pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
285 		    DVFS_CMD);
286 		opp_level = (pstate & DVFS_CMD_PS1_MASK);
287 		opp_level >>= DVFS_CMD_PS1_SHIFT;
288 	}
289 
290 	return opp_level;
291 }
292 
293 int
294 aplcpu_clockspeed(int *freq)
295 {
296 	struct aplcpu_softc *sc;
297 	struct opp_table *ot;
298 	uint32_t opp_hz = 0, opp_level;
299 	int i, j, k;
300 
301 	/*
302 	 * Clusters can run at different frequencies.  We report the
303 	 * highest frequency among all clusters.
304 	 */
305 
306 	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
307 		sc = aplcpu_cd.cd_devs[i];
308 		if (sc == NULL)
309 			continue;
310 
311 		for (j = 0; j < sc->sc_nclusters; j++) {
312 			if (sc->sc_opp_table[j] == NULL)
313 				continue;
314 
315 			opp_level = aplcpu_opp_level(sc, j);
316 
317 			/* Translate P-state to frequency. */
318 			ot = sc->sc_opp_table[j];
319 			for (k = 0; k < ot->ot_nopp; k++) {
320 				if (ot->ot_opp[k].opp_level != opp_level)
321 					continue;
322 				opp_hz = MAX(opp_hz, ot->ot_opp[k].opp_hz);
323 			}
324 		}
325 	}
326 
327 	if (opp_hz == 0)
328 		return EINVAL;
329 
330 	*freq = opp_hz / 1000000;
331 	return 0;
332 }
333 
334 void
335 aplcpu_setperf(int level)
336 {
337 	struct aplcpu_softc *sc;
338 	struct opp_table *ot;
339 	uint64_t min, max;
340 	uint64_t level_hz;
341 	uint32_t opp_level;
342 	uint64_t reg;
343 	int i, j, k, timo;
344 
345 	/*
346 	 * We let the CPU performance level span the entire range
347 	 * between the lowest frequency on any of the clusters and the
348 	 * highest frequency on any of the clusters.  We pick a
349 	 * frequency within that range based on the performance level
350 	 * and set all the clusters to the frequency that is closest
351 	 * to but less than that frequency.  This isn't a particularly
352 	 * sensible method but it is easy to implement and it is hard
353 	 * to come up with something more sensible given the
354 	 * constraints of the hw.setperf sysctl interface.
355 	 */
356 	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
357 		sc = aplcpu_cd.cd_devs[i];
358 		if (sc == NULL)
359 			continue;
360 
361 		min = sc->sc_opp_hz_min;
362 		max = sc->sc_opp_hz_max;
363 		level_hz = min + (level * (max - min)) / 100;
364 	}
365 
366 	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
367 		sc = aplcpu_cd.cd_devs[i];
368 		if (sc == NULL)
369 			continue;
370 		if (sc->sc_perflevel == level)
371 			continue;
372 
373 		for (j = 0; j < sc->sc_nclusters; j++) {
374 			if (sc->sc_opp_table[j] == NULL)
375 				continue;
376 
377 			/* Translate performance level to a P-state. */
378 			ot = sc->sc_opp_table[j];
379 			opp_level = ot->ot_opp[0].opp_level;
380 			for (k = 0; k < ot->ot_nopp; k++) {
381 				if (ot->ot_opp[k].opp_hz <= level_hz &&
382 				    ot->ot_opp[k].opp_level >= opp_level)
383 					opp_level = ot->ot_opp[k].opp_level;
384 			}
385 
386 			/* Wait until P-state logic isn't busy. */
387 			for (timo = 100; timo > 0; timo--) {
388 				reg = bus_space_read_8(sc->sc_iot,
389 				    sc->sc_ioh[j], DVFS_CMD);
390 				if ((reg & DVFS_CMD_BUSY) == 0)
391 					break;
392 				delay(1);
393 			}
394 			if (reg & DVFS_CMD_BUSY)
395 				continue;
396 
397 			/* Set desired P-state. */
398 			reg &= ~DVFS_CMD_PS1_MASK;
399 			reg |= (opp_level << DVFS_CMD_PS1_SHIFT);
400 			reg |= DVFS_CMD_SET;
401 			bus_space_write_8(sc->sc_iot, sc->sc_ioh[j],
402 			    DVFS_CMD, reg);
403 		}
404 
405 		sc->sc_perflevel = level;
406 	}
407 }
408 
409 void
410 aplcpu_refresh_sensors(void *arg)
411 {
412 	struct aplcpu_softc *sc = arg;
413 	struct opp_table *ot;
414 	uint32_t opp_level;
415 	int i, j;
416 
417 	for (i = 0; i < sc->sc_nclusters; i++) {
418 		if (sc->sc_opp_table[i] == NULL)
419 			continue;
420 
421 		opp_level = aplcpu_opp_level(sc, i);
422 
423 		/* Translate P-state to frequency. */
424 		ot = sc->sc_opp_table[i];
425 		for (j = 0; j < ot->ot_nopp; j++) {
426 			if (ot->ot_opp[j].opp_level == opp_level) {
427 				sc->sc_sensor[i].value = ot->ot_opp[j].opp_hz;
428 				break;
429 			}
430 		}
431 	}
432 }
433