xref: /openbsd-src/sys/arch/arm64/dev/aplcpu.c (revision 5b1ba0c506e1039151dc590e0013fd3696a8678a)
1 /*	$OpenBSD: aplcpu.c,v 1.9 2024/09/29 09:25:37 jsg Exp $	*/
2 /*
3  * Copyright (c) 2022 Mark Kettenis <kettenis@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/device.h>
21 #include <sys/malloc.h>
22 #include <sys/sensors.h>
23 #include <sys/sysctl.h>
24 
25 #include <machine/bus.h>
26 #include <machine/fdt.h>
27 
28 #include <dev/ofw/openfirm.h>
29 #include <dev/ofw/fdt.h>
30 
31 #define DVFS_CMD			0x0020
32 #define DVFS_CMD_BUSY			(1U << 31)
33 #define DVFS_CMD_SET			(1 << 25)
34 #define DVFS_CMD_PS2_MASK		(0x1f << 12)
35 #define DVFS_CMD_PS2_SHIFT		12
36 #define DVFS_CMD_PS1_MASK		(0x1f << 0)
37 #define DVFS_CMD_PS1_SHIFT		0
38 
39 #define DVFS_STATUS			0x50
40 #define DVFS_T8103_STATUS_CUR_PS_MASK	(0xf << 4)
41 #define DVFS_T8103_STATUS_CUR_PS_SHIFT	4
42 #define DVFS_T8112_STATUS_CUR_PS_MASK	(0x1f << 5)
43 #define DVFS_T8112_STATUS_CUR_PS_SHIFT	5
44 
45 #define APLCPU_DEEP_WFI_LATENCY		10 /* microseconds */
46 
47 struct opp {
48 	uint64_t opp_hz;
49 	uint32_t opp_level;
50 };
51 
52 struct opp_table {
53 	LIST_ENTRY(opp_table) ot_list;
54 	uint32_t ot_phandle;
55 
56 	struct opp *ot_opp;
57 	u_int ot_nopp;
58 	uint64_t ot_opp_hz_min;
59 	uint64_t ot_opp_hz_max;
60 };
61 
62 #define APLCPU_MAX_CLUSTERS	8
63 
64 struct aplcpu_softc {
65 	struct device		sc_dev;
66 	bus_space_tag_t		sc_iot;
67 	bus_space_handle_t	sc_ioh[APLCPU_MAX_CLUSTERS];
68 	bus_size_t		sc_ios[APLCPU_MAX_CLUSTERS];
69 
70 	int			sc_node;
71 	u_int			sc_nclusters;
72 	int			sc_perflevel;
73 
74 	uint32_t		sc_cur_ps_mask;
75 	u_int			sc_cur_ps_shift;
76 
77 	LIST_HEAD(, opp_table)	sc_opp_tables;
78 	struct opp_table	*sc_opp_table[APLCPU_MAX_CLUSTERS];
79 	uint64_t		sc_opp_hz_min;
80 	uint64_t		sc_opp_hz_max;
81 
82 	struct ksensordev	sc_sensordev;
83 	struct ksensor		sc_sensor[APLCPU_MAX_CLUSTERS];
84 };
85 
86 int	aplcpu_match(struct device *, void *, void *);
87 void	aplcpu_attach(struct device *, struct device *, void *);
88 
89 const struct cfattach aplcpu_ca = {
90 	sizeof (struct aplcpu_softc), aplcpu_match, aplcpu_attach
91 };
92 
93 struct cfdriver aplcpu_cd = {
94 	NULL, "aplcpu", DV_DULL
95 };
96 
97 void	aplcpu_opp_init(struct aplcpu_softc *, int);
98 uint32_t aplcpu_opp_level(struct aplcpu_softc *, int);
99 int	aplcpu_clockspeed(int *);
100 void	aplcpu_setperf(int level);
101 void	aplcpu_refresh_sensors(void *);
102 void	aplcpu_idle_cycle(void);
103 void	aplcpu_deep_wfi(void);
104 
105 int
106 aplcpu_match(struct device *parent, void *match, void *aux)
107 {
108 	struct fdt_attach_args *faa = aux;
109 
110 	return OF_is_compatible(faa->fa_node, "apple,soc-cpufreq") ||
111 	    OF_is_compatible(faa->fa_node, "apple,cluster-cpufreq");
112 }
113 
114 void
115 aplcpu_attach(struct device *parent, struct device *self, void *aux)
116 {
117 	struct aplcpu_softc *sc = (struct aplcpu_softc *)self;
118 	struct fdt_attach_args *faa = aux;
119 	struct cpu_info *ci;
120 	CPU_INFO_ITERATOR cii;
121 	int i;
122 
123 	if (faa->fa_nreg < 1) {
124 		printf(": no registers\n");
125 		return;
126 	}
127 
128 	if (faa->fa_nreg > APLCPU_MAX_CLUSTERS) {
129 		printf(": too many registers\n");
130 		return;
131 	}
132 
133 	sc->sc_iot = faa->fa_iot;
134 	for (i = 0; i < faa->fa_nreg; i++) {
135 		if (bus_space_map(sc->sc_iot, faa->fa_reg[i].addr,
136 		    faa->fa_reg[i].size, 0, &sc->sc_ioh[i])) {
137 			printf(": can't map registers\n");
138 			goto unmap;
139 		}
140 		sc->sc_ios[i] = faa->fa_reg[i].size;
141 	}
142 
143 	printf("\n");
144 
145 	sc->sc_node = faa->fa_node;
146 	sc->sc_nclusters = faa->fa_nreg;
147 
148 	if (OF_is_compatible(sc->sc_node, "apple,t8103-soc-cpufreq") ||
149 	    OF_is_compatible(sc->sc_node, "apple,t8103-cluster-cpufreq")) {
150 		sc->sc_cur_ps_mask = DVFS_T8103_STATUS_CUR_PS_MASK;
151 		sc->sc_cur_ps_shift = DVFS_T8103_STATUS_CUR_PS_SHIFT;
152 	} else if (OF_is_compatible(sc->sc_node, "apple,t8112-soc-cpufreq") ||
153 	    OF_is_compatible(sc->sc_node, "apple,t8112-cluster-cpufreq")) {
154 		sc->sc_cur_ps_mask = DVFS_T8112_STATUS_CUR_PS_MASK;
155 		sc->sc_cur_ps_shift = DVFS_T8112_STATUS_CUR_PS_SHIFT;
156 	}
157 
158 	sc->sc_opp_hz_min = UINT64_MAX;
159 	sc->sc_opp_hz_max = 0;
160 
161 	LIST_INIT(&sc->sc_opp_tables);
162 	CPU_INFO_FOREACH(cii, ci) {
163 		aplcpu_opp_init(sc, ci->ci_node);
164 	}
165 
166 	for (i = 0; i < sc->sc_nclusters; i++) {
167 		sc->sc_sensor[i].type = SENSOR_FREQ;
168 		sensor_attach(&sc->sc_sensordev, &sc->sc_sensor[i]);
169 	}
170 
171 	aplcpu_refresh_sensors(sc);
172 
173 	strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
174 	    sizeof(sc->sc_sensordev.xname));
175 	sensordev_install(&sc->sc_sensordev);
176 	sensor_task_register(sc, aplcpu_refresh_sensors, 1);
177 
178 	cpu_idle_cycle_fcn = aplcpu_idle_cycle;
179 	cpu_suspend_cycle_fcn = aplcpu_deep_wfi;
180 	cpu_cpuspeed = aplcpu_clockspeed;
181 	cpu_setperf = aplcpu_setperf;
182 	return;
183 
184 unmap:
185 	for (i = 0; i < faa->fa_nreg; i++) {
186 		if (sc->sc_ios[i] == 0)
187 			continue;
188 		bus_space_unmap(sc->sc_iot, sc->sc_ioh[i], sc->sc_ios[i]);
189 	}
190 }
191 
192 void
193 aplcpu_opp_init(struct aplcpu_softc *sc, int node)
194 {
195 	struct opp_table *ot;
196 	int count, child;
197 	uint32_t freq_domain[2], phandle;
198 	uint32_t opp_hz, opp_level;
199 	int i, j;
200 
201 	freq_domain[0] = OF_getpropint(node, "performance-domains", 0);
202 	freq_domain[1] = 0;
203 	if (freq_domain[0] == 0) {
204 		if (OF_getpropintarray(node, "apple,freq-domain", freq_domain,
205 		    sizeof(freq_domain)) != sizeof(freq_domain))
206 			return;
207 		if (freq_domain[1] > APLCPU_MAX_CLUSTERS)
208 			return;
209 	}
210 	if (freq_domain[0] != OF_getpropint(sc->sc_node, "phandle", 0))
211 		return;
212 
213 	phandle = OF_getpropint(node, "operating-points-v2", 0);
214 	if (phandle == 0)
215 		return;
216 
217 	LIST_FOREACH(ot, &sc->sc_opp_tables, ot_list) {
218 		if (ot->ot_phandle == phandle) {
219 			sc->sc_opp_table[freq_domain[1]] = ot;
220 			return;
221 		}
222 	}
223 
224 	node = OF_getnodebyphandle(phandle);
225 	if (node == 0)
226 		return;
227 
228 	if (!OF_is_compatible(node, "operating-points-v2"))
229 		return;
230 
231 	count = 0;
232 	for (child = OF_child(node); child != 0; child = OF_peer(child))
233 		count++;
234 	if (count == 0)
235 		return;
236 
237 	ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK);
238 	ot->ot_phandle = phandle;
239 	ot->ot_opp = mallocarray(count, sizeof(struct opp),
240 	    M_DEVBUF, M_ZERO | M_WAITOK);
241 	ot->ot_nopp = count;
242 
243 	count = 0;
244 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
245 		opp_hz = OF_getpropint64(child, "opp-hz", 0);
246 		opp_level = OF_getpropint(child, "opp-level", 0);
247 
248 		/* Insert into the array, keeping things sorted. */
249 		for (i = 0; i < count; i++) {
250 			if (opp_hz < ot->ot_opp[i].opp_hz)
251 				break;
252 		}
253 		for (j = count; j > i; j--)
254 			ot->ot_opp[j] = ot->ot_opp[j - 1];
255 		ot->ot_opp[i].opp_hz = opp_hz;
256 		ot->ot_opp[i].opp_level = opp_level;
257 		count++;
258 	}
259 
260 	ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz;
261 	ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz;
262 
263 	LIST_INSERT_HEAD(&sc->sc_opp_tables, ot, ot_list);
264 	sc->sc_opp_table[freq_domain[1]] = ot;
265 
266 	/* Keep track of overall min/max frequency. */
267 	if (sc->sc_opp_hz_min > ot->ot_opp_hz_min)
268 		sc->sc_opp_hz_min = ot->ot_opp_hz_min;
269 	if (sc->sc_opp_hz_max < ot->ot_opp_hz_max)
270 		sc->sc_opp_hz_max = ot->ot_opp_hz_max;
271 }
272 
273 uint32_t
274 aplcpu_opp_level(struct aplcpu_softc *sc, int cluster)
275 {
276 	uint32_t opp_level;
277 	uint64_t pstate;
278 
279 	if (sc->sc_cur_ps_mask) {
280 		pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
281 		    DVFS_STATUS);
282 		opp_level = (pstate & sc->sc_cur_ps_mask);
283 		opp_level >>= sc->sc_cur_ps_shift;
284 	} else {
285 		pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
286 		    DVFS_CMD);
287 		opp_level = (pstate & DVFS_CMD_PS1_MASK);
288 		opp_level >>= DVFS_CMD_PS1_SHIFT;
289 	}
290 
291 	return opp_level;
292 }
293 
294 int
295 aplcpu_clockspeed(int *freq)
296 {
297 	struct aplcpu_softc *sc;
298 	struct opp_table *ot;
299 	uint32_t opp_hz = 0, opp_level;
300 	int i, j, k;
301 
302 	/*
303 	 * Clusters can run at different frequencies.  We report the
304 	 * highest frequency among all clusters.
305 	 */
306 
307 	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
308 		sc = aplcpu_cd.cd_devs[i];
309 		if (sc == NULL)
310 			continue;
311 
312 		for (j = 0; j < sc->sc_nclusters; j++) {
313 			if (sc->sc_opp_table[j] == NULL)
314 				continue;
315 
316 			opp_level = aplcpu_opp_level(sc, j);
317 
318 			/* Translate P-state to frequency. */
319 			ot = sc->sc_opp_table[j];
320 			for (k = 0; k < ot->ot_nopp; k++) {
321 				if (ot->ot_opp[k].opp_level != opp_level)
322 					continue;
323 				opp_hz = MAX(opp_hz, ot->ot_opp[k].opp_hz);
324 			}
325 		}
326 	}
327 
328 	if (opp_hz == 0)
329 		return EINVAL;
330 
331 	*freq = opp_hz / 1000000;
332 	return 0;
333 }
334 
335 void
336 aplcpu_setperf(int level)
337 {
338 	struct aplcpu_softc *sc;
339 	struct opp_table *ot;
340 	uint64_t min, max;
341 	uint64_t level_hz;
342 	uint32_t opp_level;
343 	uint64_t reg;
344 	int i, j, k, timo;
345 
346 	/*
347 	 * We let the CPU performance level span the entire range
348 	 * between the lowest frequency on any of the clusters and the
349 	 * highest frequency on any of the clusters.  We pick a
350 	 * frequency within that range based on the performance level
351 	 * and set all the clusters to the frequency that is closest
352 	 * to but less than that frequency.  This isn't a particularly
353 	 * sensible method but it is easy to implement and it is hard
354 	 * to come up with something more sensible given the
355 	 * constraints of the hw.setperf sysctl interface.
356 	 */
357 	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
358 		sc = aplcpu_cd.cd_devs[i];
359 		if (sc == NULL)
360 			continue;
361 
362 		min = sc->sc_opp_hz_min;
363 		max = sc->sc_opp_hz_max;
364 		level_hz = min + (level * (max - min)) / 100;
365 	}
366 
367 	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
368 		sc = aplcpu_cd.cd_devs[i];
369 		if (sc == NULL)
370 			continue;
371 		if (sc->sc_perflevel == level)
372 			continue;
373 
374 		for (j = 0; j < sc->sc_nclusters; j++) {
375 			if (sc->sc_opp_table[j] == NULL)
376 				continue;
377 
378 			/* Translate performance level to a P-state. */
379 			ot = sc->sc_opp_table[j];
380 			opp_level = ot->ot_opp[0].opp_level;
381 			for (k = 0; k < ot->ot_nopp; k++) {
382 				if (ot->ot_opp[k].opp_hz <= level_hz &&
383 				    ot->ot_opp[k].opp_level >= opp_level)
384 					opp_level = ot->ot_opp[k].opp_level;
385 			}
386 
387 			/* Wait until P-state logic isn't busy. */
388 			for (timo = 100; timo > 0; timo--) {
389 				reg = bus_space_read_8(sc->sc_iot,
390 				    sc->sc_ioh[j], DVFS_CMD);
391 				if ((reg & DVFS_CMD_BUSY) == 0)
392 					break;
393 				delay(1);
394 			}
395 			if (reg & DVFS_CMD_BUSY)
396 				continue;
397 
398 			/* Set desired P-state. */
399 			reg &= ~DVFS_CMD_PS1_MASK;
400 			reg |= (opp_level << DVFS_CMD_PS1_SHIFT);
401 			reg |= DVFS_CMD_SET;
402 			bus_space_write_8(sc->sc_iot, sc->sc_ioh[j],
403 			    DVFS_CMD, reg);
404 		}
405 
406 		sc->sc_perflevel = level;
407 	}
408 }
409 
410 void
411 aplcpu_refresh_sensors(void *arg)
412 {
413 	struct aplcpu_softc *sc = arg;
414 	struct opp_table *ot;
415 	uint32_t opp_level;
416 	int i, j;
417 
418 	for (i = 0; i < sc->sc_nclusters; i++) {
419 		if (sc->sc_opp_table[i] == NULL)
420 			continue;
421 
422 		opp_level = aplcpu_opp_level(sc, i);
423 
424 		/* Translate P-state to frequency. */
425 		ot = sc->sc_opp_table[i];
426 		for (j = 0; j < ot->ot_nopp; j++) {
427 			if (ot->ot_opp[j].opp_level == opp_level) {
428 				sc->sc_sensor[i].value = ot->ot_opp[j].opp_hz;
429 				break;
430 			}
431 		}
432 	}
433 }
434 
435 void
436 aplcpu_idle_cycle(void)
437 {
438 	struct cpu_info *ci = curcpu();
439 	struct timeval start, stop;
440 	u_long itime;
441 
442 	microuptime(&start);
443 
444 	if (ci->ci_prev_sleep > 3 * APLCPU_DEEP_WFI_LATENCY)
445 		aplcpu_deep_wfi();
446 	else
447 		cpu_wfi();
448 
449 	microuptime(&stop);
450 	timersub(&stop, &start, &stop);
451 	itime = stop.tv_sec * 1000000 + stop.tv_usec;
452 
453 	ci->ci_last_itime = itime;
454 	itime >>= 1;
455 	ci->ci_prev_sleep = (ci->ci_prev_sleep + (ci->ci_prev_sleep >> 1)
456 	    + itime) >> 1;
457 }
458