xref: /openbsd-src/sys/arch/arm64/dev/aplcpu.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: aplcpu.c,v 1.1 2022/02/20 19:25:57 kettenis Exp $	*/
2 /*
3  * Copyright (c) 2022 Mark Kettenis <kettenis@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/device.h>
21 #include <sys/malloc.h>
22 #include <sys/sysctl.h>
23 
24 #include <machine/bus.h>
25 #include <machine/fdt.h>
26 
27 #include <dev/ofw/openfirm.h>
28 #include <dev/ofw/fdt.h>
29 
30 /*
31  * This driver is based on preliminary device tree bindings and will
32  * almost certainly need changes once the official bindings land in
33  * mainline Linux.  Support for these preliminary bindings will be
34  * dropped as soon as official bindings are available.
35  */
36 
37 #define CLUSTER_PSTATE			0x0020
38 #define CLUSTER_PSTATE_BUSY		(1U << 31)
39 #define CLUSTER_PSTATE_SET		(1 << 25)
40 #define CLUSTER_PSTATE_DESIRED2_MASK	(0xf << 12)
41 #define CLUSTER_PSTATE_DESIRED2_SHIFT	12
42 #define CLUSTER_PSTATE_DESIRED1_MASK	(0xf << 0)
43 #define CLUSTER_PSTATE_DESIRED1_SHIFT	0
44 
45 struct opp {
46 	uint64_t opp_hz;
47 	uint32_t opp_level;
48 };
49 
50 struct opp_table {
51 	LIST_ENTRY(opp_table) ot_list;
52 	uint32_t ot_phandle;
53 
54 	struct opp *ot_opp;
55 	u_int ot_nopp;
56 	uint64_t ot_opp_hz_min;
57 	uint64_t ot_opp_hz_max;
58 };
59 
60 #define APLCPU_MAX_CLUSTERS	8
61 
62 struct aplcpu_softc {
63 	struct device		sc_dev;
64 	bus_space_tag_t		sc_iot;
65 	bus_space_handle_t	sc_ioh[APLCPU_MAX_CLUSTERS];
66 	bus_size_t		sc_ios[APLCPU_MAX_CLUSTERS];
67 
68 	int			sc_node;
69 	u_int			sc_nclusters;
70 	int			sc_perflevel;
71 
72 	LIST_HEAD(, opp_table)	sc_opp_tables;
73 	struct opp_table	*sc_opp_table[APLCPU_MAX_CLUSTERS];
74 	uint64_t		sc_opp_hz_min;
75 	uint64_t		sc_opp_hz_max;
76 };
77 
78 struct aplcpu_softc *aplcpu_sc;
79 
80 int	aplcpu_match(struct device *, void *, void *);
81 void	aplcpu_attach(struct device *, struct device *, void *);
82 
83 const struct cfattach aplcpu_ca = {
84 	sizeof (struct aplcpu_softc), aplcpu_match, aplcpu_attach
85 };
86 
87 struct cfdriver aplcpu_cd = {
88 	NULL, "aplcpu", DV_DULL
89 };
90 
91 void	aplcpu_opp_init(struct aplcpu_softc *, int);
92 int	aplcpu_clockspeed(int *);
93 void	aplcpu_setperf(int level);
94 
95 int
96 aplcpu_match(struct device *parent, void *match, void *aux)
97 {
98 	struct fdt_attach_args *faa = aux;
99 
100 	return OF_is_compatible(faa->fa_node, "apple,cluster-cpufreq");
101 }
102 
103 void
104 aplcpu_attach(struct device *parent, struct device *self, void *aux)
105 {
106 	struct aplcpu_softc *sc = (struct aplcpu_softc *)self;
107 	struct fdt_attach_args *faa = aux;
108 	struct cpu_info *ci;
109 	CPU_INFO_ITERATOR cii;
110 	int i;
111 
112 	if (faa->fa_nreg < 1) {
113 		printf(": no registers\n");
114 		return;
115 	}
116 
117 	if (faa->fa_nreg > APLCPU_MAX_CLUSTERS) {
118 		printf(": too many registers\n");
119 		return;
120 	}
121 
122 	sc->sc_iot = faa->fa_iot;
123 	for (i = 0; i < faa->fa_nreg; i++) {
124 		if (bus_space_map(sc->sc_iot, faa->fa_reg[i].addr,
125 		    faa->fa_reg[i].size, 0, &sc->sc_ioh[i])) {
126 			printf(": can't map registers\n");
127 			goto unmap;
128 		}
129 		sc->sc_ios[i] = faa->fa_reg[i].size;
130 	}
131 
132 	printf("\n");
133 
134 	sc->sc_node = faa->fa_node;
135 	sc->sc_nclusters = faa->fa_nreg;
136 
137 	sc->sc_opp_hz_min = UINT64_MAX;
138 	sc->sc_opp_hz_max = 0;
139 
140 	LIST_INIT(&sc->sc_opp_tables);
141 	CPU_INFO_FOREACH(cii, ci) {
142 		aplcpu_opp_init(sc, ci->ci_node);
143 	}
144 
145 	aplcpu_sc = sc;
146 	cpu_cpuspeed = aplcpu_clockspeed;
147 	cpu_setperf = aplcpu_setperf;
148 	return;
149 
150 unmap:
151 	for (i = 0; i < faa->fa_nreg; i++) {
152 		if (sc->sc_ios[i] == 0)
153 			continue;
154 		bus_space_unmap(sc->sc_iot, sc->sc_ioh[i], sc->sc_ios[i]);
155 	}
156 }
157 
158 void
159 aplcpu_opp_init(struct aplcpu_softc *sc, int node)
160 {
161 	struct opp_table *ot;
162 	int count, child;
163 	uint32_t freq_domain[2], phandle;
164 	uint32_t opp_hz, opp_level;
165 	int i, j;
166 
167 	if (OF_getpropintarray(node, "apple,freq-domain", freq_domain,
168 	    sizeof(freq_domain)) != sizeof(freq_domain))
169 		return;
170 	if (freq_domain[0] != OF_getpropint(sc->sc_node, "phandle", 0))
171 		return;
172 	if (freq_domain[1] > APLCPU_MAX_CLUSTERS)
173 		return;
174 
175 	phandle = OF_getpropint(node, "operating-points-v2", 0);
176 	if (phandle == 0)
177 		return;
178 
179 	LIST_FOREACH(ot, &sc->sc_opp_tables, ot_list) {
180 		if (ot->ot_phandle == phandle) {
181 			sc->sc_opp_table[freq_domain[1]] = ot;
182 			return;
183 		}
184 	}
185 
186 	node = OF_getnodebyphandle(phandle);
187 	if (node == 0)
188 		return;
189 
190 	if (!OF_is_compatible(node, "operating-points-v2"))
191 		return;
192 
193 	count = 0;
194 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
195 		if (OF_getproplen(child, "turbo-mode") == 0)
196 			continue;
197 		count++;
198 	}
199 	if (count == 0)
200 		return;
201 
202 	ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK);
203 	ot->ot_phandle = phandle;
204 	ot->ot_opp = mallocarray(count, sizeof(struct opp),
205 	    M_DEVBUF, M_ZERO | M_WAITOK);
206 	ot->ot_nopp = count;
207 
208 	count = 0;
209 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
210 		if (OF_getproplen(child, "turbo-mode") == 0)
211 			continue;
212 		opp_hz = OF_getpropint64(child, "opp-hz", 0);
213 		opp_level = OF_getpropint(child, "opp-level", 0);
214 
215 		/* Insert into the array, keeping things sorted. */
216 		for (i = 0; i < count; i++) {
217 			if (opp_hz < ot->ot_opp[i].opp_hz)
218 				break;
219 		}
220 		for (j = count; j > i; j--)
221 			ot->ot_opp[j] = ot->ot_opp[j - 1];
222 		ot->ot_opp[i].opp_hz = opp_hz;
223 		ot->ot_opp[i].opp_level = opp_level;
224 		count++;
225 	}
226 
227 	ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz;
228 	ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz;
229 
230 	LIST_INSERT_HEAD(&sc->sc_opp_tables, ot, ot_list);
231 	sc->sc_opp_table[freq_domain[1]] = ot;
232 
233 	/* Keep track of overall min/max frequency. */
234 	if (sc->sc_opp_hz_min > ot->ot_opp_hz_min)
235 		sc->sc_opp_hz_min = ot->ot_opp_hz_min;
236 	if (sc->sc_opp_hz_max < ot->ot_opp_hz_max)
237 		sc->sc_opp_hz_max = ot->ot_opp_hz_max;
238 }
239 
240 int
241 aplcpu_clockspeed(int *freq)
242 {
243 	struct aplcpu_softc *sc = aplcpu_sc;
244 	struct opp_table *ot;
245 	uint32_t opp_hz = 0, opp_level;
246 	uint64_t pstate;
247 	int i, j;
248 
249 	/*
250 	 * Clusters can run at different frequencies.  We report the
251 	 * highest frequency among all clusters.
252 	 */
253 
254 	for (i = 0; i < sc->sc_nclusters; i++) {
255 		if (sc->sc_opp_table[i] == NULL)
256 			continue;
257 
258 		pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[i],
259 		    CLUSTER_PSTATE);
260 		opp_level = (pstate & CLUSTER_PSTATE_DESIRED1_MASK);
261 		opp_level >>= CLUSTER_PSTATE_DESIRED1_SHIFT;
262 
263 		/* Translate P-state to frequency. */
264 		ot = sc->sc_opp_table[i];
265 		for (j = 0; j < ot->ot_nopp; j++) {
266 			if (ot->ot_opp[j].opp_level == opp_level)
267 				opp_hz = MAX(opp_hz, ot->ot_opp[j].opp_hz);
268 		}
269 	}
270 	if (opp_hz == 0)
271 		return EINVAL;
272 
273 	*freq = opp_hz / 1000000;
274 	return 0;
275 }
276 
277 void
278 aplcpu_setperf(int level)
279 {
280 	struct aplcpu_softc *sc = aplcpu_sc;
281 	struct opp_table *ot;
282 	uint64_t min, max;
283 	uint64_t level_hz;
284 	uint32_t opp_level;
285 	uint64_t reg;
286 	int i, j, timo;
287 
288 	if (sc->sc_perflevel == level)
289 		return;
290 
291 	/*
292 	 * We let the CPU performance level span the entire range
293 	 * between the lowest frequency on any of the clusters and the
294 	 * highest frequency on any of the clusters.  We pick a
295 	 * frequency within that range based on the performance level
296 	 * and set all the clusters to the frequency that is closest
297 	 * to but less than that frequency.  This isn't a particularly
298 	 * sensible method but it is easy to implement and it is hard
299 	 * to come up with something more sensible given the
300 	 * constraints of the hw.setperf sysctl interface.
301 	 */
302 	min = sc->sc_opp_hz_min;
303 	max = sc->sc_opp_hz_max;
304 	level_hz = min + (level * (max - min)) / 100;
305 
306 	for (i = 0; i < sc->sc_nclusters; i++) {
307 		if (sc->sc_opp_table[i] == NULL)
308 			continue;
309 
310 		/* Translate performance level to a P-state. */
311 		opp_level = 0;
312 		ot = sc->sc_opp_table[i];
313 		for (j = 0; j < ot->ot_nopp; j++) {
314 			if (ot->ot_opp[j].opp_hz <= level_hz &&
315 			    ot->ot_opp[j].opp_level >= opp_level)
316 				opp_level = ot->ot_opp[j].opp_level;
317 		}
318 
319 		/* Wait until P-state logic isn't busy. */
320 		for (timo = 100; timo > 0; timo--) {
321 			reg = bus_space_read_8(sc->sc_iot, sc->sc_ioh[i],
322 			    CLUSTER_PSTATE);
323 			if ((reg & CLUSTER_PSTATE_BUSY) == 0)
324 				break;
325 			delay(1);
326 		}
327 		if (reg & CLUSTER_PSTATE_BUSY)
328 			continue;
329 
330 		/* Set desired P-state. */
331 		reg &= ~CLUSTER_PSTATE_DESIRED1_MASK;
332 		reg &= ~CLUSTER_PSTATE_DESIRED2_MASK;
333 		reg |= (opp_level << CLUSTER_PSTATE_DESIRED1_SHIFT);
334 		reg |= (opp_level << CLUSTER_PSTATE_DESIRED2_SHIFT);
335 		reg |= CLUSTER_PSTATE_SET;
336 		bus_space_write_8(sc->sc_iot, sc->sc_ioh[i],
337 		    CLUSTER_PSTATE, reg);
338 	}
339 
340 	sc->sc_perflevel = level;
341 }
342