xref: /openbsd-src/sys/arch/powerpc64/powerpc64/cpu.c (revision d9a6171acb5469c258797f6c715a1c8ab071b042)
1 /*	$OpenBSD: cpu.c,v 1.30 2024/11/28 18:54:36 gkoehler Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/atomic.h>
21 #include <sys/device.h>
22 #include <sys/systm.h>
23 #include <sys/timeout.h>
24 
25 #include <uvm/uvm_extern.h>
26 
27 #include <machine/cpu.h>
28 #include <machine/cpufunc.h>
29 #include <machine/fdt.h>
30 #include <machine/opal.h>
31 
32 #include <dev/ofw/openfirm.h>
33 #include <dev/ofw/fdt.h>
34 
35 /* CPU Identification. */
36 #define CPU_IBMPOWER8E		0x004b
37 #define CPU_IBMPOWER8NVL	0x004c
38 #define CPU_IBMPOWER8		0x004d
39 #define CPU_IBMPOWER9		0x004e
40 #define CPU_IBMPOWER9P		0x004f
41 
42 #define CPU_VERSION(pvr)	((pvr) >> 16)
43 #define CPU_REV_MAJ(pvr)	(((pvr) >> 8) & 0xf)
44 #define CPU_REV_MIN(pvr)	(((pvr) >> 0) & 0xf)
45 
46 struct cpu_version {
47 	int		version;
48 	const char	*name;
49 };
50 
51 struct cpu_version cpu_version[] = {
52 	{ CPU_IBMPOWER8, "IBM POWER8" },
53 	{ CPU_IBMPOWER8E, "IBM POWER8E" },
54 	{ CPU_IBMPOWER8NVL, "IBM POWER8NVL" },
55 	{ CPU_IBMPOWER9, "IBM POWER9" },
56 	{ CPU_IBMPOWER9P, "IBM POWER9P" },
57 	{ 0, NULL }
58 };
59 
60 char cpu_model[64];
61 
62 uint64_t tb_freq = 512000000;	/* POWER8, POWER9 */
63 
64 struct cpu_info cpu_info[MAXCPUS];
65 struct cpu_info *cpu_info_primary = &cpu_info[0];
66 
67 struct timeout cpu_darn_to;
68 void	cpu_darn(void *);
69 
70 int	cpu_match(struct device *, void *, void *);
71 void	cpu_attach(struct device *, struct device *, void *);
72 
73 const struct cfattach cpu_ca = {
74 	sizeof(struct device), cpu_match, cpu_attach
75 };
76 
77 struct cfdriver cpu_cd = {
78 	NULL, "cpu", DV_DULL
79 };
80 
81 void	cpu_hatch(void);
82 int	cpu_intr(void *);
83 
84 int
85 cpu_match(struct device *parent, void *cfdata, void *aux)
86 {
87 	struct fdt_attach_args *faa = aux;
88 	char buf[32];
89 
90 	if (OF_getprop(faa->fa_node, "device_type", buf, sizeof(buf)) <= 0 ||
91 	    strcmp(buf, "cpu") != 0)
92 		return 0;
93 
94 	if (ncpus < MAXCPUS || faa->fa_reg[0].addr == mfpir())
95 		return 1;
96 
97 	return 0;
98 }
99 
100 void
101 cpu_attach(struct device *parent, struct device *dev, void *aux)
102 {
103 	struct fdt_attach_args *faa = aux;
104 	struct cpu_info *ci;
105 	const char *name = NULL;
106 	uint32_t pvr, clock_freq, iline, dline;
107 	int node, level, i;
108 
109 	ci = &cpu_info[dev->dv_unit];
110 	ci->ci_dev = dev;
111 	ci->ci_cpuid = dev->dv_unit;
112 	ci->ci_pir = faa->fa_reg[0].addr;
113 	ci->ci_node = faa->fa_node;
114 
115 	printf(" pir %x", ci->ci_pir);
116 
117 	pvr = mfpvr();
118 
119 	for (i = 0; cpu_version[i].name; i++) {
120 		if (CPU_VERSION(pvr) == cpu_version[i].version) {
121 			name = cpu_version[i].name;
122 			break;
123 		}
124 	}
125 
126 	if (name) {
127 		printf(": %s %d.%d", name, CPU_REV_MAJ(pvr), CPU_REV_MIN(pvr));
128 		snprintf(cpu_model, sizeof(cpu_model), "%s %d.%d",
129 		    name, CPU_REV_MAJ(pvr), CPU_REV_MIN(pvr));
130 	} else {
131 		printf(": Unknown, PVR 0x%x", pvr);
132 		strlcpy(cpu_model, "Unknown", sizeof(cpu_model));
133 	}
134 
135 	node = faa->fa_node;
136 	clock_freq = OF_getpropint(node, "clock-frequency", 0);
137 	if (clock_freq != 0) {
138 		clock_freq /= 1000000; /* Hz to MHz */
139 		printf(", %u MHz", clock_freq);
140 	}
141 
142 	iline = OF_getpropint(node, "i-cache-block-size", 128);
143 	dline = OF_getpropint(node, "d-cache-block-size", 128);
144 	level = 1;
145 
146 	while (node) {
147 		const char *unit = "KB";
148 		uint32_t isize, iways;
149 		uint32_t dsize, dways;
150 		uint32_t cache;
151 
152 		isize = OF_getpropint(node, "i-cache-size", 0) / 1024;
153 		iways = OF_getpropint(node, "i-cache-sets", 0);
154 		dsize = OF_getpropint(node, "d-cache-size", 0) / 1024;
155 		dways = OF_getpropint(node, "d-cache-sets", 0);
156 
157 		/* Print large cache sizes in MB. */
158 		if (isize > 4096 && dsize > 4096) {
159 			unit = "MB";
160 			isize /= 1024;
161 			dsize /= 1024;
162 		}
163 
164 		printf("\n%s:", dev->dv_xname);
165 
166 		if (OF_getproplen(node, "cache-unified") == 0) {
167 			printf(" %d%s %db/line %d-way L%d cache",
168 			    isize, unit, iline, iways, level);
169 		} else {
170 			printf(" %d%s %db/line %d-way L%d I-cache",
171 			    isize, unit, iline, iways, level);
172 			printf(", %d%s %db/line %d-way L%d D-cache",
173 			    dsize, unit, dline, dways, level);
174 		}
175 
176 		cache = OF_getpropint(node, "l2-cache", 0);
177 		node = OF_getnodebyphandle(cache);
178 		level++;
179 	}
180 
181 	if (CPU_IS_PRIMARY(ci) && (hwcap2 & PPC_FEATURE2_DARN)) {
182 		timeout_set(&cpu_darn_to, cpu_darn, NULL);
183 		cpu_darn(NULL);
184 	}
185 
186 #ifdef MULTIPROCESSOR
187 	if (dev->dv_unit != 0) {
188 		int timeout = 10000;
189 
190 		clockqueue_init(&ci->ci_queue);
191 		sched_init_cpu(ci);
192 		ncpus++;
193 
194 		ci->ci_initstack_end = km_alloc(PAGE_SIZE, &kv_any, &kp_zero,
195 		    &kd_waitok) + PAGE_SIZE;
196 
197 		if (opal_start_cpu(ci->ci_pir, (vaddr_t)cpu_hatch) ==
198 		    OPAL_SUCCESS) {
199 			atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
200 			membar_sync();
201 
202 			while ((ci->ci_flags & CPUF_IDENTIFIED) == 0 &&
203 			    --timeout)
204 				delay(1000);
205 			if (timeout == 0) {
206 				printf(" failed to identify");
207 				ci->ci_flags = 0;
208 			}
209 		} else {
210 			printf(" failed to start");
211 			ci->ci_flags = 0;
212 		}
213 	}
214 #endif
215 
216 	printf("\n");
217 
218 	/* Update timebase frequency to reflect reality. */
219 	tb_freq = OF_getpropint(faa->fa_node, "timebase-frequency", tb_freq);
220 }
221 
222 void
223 cpu_init_features(void)
224 {
225 	uint32_t pvr = mfpvr();
226 
227 	hwcap = PPC_FEATURE_32 | PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU |
228 	    PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_ALTIVEC |
229 	    PPC_FEATURE_HAS_VSX;
230 
231 	switch (CPU_VERSION(pvr)) {
232 	case CPU_IBMPOWER9:
233 	case CPU_IBMPOWER9P:
234 		hwcap2 |= PPC_FEATURE2_ARCH_3_00;
235 		hwcap2 |= PPC_FEATURE2_DARN;
236 		break;
237 	}
238 }
239 
240 void
241 cpu_init(void)
242 {
243 	uint64_t lpcr = LPCR_LPES;
244 
245 	if (hwcap2 & PPC_FEATURE2_ARCH_3_00)
246 		lpcr |= LPCR_PECE | LPCR_HVICE;
247 
248 	mtlpcr(lpcr);
249 	isync();
250 
251 	mtfscr(0);
252 	isync();
253 
254 	/*
255 	 * Set AMR to inhibit loads and stores for all virtual page
256 	 * class keys, except for Key0 which is used for normal kernel
257 	 * access.  This means we can pick any other key to implement
258 	 * execute-only mappings.  But we pick Key1 since that allows
259 	 * us to use the same bit in the PTE as was used to enable the
260 	 * Data Access Compare mechanism on CPUs based on older
261 	 * versions of the architecture (such as the PowerPC 970).
262 	 *
263 	 * Set UAMOR (and AMOR just to be safe) to zero to prevent
264 	 * userland from modifying any bits in AMR.
265 	 */
266 	mtamr(0x3fffffffffffffff);
267 	mtuamor(0);
268 	mtamor(0);
269 	isync();
270 }
271 
272 void
273 cpu_darn(void *arg)
274 {
275 	uint64_t value;
276 
277 	__asm volatile ("darn %0, 1" : "=r"(value));
278 	if (value != UINT64_MAX) {
279 		enqueue_randomness(value);
280 		enqueue_randomness(value >> 32);
281 	}
282 
283 	timeout_add_msec(&cpu_darn_to, 10);
284 }
285 
286 uint64_t cpu_idle_state_psscr;
287 void	cpu_idle_spin(void);
288 void	(*cpu_idle_cycle_fcn)(void) = &cpu_idle_spin;
289 
290 void
291 cpu_idle_cycle(void)
292 {
293 	intr_disable();
294 
295 	if (!cpu_is_idle(curcpu())) {
296 		intr_enable();
297 		return;
298 	}
299 
300 	(*cpu_idle_cycle_fcn)();
301 
302 	intr_enable();
303 }
304 
305 #ifdef MULTIPROCESSOR
306 
307 volatile int mp_perflevel;
308 void (*ul_setperf)(int);
309 
310 void
311 cpu_bootstrap(void)
312 {
313 	struct cpu_info *ci;
314 	CPU_INFO_ITERATOR cii;
315 	uint32_t pir = mfpir();
316 	uint64_t msr;
317 
318 	CPU_INFO_FOREACH(cii, ci) {
319 		if (pir == ci->ci_pir)
320 			break;
321 	}
322 
323 	/* Store pointer to our struct cpu_info. */
324 	__asm volatile ("mtsprg0 %0" :: "r"(ci));
325 
326 	/* We're now ready to take traps. */
327 	msr = mfmsr();
328 	mtmsr(msr | (PSL_ME|PSL_RI));
329 
330 	cpu_init();
331 
332 	pmap_bootstrap_cpu();
333 
334 	/* Enable translation. */
335 	msr = mfmsr();
336 	mtmsr(msr | (PSL_DR|PSL_IR));
337 	isync();
338 }
339 
340 void
341 cpu_start_secondary(void)
342 {
343 	struct cpu_info *ci = curcpu();
344 	int s;
345 
346 	atomic_setbits_int(&ci->ci_flags, CPUF_PRESENT);
347 
348 	while ((ci->ci_flags & CPUF_IDENTIFY) == 0)
349 		CPU_BUSY_CYCLE();
350 
351 	atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFIED);
352 	membar_sync();
353 
354 	while ((ci->ci_flags & CPUF_GO) == 0)
355 		CPU_BUSY_CYCLE();
356 
357 	s = splhigh();
358 	cpu_startclock();
359 
360 	atomic_setbits_int(&ci->ci_flags, CPUF_RUNNING);
361 	membar_sync();
362 
363 	spllower(IPL_NONE);
364 
365 	sched_toidle();
366 }
367 
368 void
369 cpu_boot_secondary(struct cpu_info *ci)
370 {
371 	atomic_setbits_int(&ci->ci_flags, CPUF_GO);
372 	membar_sync();
373 
374 	while ((ci->ci_flags & CPUF_RUNNING) == 0)
375 		CPU_BUSY_CYCLE();
376 }
377 
378 void
379 cpu_boot_secondary_processors(void)
380 {
381 	struct cpu_info *ci;
382 	CPU_INFO_ITERATOR cii;
383 
384 	CPU_INFO_FOREACH(cii, ci) {
385 		/* Set up IPI handler. */
386 		ci->ci_ipi = fdt_intr_establish_idx_cpu(ci->ci_node, 0,
387 		    IPL_IPI, ci, cpu_intr, ci, ci->ci_dev->dv_xname);
388 
389 		if (CPU_IS_PRIMARY(ci))
390 			continue;
391 		if ((ci->ci_flags & CPUF_PRESENT) == 0)
392 			continue;
393 
394 		ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;
395 		cpu_boot_secondary(ci);
396 	}
397 }
398 
399 int
400 cpu_intr(void *arg)
401 {
402 	struct cpu_info *ci = curcpu();
403 	int pending;
404 
405 	pending = atomic_swap_uint(&ci->ci_ipi_reason, IPI_NOP);
406 
407 	if (pending & IPI_DDB)
408 		db_enter();
409 
410 	if (pending & IPI_SETPERF)
411 		ul_setperf(mp_perflevel);
412 
413 	return 1;
414 }
415 
416 void
417 cpu_kick(struct cpu_info *ci)
418 {
419 	if (ci != curcpu())
420 		intr_send_ipi(ci, IPI_NOP);
421 }
422 
423 void
424 cpu_unidle(struct cpu_info *ci)
425 {
426 	if (ci != curcpu())
427 		intr_send_ipi(ci, IPI_NOP);
428 }
429 
430 /*
431  * Run ul_setperf(level) on every core.
432  */
433 void
434 mp_setperf(int level)
435 {
436 	int i;
437 
438 	mp_perflevel = level;
439 	ul_setperf(level);
440 	for (i = 0; i < ncpus; i++) {
441 		if (i != cpu_number())
442 			intr_send_ipi(&cpu_info[i], IPI_SETPERF);
443 	}
444 }
445 
446 #endif
447