xref: /netbsd-src/sys/arch/riscv/riscv/riscv_machdep.c (revision 0953a56a13bb59c798a0f87cb85760218a381a0d)
1 /*	$NetBSD: riscv_machdep.c,v 1.42 2025/01/04 14:23:03 skrll Exp $	*/
2 
3 /*-
4  * Copyright (c) 2014, 2019, 2022 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matt Thomas of 3am Software Foundry, and by Nick Hudson.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ddb.h"
33 #include "opt_modular.h"
34 #include "opt_multiprocessor.h"
35 #include "opt_riscv_debug.h"
36 
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: riscv_machdep.c,v 1.42 2025/01/04 14:23:03 skrll Exp $");
39 
40 #include <sys/param.h>
41 
42 #include <sys/asan.h>
43 #include <sys/boot_flag.h>
44 #include <sys/cpu.h>
45 #include <sys/exec.h>
46 #include <sys/kmem.h>
47 #include <sys/ktrace.h>
48 #include <sys/lwp.h>
49 #include <sys/module.h>
50 #include <sys/mount.h>
51 #include <sys/msgbuf.h>
52 #include <sys/optstr.h>
53 #include <sys/proc.h>
54 #include <sys/reboot.h>
55 #include <sys/syscall.h>
56 #include <sys/sysctl.h>
57 #include <sys/systm.h>
58 
59 #include <dev/cons.h>
60 #ifdef __HAVE_MM_MD_KERNACC
61 #include <dev/mm.h>
62 #endif
63 
64 #include <uvm/uvm_extern.h>
65 
66 #include <riscv/frame.h>
67 #include <riscv/locore.h>
68 #include <riscv/machdep.h>
69 #include <riscv/pte.h>
70 #include <riscv/sbi.h>
71 #include <riscv/userret.h>
72 
73 #include <libfdt.h>
74 #include <dev/fdt/fdtvar.h>
75 #include <dev/fdt/fdt_boot.h>
76 #include <dev/fdt/fdt_memory.h>
77 #include <dev/fdt/fdt_private.h>
78 
79 int cpu_printfataltraps = 1;
80 char machine[] = MACHINE;
81 char machine_arch[] = MACHINE_ARCH;
82 
83 #ifdef VERBOSE_INIT_RISCV
84 #define	VPRINTF(...)	printf(__VA_ARGS__)
85 #else
86 #define	VPRINTF(...)	__nothing
87 #endif
88 
89 /* 64 should be enough, even for a ZFS UUID */
90 #define	MAX_BOOT_DEV_STR	64
91 
92 char bootdevstr[MAX_BOOT_DEV_STR] = "";
93 char *boot_args = NULL;
94 
95 paddr_t physical_start;
96 paddr_t physical_end;
97 
98 static void
99 earlyconsputc(dev_t dev, int c)
100 {
101 	uartputc(c);
102 }
103 
104 static int
105 earlyconsgetc(dev_t dev)
106 {
107 	return uartgetc();
108 }
109 
110 static struct consdev earlycons = {
111 	.cn_putc = earlyconsputc,
112 	.cn_getc = earlyconsgetc,
113 	.cn_pollc = nullcnpollc,
114 };
115 
116 struct vm_map *phys_map;
117 
118 struct trapframe cpu_ddb_regs;
119 const pcu_ops_t * const pcu_ops_md_defs[PCU_UNIT_COUNT] = {
120 #ifdef FPE
121 	[PCU_FPU] = &pcu_fpu_ops,
122 #endif
123 };
124 
125 /*
126  * Used by PHYSTOV and VTOPHYS -- Will be set be BSS is zeroed so
127  * keep it in data
128  */
129 unsigned long kern_vtopdiff __attribute__((__section__(".data")));
130 
131 
132 /*
133  * machine dependent system variables.
134  */
135 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
136 {
137 	sysctl_createv(clog, 0, NULL, NULL,
138 	    CTLFLAG_PERMANENT,
139 	    CTLTYPE_NODE, "machdep", NULL,
140 	    NULL, 0, NULL, 0,
141 	    CTL_MACHDEP, CTL_EOL);
142 }
143 
144 #ifdef MODULAR
145 /*
146  * Push any modules loaded by the boot loader.
147  */
148 void
149 module_init_md(void)
150 {
151 }
152 #endif /* MODULAR */
153 
154 /*
155  * Set registers on exec.
156  * Clear all registers except sp, pc.
157  * sp is set to the stack pointer passed in.  pc is set to the entry
158  * point given by the exec_package passed in.
159  */
160 void
161 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
162 {
163 	struct trapframe * const tf = l->l_md.md_utf;
164 	struct proc * const p = l->l_proc;
165 
166 	memset(tf, 0, sizeof(*tf));
167 	tf->tf_sp = (intptr_t)stack_align(stack);
168 	tf->tf_pc = (intptr_t)pack->ep_entry & ~1;
169 #ifdef _LP64
170 	tf->tf_sr = (p->p_flag & PK_32) ? SR_USER32 : SR_USER64;
171 #else
172 	tf->tf_sr = SR_USER;
173 #endif
174 
175 	// Set up arguments for ___start(cleanup, ps_strings)
176 	tf->tf_a0 = 0;			// cleanup
177 	tf->tf_a1 = p->p_psstrp;	// ps_strings
178 
179 	/*
180 	 * Must have interrupts disabled for exception return.
181 	 * Must be switching to user mode.
182 	 * Must enable interrupts after sret.
183 	 */
184 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
185 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
186 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
187 }
188 
189 void
190 md_child_return(struct lwp *l)
191 {
192 	struct trapframe * const tf = lwp_trapframe(l);
193 
194 	tf->tf_a0 = 0;
195 	tf->tf_a1 = 1;
196 #ifdef FPE
197 	/* Disable FP as we can't be using it (yet). */
198 	tf->tf_sr &= ~SR_FS;
199 #endif
200 
201 	/*
202 	 * Must have interrupts disabled for exception return.
203 	 * Must be switching to user mode.
204 	 * Must enable interrupts after sret.
205 	 */
206 
207 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SIE) == 0);
208 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPP) == 0);
209 	KASSERT(__SHIFTOUT(tf->tf_sr, SR_SPIE) != 0);
210 
211 	userret(l);
212 }
213 
214 /*
215  * Process the tail end of a posix_spawn() for the child.
216  */
217 void
218 cpu_spawn_return(struct lwp *l)
219 {
220 	userret(l);
221 }
222 
223 /*
224  * Start a new LWP
225  */
226 void
227 startlwp(void *arg)
228 {
229 	ucontext_t * const uc = arg;
230 	lwp_t * const l = curlwp;
231 	int error __diagused;
232 
233 	error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
234 	KASSERT(error == 0);
235 
236 	kmem_free(uc, sizeof(*uc));
237 	userret(l);
238 }
239 
240 // We've worked hard to make sure struct reg and __gregset_t are the same.
241 // Ditto for struct fpreg and fregset_t.
242 
243 #ifdef _LP64
244 CTASSERT(sizeof(struct reg) == sizeof(__gregset_t));
245 #endif
246 CTASSERT(sizeof(struct fpreg) == sizeof(__fregset_t));
247 
248 void
249 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
250 {
251 	const struct trapframe * const tf = l->l_md.md_utf;
252 
253 	/* Save register context. */
254 	*(struct reg *)mcp->__gregs = tf->tf_regs;
255 
256 	*flags |= _UC_CPU | _UC_TLSBASE;
257 
258 	/* Save floating point register context, if any. */
259 	KASSERT(l == curlwp);
260 	if (fpu_valid_p(l)) {
261 		/*
262 		 * If this process is the current FP owner, dump its
263 		 * context to the PCB first.
264 		 */
265 		fpu_save(l);
266 
267 		struct pcb * const pcb = lwp_getpcb(l);
268 		*(struct fpreg *)mcp->__fregs = pcb->pcb_fpregs;
269 		*flags |= _UC_FPU;
270 	}
271 }
272 
273 int
274 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
275 {
276 	/*
277 	 * Verify that at least the PC and SP are user addresses.
278 	 */
279 	if ((intptr_t) mcp->__gregs[_REG_PC] < 0
280 	    || (intptr_t) mcp->__gregs[_REG_SP] < 0
281 	    || (mcp->__gregs[_REG_PC] & 1))
282 		return EINVAL;
283 
284 	return 0;
285 }
286 
287 int
288 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
289 {
290 	struct trapframe * const tf = l->l_md.md_utf;
291 	struct proc * const p = l->l_proc;
292 	const __greg_t * const gr = mcp->__gregs;
293 	int error;
294 
295 	/* Restore register context, if any. */
296 	if (flags & _UC_CPU) {
297 		error = cpu_mcontext_validate(l, mcp);
298 		if (error)
299 			return error;
300 
301 		/*
302 		 * Avoid updating TLS register here.
303 		 */
304 		const __greg_t saved_tp = tf->tf_reg[_REG_TP];
305 		tf->tf_regs = *(const struct reg *)gr;
306 		tf->tf_reg[_REG_TP] = saved_tp;
307 	}
308 
309 	/* Restore the private thread context */
310 	if (flags & _UC_TLSBASE) {
311 		lwp_setprivate(l, (void *)(intptr_t)mcp->__gregs[_X_TP]);
312 	}
313 
314 	/* Restore floating point register context, if any. */
315 	if (flags & _UC_FPU) {
316 		KASSERT(l == curlwp);
317 		/* Tell PCU we are replacing the FPU contents. */
318 		fpu_replace(l);
319 
320 		/*
321 		 * The PCB FP regs struct includes the FP CSR, so use the
322 		 * proper size of fpreg when copying.
323 		 */
324 		struct pcb * const pcb = lwp_getpcb(l);
325 		pcb->pcb_fpregs = *(const struct fpreg *)mcp->__fregs;
326 	}
327 
328 	mutex_enter(p->p_lock);
329 	if (flags & _UC_SETSTACK)
330 		l->l_sigstk.ss_flags |= SS_ONSTACK;
331 	if (flags & _UC_CLRSTACK)
332 		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
333 	mutex_exit(p->p_lock);
334 
335 	return 0;
336 }
337 
338 void
339 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
340 {
341 	KASSERT(kpreempt_disabled());
342 
343 	if ((flags & RESCHED_KPREEMPT) != 0) {
344 #ifdef __HAVE_PREEMPTION
345 		if ((flags & RESCHED_REMOTE) != 0) {
346 			cpu_send_ipi(ci, IPI_KPREEMPT);
347 		} else {
348 			softint_trigger(SOFTINT_KPREEMPT);
349 		}
350 #endif
351 		return;
352 	}
353 	if ((flags & RESCHED_REMOTE) != 0) {
354 #ifdef MULTIPROCESSOR
355 		cpu_send_ipi(ci, IPI_AST);
356 #endif
357 	} else {
358 		l->l_md.md_astpending = 1;	/* force call to ast() */
359 	}
360 }
361 
362 void
363 cpu_signotify(struct lwp *l)
364 {
365 	KASSERT(kpreempt_disabled());
366 #ifdef __HAVE_FAST_SOFTINTS
367 	KASSERT(lwp_locked(l, NULL));
368 #endif
369 
370 	if (l->l_cpu != curcpu()) {
371 #ifdef MULTIPROCESSOR
372 		cpu_send_ipi(l->l_cpu, IPI_AST);
373 #endif
374 	} else {
375 		l->l_md.md_astpending = 1; 	/* force call to ast() */
376 	}
377 }
378 
379 void
380 cpu_need_proftick(struct lwp *l)
381 {
382 	KASSERT(kpreempt_disabled());
383 	KASSERT(l->l_cpu == curcpu());
384 
385 	l->l_pflag |= LP_OWEUPC;
386 	l->l_md.md_astpending = 1;		/* force call to ast() */
387 }
388 
389 
390 /* Sync the discs, unmount the filesystems, and adjust the todr */
391 static void
392 bootsync(void)
393 {
394 	static bool bootsyncdone = false;
395 
396 	if (bootsyncdone)
397 		return;
398 
399 	bootsyncdone = true;
400 
401 	/* Make sure we can still manage to do things */
402 	if ((csr_sstatus_read() & SR_SIE) == 0) {
403 		/*
404 		 * If we get here then boot has been called without RB_NOSYNC
405 		 * and interrupts were disabled. This means the boot() call
406 		 * did not come from a user process e.g. shutdown, but must
407 		 * have come from somewhere in the kernel.
408 		 */
409 		ENABLE_INTERRUPTS();
410 		printf("Warning interrupts disabled during boot()\n");
411 	}
412 
413 	vfs_shutdown();
414 }
415 
416 
417 void
418 cpu_reboot(int howto, char *bootstr)
419 {
420 
421 	/*
422 	 * If RB_NOSYNC was not specified sync the discs.
423 	 * Note: Unless cold is set to 1 here, syslogd will die during the
424 	 * unmount.  It looks like syslogd is getting woken up only to find
425 	 * that it cannot page part of the binary in as the filesystem has
426 	 * been unmounted.
427 	 */
428 	if ((howto & RB_NOSYNC) == 0)
429 		bootsync();
430 
431 #if 0
432 	/* Disable interrupts. */
433 	const int s = splhigh();
434 
435 	/* Do a dump if requested. */
436 	if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
437 		dumpsys();
438 
439 	splx(s);
440 #endif
441 
442 	pmf_system_shutdown(boothowto);
443 
444 	/* Say NO to interrupts for good */
445 	splhigh();
446 
447 	/* Run any shutdown hooks */
448 	doshutdownhooks();
449 
450 	/* Make sure IRQ's are disabled */
451 	DISABLE_INTERRUPTS();
452 
453 	if (howto & RB_HALT) {
454 		printf("\n");
455 		printf("The operating system has halted.\n");
456 		printf("Please press any key to reboot.\n\n");
457 		cnpollc(true);	/* for proper keyboard command handling */
458 		if (cngetc() == 0) {
459 			/* no console attached, so just hlt */
460 			printf("No keyboard - cannot reboot after all.\n");
461 			goto spin;
462 		}
463 		cnpollc(false);
464 	}
465 
466 	printf("rebooting...\n");
467 
468 	sbi_system_reset(SBI_RESET_TYPE_COLDREBOOT, SBI_RESET_REASON_NONE);
469 spin:
470 	for (;;) {
471 		asm volatile("wfi" ::: "memory");
472 	}
473 	/* NOTREACHED */
474 }
475 
476 void
477 cpu_dumpconf(void)
478 {
479 	// TBD!!
480 }
481 
482 
483 int
484 cpu_lwp_setprivate(lwp_t *l, void *addr)
485 {
486 	struct trapframe * const tf = lwp_trapframe(l);
487 
488 	tf->tf_reg[_REG_TP] = (register_t)addr;
489 
490 	return 0;
491 }
492 
493 
494 void
495 cpu_startup(void)
496 {
497 	vaddr_t minaddr, maxaddr;
498 	char pbuf[10];	/* "999999 MB" -- But Sv39 is max 512GB */
499 
500 	/*
501 	 * Good {morning,afternoon,evening,night}.
502 	 */
503 	printf("%s%s", copyright, version);
504 	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
505 	printf("total memory = %s\n", pbuf);
506 
507 	minaddr = 0;
508 	/*
509 	 * Allocate a submap for physio.
510 	 */
511 	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
512 	    VM_PHYS_SIZE, 0, FALSE, NULL);
513 
514 	format_bytes(pbuf, sizeof(pbuf), ptoa(uvm_availmem(false)));
515 	printf("avail memory = %s\n", pbuf);
516 
517 #ifdef MULTIPROCESSOR
518 	kcpuset_create(&cpus_halted, true);
519 	KASSERT(cpus_halted != NULL);
520 
521 	kcpuset_create(&cpus_hatched, true);
522 	KASSERT(cpus_hatched != NULL);
523 
524 	kcpuset_create(&cpus_paused, true);
525 	KASSERT(cpus_paused != NULL);
526 
527 	kcpuset_create(&cpus_resumed, true);
528 	KASSERT(cpus_resumed != NULL);
529 
530 	kcpuset_create(&cpus_running, true);
531 	KASSERT(cpus_running != NULL);
532 
533 	kcpuset_set(cpus_hatched, cpu_index(curcpu()));
534 	kcpuset_set(cpus_running, cpu_index(curcpu()));
535 #endif
536 
537 	fdtbus_intr_init();
538 
539 	fdt_setup_rndseed();
540 	fdt_setup_efirng();
541 }
542 
543 static void
544 riscv_add_memory(const struct fdt_memory *m, void *arg)
545 {
546 	paddr_t first = atop(m->start);
547 	paddr_t last = atop(m->end);
548 	int freelist = VM_FREELIST_DEFAULT;
549 
550 	VPRINTF("adding %#16" PRIxPADDR " - %#16" PRIxPADDR"  to freelist %d\n",
551 	    m->start, m->end, freelist);
552 
553 	uvm_page_physload(first, last, first, last, freelist);
554 	physmem += last - first;
555 }
556 
557 
558 static void
559 cpu_kernel_vm_init(paddr_t memory_start, paddr_t memory_end)
560 {
561 	extern char __kernel_text[];
562 	extern char _end[];
563 
564 	vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
565 	vaddr_t kernend = round_page((vaddr_t)_end);
566 	paddr_t kernstart_phys = KERN_VTOPHYS(kernstart);
567 	paddr_t kernend_phys = KERN_VTOPHYS(kernend);
568 
569 	VPRINTF("%s: kernel phys start %#" PRIxPADDR " end %#" PRIxPADDR "\n",
570 	    __func__, kernstart_phys, kernend_phys);
571 	fdt_memory_remove_range(kernstart_phys,
572 	    kernend_phys - kernstart_phys);
573 
574 	/*
575 	 * Don't give these pages to UVM.
576 	 *
577 	 * cpu_kernel_vm_init need to create proper tables then the following
578 	 * will be true.
579 	 *
580 	 * Now we have APs started the pages used for stacks and L1PT can
581 	 * be given to uvm
582 	 */
583 	extern char const __start__init_memory[];
584 	extern char const __stop__init_memory[] __weak;
585 	if (&__start__init_memory[0] != &__stop__init_memory[0]) {
586 		const paddr_t spa = KERN_VTOPHYS((vaddr_t)__start__init_memory);
587 		const paddr_t epa = KERN_VTOPHYS((vaddr_t)__stop__init_memory);
588 
589 		VPRINTF("%s: init   phys start %#" PRIxPADDR
590 		    " end %#" PRIxPADDR "\n", __func__, spa, epa);
591 		fdt_memory_remove_range(spa, epa - spa);
592 	}
593 
594 #ifdef _LP64
595 	paddr_t pa = memory_start & ~XSEGOFSET;
596 	pmap_direct_base = RISCV_DIRECTMAP_START;
597 	extern pd_entry_t l2_pte[PAGE_SIZE / sizeof(pd_entry_t)];
598 
599 
600 	const vsize_t vshift = XSEGSHIFT;
601 	const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1;
602 	const vsize_t inc = 1UL << vshift;
603 
604 	const vaddr_t sva = RISCV_DIRECTMAP_START + pa;
605 	const vaddr_t eva = RISCV_DIRECTMAP_END;
606 	const size_t sidx = (sva >> vshift) & pdetab_mask;
607 	const size_t eidx = (eva >> vshift) & pdetab_mask;
608 
609 	/* Allocate gigapages covering all physical memory in the direct map. */
610 	for (size_t i = sidx; i < eidx && pa < memory_end; i++, pa += inc) {
611 		l2_pte[i] = PA_TO_PTE(pa) | PTE_KERN | PTE_HARDWIRED | PTE_RW;
612 		VPRINTF("dm:   %p :  %#" PRIxPADDR "\n", &l2_pte[i], l2_pte[i]);
613 	}
614 #endif
615 //	pt_dump(printf);
616 }
617 
618 static void
619 riscv_init_lwp0_uarea(void)
620 {
621 	extern char lwp0uspace[];
622 
623 	uvm_lwp_setuarea(&lwp0, (vaddr_t)lwp0uspace);
624 	memset(&lwp0.l_md, 0, sizeof(lwp0.l_md));
625 	memset(lwp_getpcb(&lwp0), 0, sizeof(struct pcb));
626 
627 	struct trapframe *tf = (struct trapframe *)(lwp0uspace + USPACE) - 1;
628 	memset(tf, 0, sizeof(*tf));
629 
630 	lwp0.l_md.md_utf = lwp0.l_md.md_ktf = tf;
631 }
632 
633 
634 static void
635 riscv_print_memory(const struct fdt_memory *m, void *arg)
636 {
637 
638 	VPRINTF("FDT /memory @ 0x%" PRIx64 " size 0x%" PRIx64 "\n",
639 	    m->start, m->end - m->start);
640 }
641 
642 
643 static void
644 parse_mi_bootargs(char *args)
645 {
646 	int howto;
647 	bool found, start, skipping;
648 
649 	if (args == NULL)
650 		return;
651 
652 	start = true;
653 	skipping = false;
654 	for (char *cp = args; *cp; cp++) {
655 		/* check for "words" starting with a "-" only */
656 		if (start) {
657 			if (*cp == '-') {
658 				skipping = false;
659 			} else {
660 				skipping = true;
661 			}
662 			start = false;
663 			continue;
664 		}
665 
666 		if (*cp == ' ') {
667 			start = true;
668 			skipping = false;
669 			continue;
670 		}
671 
672 		if (skipping) {
673 			continue;
674 		}
675 
676 		/* Check valid boot flags */
677 		howto = 0;
678 		BOOT_FLAG(*cp, howto);
679 		if (!howto)
680 			printf("bootflag '%c' not recognised\n", *cp);
681 		else
682 			boothowto |= howto;
683 	}
684 
685 	found = optstr_get(args, "root", bootdevstr, sizeof(bootdevstr));
686 	if (found) {
687 		bootspec = bootdevstr;
688 	}
689 }
690 
691 
692 void
693 init_riscv(register_t hartid, paddr_t dtb)
694 {
695 
696 	/* set temporally to work printf()/panic() even before consinit() */
697 	cn_tab = &earlycons;
698 
699 	/* Load FDT */
700 	const vaddr_t dtbva = VM_KERNEL_DTB_BASE + (dtb & (NBSEG - 1));
701 	void *fdt_data = (void *)dtbva;
702 	int error = fdt_check_header(fdt_data);
703 	if (error != 0)
704 	    panic("fdt_check_header failed: %s", fdt_strerror(error));
705 
706 	fdtbus_init(fdt_data);
707 
708 	/* Lookup platform specific backend */
709 	const struct fdt_platform * const plat = fdt_platform_find();
710 	if (plat == NULL)
711 		panic("Kernel does not support this device");
712 
713 	/* Early console may be available, announce ourselves. */
714 	VPRINTF("FDT<%p>\n", fdt_data);
715 
716 	boot_args = fdt_get_bootargs();
717 
718 	VPRINTF("devmap %p\n", plat->fp_devmap());
719 	pmap_devmap_bootstrap(0, plat->fp_devmap());
720 
721 	VPRINTF("bootstrap\n");
722 	plat->fp_bootstrap();
723 
724 	/*
725 	 * If stdout-path is specified on the command line, override the
726 	 * value in /chosen/stdout-path before initializing console.
727 	 */
728 	VPRINTF("stdout\n");
729 	fdt_update_stdout_path(fdt_data, boot_args);
730 
731 	/*
732 	 * Done making changes to the FDT.
733 	 */
734 	fdt_pack(fdt_data);
735 
736 	const uint32_t dtbsize = round_page(fdt_totalsize(fdt_data));
737 
738 	VPRINTF("fdt size %x/%x\n", dtbsize, fdt_totalsize(fdt_data));
739 
740 	VPRINTF("consinit ");
741 	consinit();
742 	VPRINTF("ok\n");
743 
744 	/* Talk to the user */
745 	printf("NetBSD/riscv (fdt) booting ...\n");
746 
747 #ifdef BOOT_ARGS
748 	char mi_bootargs[] = BOOT_ARGS;
749 	parse_mi_bootargs(mi_bootargs);
750 #endif
751 
752 	uint64_t memory_start, memory_end;
753 	fdt_memory_get(&memory_start, &memory_end);
754 	physical_start = memory_start;
755 	physical_end = memory_end;
756 
757 	fdt_memory_foreach(riscv_print_memory, NULL);
758 
759 	/* Cannot map memory above largest page number */
760 	const uint64_t maxppn = __SHIFTOUT_MASK(PTE_PPN) - 1;
761 	const uint64_t memory_limit = ptoa(maxppn);
762 
763 	if (memory_end > memory_limit) {
764 		fdt_memory_remove_range(memory_limit, memory_end);
765 		memory_end = memory_limit;
766 	}
767 
768 	uint64_t memory_size __unused = memory_end - memory_start;
769 
770 	VPRINTF("%s: memory start %" PRIx64 " end %" PRIx64 " (len %"
771 	    PRIx64 ")\n", __func__, memory_start, memory_end, memory_size);
772 
773 	/* Parse ramdisk, rndseed, and firmware's RNG from EFI */
774 	fdt_probe_initrd();
775 	fdt_probe_rndseed();
776 	fdt_probe_efirng();
777 
778 	fdt_memory_remove_reserved(memory_start, memory_end);
779 
780 	fdt_memory_remove_range(dtb, dtbsize);
781 	fdt_reserve_initrd();
782 	fdt_reserve_rndseed();
783 	fdt_reserve_efirng();
784 
785 	/* Perform PT build and VM init */
786 	cpu_kernel_vm_init(memory_start, memory_end);
787 
788 	VPRINTF("bootargs: %s\n", boot_args);
789 
790 	parse_mi_bootargs(boot_args);
791 
792 #ifdef DDB
793 	if (boothowto & RB_KDB) {
794 		printf("Entering DDB...\n");
795 		cpu_Debugger();
796 	}
797 #endif
798 
799 	extern char __kernel_text[];
800 	extern char _end[];
801 //	extern char __data_start[];
802 //	extern char __rodata_start[];
803 
804 	vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
805 	vaddr_t kernend = round_page((vaddr_t)_end);
806 	paddr_t kernstart_phys __unused = KERN_VTOPHYS(kernstart);
807 	paddr_t kernend_phys __unused = KERN_VTOPHYS(kernend);
808 
809 	vaddr_t kernelvmstart;
810 
811 	vaddr_t kernstart_mega __unused = MEGAPAGE_TRUNC(kernstart);
812 	vaddr_t kernend_mega = MEGAPAGE_ROUND(kernend);
813 
814 	kernelvmstart = kernend_mega;
815 
816 #if 0
817 #ifdef MODULAR
818 #define MODULE_RESERVED_MAX	(1024 * 1024 * 128)
819 #define MODULE_RESERVED_SIZE	(1024 * 1024 * 32)	/* good enough? */
820 	module_start = kernelvmstart;
821 	module_end = kernend_mega + MODULE_RESERVED_SIZE;
822 	if (module_end >= kernstart_mega + MODULE_RESERVED_MAX)
823 		module_end = kernstart_mega + MODULE_RESERVED_MAX;
824 	KASSERT(module_end > kernend_mega);
825 	kernelvmstart = module_end;
826 #endif /* MODULAR */
827 #endif
828 	KASSERT(kernelvmstart < VM_KERNEL_VM_BASE);
829 
830 	kernelvmstart = VM_KERNEL_VM_BASE;
831 
832 	/*
833 	 * msgbuf is allocated from the top of the last biggest memory block.
834 	 */
835 	paddr_t msgbufaddr = 0;
836 
837 #ifdef _LP64
838 	/* XXX check all ranges for last one with a big enough hole */
839 	msgbufaddr = memory_end - MSGBUFSIZE;
840 	KASSERT(msgbufaddr != 0);	/* no space for msgbuf */
841 	fdt_memory_remove_range(msgbufaddr, msgbufaddr + MSGBUFSIZE);
842 	msgbufaddr = RISCV_PA_TO_KVA(msgbufaddr);
843 	VPRINTF("msgbufaddr = %#lx\n", msgbufaddr);
844 	initmsgbuf((void *)msgbufaddr, MSGBUFSIZE);
845 #endif
846 
847 	KASSERT(msgbufaddr != 0);	/* no space for msgbuf */
848 #ifdef _LP64
849 	initmsgbuf((void *)RISCV_PA_TO_KVA(msgbufaddr), MSGBUFSIZE);
850 #endif
851 
852 #define	DPRINTF(v)	VPRINTF("%24s = 0x%16lx\n", #v, (unsigned long)v);
853 
854 	VPRINTF("------------------------------------------\n");
855 	DPRINTF(kern_vtopdiff);
856 	DPRINTF(memory_start);
857 	DPRINTF(memory_end);
858 	DPRINTF(memory_size);
859 	DPRINTF(kernstart_phys);
860 	DPRINTF(kernend_phys)
861 	DPRINTF(msgbufaddr);
862 //	DPRINTF(physical_end);
863 	DPRINTF(VM_MIN_KERNEL_ADDRESS);
864 	DPRINTF(kernstart_mega);
865 	DPRINTF(kernstart);
866 	DPRINTF(kernend);
867 	DPRINTF(kernend_mega);
868 #if 0
869 #ifdef MODULAR
870 	DPRINTF(module_start);
871 	DPRINTF(module_end);
872 #endif
873 #endif
874 	DPRINTF(VM_MAX_KERNEL_ADDRESS);
875 #ifdef _LP64
876 	DPRINTF(pmap_direct_base);
877 #endif
878 	VPRINTF("------------------------------------------\n");
879 
880 #undef DPRINTF
881 
882 	uvm_md_init();
883 
884 	/*
885 	 * pass memory pages to uvm
886 	 */
887 	physmem = 0;
888 	fdt_memory_foreach(riscv_add_memory, NULL);
889 
890 	pmap_bootstrap(kernelvmstart, VM_MAX_KERNEL_ADDRESS);
891 
892 	kasan_init();
893 
894 	/* Finish setting up lwp0 on our end before we call main() */
895 	riscv_init_lwp0_uarea();
896 
897 
898 	error = 0;
899 	if ((boothowto & RB_MD1) == 0) {
900 		VPRINTF("mpstart\n");
901 		if (plat->fp_mpstart)
902 			error = plat->fp_mpstart();
903 	}
904 	if (error)
905 		printf("AP startup problems\n");
906 }
907 
908 
909 #ifdef __HAVE_MM_MD_KERNACC
910 
911 #define IN_RANGE_P(addr, start, end)	(start) <= (addr) && (addr) < (end)
912 #ifdef _LP64
913 #define IN_DIRECTMAP_P(va) \
914 	IN_RANGE_P(va, RISCV_DIRECTMAP_START, RISCV_DIRECTMAP_END)
915 #else
916 #define IN_DIRECTMAP_P(va) false
917 #endif
918 
919 int
920 mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled)
921 {
922 	extern char __kernel_text[];
923 	extern char _end[];
924 	extern char __data_start[];
925 
926 	const vaddr_t kernstart = trunc_page((vaddr_t)__kernel_text);
927 	const vaddr_t kernend = round_page((vaddr_t)_end);
928 	const vaddr_t data_start = (vaddr_t)__data_start;
929 
930 	const vaddr_t va = (vaddr_t)ptr;
931 
932 	*handled = false;
933 	if (IN_RANGE_P(va, kernstart, kernend)) {
934 		*handled = true;
935 		if (va < data_start && (prot & VM_PROT_WRITE) != 0) {
936 			return EFAULT;
937 		}
938 	} else if (IN_DIRECTMAP_P(va)) {
939 		*handled = true;
940 	}
941 
942 	return 0;
943 }
944 #endif
945 
946 
947 #ifdef _LP64
948 static void
949 pte_bits(void (*pr)(const char *, ...), pt_entry_t pte)
950 {
951 	(*pr)("%c%c%c%c%c%c%c%c",
952 	    (pte & PTE_D) ? 'D' : '.',
953 	    (pte & PTE_A) ? 'A' : '.',
954 	    (pte & PTE_G) ? 'G' : '.',
955 	    (pte & PTE_U) ? 'U' : '.',
956 	    (pte & PTE_X) ? 'X' : '.',
957 	    (pte & PTE_W) ? 'W' : '.',
958 	    (pte & PTE_R) ? 'R' : '.',
959 	    (pte & PTE_V) ? 'V' : '.');
960 }
961 
962 static void
963 dump_ln_table(paddr_t pdp_pa, int topbit, int level, vaddr_t va,
964     void (*pr)(const char *, ...) __printflike(1, 2))
965 {
966 	pd_entry_t *pdp = (void *)PMAP_DIRECT_MAP(pdp_pa);
967 
968 	(*pr)("l%u     @  pa %#16" PRIxREGISTER "\n", level, pdp_pa);
969 	for (size_t i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
970 		pd_entry_t entry = pdp[i];
971 
972 		if (topbit) {
973 			va = i << (PGSHIFT + level * SEGLENGTH);
974 			if (va & __BIT(topbit)) {
975 				va |= __BITS(63, topbit);
976 			}
977 		}
978 		if (entry != 0) {
979 			paddr_t pa = __SHIFTOUT(entry, PTE_PPN) << PGSHIFT;
980 			// check level PPN bits.
981 			if (PTE_ISLEAF_P(entry)) {
982 				(*pr)("l%u %3zu    va 0x%016lx  pa 0x%012lx - ",
983 				      level, i, va, pa);
984 				pte_bits(pr, entry);
985 				(*pr)("\n");
986 			} else {
987 				(*pr)("l%u %3zu    va 0x%016lx  -> 0x%012lx - ",
988 				      level, i, va, pa);
989 				pte_bits(pr, entry);
990 				(*pr)("\n");
991 				if (level == 0) {
992 					(*pr)("wtf\n");
993 					continue;
994 				}
995 				if (pte_pde_valid_p(entry))
996 					dump_ln_table(pa, 0, level - 1, va, pr);
997 			}
998 		}
999 		va += 1UL << (PGSHIFT + level * SEGLENGTH);
1000 	}
1001 }
1002 
1003 void
1004 pt_dump(void (*pr)(const char *, ...) __printflike(1, 2))
1005 {
1006 	const register_t satp = csr_satp_read();
1007 	size_t topbit = sizeof(long) * NBBY - 1;
1008 
1009 #ifdef _LP64
1010 	const paddr_t satp_pa = __SHIFTOUT(satp, SATP_PPN) << PGSHIFT;
1011 	const uint8_t mode = __SHIFTOUT(satp, SATP_MODE);
1012 	u_int level = 1;
1013 
1014 	switch (mode) {
1015 	case SATP_MODE_SV39:
1016 	case SATP_MODE_SV48:
1017 		topbit = (39 - 1) + (mode - 8) * SEGLENGTH;
1018 		level = mode - 6;
1019 		break;
1020 	}
1021 #endif
1022 	(*pr)("topbit = %zu\n", topbit);
1023 
1024 	(*pr)("satp   = 0x%" PRIxREGISTER "\n", satp);
1025 #ifdef _LP64
1026 	dump_ln_table(satp_pa, topbit, level, 0, pr);
1027 #endif
1028 }
1029 #endif
1030 
1031 void
1032 consinit(void)
1033 {
1034 	static bool initialized = false;
1035 	const struct fdt_console *cons = fdtbus_get_console();
1036 	const struct fdt_platform *plat = fdt_platform_find();
1037 
1038 	if (initialized || cons == NULL)
1039 		return;
1040 
1041 	u_int uart_freq = 0;
1042 	extern struct bus_space riscv_generic_bs_tag;
1043 	struct fdt_attach_args faa = {
1044 		.faa_bst = &riscv_generic_bs_tag,
1045 	};
1046 
1047 	faa.faa_phandle = fdtbus_get_stdout_phandle();
1048 	if (plat->fp_uart_freq != NULL)
1049 		uart_freq = plat->fp_uart_freq();
1050 
1051 	cons->consinit(&faa, uart_freq);
1052 
1053 	initialized = true;
1054 }
1055