xref: /netbsd-src/sys/arch/aarch64/aarch64/locore.S (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1/*	$NetBSD: locore.S,v 1.71 2020/08/16 10:08:42 skrll Exp $	*/
2
3/*
4 * Copyright (c) 2017 Ryo Shimizu <ryo@nerv.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "opt_arm_debug.h"
30#include "opt_console.h"
31#include "opt_cpuoptions.h"
32#include "opt_ddb.h"
33#include "opt_fdt.h"
34#include "opt_kasan.h"
35#include "opt_multiprocessor.h"
36
37#include <aarch64/asm.h>
38#include <aarch64/hypervisor.h>
39#include "assym.h"
40
41RCSID("$NetBSD: locore.S,v 1.71 2020/08/16 10:08:42 skrll Exp $")
42
43#ifdef AARCH64_DEVICE_MEM_STRONGLY_ORDERED
44#define	MAIR_DEVICE_MEM		MAIR_DEVICE_nGnRnE
45#else
46#define	MAIR_DEVICE_MEM		MAIR_DEVICE_nGnRE
47#endif
48#define	MAIR_DEVICE_MEM_SO	MAIR_DEVICE_nGnRnE
49
50/*#define DEBUG_LOCORE			// debug print */
51/*#define DEBUG_LOCORE_PRINT_LOCK	// avoid mixing AP's output */
52
53#define LOCORE_EL2
54
55#define BOOT_AP_STACKSIZE	256	/* size of temporally stack for APs */
56#define PMAPBOOT_PAGEALLOCMAX	(1024 * 1024)	/* reserved size from _end[] */
57
58#if (defined(VERBOSE_INIT_ARM) || defined(DEBUG_LOCORE)) && defined(EARLYCONS)
59#if !defined(CONSADDR)
60#error CONSADDR required with EARLYCONS
61#endif
62#define VERBOSE_LOCORE
63#endif
64
65#ifdef VERBOSE_LOCORE
66#define VPRINT(string)		PRINT(string)
67#else
68#define VPRINT(string)
69#endif
70
71/* DPRINTREG macro use x19 internally. x0-x15 may be broken */
72#if (defined(DEBUG_LOCORE) && defined(EARLYCONS))
73#define DPRINT(string)		PRINT(string)
74#define DPRINTREG(str, reg)	mov x19,reg; PRINT(str); mov x0,x19; bl print_x0
75#define DPRINTSREG(str, reg)	mrs x19,reg; PRINT(str); mov x0,x19; bl print_x0
76#else
77#define DPRINT(string)
78#define DPRINTREG(str, reg)
79#define DPRINTSREG(str, reg)
80#endif
81
82#define PRINT(string)	bl xprint; .asciz string; .align 2
83
84
85	.text
86	.align	3
87ASENTRY_NP(aarch64_start)
88	/* keep lr & sp for return to bootloader if possible */
89	mov	x27, lr
90	mov	x28, sp
91
92	/* set stack pointer for boot */
93	adrl	x0, bootstk
94	mov	sp, x0
95
96	bl	clear_bss
97
98	PRINT("boot NetBSD/aarch64\n")
99
100	bl	1f
1011:	DPRINTREG("PC               = ", lr)
102	DPRINTREG("SP               = ", sp)
103	mrs	x20, CurrentEL
104	lsr	x20, x20, #2
105	DPRINTREG("CurrentEL        = ", x20)
106	cmp	x20, #2
107	bcc	1f
108	/* EL2 registers can be accessed in EL2 or higher */
109	DPRINTSREG("SCTLR_EL2        = ", sctlr_el2)
110	DPRINTSREG("HCR_EL2          = ", hcr_el2)
1111:
112	DPRINTSREG("SPSR_EL1         = ", spsr_el1)
113	DPRINTSREG("CNTFREQ_EL0      = ", cntfrq_el0)
114	DPRINTSREG("SCTLR_EL1        = ", sctlr_el1)
115	DPRINTSREG("MIDR_EL1         = ", midr_el1)
116	DPRINTSREG("MPIDR_EL1        = ", mpidr_el1)
117	DPRINTSREG("ID_AA64MPFR0_EL1 = ", id_aa64pfr0_el1)
118	DPRINTSREG("ID_AA64MPFR1_EL1 = ", id_aa64pfr1_el1)
119	DPRINTSREG("ID_AA64ISAR0_EL1 = ", id_aa64isar0_el1)
120	DPRINTSREG("ID_AA64ISAR1_EL1 = ", id_aa64isar1_el1)
121	DPRINTSREG("ID_AA64MMFR0_EL1 = ", id_aa64mmfr0_el1)
122	DPRINTSREG("ID_AA64MMFR1_EL1 = ", id_aa64mmfr1_el1)
123
124#ifdef LOCORE_EL2
125	VPRINT("Drop to EL1...")
126# include <aarch64/aarch64/locore_el2.S>
127	VPRINT("OK\n")
128	mrs	x20, CurrentEL
129	lsr	x20, x20, #2
130	DPRINTREG("CurrentEL        = ", x20)
131#endif /* LOCORE_EL2 */
132
133
134	bl	mmu_disable
135	bl	init_sysregs
136	bl	init_mmutable
137	cbnz	x0, aarch64_fatal
138	bl	save_ttbrs
139
140	VPRINT("MMU Enable...")
141	bl	mmu_enable
142	VPRINT("OK\n")
143
144	ldr	x20, =vstart	/* virtual address of vstart */
145	DPRINTSREG("SPSR_EL1         = ", spsr_el1)
146	DPRINTSREG("DAIF             = ", daif)
147	DPRINTREG("vstart           = ", x20)
148	br	x20		/* jump to the kernel virtual address */
149
150aarch64_fatal:
151	PRINT("fatal error occured while booting\n")
152	/* return to bootloader. if switched from EL2 to EL1, It might fail */
153	mov	lr, x27
154	mov	sp, x28
155	ret
156
157/*
158 * vstart is in kernel virtual address
159 */
160vstart:
161	DPRINTREG("PC               = ", x20)
162
163	/* set exception vector */
164	adrl	x0, _C_LABEL(el1_vectors)
165	msr	vbar_el1, x0
166
167	/* set lwp0 stack */
168	adrl	x0, lwp0uspace
169	add	x0, x0, #(UPAGES * PAGE_SIZE)
170	sub	x0, x0, #TF_SIZE	/* lwp0space + USPACE - TF_SIZE */
171	mov	sp, x0			/* define lwp0 ksp bottom */
172	DPRINTREG("SP(lwp0,kvm)     = ", sp)
173
174	/* lwp-private = NULL */
175	msr	tpidr_el0, xzr
176	msr	tpidrro_el0, xzr
177
178	/* set curcpu() */
179	adrl	x0, lwp0		/* curlwp is lwp0 */
180	msr	tpidr_el1, x0
181	DPRINTREG("curlwp           = ", x0);
182
183	/* init PAN if supported */
184	mov	x0, #1
185	bl	aarch64_pan_init
186
187	/* init PAC if supported */
188	mov	x0, #1
189	bl	aarch64_pac_init
190	cbnz	w0, 1f			/* if (aarch64_pac_init() == 0) */
191	mrs	x0, sctlr_el1
192	ldr	x1, sctlr_pac
193	orr	x0, x0, x1		/*  enable PAC */
194	msr	sctlr_el1, x0
1951:
196
197	/* set topology information */
198	adrl	x0, cpu_info_store	/* curcpu */
199	mrs	x1, mpidr_el1
200	mov	x2, #0
201	bl	arm_cpu_topology_set
202
203	/* get cache configuration */
204	mov	x0, xzr
205	bl	aarch64_getcacheinfo
206
207#ifdef KASAN
208	adrl	x0, lwp0uspace
209	bl	_C_LABEL(kasan_early_init)
210#endif
211
212	mov	fp, #0			/* trace back starts here */
213	PRINT("initarm\n")
214	bl	_C_LABEL(initarm)	/* Off we go */
215
216	PRINT("main\n")
217	bl	_C_LABEL(main)		/* call main() */
218
219	adr	x0, .Lmainreturned
220	b	_C_LABEL(panic)
221	/* NOTREACHED */
222ASEND(aarch64_start)
223
224.Lmainreturned:
225	.asciz	"main() returned"
226	.align 2
227
228
229ASENTRY_NP(clear_bss)
230	/* Zero the BSS. The size must be aligned 16, usually it should be. */
231	adrl	x14, __bss_start__
232	adrl	x15, __bss_end__
233	b	2f
2341:	stp	xzr, xzr, [x14], #16
2352:	cmp	x14, x15
236	b.lo	1b
237	ret
238ASEND(clear_bss)
239
240
241init_sysregs:
242	stp	x0, lr, [sp, #-16]!
243
244	/* init debug event */
245	ldr	x0, mdscr_setting
246	msr	mdscr_el1, x0
247	msr	oslar_el1, xzr
248
249	/* Clear context id register */
250	msr	contextidr_el1, xzr
251
252	/* No trap system register access, and Trap FP/SIMD access */
253	msr	cpacr_el1, xzr
254
255	/* allow to read CNTVCT_EL0 and CNTFRQ_EL0 from EL0 */
256	mrs	x0, cntkctl_el1
257	orr	x0, x0, #CNTKCTL_EL0VCTEN
258	msr	cntkctl_el1, x0
259
260	/* any exception not masked */
261	msr	daif, xzr
262
263	ldp	x0, lr, [sp], #16
264	ret
265
266
267#ifdef MULTIPROCESSOR
268
269#ifdef DEBUG_LOCORE
270/*
271 * atomic_ops doesn't work before MMU enabled, so using Peterson's algorithm.
272 * this is only used to serialize debug print and avoid mixing output.
273 * Not absolutely necessary.
274 *
275 * x27 for cpuindex.
276 */
277locore_lock_enter:
278#ifdef DEBUG_LOCORE_PRINT_LOCK
279	mov	x3, xzr			/* x3 = level */
280levelloop:
281	/* lock_level[] and lock_turn[] are always accessed via PA(devmap) */
282	adrl	x0, kern_vtopdiff
283	ldr	x0, [x0]
284	ldr	x4, =lock_level
285	sub	x4, x4, x0
286	ldr	x5, =lock_turn
287	sub	x5, x5, x0
288
289	strh	w3, [x4, x27, lsl #1]	/* lock_level[i] = level */
290	dsb	sy
291	strh	w27, [x5, x3, lsl #1]	/* lock_turn[level] = i */
292	dsb	sy
293waitloop:
294	dmb	sy
295	ldrh	w0, [x5, x3, lsl #1]	/* lock_turn[level] == i ? */
296	cmp	x27, x0
297	bne	nextlevel
298
299	mov	x2, xzr			/* k = 0 */
300levelcheck:
301	cmp	x2, x27
302	beq	levelcheck_next
303
304	dmb	sy
305	ldrsh	w0, [x4, x2, lsl #1]	/* lock_level[k] >= level */
306	cmp	w0, w3
307	bge	waitloop
308levelcheck_next:
309	add	x2, x2, #1		/* k++ */
310	cmp	x2, #MAXCPUS
311	bne	levelcheck
312nextlevel:
313	add	x3, x3, #1
314	cmp	x3, #(MAXCPUS - 1)
315	bne	levelloop
316#endif /* DEBUG_LOCORE_PRINT_LOCK */
317	ret
318
319
320locore_lock_exit:
321#ifdef DEBUG_LOCORE_PRINT_LOCK
322	/* lock_level[] and lock_turn[] are always accessed via PA(devmap) */
323	adrl	x0, kern_vtopdiff
324	ldr	x0, [x0]
325	ldr	x1, =lock_level
326	sub	x1, x1, x0
327	mvn	x0, xzr
328	strh	w0, [x1, x27, lsl #1]	/* lock_level[i] = -1 */
329	dsb	sy
330#endif /* DEBUG_LOCORE_PRINT_LOCK */
331	ret
332
333
334/* print "[CPU$x27] " (x27 for cpuindex) */
335printcpu:
336	stp	x0, lr, [sp, #-16]!
337	PRINT("[CPU");			\
338	mov	x0, x27;		\
339	bl	_printdec_x0;		\
340	PRINT("] ");			\
341	ldp	x0, lr, [sp], #16
342	ret
343
344#define CPU_DPRINT(str)			\
345	bl	locore_lock_enter;	\
346	bl	printcpu;		\
347	DPRINT(str);			\
348	bl	locore_lock_exit
349
350/*
351 * CPU_DPRINTREG macro use x19 internally. x0-x15 may be broken.
352 * x27 for cpuindex.
353 */
354#define CPU_DPRINTREG(str,reg)		\
355	mov	x19, reg;		\
356	bl	locore_lock_enter;	\
357	bl	printcpu;		\
358	PRINT(str);			\
359	mov	x0, x19;		\
360	bl	print_x0;		\
361	bl	locore_lock_exit
362
363#define CPU_DPRINTSREG(str, reg)	\
364	mrs	x19, reg;		\
365	CPU_DPRINTREG(str, x19)
366
367#else /* DEBUG_LOCORE */
368
369#define CPU_DPRINT(str)
370#define CPU_DPRINTREG(str,reg)
371#define CPU_DPRINTSREG(str, reg)
372
373#endif /* DEBUG_LOCORE */
374
375ENTRY_NP(cpu_mpstart)
376	mrs	x3, mpidr_el1
377	ldr	x0, =(MPIDR_AFF0 | MPIDR_AFF1 | MPIDR_AFF2 | MPIDR_AFF3)
378	and	x3, x3, x0
379
380	/*
381	 * resolve own cpuindex. my mpidr is stored in
382	 * extern uint64_t cpu_mpidr[MAXCPUS]
383	 */
384	adrl	x0, _C_LABEL(cpu_mpidr)
385	mov	x1, xzr
3861:
387	add	x1, x1, #1
388	cmp	x1, #MAXCPUS		/* cpuindex >= MAXCPUS ? */
389	bge	toomanycpus
390	ldr	x2, [x0, x1, lsl #3]	/* cpu_mpidr[cpuindex] */
391	cmp	x2, x3			/* == mpidr_el1 & MPIDR_AFF ? */
392	bne	1b
393
394	mov	x27, x1			/* x27 = cpuindex */
395
396	/*
397	 * x27 = cpuindex
398	 */
399
400	/* set stack pointer for boot */
401	mov	x1, #BOOT_AP_STACKSIZE
402	mul	x1, x1, x27
403	adrl	x0, bootstk
404	add	sp, x0, x1  /* sp = bootstk + (BOOT_AP_STACKSIZE * cpuindex) */
405
406	bl	1f
4071:	CPU_DPRINTREG("PC               = ", lr)
408	CPU_DPRINTREG("SP               = ", sp)
409	mrs	x20, CurrentEL
410	lsr	x20, x20, #2
411	CPU_DPRINTREG("CurrentEL        = ", x20)
412	cmp	x20, #2
413	bcc	1f
414	/* EL2 registers can be accessed in EL2 or higher */
415	CPU_DPRINTSREG("SCTLR_EL2        = ", sctlr_el2)
416	CPU_DPRINTSREG("HCR_EL2          = ", hcr_el2)
4171:
418	CPU_DPRINTSREG("SPSR_EL1         = ", spsr_el1)
419	CPU_DPRINTSREG("SCTLR_EL1        = ", sctlr_el1)
420	CPU_DPRINTSREG("MIDR_EL1         = ", midr_el1)
421	CPU_DPRINTSREG("MPIDR_EL1        = ", mpidr_el1)
422
423#ifdef LOCORE_EL2
424	CPU_DPRINT("Drop to EL1...\n")
425	bl	drop_to_el1
426	CPU_DPRINT("Drop to EL1 OK\n")
427	mrs	x20, CurrentEL
428	lsr	x20, x20, #2
429	CPU_DPRINTREG("CurrentEL        = ", x20)
430#endif /* LOCORE_EL2 */
431
432
433	bl	mmu_disable
434	bl	init_sysregs
435
436	CPU_DPRINT("MMU Enable...\n")
437	bl	load_ttbrs
438	bl	mmu_enable
439	CPU_DPRINT("MMU Enable OK\n")
440
441	/* jump to virtual address */
442	ldr	x20, =mp_vstart
443	br	x20
444
445mp_vstart:
446	hint	0x24		/* bti j */
447	CPU_DPRINTREG("PC               = ", x20)
448
449	/* set exception vector */
450	adrl	x0, _C_LABEL(el1_vectors)
451	msr	vbar_el1, x0
452
453	/* lwp-private = NULL */
454	msr	tpidr_el0, xzr
455	msr	tpidrro_el0, xzr
456
457	mov	x0, #CPU_INFO_SIZE
458	mul	x0, x27, x0
459	adrl	x1, _C_LABEL(cpu_info_store)
460	add	x0, x0, x1		/* x0 = &cpu_info_store[cpuindex] */
461
462	/* temporarily set tpidr_el1 to curcpu until the idle lwp is setup */
463	msr	tpidr_el1, x0		/* tpidr_el1 = curcpu = x0 */
464
465	/* fill curcpu()->ci_{midr,mpidr} */
466	mrs	x1, midr_el1
467	str	x1, [x0, #CI_MIDR]	/* curcpu()->ci_cpuid = midr_el1 */
468	mrs	x1, mpidr_el1
469	str	x1, [x0, #CI_MPIDR]	/* curcpu()->ci_mpidr = mpidr_el1 */
470
471	/* set topology information */
472	mov	x2, #0
473	bl	arm_cpu_topology_set
474
475	/* x28 = &arm_cpu_hatched[cpuindex / (sizeof(u_long) * NBBY)] */
476	adrl	x0, _C_LABEL(arm_cpu_hatched)
477	// Appease clang - mov	x1, x27, lsr #6
478	orr	x1, xzr, x27, lsr #6
479	add	x28, x0, x1, lsl #3
480
481	/* x29 = __BIT(cpuindex % (sizeof(u_long) * NBBY)) */
482	mov	x0, #1
483	and	x2, x27, #63
484	lsl	x29, x0, x2
485
486	/*
487	 * atomic_or_ulong(&arm_cpu_hatched[cpuindex / (sizeof(u_long)  * NBBY)],
488	 *   _BIT(cpuindex % ((sizeof(u_long) * NBBY)
489	 * to inform the boot processor.
490	 */
491	mov	x0, x28
492	mov	x1, x29
493	bl	_C_LABEL(atomic_or_ulong)	/* hatched! */
494	dsb	sy
495	sev
496
497	/* x28 = &arm_cpu_mbox[cpuindex / (sizeof(u_long) * NBBY)] */
498	adrl	x0, _C_LABEL(arm_cpu_mbox)
499	// Appease clang - mov	x1, x27, lsr #6
500	orr	x1, xzr, x27, lsr #6
501	add	x28, x0, x1, lsl #3
502
503	/* wait for the mailbox start bit to become true */
5041:
505	dmb	sy
506	ldr	x20, [x28]
507	tst	x20, x29
508	bne	9f
509	wfe
510	b	1b
5119:
512
513	/*
514	 * set curlwp (tpidr_el1 and curcpu()->ci_curlwp) now we know the
515	 * idle lwp from curcpu()->ci_idlelwp
516	 */
517	mrs	x0, tpidr_el1		/* curcpu (temporarily) */
518	ldr	x1, [x0, #CI_IDLELWP]	/* x0 = curcpu()->ci_idlelwp */
519	msr	tpidr_el1, x1		/* tpidr_el1 = curlwp = x1 */
520	str	x1, [x0, #CI_CURLWP]	/* curlwp is idlelwp */
521
522	/* get my stack from lwp */
523	ldr	x2, [x1, #L_PCB]	/* x2 = lwp_getpcb(idlelwp) */
524	add	x2, x2, #(UPAGES * PAGE_SIZE)
525	sub	sp, x2, #TF_SIZE	/* sp = pcb + USPACE - TF_SIZE */
526
527	/* init PAN if supported */
528	mov	x0, #0
529	bl	aarch64_pan_init
530
531	/* init PAC if supported */
532	mov	x0, #0
533	bl	aarch64_pac_init
534	cbnz	w0, 1f			/* if (aarch64_pac_init() == 0) */
535	mrs	x0, sctlr_el1
536	ldr	x1, sctlr_pac
537	orr	x0, x0, x1		/*  enable PAC */
538	msr	sctlr_el1, x0
5391:
540
541	mov	fp, xzr			/* trace back starts here */
542	mrs	x0, tpidr_el1		/* curlwp */
543	ldr	x0, [x0, #L_CPU]	/* curlwp->l_cpu */
544	bl	_C_LABEL(cpu_hatch)
545	mov	x0, xzr
546	b	_C_LABEL(idle_loop)	/* never to return */
547END(cpu_mpstart)
548
549toomanycpus:
550	CPU_DPRINT("too many cpus, or MPIDR not exists in cpu_mpidr[]\n")
5511:	wfi
552	b	1b
553
554
555#else /* MULTIPROCESSOR */
556
557ENTRY_NP(cpu_mpstart)
5581:	wfi
559	b	1b
560END(cpu_mpstart)
561
562#endif /* MULTIPROCESSOR */
563
564
565/*
566 * xprint - print strings pointed by $PC(LR)
567 *          and return to the end of string.
568 *          "\n" will be replaced "\r\n"
569 * e.g.)
570 *    bl        xprint      <- call
571 *    .ascii    "Hello\n\0" <- wouldn't return here
572 *    .align    2
573 *    nop                   <- return to here
574 *
575 */
576xprint:
577	mov	x0, lr
578	bl	_C_LABEL(uartputs)
579	add	x0, x0, #3
580	bic	lr, x0, #3
581	ret
582
583/*
584 * uartputs(str) - print strings with replacing "\n" to "\r\n".
585 * returns the address after the end of the string. (x0 = next of '\0')
586 */
587ENTRY_NP(uartputs)
588	stp	x19, lr, [sp, #-16]!
589	mov	x19, x0
590	ldrb	w0, [x19], #1
591	cbz	w0, 9f
5921:
593	cmp	x0, #'\n'
594	bne	2f
595	mov	x0, #0x0d	/* '\r' */
596	bl	uartputc
597	mov	x0, #'\n'
5982:
599	bl	uartputc
600	ldrb	w0, [x19], #1
601	cbnz	w0, 1b
6029:
603	mov	x0, x19
604	ldp	x19, lr, [sp], #16
605	ret
606END(uartputs)
607
608/*
609 * print x0 in 16 widths hexadecimal.
610 *
611 * x0 is preserved despite being caller saved.
612 * other caller saved registers will be broken.
613 */
614_print_x0:
615	stp	x0, lr, [sp, #-16]!
616	stp	x20, x21, [sp, #-16]!
617
618	mov	x21, x0		/* number to display */
619	mov	x20, #60	/* num of shift */
6201:
621	ror	x0, x21, x20
622	and	x0, x0, #0xf
623	cmp	x0, #10
624	blt	2f
625	add	x0, x0, #('a' - 10 - '0')
6262:	add	x0, x0, #'0'
627	bl	uartputc
628	subs	x20, x20, #4
629	bge	1b
630
631	ldp	x20, x21, [sp], #16
632	ldp	x0, lr, [sp], #16
633	ret
634
635/*
636 * print x0 in decimal.
637 *
638 * x0 is preserved despite being caller saved.
639 * other caller saved registers will be broken.
640 */
641_printdec_x0:
642	stp	x0, lr, [sp, #-(16+32)]!
643	add	x8, sp, #(16+32)
644
645	strb	wzr, [x8, #-1]!
6461:
647	mov	x10, #10
648	udiv	x1, x0, x10	/* x1 = x0 / 10 */
649	msub	x3, x1, x10, x0	/* x3 = x0 % 10 */
650	mov	x0, x1
651
652	add	x3, x3, #'0'
653	strb	w3, [x8, #-1]!
654	cbnz	x0, 1b
655
656	mov	x0, x8
657	bl	uartputs
658
659	ldp	x0, lr, [sp], #(16+32)
660	ret
661
662/*
663 * print x0 in 16 widths hexadecimal with crlf.
664 *
665 * x0 is preserved despite being caller saved.
666 * other caller saved registers will be broken.
667 */
668print_x0:
669	stp	x0, lr, [sp, #-16]!
670	bl	_print_x0
671	PRINT("\n")
672	ldp	x0, lr, [sp], #16
673	ret
674
675#ifdef VERBOSE_LOCORE
676/*
677 * tinyprintf() supports only maximum 7 '%x', '%d' and '%s' formats.
678 * width and any modifiers are ignored. '\n' will be replaced to '\r\n'.
679 *
680 * '%x' will be always expanded 16 widths hexadicimal.
681 * e.g., tinyprintf("Hello %s %x\n", "World", 0x12345)
682 * outputs "Hello World 0000000000012345\r\n"
683 */
684tinyprintf:
685	stp	x0, lr, [sp, #-16]!
686	stp	x19, x20, [sp, #-16]!
687	stp	x7, x8, [sp, #-16]!
688	stp	x5, x6, [sp, #-16]!
689	stp	x3, x4, [sp, #-16]!
690	stp	x1, x2, [sp, #-16]!
691
692	mov	x20, xzr
693	mov	x19, x0
694	ldrb	w0, [x19], #1
695	cbz	w0, tinyprintf_done
696
697tinyprintf_loop:
698	cmp	x0, #'\n'
699	bne	1f
700	/* '\n' -> '\r', '\n' */
701	mov	x0, #0x0d	/* '\r' */
702	bl	uartputc
703	mov	x0, #'\n'
7041:
705
706	cmp	x0, #'%'
707	bne	tinyprintf_putc
708	cmp	x20, #8
709	bcs	tinyprintf_putc
710
711tinyprintf_fetch_fmt:
712	ldrb	w9, [x19], #1
713	cbz	w9, tinyprintf_done
714
715	/* width and modifier are ignored */
716	cmp	x9, #'h'
717	beq	tinyprintf_fetch_fmt
718	cmp	x9, #'l'
719	beq	tinyprintf_fetch_fmt
720	cmp	x9, #'j'
721	beq	tinyprintf_fetch_fmt
722	cmp	x9, #'t'
723	beq	tinyprintf_fetch_fmt
724	cmp	x9, #'z'
725	beq	tinyprintf_fetch_fmt
726	cmp	x9, #'0'
727	bcc	1f
728	cmp	x9, #'9'
729	bls	tinyprintf_fetch_fmt
7301:
731	ldr	x0, [sp, x20, lsl #3]	/* get Nth argument */
732	add	x20, x20, #1
733
734	cmp	x9, #'x'
735	bne	5f
736	/* "%x" format */
737	bl	_print_x0
738	b	tinyprintf_next
7395:
740	cmp	x9, #'d'
741	bne	5f
742	/* "%d" format */
743	bl	_printdec_x0
744	b	tinyprintf_next
7455:
746	cmp	x9, #'s'
747	bne	5f
748	/* "%s" format */
749	bl	_C_LABEL(uartputs)
750	b	tinyprintf_next
7515:
752
753tinyprintf_putc:
754	bl	uartputc
755tinyprintf_next:
756	ldrb	w0, [x19], #1
757	cbnz	w0, tinyprintf_loop
758
759tinyprintf_done:
760	mov	x0, x19
761
762	ldp	x1, x2, [sp], #16
763	ldp	x3, x4, [sp], #16
764	ldp	x5, x6, [sp], #16
765	ldp	x7, x8, [sp], #16
766	ldp	x19, x20, [sp], #16
767	ldp	x0, lr, [sp], #16
768	ret
769#endif /* VERBOSE_LOCORE */
770
771
772save_ttbrs:
773	/* save ttbr[01]_el1 for AP */
774	mrs	x0, ttbr0_el1
775	mrs	x1, ttbr1_el1
776	adrl	x2, ttbr_save
777	stp	x0, x1, [x2]
778	ret
779
780load_ttbrs:
781	/* load ttbr[01]_el1 */
782	adrl	x2, ttbr_save
783	ldp	x0, x1, [x2]
784	msr	ttbr0_el1, x0
785	msr	ttbr1_el1, x1
786	ret
787
788
789init_mmutable:
790	stp	x26, lr, [sp, #-16]!
791
792	/* first allocated page must be kernel l0pt = ARM_BOOTSTRAP_LxPT */
793	bl	pmapboot_pagealloc
794	cbz	x0, init_mmutable_error
795	msr	ttbr1_el1, x0
796
797	bl	pmapboot_pagealloc
798	cbz	x0, init_mmutable_error
799	msr	ttbr0_el1, x0
800
801	DPRINTSREG("TTBR0            = ", ttbr0_el1)
802	DPRINTSREG("TTBR1            = ", ttbr1_el1)
803
804#ifdef VERBOSE_LOCORE
805	adr	x26, tinyprintf
806#else
807	mov	x26, xzr
808#endif
809
810	/*
811	 * int
812	 * pmapboot_enter(
813	 *     x0: vaddr_t va,
814	 *     x1: paddr_t pa,
815	 *     x2: psize_t size,
816	 *     x3: psize_t blocksize,  // L[123]_SIZE
817	 *     x4: pt_entry_t attr,    // pte attributes. LX_BLKPAG_*
818	 *     x5: void (*pr)(const char *, ...)
819	 *  );
820	 */
821
822#ifdef CONSADDR
823	VPRINT("Creating identity mapping for CONSADDR\n")
824	ldr	x0, =CONSADDR			/* va = CONADDR (physical) */
825	mov	x1, x0				/* pa = va */
826	mov	x2, #L2_SIZE			/* size */
827	mov	x3, #L2_SIZE			/* blocksize */
828	mov	x4, #LX_BLKPAG_ATTR_DEVICE_MEM | LX_BLKPAG_AP_RW
829	orr	x4, x4, #LX_BLKPAG_UXN | LX_BLKPAG_PXN	/* attr */
830	mov	x5, x26				/* pr func */
831	bl	pmapboot_enter
832	cbnz	x0, init_mmutable_error
833#endif
834
835	/* identity mapping for kernel image */
836	VPRINT("Creating identity mapping for kernel image\n")
837	adrl	x0, start			/* va = start (physical) */
838
839	mov	x1, x0				/* pa = va */
840	adrl	x2, _end
841	sub	x2, x2, x1			/* size = _end - start */
842	add	x2, x2, #PMAPBOOT_PAGEALLOCMAX	/* for pmapboot_pagealloc() */
843	mov	x3, #L2_SIZE			/* blocksize */
844	mov	x4, #LX_BLKPAG_ATTR_NORMAL_NC | LX_BLKPAG_AP_RW	/* attr */
845	mov	x5, x26				/* pr func */
846	bl	pmapboot_enter
847	cbnz	x0, init_mmutable_error
848
849#ifdef FDT
850	VPRINT("Creating identity mapping for FDT\n")
851	adrl	x8, _C_LABEL(fdt_addr_r)
852	ldr	x8, [x8]
853
854	mov	x0, x8				/* va */
855	mov	x1, x8				/* pa */
856	mov	x2, #L2_SIZE			/* size */
857	mov	x3, #L2_SIZE			/* blocksize */
858	mov	x4, #LX_BLKPAG_ATTR_NORMAL_NC | LX_BLKPAG_AP_RW
859	orr	x4, x4, #LX_BLKPAG_UXN | LX_BLKPAG_PXN	/* attr */
860	mov	x5, x26				/* pr func */
861	bl	pmapboot_enter
862	cbnz	x0, init_mmutable_error
863#endif
864
865	VPRINT("Creating KVA=PA tables\n")
866	ldr	x0, =start			/* va */
867	adrl	x1, start			/* pa = start (physical) */
868	adrl	x2, _end
869	sub	x2, x2, x1			/* size = _end - start */
870	mov	x3, #L2_SIZE			/* blocksize */
871	mov	x4, #LX_BLKPAG_ATTR_NORMAL_WB | LX_BLKPAG_AP_RW	/* attr */
872	orr	x4, x4, #LX_BLKPAG_UXN
873	mov	x5, x26				/* pr func */
874	bl	pmapboot_enter
875	cbnz	x0, init_mmutable_error
876
877	VPRINT("OK\n");
878	mov	x0, xzr
879	b	init_mmutable_done
880init_mmutable_error:
881	mvn	x0, xzr
882init_mmutable_done:
883	ldp	x26, lr, [sp], #16
884	ret
885
886mmu_disable:
887	dsb	sy
888	mrs	x0, sctlr_el1
889	bic	x0, x0, SCTLR_M		/* clear MMU enable bit */
890	msr	sctlr_el1, x0
891	isb
892	ret
893
894mmu_enable:
895	dsb	sy
896
897	/* Invalidate all TLB */
898	dsb	ishst
899#ifdef MULTIPROCESSOR
900	tlbi	vmalle1is
901#else
902	tlbi	vmalle1
903#endif
904	dsb	ish
905	isb
906
907	ldr	x0, mair_setting
908	msr	mair_el1, x0
909
910
911	/* TCR_EL1:IPS[34:32] = AA64MMFR0:PARange[3:0] */
912	ldr	x0, tcr_setting
913	mrs	x1, id_aa64mmfr0_el1
914	bfi	x0, x1, #32, #3
915	msr	tcr_el1, x0
916
917	/*
918	 * configure SCTLR
919	 */
920	mrs	x0, sctlr_el1
921	ldr	x1, sctlr_clear
922	bic	x0, x0, x1
923	ldr	x1, sctlr_pac	/* disable PAC */
924	bic	x0, x0, x1
925	ldr	x1, sctlr_set
926	orr	x0, x0, x1
927
928	ldr	x1, sctlr_ee
929#ifdef __AARCH64EB__
930	orr	x0, x0, x1	/* set: BigEndian */
931#else
932	bic	x0, x0, x1	/* clear: LittleEndian */
933#endif
934	msr	sctlr_el1, x0	/* enabling MMU! */
935	isb
936
937	ret
938
939
940	.align 3
941mair_setting:
942	.quad (						\
943	    __SHIFTIN(MAIR_NORMAL_WB, MAIR_ATTR0) |	\
944	    __SHIFTIN(MAIR_NORMAL_NC, MAIR_ATTR1) |	\
945	    __SHIFTIN(MAIR_NORMAL_WT, MAIR_ATTR2) |	\
946	    __SHIFTIN(MAIR_DEVICE_MEM, MAIR_ATTR3) |	\
947	    __SHIFTIN(MAIR_DEVICE_MEM_SO, MAIR_ATTR4))
948
949#define VIRT_BIT	48
950
951#ifdef MULTIPROCESSOR
952#define TCR_SHAREABLE	(TCR_SH0_INNER | TCR_SH1_INNER)
953#else
954#define TCR_SHAREABLE	(TCR_SH0_NONE | TCR_SH1_NONE)
955#endif
956
957tcr_setting:
958	.quad (						\
959	    __SHIFTIN(64 - VIRT_BIT, TCR_T1SZ) |	\
960	    __SHIFTIN(64 - VIRT_BIT, TCR_T0SZ) |	\
961	    TCR_AS64K |					\
962	    TCR_TG1_4KB | TCR_TG0_4KB |			\
963	    TCR_ORGN0_WB_WA |				\
964	    TCR_IRGN0_WB_WA |				\
965	    TCR_ORGN1_WB_WA |				\
966	    TCR_IRGN1_WB_WA) | TCR_SHAREABLE
967
968
969#ifdef AARCH64_ALIGNMENT_CHECK
970#define SCTLR_A_CONFIG		SCTLR_A
971#else
972#define SCTLR_A_CONFIG		0
973#endif
974
975#ifdef AARCH64_EL0_STACK_ALIGNMENT_CHECK
976#define SCTLR_SA0_CONFIG	SCTLR_SA0
977#else
978#define SCTLR_SA0_CONFIG	0
979#endif
980
981#ifdef AARCH64_EL1_STACK_ALIGNMENT_CHECK
982#define SCTLR_SA_CONFIG		SCTLR_SA
983#else
984#define SCTLR_SA_CONFIG		0
985#endif
986
987
988sctlr_ee:
989	.quad (SCTLR_EE | SCTLR_EOE)	/* Endianness of Exception and EL0 */
990sctlr_set:
991	.quad ( \
992	    SCTLR_LSMAOE |  /* Load/Store Multiple Atomicity and Ordering */ \
993	    SCTLR_nTLSMD |  /* no Trap Load/Store Multiple to Device */ \
994	    SCTLR_UCI |     /* Enables EL0 DC {CVAU,CIVAC,CVAC}, IC IVAU */ \
995	    SCTLR_SPAN |    /* This field resets to 1 */ \
996	    SCTLR_UCT |     /* Enables EL0 access to the CTR_EL0 */ \
997	    SCTLR_nTWE |    /* EL0 WFE non-trapping */ \
998	    SCTLR_nTWI |    /* EL0 WFI non-trapping */ \
999	    SCTLR_DZE |     /* Enables access to the DC ZVA instruction */ \
1000	    SCTLR_I |       /* Instruction cache enable */ \
1001	    SCTLR_SED |     /* SETEND instruction disable */ \
1002	    SCTLR_C |       /* Cache enable */ \
1003	    SCTLR_M |       /* MMU Enable */ \
1004	    SCTLR_SA0_CONFIG | \
1005	    SCTLR_SA_CONFIG | \
1006	    SCTLR_A_CONFIG | \
1007	    0)
1008sctlr_clear:
1009	.quad ( \
1010	    SCTLR_IESB |    /* Enable Implicit ErrorSynchronizationBarrier */ \
1011	    SCTLR_WXN |     /* Write permission implies Execute Never (W^X) */ \
1012	    SCTLR_UMA |     /* EL0 Controls access to interrupt masks */ \
1013	    SCTLR_ITD |     /* IT instruction disable */ \
1014	    SCTLR_nAA |     /* ? */ \
1015	    SCTLR_CP15BEN | /* CP15 barrier enable */ \
1016	    SCTLR_SA0 |     /* Enable EL0 stack alignment check */ \
1017	    SCTLR_SA |      /* Enable SP alignment check */ \
1018	    SCTLR_A |       /* Alignment check enable */ \
1019	    0)
1020sctlr_pac:
1021	.quad ( \
1022	    SCTLR_EnIA |    /* PACIA (APIAKey_EL1) instruction enable */ \
1023	    SCTLR_EnIB |    /* PACIB (APIBKey_EL1) instruction enable */ \
1024	    SCTLR_EnDA |    /* PACDA (APDAKey_EL1) instruction enable */ \
1025	    SCTLR_EnDB |    /* PACDB (APDBKey_EL1) instruction enable */ \
1026	    0)
1027
1028mdscr_setting:
1029	.quad ( \
1030	    MDSCR_TDCC |    /* Trap Debug Communications Channel access */ \
1031	    0)
1032
1033.L_devmap_addr:
1034	.quad	VM_KERNEL_IO_ADDRESS
1035
1036	.data
1037
1038#ifdef DEBUG_LOCORE_PRINT_LOCK
1039	.align 2
1040lock_level:
1041	.fill	MAXCPUS, 2, -1
1042lock_turn:
1043	.fill	(MAXCPUS - 1), 2, -1
1044#endif /* DEBUG_LOCORE_PRINT_LOCK */
1045
1046	.align 3
1047ttbr_save:
1048	.space	8 * 2
1049
1050	.bss
1051
1052	.align PGSHIFT
1053	.global _C_LABEL(lwp0uspace)
1054_C_LABEL(lwp0uspace):
1055	.space	UPAGES * PAGE_SIZE
1056bootstk:
1057
1058#ifdef MULTIPROCESSOR
1059	.space	BOOT_AP_STACKSIZE * (MAXCPUS - 1)
1060#endif
1061
1062	.section ".init_pagetable", "aw", %nobits
1063	.align PGSHIFT
1064	.global ARM_BOOTSTRAP_LxPT
1065ARM_BOOTSTRAP_LxPT:
1066l0pt_kern:
1067
1068	.section "_init_memory", "aw", %nobits
1069	.align PGSHIFT
1070
1071	/* None currently */
1072