xref: /netbsd-src/sys/arch/i386/i386/locore.S (revision 0cf0db2c6af328a7132b12eaf2ecf4583467c2fe)
1/*	$NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $	*/
2
3/*
4 * Copyright-o-rama!
5 */
6
7/*
8 * Copyright (c) 1998, 2000, 2004, 2006, 2007, 2009, 2016
9 * The NetBSD Foundation, Inc., All rights reserved.
10 *
11 * This code is derived from software contributed to The NetBSD Foundation
12 * by Charles M. Hannum, by Andrew Doran and by Maxime Villard.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/*
37 * Copyright (c) 2006 Manuel Bouyer.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 *    notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 *    notice, this list of conditions and the following disclaimer in the
46 *    documentation and/or other materials provided with the distribution.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
49 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
50 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
51 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
52 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
53 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
54 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
55 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
56 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
57 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 *
59 */
60
61/*
62 * Copyright (c) 2001 Wasabi Systems, Inc.
63 * All rights reserved.
64 *
65 * Written by Frank van der Linden for Wasabi Systems, Inc.
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 *    notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 *    notice, this list of conditions and the following disclaimer in the
74 *    documentation and/or other materials provided with the distribution.
75 * 3. All advertising materials mentioning features or use of this software
76 *    must display the following acknowledgement:
77 *      This product includes software developed for the NetBSD Project by
78 *      Wasabi Systems, Inc.
79 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
80 *    or promote products derived from this software without specific prior
81 *    written permission.
82 *
83 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
85 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
86 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
87 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
88 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
89 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
90 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
91 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
92 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
93 * POSSIBILITY OF SUCH DAMAGE.
94 */
95
96/*-
97 * Copyright (c) 1990 The Regents of the University of California.
98 * All rights reserved.
99 *
100 * This code is derived from software contributed to Berkeley by
101 * William Jolitz.
102 *
103 * Redistribution and use in source and binary forms, with or without
104 * modification, are permitted provided that the following conditions
105 * are met:
106 * 1. Redistributions of source code must retain the above copyright
107 *    notice, this list of conditions and the following disclaimer.
108 * 2. Redistributions in binary form must reproduce the above copyright
109 *    notice, this list of conditions and the following disclaimer in the
110 *    documentation and/or other materials provided with the distribution.
111 * 3. Neither the name of the University nor the names of its contributors
112 *    may be used to endorse or promote products derived from this software
113 *    without specific prior written permission.
114 *
115 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
116 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
117 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
118 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
119 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
120 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
121 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
122 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
123 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
124 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
125 * SUCH DAMAGE.
126 *
127 *	@(#)locore.s	7.3 (Berkeley) 5/13/91
128 */
129
130#include <machine/asm.h>
131__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $");
132
133#include "opt_copy_symtab.h"
134#include "opt_ddb.h"
135#include "opt_modular.h"
136#include "opt_multiboot.h"
137#include "opt_realmem.h"
138#include "opt_xen.h"
139
140#include "assym.h"
141#include "lapic.h"
142#include "ioapic.h"
143#include "ksyms.h"
144
145#include <sys/errno.h>
146#include <sys/syscall.h>
147
148#include <machine/segments.h>
149#include <machine/specialreg.h>
150#include <machine/trap.h>
151#include <machine/i82489reg.h>
152#include <machine/frameasm.h>
153#include <machine/i82489reg.h>
154#include <machine/cputypes.h>
155
156#ifndef XENPV
157#include <machine/multiboot.h>
158#endif
159
160/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
161#include <dev/isa/isareg.h>
162
163#ifndef XENPV
164#define	_RELOC(x)	((x) - KERNBASE)
165#else
166#define	_RELOC(x)	((x))
167#endif /* XENPV */
168#define	RELOC(x)	_RELOC(_C_LABEL(x))
169
170/* 32bit version of PTE_NX */
171#define PTE_NX32	0x80000000
172
173#ifndef PAE
174#define	PROC0_PDIR_OFF	0
175#else
176#define PROC0_L3_OFF	0
177#define PROC0_PDIR_OFF	1 * PAGE_SIZE
178#endif
179
180#define	PROC0_STK_OFF	(PROC0_PDIR_OFF + PDP_SIZE * PAGE_SIZE)
181#define	PROC0_PTP1_OFF	(PROC0_STK_OFF + UPAGES * PAGE_SIZE)
182
183/*
184 * fillkpt - Fill in a kernel page table
185 *	eax = pte (page frame | control | status)
186 *	ebx = page table address
187 *	ecx = number of pages to map
188 *
189 * For PAE, each entry is 8 bytes long: we must set the 4 upper bytes to 0.
190 * This is done by the first instruction of fillkpt. In the non-PAE case, this
191 * instruction just clears the page table entry.
192 */
193#define fillkpt	\
194	cmpl	$0,%ecx			;	/* zero-sized? */	\
195	je 	2f			; \
1961:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: 0 */	\
197	movl	%eax,(%ebx)		;	/* store phys addr */	\
198	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
199	addl	$PAGE_SIZE,%eax		;	/* next phys page */	\
200	loop	1b			; \
2012:					;
202
203/*
204 * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit.
205 */
206#define fillkpt_nox \
207	cmpl	$0,%ecx			;	/* zero-sized? */	\
208	je 	2f			; \
209	pushl	%ebp			; \
210	movl	RELOC(nox_flag),%ebp	; \
2111:	movl	%ebp,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: NX */ \
212	movl	%eax,(%ebx)		;	/* store phys addr */	\
213	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
214	addl	$PAGE_SIZE,%eax		;	/* next phys page */	\
215	loop	1b			; \
216	popl	%ebp			; \
2172:					;
218
219/*
220 * fillkpt_blank - Fill in a kernel page table with blank entries
221 *	ebx = page table address
222 *	ecx = number of pages to map
223 */
224#define fillkpt_blank	\
225	cmpl	$0,%ecx			;	/* zero-sized? */	\
226	je 	2f			; \
2271:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: 0 */	\
228	movl	$0,(%ebx)		;	/* lower 32 bits: 0 */	\
229	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
230	loop	1b			; \
2312:					;
232
233/*
234 * killkpt - Destroy a kernel page table
235 *	ebx = page table address
236 *	ecx = number of pages to destroy
237 */
238#define killkpt \
2391:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper bits (for PAE) */ \
240	movl	$0,(%ebx)		; \
241	addl	$PDE_SIZE,%ebx		; \
242	loop	1b			;
243
244
245#ifdef XEN
246#define __ASSEMBLY__
247#include <xen/include/public/elfnote.h>
248#include <xen/include/public/xen.h>
249
250#define ELFNOTE(name, type, desctype, descdata...) \
251.pushsection .note.name			;	\
252  .align 4				;	\
253  .long 2f - 1f		/* namesz */	;	\
254  .long 4f - 3f		/* descsz */	;	\
255  .long type				;	\
2561:.asciz #name				;	\
2572:.align 4				;	\
2583:desctype descdata			;	\
2594:.align 4				;	\
260.popsection
261
262/*
263 * Xen guest identifier and loader selection
264 */
265.section __xen_guest
266	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "NetBSD")
267	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "4.99")
268	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
269	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  KERNBASE)
270#ifdef XENPV
271	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  KERNBASE)
272	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  start)
273#else
274	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  0)
275	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,   .long,  RELOC(start_xenpvh))
276#endif /* XENPV */
277	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
278	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  HYPERVISOR_VIRT_START)
279	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector")
280	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
281	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .quad,  PTE_P, PTE_P)\
282	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
283	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  0)
284#if NKSYMS > 0 || defined(DDB) || defined(MODULAR)
285	ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB,     .asciz, "yes")
286#endif
287#endif  /* XEN */
288
289/*
290 * Initialization
291 */
292	.data
293
294	.globl	_C_LABEL(tablesize)
295	.globl	_C_LABEL(nox_flag)
296	.globl	_C_LABEL(cputype)
297	.globl	_C_LABEL(cpuid_level)
298	.globl	_C_LABEL(esym)
299	.globl	_C_LABEL(eblob)
300	.globl	_C_LABEL(atdevbase)
301	.globl	_C_LABEL(PDPpaddr)
302	.globl	_C_LABEL(lwp0uarea)
303	.globl	_C_LABEL(gdt)
304	.globl	_C_LABEL(idt)
305
306	.type	_C_LABEL(tablesize), @object
307_C_LABEL(tablesize):	.long	0
308END(tablesize)
309	.type	_C_LABEL(nox_flag), @object
310LABEL(nox_flag)		.long	0	/* 32bit NOX flag, set if supported */
311END(nox_flag)
312	.type	_C_LABEL(cputype), @object
313LABEL(cputype)		.long	0	/* are we 80486, Pentium, or.. */
314END(cputype)
315	.type	_C_LABEL(cpuid_level), @object
316LABEL(cpuid_level)	.long	-1	/* max. level accepted by cpuid instr */
317END(cpuid_level)
318	.type	_C_LABEL(atdevbase), @object
319LABEL(atdevbase)	.long	0	/* location of start of iomem in virt */
320END(atdevbase)
321	.type	_C_LABEL(lwp0uarea), @object
322LABEL(lwp0uarea)	.long	0
323END(lwp0uarea)
324	.type	_C_LABEL(PDPpaddr), @object
325LABEL(PDPpaddr)		.long	0	/* paddr of PDP, for libkvm */
326END(PDPpaddr)
327
328	/* Space for the temporary stack */
329	.globl	_C_LABEL(tmpstk)
330	.size	tmpstk, tmpstk - .
331	.space	512
332tmpstk:
333#ifdef XENPV
334	.align 		PAGE_SIZE, 0x0	/* Align on page boundary */
335LABEL(tmpgdt)
336	.space 		PAGE_SIZE	/* Xen expects a page */
337END(tmpgdt)
338#endif /* XENPV */
339
340	.text
341	.globl	_C_LABEL(kernel_text)
342	.set	_C_LABEL(kernel_text),KERNTEXTOFF
343
344ENTRY(start)
345#ifndef XENPV
346
347	/* Warm boot */
348	movw	$0x1234,0x472
349
350#if defined(MULTIBOOT)
351	jmp	1f
352
353	.align	4
354	.globl	Multiboot_Header
355_C_LABEL(Multiboot_Header):
356#define MULTIBOOT_HEADER_FLAGS	(MULTIBOOT_HEADER_WANT_MEMORY)
357	.long	MULTIBOOT_HEADER_MAGIC
358	.long	MULTIBOOT_HEADER_FLAGS
359	.long	-(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
360
361	.align	8
362	.globl	Multiboot2_Header
363_C_LABEL(Multiboot2_Header):
364	.long	MULTIBOOT2_HEADER_MAGIC
365	.long	MULTIBOOT2_ARCHITECTURE_I386
366	.long	Multiboot2_Header_end - Multiboot2_Header
367	.long	-(MULTIBOOT2_HEADER_MAGIC + MULTIBOOT2_ARCHITECTURE_I386 \
368		+ (Multiboot2_Header_end - Multiboot2_Header))
369
370	.long	1	/* MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST */
371	.long	12	/* sizeof(multiboot_header_tag_information_request) */
372			/* + sizeof(uint32_t) * requests */
373	.long	4	/* MULTIBOOT_TAG_TYPE_BASIC_MEMINFO */
374	.long	0	/* pad for 8 bytes alignment */
375
376	.long	8	/* MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI32 */
377	.long	12	/* sizeof(struct multiboot_tag_efi32) */
378	.long	efi_multiboot2_loader - KERNBASE
379	.long   0	/* pad for 8 bytes alignment */
380
381#if notyet
382	/*
383	 * Could be used to get an early console for debug,
384	 * but this is broken.
385	 */
386	.long	7	/* MULTIBOOT_HEADER_TAG_EFI_BS */
387	.long	8	/* sizeof(struct multiboot_tag) */
388#endif
389
390	.long	0	/* MULTIBOOT_HEADER_TAG_END */
391	.long	8	/* sizeof(struct multiboot_tag) */
392	.globl	Multiboot2_Header_end
393_C_LABEL(Multiboot2_Header_end):
394
3951:
396	/* Check if we are being executed by a Multiboot-compliant boot
397	 * loader. */
398	cmpl	$MULTIBOOT_INFO_MAGIC,%eax
399	je	multiboot1_loader
400
401	cmpl	$MULTIBOOT2_BOOTLOADER_MAGIC,%eax
402	je	multiboot2_loader
403
404	jmp	1f
405
406multiboot1_loader:
407	/*
408	 * Indeed, a multiboot-compliant boot loader executed us. We switch
409	 * to the temporary stack, and copy the received Multiboot information
410	 * structure into kernel's data space to process it later -- after we
411	 * are relocated. It will be safer to run complex C code than doing it
412	 * at this point.
413	 */
414	movl	$_RELOC(tmpstk),%esp
415	pushl	%ebx		/* Address of Multiboot information */
416	call	_C_LABEL(multiboot1_pre_reloc)
417	addl	$4,%esp
418	jmp	.Lstart_common
419
420efi_multiboot2_loader:
421	/*
422	 * EFI32 multiboot2 entry point. We are left here without
423	 * stack and with no idea of where we were loaded in memory.
424	 * The only inputs are
425	 * %eax MULTIBOOT2_BOOTLOADER_MAGIC
426	 * %ebx pointer to multiboot_info
427	 *
428	 * Here we will copy the kernel to 0x100000 (KERNTEXTOFF - KERNBASE)
429	 * as almost all the code in locore.S assume it is there. Once done,
430	 * we join the main start code .This is derived from
431	 * src/sys/arch/i386/stand/efiboot/bootia32/startprog32.S
432	 */
433
434	cli
435
436	/*
437	 * Discover our load address and store it in %edx
438	 */
439	movl	$_RELOC(tmpstk),%esp
440	call	next
441next:	popl	%edx
442	subl	$(next - efi_multiboot2_loader), %edx
443
444	/*
445	 * Save multiboot_info for later. We cannot use
446	 * temporary stack for that since we are going to
447	 * overwrite it.
448	 */
449	movl	%ebx, (multiboot2_info_ptr - efi_multiboot2_loader)(%edx)
450
451	/*
452	 * Get relocated multiboot2_loader entry point in %ebx
453	 */
454	movl	$(KERNTEXTOFF - KERNBASE), %ebx
455	addl	$(multiboot2_loader - start), %ebx
456
457        /* Copy kernel */
458        movl    $(KERNTEXTOFF - KERNBASE), %edi		/* dest */
459        movl    %edx, %esi
460	subl	$(efi_multiboot2_loader - start), %esi	/* src */
461	movl	$(__kernel_end - kernel_text), %ecx	/* size */
462#if defined(NO_OVERLAP)
463        movl    %ecx, %eax
464#else
465        movl    %edi, %eax
466        subl    %esi, %eax
467        cmpl    %ecx, %eax      /* overlapping? */
468        movl    %ecx, %eax
469        jb      .Lbackwards
470#endif
471        /* nope, copy forwards. */
472        shrl    $2, %ecx        /* copy by words */
473        rep
474        movsl
475        and     $3, %eax        /* any bytes left? */
476        jnz     .Ltrailing
477        jmp     .Lcopy_done
478
479.Ltrailing:
480        cmp     $2, %eax
481        jb      11f
482        movw    (%esi), %ax
483        movw    %ax, (%edi)
484        je      .Lcopy_done
485        movb    2(%esi), %al
486        movb    %al, 2(%edi)
487        jmp     .Lcopy_done
48811:     movb    (%esi), %al
489        movb    %al, (%edi)
490        jmp     .Lcopy_done
491
492#if !defined(NO_OVERLAP)
493.Lbackwards:
494        addl    %ecx, %edi      /* copy backwards. */
495        addl    %ecx, %esi
496        and     $3, %eax        /* any fractional bytes? */
497        jnz     .Lback_align
498.Lback_aligned:
499        shrl    $2, %ecx
500        subl    $4, %esi
501        subl    $4, %edi
502        std
503        rep
504        movsl
505        cld
506        jmp     .Lcopy_done
507
508.Lback_align:
509        sub     %eax, %esi
510        sub     %eax, %edi
511        cmp     $2, %eax
512        jb      11f
513        je      12f
514        movb    2(%esi), %al
515        movb    %al, 2(%edi)
51612:     movw    (%esi), %ax
517        movw    %ax, (%edi)
518        jmp     .Lback_aligned
51911:     movb    (%esi), %al
520        movb    %al, (%edi)
521        jmp     .Lback_aligned
522#endif
523        /* End of copy kernel */
524.Lcopy_done:
525	cld			/* LynxOS depends on it */
526
527	/* Prepare jump address */
528	lea	(efi_multiboot2_loader32a - efi_multiboot2_loader)(%edx), %eax
529	movl	%eax, (efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
530
531	/* Setup GDT */
532	lea	(gdt - efi_multiboot2_loader)(%edx), %eax
533	movl	%eax, (gdtrr - efi_multiboot2_loader)(%edx)
534	lgdt	(gdtr - efi_multiboot2_loader)(%edx)
535
536	/* Jump to set %cs */
537	ljmp	*(efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
538
539	.align	4
540efi_multiboot2_loader32a:
541	movl	$0x10, %eax	/* #define DATA_SEGMENT	0x10 */
542	movw	%ax, %ds
543	movw	%ax, %es
544	movw	%ax, %fs
545	movw	%ax, %gs
546	movw	%ax, %ss
547
548	/* Already set new stack pointer */
549	movl	%esp, %ebp
550
551	/* Disable Paging in CR0 */
552	movl	%cr0, %eax
553	andl	$(~CR0_PG), %eax
554	movl	%eax, %cr0
555
556	/* Disable PAE in CR4 */
557	movl	%cr4, %eax
558	andl	$(~CR4_PAE), %eax
559	movl	%eax, %cr4
560
561	jmp	efi_multiboot2_loader32b
562
563	.align	4
564efi_multiboot2_loader32b:
565	xor	%eax, %eax
566	movl	%ebx, (efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
567	/*
568	 * Reload multiboot info from target location
569	 */
570	movl	_RELOC(multiboot2_info_ptr), %ebx
571	ljmp	*(efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
572
573	.align	16
574efi_multiboot2_loader32r:
575	.long	0
576	.long	0x08	/* #define	CODE_SEGMENT	0x08 */
577	.align	16
578gdt:
579	.long	0, 0
580	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
581	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
582gdtr:
583	.word	gdtr - gdt
584gdtrr:
585	.quad  	0
586multiboot2_info_ptr:
587	.long	0
588
589	.align 16
590multiboot2_loader:
591	movl    $_RELOC(tmpstk),%esp
592	pushl	%ebx		/* Address of Multiboot information */
593	call	_C_LABEL(multiboot2_pre_reloc)
594	addl	$4,%esp
595	jmp	.Lstart_common
596#endif /* MULTIBOOT */
597
5981:
599	/*
600	 * At this point, we know that a NetBSD-specific boot loader
601	 * booted this kernel.
602	 *
603	 * Load parameters from the stack (32 bits):
604	 *     boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem
605	 * We are not interested in 'bootdev'.
606	 */
607
608	addl	$4,%esp		/* Discard return address to boot loader */
609	call	_C_LABEL(native_loader)
610	addl	$24,%esp
611
612.Lstart_common:
613	/* First, reset the PSL. */
614	pushl	$PSL_MBO
615	popfl
616
617	/* Clear segment registers; always null in proc0. */
618	xorl	%eax,%eax
619	movw	%ax,%fs
620	movw	%ax,%gs
621
622	/* Find out our CPU type. */
623
624try386:	/* Try to toggle alignment check flag; does not exist on 386. */
625	pushfl
626	popl	%eax
627	movl	%eax,%ecx
628	orl	$PSL_AC,%eax
629	pushl	%eax
630	popfl
631	pushfl
632	popl	%eax
633	xorl	%ecx,%eax
634	andl	$PSL_AC,%eax
635	pushl	%ecx
636	popfl
637
638	testl	%eax,%eax
639	jnz	try486
640
641	/*
642	 * Try the test of a NexGen CPU -- ZF will not change on a DIV
643	 * instruction on a NexGen, it will on an i386.  Documented in
644	 * Nx586 Processor Recognition Application Note, NexGen, Inc.
645	 */
646	movl	$0x5555,%eax
647	xorl	%edx,%edx
648	movl	$2,%ecx
649	divl	%ecx
650	jnz	is386
651
652isnx586:
653	/*
654	 * Don't try cpuid, as Nx586s reportedly don't support the
655	 * PSL_ID bit.
656	 */
657	movl	$CPU_NX586,RELOC(cputype)
658	jmp	2f
659
660is386:
661	movl	$CPU_386,RELOC(cputype)
662	jmp	2f
663
664try486:	/* Try to toggle identification flag; does not exist on early 486s. */
665	pushfl
666	popl	%eax
667	movl	%eax,%ecx
668	xorl	$PSL_ID,%eax
669	pushl	%eax
670	popfl
671	pushfl
672	popl	%eax
673	xorl	%ecx,%eax
674	andl	$PSL_ID,%eax
675	pushl	%ecx
676	popfl
677
678	testl	%eax,%eax
679	jnz	try586
680is486:	movl	$CPU_486,RELOC(cputype)
681	/*
682	 * Check Cyrix CPU
683	 * Cyrix CPUs do not change the undefined flags following
684	 * execution of the divide instruction which divides 5 by 2.
685	 *
686	 * Note: CPUID is enabled on M2, so it passes another way.
687	 */
688	pushfl
689	movl	$0x5555, %eax
690	xorl	%edx, %edx
691	movl	$2, %ecx
692	clc
693	divl	%ecx
694	jnc	trycyrix486
695	popfl
696	jmp 2f
697trycyrix486:
698	movl	$CPU_6x86,RELOC(cputype)	/* set CPU type */
699	/*
700	 * Check for Cyrix 486 CPU by seeing if the flags change during a
701	 * divide. This is documented in the Cx486SLC/e SMM Programmer's
702	 * Guide.
703	 */
704	xorl	%edx,%edx
705	cmpl	%edx,%edx		/* set flags to known state */
706	pushfl
707	popl	%ecx			/* store flags in ecx */
708	movl	$-1,%eax
709	movl	$4,%ebx
710	divl	%ebx			/* do a long division */
711	pushfl
712	popl	%eax
713	xorl	%ecx,%eax		/* are the flags different? */
714	testl	$0x8d5,%eax		/* only check C|PF|AF|Z|N|V */
715	jne	2f			/* yes; must be Cyrix 6x86 CPU */
716	movl	$CPU_486DLC,RELOC(cputype) 	/* set CPU type */
717
718#ifndef CYRIX_CACHE_WORKS
719	/* Disable caching of the ISA hole only. */
720	invd
721	movb	$CCR0,%al	/* Configuration Register index (CCR0) */
722	outb	%al,$0x22
723	inb	$0x23,%al
724	orb	$(CCR0_NC1|CCR0_BARB),%al
725	movb	%al,%ah
726	movb	$CCR0,%al
727	outb	%al,$0x22
728	movb	%ah,%al
729	outb	%al,$0x23
730	invd
731#else /* CYRIX_CACHE_WORKS */
732	/* Set cache parameters */
733	invd			/* Start with guaranteed clean cache */
734	movb	$CCR0,%al	/* Configuration Register index (CCR0) */
735	outb	%al,$0x22
736	inb	$0x23,%al
737	andb	$~CCR0_NC0,%al
738#ifndef CYRIX_CACHE_REALLY_WORKS
739	orb	$(CCR0_NC1|CCR0_BARB),%al
740#else
741	orb	$CCR0_NC1,%al
742#endif
743	movb	%al,%ah
744	movb	$CCR0,%al
745	outb	%al,$0x22
746	movb	%ah,%al
747	outb	%al,$0x23
748	/* clear non-cacheable region 1	*/
749	movb	$(NCR1+2),%al
750	outb	%al,$0x22
751	movb	$NCR_SIZE_0K,%al
752	outb	%al,$0x23
753	/* clear non-cacheable region 2	*/
754	movb	$(NCR2+2),%al
755	outb	%al,$0x22
756	movb	$NCR_SIZE_0K,%al
757	outb	%al,$0x23
758	/* clear non-cacheable region 3	*/
759	movb	$(NCR3+2),%al
760	outb	%al,$0x22
761	movb	$NCR_SIZE_0K,%al
762	outb	%al,$0x23
763	/* clear non-cacheable region 4	*/
764	movb	$(NCR4+2),%al
765	outb	%al,$0x22
766	movb	$NCR_SIZE_0K,%al
767	outb	%al,$0x23
768	/* enable caching in CR0 */
769	movl	%cr0,%eax
770	andl	$~(CR0_CD|CR0_NW),%eax
771	movl	%eax,%cr0
772	invd
773#endif /* CYRIX_CACHE_WORKS */
774
775	jmp	2f
776
777try586:	/* Use the `cpuid' instruction. */
778	xorl	%eax,%eax
779	cpuid
780	movl	%eax,RELOC(cpuid_level)
781
782	/*
783	 * Retrieve the NX/XD flag. We use the 32bit version of PTE_NX.
784	 */
785	movl	$0x80000001,%eax
786	cpuid
787	andl	$CPUID_NOX,%edx
788	jz	no_NOX
789	movl	$PTE_NX32,RELOC(nox_flag)
790no_NOX:
791
7922:
793	/*
794	 * Finished with old stack; load new %esp now instead of later so we
795	 * can trace this code without having to worry about the trace trap
796	 * clobbering the memory test or the zeroing of the bss+bootstrap page
797	 * tables.
798	 *
799	 * The boot program should check:
800	 *	text+data <= &stack_variable - more_space_for_stack
801	 *	text+data+bss+pad+space_for_page_tables <= end_of_memory
802	 *
803	 * XXX: the gdt is in the carcass of the boot program so clearing
804	 * the rest of memory is still not possible.
805	 */
806	movl	$_RELOC(tmpstk),%esp
807
808/*
809 * There are two different layouts possible, depending on whether PAE is
810 * enabled or not.
811 *
812 * If PAE is not enabled, there are two levels of pages: PD -> PT. They will
813 * be referred to as: L2 -> L1. L2 is 1 page long. The BOOTSTRAP TABLES have
814 * the following layout:
815 * 	+-----+------------+----+
816 * 	| L2 -> PROC0 STK -> L1 |
817 * 	+-----+------------+----+
818 *
819 * If PAE is enabled, there are three levels of pages: PDP -> PD -> PT. They
820 * will be referred to as: L3 -> L2 -> L1. L3 is 1 page long, L2 is 4 page
821 * long. The BOOTSTRAP TABLES have the following layout:
822 * 	+-----+-----+------------+----+
823 * 	| L3 -> L2 -> PROC0 STK -> L1 |
824 * 	+-----+-----+------------+----+
825 *
826 * Virtual address space of the kernel in both cases:
827 * +------+--------+------+-----+--------+---------------------+-----------
828 * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | BOOTSTRAP
829 * +------+--------+------+-----+--------+---------------------+-----------
830 *                             (1)      (2)                   (3)
831 *
832 * -------+-------------+
833 * TABLES | ISA I/O MEM |
834 * -------+-------------+
835 *       (4)
836 *
837 * PROC0 STK is obviously not linked as a page level. It just happens to be
838 * caught between L2 and L1.
839 *
840 * Important note: the kernel segments are properly 4k-aligned
841 * (see kern.ldscript), so there's no need to enforce alignment.
842 */
843
844	/* Find end of kernel image; brings us on (1). */
845	movl	$RELOC(__kernel_end),%edi
846
847#if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB)
848	/* Save the symbols (if loaded); brings us on (2). */
849	movl	RELOC(esym),%eax
850	testl	%eax,%eax
851	jz	1f
852	subl	$KERNBASE,%eax
853	movl	%eax,%edi
8541:
855#endif
856
857	/* Skip over any modules/blobs; brings us on (3). */
858	movl	RELOC(eblob),%eax
859	testl	%eax,%eax
860	jz	1f
861	subl	$KERNBASE,%eax
862	movl	%eax,%edi
8631:
864
865	/* We are on (3). Align up for BOOTSTRAP TABLES. */
866	movl	%edi,%esi
867	addl	$PGOFSET,%esi
868	andl	$~PGOFSET,%esi
869
870	/* nkptp[1] = (esi + ~L2_FRAME) >> L2_SHIFT + 1; */
871	movl	%esi,%eax
872	addl	$~L2_FRAME,%eax
873	shrl	$L2_SHIFT,%eax
874	incl	%eax		/* one more PTP for VAs stolen by bootstrap */
8751:	movl	%eax,RELOC(nkptp)+1*4
876
877	/* tablesize = (PDP_SIZE + UPAGES + nkptp[1]) << PGSHIFT; */
878	addl	$(PDP_SIZE+UPAGES),%eax
879#ifdef PAE
880	incl	%eax 		/* one more page for L3 */
881	shll	$PGSHIFT+1,%eax	/* PTP tables are twice larger with PAE */
882#else
883	shll	$PGSHIFT,%eax
884#endif
885	movl	%eax,RELOC(tablesize)
886
887	/* Ensure that nkptp[1] covers BOOTSTRAP TABLES, ie:
888	 * (esi + tablesize) >> L2_SHIFT + 1 < nkptp[1] */
889	addl	%esi,%eax
890	addl	$~L2_FRAME,%eax
891	shrl	$L2_SHIFT,%eax
892	incl	%eax
893	cmpl	%eax,RELOC(nkptp)+1*4
894	jnz	1b
895
896	/* Now, zero out the BOOTSTRAP TABLES (before filling them in). */
897	movl	%esi,%edi
898	xorl	%eax,%eax
899	cld
900	movl	RELOC(tablesize),%ecx
901	shrl	$2,%ecx
902	rep
903	stosl				/* copy eax -> edi */
904
905/*
906 * Build the page tables and levels. We go from L1 to L2/L3, and link the levels
907 * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't
908 * be > 4G, or we can't deal with it anyway, since we are in 32bit mode.
909 */
910	/*
911	 * Build L1.
912	 */
913	leal	(PROC0_PTP1_OFF)(%esi),%ebx
914
915	/* Skip the area below the kernel text. */
916	movl	$(KERNTEXTOFF - KERNBASE),%ecx
917	shrl	$PGSHIFT,%ecx
918	fillkpt_blank
919
920	/* Map the kernel text RX. */
921	movl	$(KERNTEXTOFF - KERNBASE),%eax	/* start of TEXT */
922	movl	$RELOC(__rodata_start),%ecx
923	subl	%eax,%ecx
924	shrl	$PGSHIFT,%ecx
925	orl	$(PTE_P),%eax
926	fillkpt
927
928	/* Map the kernel rodata R. */
929	movl	$RELOC(__rodata_start),%eax
930	movl	$RELOC(__data_start),%ecx
931	subl	%eax,%ecx
932	shrl	$PGSHIFT,%ecx
933	orl	$(PTE_P),%eax
934	fillkpt_nox
935
936	/* Map the kernel data+bss RW. */
937	movl	$RELOC(__data_start),%eax
938	movl	$RELOC(__kernel_end),%ecx
939	subl	%eax,%ecx
940	shrl	$PGSHIFT,%ecx
941	orl	$(PTE_P|PTE_W),%eax
942	fillkpt_nox
943
944	/* Map [SYMS]+[PRELOADED MODULES] RW. */
945	movl	$RELOC(__kernel_end),%eax
946	movl	%esi,%ecx		/* start of BOOTSTRAP TABLES */
947	subl	%eax,%ecx
948	shrl	$PGSHIFT,%ecx
949	orl	$(PTE_P|PTE_W),%eax
950	fillkpt_nox
951
952	/* Map the BOOTSTRAP TABLES RW. */
953	movl	%esi,%eax		/* start of BOOTSTRAP TABLES */
954	movl	RELOC(tablesize),%ecx	/* length of BOOTSTRAP TABLES */
955	shrl	$PGSHIFT,%ecx
956	orl	$(PTE_P|PTE_W),%eax
957	fillkpt_nox
958
959	/* We are on (4). Map ISA I/O MEM RW. */
960	movl	$IOM_BEGIN,%eax
961	movl	$IOM_SIZE,%ecx	/* size of ISA I/O MEM */
962	shrl	$PGSHIFT,%ecx
963	orl	$(PTE_P|PTE_W/*|PTE_PCD*/),%eax
964	fillkpt_nox
965
966	/*
967	 * Build L2 for identity mapping. Linked to L1.
968	 */
969	leal	(PROC0_PDIR_OFF)(%esi),%ebx
970	leal	(PROC0_PTP1_OFF)(%esi),%eax
971	orl	$(PTE_P|PTE_W),%eax
972	movl	RELOC(nkptp)+1*4,%ecx
973	fillkpt
974
975	/* Set up L2 entries for actual kernel mapping */
976	leal	(PROC0_PDIR_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
977	leal	(PROC0_PTP1_OFF)(%esi),%eax
978	orl	$(PTE_P|PTE_W),%eax
979	movl	RELOC(nkptp)+1*4,%ecx
980	fillkpt
981
982	/* Install recursive top level PDE */
983	leal	(PROC0_PDIR_OFF + PDIR_SLOT_PTE * PDE_SIZE)(%esi),%ebx
984	leal	(PROC0_PDIR_OFF)(%esi),%eax
985	orl	$(PTE_P|PTE_W),%eax
986	movl	$PDP_SIZE,%ecx
987	fillkpt_nox
988
989#ifdef PAE
990	/*
991	 * Build L3. Linked to L2.
992	 */
993	leal	(PROC0_L3_OFF)(%esi),%ebx
994	leal	(PROC0_PDIR_OFF)(%esi),%eax
995	orl	$(PTE_P),%eax
996	movl	$PDP_SIZE,%ecx
997	fillkpt
998
999	/* Enable PAE mode */
1000	movl	%cr4,%eax
1001	orl	$CR4_PAE,%eax
1002	movl	%eax,%cr4
1003#endif
1004
1005	/* Save physical address of L2. */
1006	leal	(PROC0_PDIR_OFF)(%esi),%eax
1007	movl	%eax,RELOC(PDPpaddr)
1008
1009	/*
1010	 * Startup checklist:
1011	 * 1. Load %cr3 with pointer to L2 (or L3 for PAE).
1012	 */
1013	movl	%esi,%eax
1014	movl	%eax,%cr3
1015
1016	/*
1017	 * 2. Set NOX in EFER, if available.
1018	 */
1019	movl	RELOC(nox_flag),%ebx
1020	cmpl	$0,%ebx
1021	je 	skip_NOX
1022	movl	$MSR_EFER,%ecx
1023	rdmsr
1024	xorl	%eax,%eax
1025	orl	$(EFER_NXE),%eax
1026	wrmsr
1027skip_NOX:
1028
1029	/*
1030	 * 3. Enable paging and the rest of it.
1031	 */
1032	movl	%cr0,%eax
1033	orl	$(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax
1034	movl	%eax,%cr0
1035
1036	pushl	$begin			/* jump to high mem */
1037	ret
1038
1039begin:
1040	/*
1041	 * We have arrived. There's no need anymore for the identity mapping in
1042	 * low memory, remove it.
1043	 */
1044	movl	_C_LABEL(nkptp)+1*4,%ecx
1045	leal	(PROC0_PDIR_OFF)(%esi),%ebx	/* old, phys address of PDIR */
1046	addl	$(KERNBASE), %ebx		/* new, virt address of PDIR */
1047	killkpt
1048
1049	/* Relocate atdevbase. */
1050	movl	$KERNBASE,%edx
1051	addl	_C_LABEL(tablesize),%edx
1052	addl	%esi,%edx
1053	movl	%edx,_C_LABEL(atdevbase)
1054
1055	/* Set up bootstrap stack. */
1056	leal	(PROC0_STK_OFF+KERNBASE)(%esi),%eax
1057	movl	%eax,_C_LABEL(lwp0uarea)
1058	leal	(USPACE-FRAMESIZE)(%eax),%esp
1059	movl	%esi,PCB_CR3(%eax)	/* pcb->pcb_cr3 */
1060	xorl	%ebp,%ebp		/* mark end of frames */
1061
1062#if defined(MULTIBOOT)
1063	/* It is now safe to parse the Multiboot information structure
1064	 * we saved before from C code.  Note that we cannot delay its
1065	 * parsing any more because initgdt (called below) needs to make
1066	 * use of this information.
1067	 * We call both multiboot 1 and 2 flavors, they now if they
1068	 * have something to do on their own.
1069	 */
1070	call	_C_LABEL(multiboot1_post_reloc)
1071	call 	_C_LABEL(multiboot2_post_reloc)
1072#endif
1073
1074	/*
1075	 * Initialize a temporary GDT (Global Descriptor Table) on the
1076	 * stack and make the segment registers to use it.
1077	 *
1078	 * This creates a segment descriptor for the CPU-local segment
1079	 * and loads %fs with its segment selector to set up addressing
1080	 * for %fs.  Thus, after this point, CPUVAR(...), curcpu(), and
1081	 * curlwp will work.
1082	 *
1083	 * Later, we will replace this temporary GDT on the stack by a
1084	 * permanent GDT allocated with uvm_km in gdt_init.
1085	 *
1086	 * XXX Intel recommends ensuring the GDT address is aligned on
1087	 * an 8-byte boundary for performance.  Perhaps not an issue
1088	 * early at boot, but maybe worth doing?
1089	 *
1090	 *	Intel 64 and IA-32 Architectures, Software Developer's
1091	 *	Manual, Volume 3: System Programming Guide, Order
1092	 *	Number 325383, April 2022, Sec. 3.5.1 `Segment
1093	 *	Descriptor Tables', p. 3-15:
1094	 *
1095	 *		The base address of the GDT should be aligned
1096	 *		on an eight-byte boundary to yield the best
1097	 *		processor performance.
1098	 */
1099	subl	$NGDT*8, %esp		/* space for temporary gdt */
1100	pushl	%esp
1101	call	_C_LABEL(initgdt)
1102	addl	$4,%esp
1103
1104	movl	_C_LABEL(tablesize),%eax
1105	addl	%esi,%eax		/* skip past stack and page tables */
1106
1107#ifdef PAE
1108	pushl	$0	/* init386() expects a 64 bits paddr_t with PAE */
1109#endif
1110	pushl	%eax
1111#if defined(XEN) && !defined(XENPV)
1112        call    _C_LABEL(init_xen_early)
1113#endif
1114	call	_C_LABEL(init_bootspace)
1115	call	_C_LABEL(init386)
1116	addl	$PDE_SIZE,%esp		/* pop paddr_t */
1117	addl	$NGDT*8,%esp		/* pop temporary gdt */
1118
1119	call 	_C_LABEL(main)
1120#else /* XENPV */
1121	/* First, reset the PSL. */
1122	pushl	$PSL_MBO
1123	popfl
1124
1125	cld
1126
1127	/*
1128	 * Xen info:
1129	 * - %esp -> stack, *theoretically* the last used page by Xen bootstrap
1130	 */
1131	movl	%esp,%ebx
1132	movl	$_RELOC(tmpstk),%esp
1133
1134	/* Clear BSS. */
1135	xorl	%eax,%eax
1136	movl	$RELOC(__bss_start),%edi
1137	movl	$RELOC(_end),%ecx
1138	subl	%edi,%ecx
1139	rep
1140	stosb
1141
1142	/* Copy the necessary stuff from start_info structure. */
1143	/* We need to copy shared_info early, so that sti/cli work */
1144	movl	$RELOC(start_info_union),%edi
1145	movl	$(PAGE_SIZE / 4),%ecx
1146	rep
1147	movsl
1148
1149	/* Clear segment registers. */
1150	xorl	%eax,%eax
1151	movw	%ax,%fs
1152	movw	%ax,%gs
1153
1154	xorl	%eax,%eax
1155	cpuid
1156	movl	%eax,RELOC(cpuid_level)
1157
1158	movl    $VM_GUEST_XENPV, RELOC(vm_guest)
1159
1160	/*
1161	 * Use a temporary GDT page. We'll re-add it to uvm(9) once we're done
1162	 * using it.
1163	 */
1164	movl	$RELOC(tmpgdt),%eax
1165	pushl	%eax		/* start of temporary gdt */
1166	call	_C_LABEL(initgdt)
1167	addl	$4,%esp
1168
1169	call	xen_locore
1170
1171	/*
1172	 * The first VA available is returned by xen_locore in %eax. We
1173	 * use it as the UAREA, and set up the stack here.
1174	 */
1175	movl	%eax,%esi
1176	movl	%esi,_C_LABEL(lwp0uarea)
1177	leal	(USPACE-FRAMESIZE)(%eax),%esp
1178	xorl	%ebp,%ebp		/* mark end of frames */
1179
1180	/* Set first_avail after the DUMMY PAGE (see xen_locore). */
1181	addl	$(USPACE+PAGE_SIZE),%esi
1182	subl	$KERNBASE,%esi		/* init386 wants a physical address */
1183
1184	pushl	$0	/* init386() expects a 64 bits paddr_t with PAE */
1185	pushl	%esi
1186	call	_C_LABEL(init_bootspace)
1187	call	_C_LABEL(init386)
1188	addl	$PDE_SIZE,%esp		/* pop paddr_t */
1189	call 	_C_LABEL(main)
1190#endif /* XENPV */
1191END(start)
1192
1193#if defined(XEN)
1194#ifndef XENPV
1195/* entry point for Xen PVH */
1196ENTRY(start_xenpvh)
1197	/* Xen doesn't start us with a valid gdt */
1198	movl    $RELOC(gdtdesc_xenpvh), %eax
1199	lgdt    (%eax)
1200	jmp     $GSEL(GCODE_SEL, SEL_KPL), $RELOC(.Lreload_cs)
1201
1202.Lreload_cs:
1203	movw    $GSEL(GDATA_SEL, SEL_KPL), %ax
1204	movw    %ax, %ds
1205	movw    %ax, %es
1206	movw    %ax, %ss
1207
1208	/* we need a valid stack */
1209	movl	$RELOC(tmpstk),%esp
1210
1211	/* clear BSS */
1212        xorl    %eax,%eax
1213	movl    $RELOC(__bss_start),%edi
1214	movl    $RELOC(_end),%ecx
1215	subl    %edi,%ecx
1216	rep
1217	stosb
1218
1219	/*
1220	 * save addr of the hvm_start_info structure. This is also the end
1221	 * of the symbol table
1222	 */
1223	movl	%ebx, RELOC(hvm_start_paddr)
1224	movl	%ebx, %eax
1225	addl	$KERNBASE,%eax
1226	movl	$RELOC(esym),%ebp
1227	movl	%eax,(%ebp)
1228	/* get a page for HYPERVISOR_shared_info */
1229	addl	$PAGE_SIZE, %ebx
1230	addl	$PGOFSET,%ebx
1231	andl	$~PGOFSET,%ebx
1232	movl	$RELOC(HYPERVISOR_shared_info_pa),%ebp
1233	movl	%ebx,(%ebp)
1234	/* XXX assume hvm_start_info+dependant structure fits in a single page */
1235	addl	$PAGE_SIZE, %ebx
1236	addl	$PGOFSET,%ebx
1237	andl	$~PGOFSET,%ebx
1238	addl	$KERNBASE,%ebx
1239	movl	$RELOC(eblob),%ebp
1240	movl	%ebx,(%ebp)
1241	/* announce ourself */
1242	movl	$VM_GUEST_XENPVH, RELOC(vm_guest)
1243	jmp	.Lstart_common
1244END(start_xenpvh)
1245	.align 8
1246gdtdesc_xenpvh:
1247	.word	gdt_xenpvhend - gdt_xenpvh
1248	.long	RELOC(gdt_xenpvh)
1249	.word	0
1250gdt_xenpvh:
1251	.long   0			# null descriptor
1252	.long   0
1253	.long   0x0000ffff		# %cs
1254	.long   0x00cf9a00
1255	.long   0x0000ffff		# %ds, %es, %ss
1256	.long   0x00cf9200
1257gdt_xenpvhend:
1258	.align 4
1259#endif /* !XENPV */
1260
1261
1262/* space for the hypercall call page */
1263#define HYPERCALL_PAGE_OFFSET 0x1000
1264.align HYPERCALL_PAGE_OFFSET
1265ENTRY(hypercall_page) /* Returns -1, on HYPERVISOR_xen_version() */
1266.skip	(__HYPERVISOR_xen_version*32), 0x90
1267	movl	$-1, %eax
1268	retl
1269.align HYPERCALL_PAGE_OFFSET, 0x90
1270END(hypercall_page)
1271
1272#ifdef XENPV
1273/*
1274 * void lgdt_finish(void);
1275 * Finish load a new GDT pointer (do any necessary cleanup).
1276 * XXX It's somewhat questionable whether reloading all the segment registers
1277 * is necessary, since the actual descriptor data is not changed except by
1278 * process creation and exit, both of which clean up via task switches.  OTOH,
1279 * this only happens at run time when the GDT is resized.
1280 */
1281/* LINTSTUB: Func: void lgdt_finish(void) */
1282ENTRY(lgdt_finish)
1283	movl	$GSEL(GDATA_SEL, SEL_KPL),%eax
1284	movw	%ax,%ds
1285	movw	%ax,%es
1286	movw	%ax,%gs
1287	movw	%ax,%ss
1288	movl	$GSEL(GCPU_SEL, SEL_KPL),%eax
1289	movw	%ax,%fs
1290	/* Reload code selector by doing intersegment return. */
1291	popl	%eax
1292	pushl	$GSEL(GCODE_SEL, SEL_KPL)
1293	pushl	%eax
1294	lret
1295END(lgdt_finish)
1296
1297#endif /* XENPV */
1298#endif /* XEN */
1299
1300/*
1301 * void lwp_trampoline(void);
1302 *
1303 * This is a trampoline function pushed onto the stack of a newly created
1304 * process in order to do some additional setup.  The trampoline is entered by
1305 * cpu_switchto()ing to the process, so we abuse the callee-saved
1306 * registers used by cpu_switchto() to store the information about the
1307 * stub to call.
1308 * NOTE: This function does not have a normal calling sequence!
1309 */
1310ENTRY(lwp_trampoline)
1311	movl	%ebp,%edi	/* for .Lsyscall_checkast */
1312	xorl	%ebp,%ebp
1313	pushl	%edi
1314	pushl	%eax
1315	call	_C_LABEL(lwp_startup)
1316	addl	$8,%esp
1317	pushl	%ebx
1318	call	*%esi
1319	addl	$4,%esp
1320	jmp	.Lsyscall_checkast
1321	/* NOTREACHED */
1322END(lwp_trampoline)
1323
1324/*
1325 * sigcode()
1326 *
1327 * Signal trampoline; copied to top of user stack.  Used only for
1328 * compatibility with old releases of NetBSD.
1329 */
1330ENTRY(sigcode)
1331	/*
1332	 * Handler has returned here as if we called it.  The sigcontext
1333	 * is on the stack after the 3 args "we" pushed.
1334	 */
1335	leal	12(%esp),%eax		/* get pointer to sigcontext */
1336	movl	%eax,4(%esp)		/* put it in the argument slot */
1337					/* fake return address already there */
1338	movl	$SYS_compat_16___sigreturn14,%eax
1339	int	$0x80	 		/* enter kernel with args on stack */
1340	movl	$SYS_exit,%eax
1341	int	$0x80			/* exit if sigreturn fails */
1342	.globl	_C_LABEL(esigcode)
1343_C_LABEL(esigcode):
1344END(sigcode)
1345
1346/*
1347 * int setjmp(label_t *)
1348 *
1349 * Used primarily by DDB.
1350 */
1351ENTRY(setjmp)
1352	movl	4(%esp),%eax
1353	movl	%ebx,(%eax)		/* save ebx */
1354	movl	%esp,4(%eax)		/* save esp */
1355	movl	%ebp,8(%eax)		/* save ebp */
1356	movl	%esi,12(%eax)		/* save esi */
1357	movl	%edi,16(%eax)		/* save edi */
1358	movl	(%esp),%edx		/* get rta */
1359	movl	%edx,20(%eax)		/* save eip */
1360	xorl	%eax,%eax		/* return 0 */
1361	ret
1362END(setjmp)
1363
1364/*
1365 * int longjmp(label_t *)
1366 *
1367 * Used primarily by DDB.
1368 */
1369ENTRY(longjmp)
1370	movl	4(%esp),%eax
1371	movl	(%eax),%ebx		/* restore ebx */
1372	movl	4(%eax),%esp		/* restore esp */
1373	movl	8(%eax),%ebp		/* restore ebp */
1374	movl	12(%eax),%esi		/* restore esi */
1375	movl	16(%eax),%edi		/* restore edi */
1376	movl	20(%eax),%edx		/* get rta */
1377	movl	%edx,(%esp)		/* put in return frame */
1378	movl	$1,%eax			/* return 1 */
1379	ret
1380END(longjmp)
1381
1382/*
1383 * void dumpsys(void)
1384 *
1385 * Mimic cpu_switchto() for postmortem debugging.
1386 */
1387ENTRY(dumpsys)
1388	pushl	%ebx			/* set up fake switchframe */
1389	pushl	%esi			/* and save context */
1390	pushl	%edi
1391	movl	%esp,_C_LABEL(dumppcb)+PCB_ESP
1392	movl	%ebp,_C_LABEL(dumppcb)+PCB_EBP
1393	call	_C_LABEL(dodumpsys)	/* dump! */
1394	addl	$(3*4), %esp		/* unwind switchframe */
1395	ret
1396END(dumpsys)
1397
1398/*
1399 * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp,
1400 *     bool returning)
1401 *
1402 *	1. save context of oldlwp.
1403 *	2. restore context of newlwp.
1404 *
1405 * Note that the stack frame layout is known to "struct switchframe" in
1406 * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes
1407 * it for a new lwp.
1408 */
1409ENTRY(cpu_switchto)
1410	pushl	%ebx
1411	pushl	%esi
1412	pushl	%edi
1413
1414	movl	16(%esp),%esi		/* oldlwp */
1415	movl	20(%esp),%edi		/* newlwp */
1416	movl	24(%esp),%edx		/* returning */
1417
1418	/* Save old context. */
1419	movl	L_PCB(%esi),%eax
1420	movl	%esp,PCB_ESP(%eax)
1421	movl	%ebp,PCB_EBP(%eax)
1422
1423	/* Switch to newlwp's stack. */
1424	movl	L_PCB(%edi),%ebx
1425	movl	PCB_EBP(%ebx),%ebp
1426	movl	PCB_ESP(%ebx),%esp
1427
1428	/*
1429	 * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
1430	 * order to coordinate mutex_exit on this CPU with
1431	 * mutex_vector_enter on another CPU.
1432	 *
1433	 * 1. Any prior mutex_exit by oldlwp must be visible to other
1434	 *    CPUs before we set ci_curlwp := newlwp on this one,
1435	 *    requiring a store-before-store barrier.
1436	 *
1437	 *    (This is always guaranteed by the x86 memory model, TSO,
1438	 *    but other architectures require a explicit barrier before
1439	 *    the store to ci->ci_curlwp.)
1440	 *
1441	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
1442	 *    before any subsequent mutex_exit by newlwp can even test
1443	 *    whether there might be waiters, requiring a
1444	 *    store-before-load barrier.
1445	 *
1446	 *    (This is the only ordering x86 TSO ever requires any kind
1447	 *    of barrier for -- in this case, we take advantage of the
1448	 *    sequential consistency implied by XCHG to obviate the
1449	 *    need for MFENCE or something.)
1450	 *
1451	 * See kern_mutex.c for details -- this is necessary for
1452	 * adaptive mutexes to detect whether the lwp is on the CPU in
1453	 * order to safely block without requiring atomic r/m/w in
1454	 * mutex_exit.
1455	 */
1456	movl	%edi,%ecx
1457	xchgl	%ecx,CPUVAR(CURLWP)
1458
1459#ifdef XENPV
1460	/* if we are there, we're obviously not in user context.
1461	 * reset ci_xen_clockf_* in case the splx() at the end of mi_switch()
1462	 * triggers a deffered call do xen_timer_handler()
1463	 */
1464	movb	$0, CPUVAR(XEN_CLOCKF_USERMODE)
1465	movl	$_C_LABEL(cpu_switchto), CPUVAR(XEN_CLOCKF_PC)
1466#endif
1467
1468	/* Skip the rest if returning to a pinned LWP. */
1469	testl	%edx,%edx
1470	jnz	switch_return
1471
1472	/* Switch ring0 stack */
1473#ifdef XENPV
1474	pushl	%edi
1475	call	_C_LABEL(i386_switch_context)
1476	addl	$4,%esp
1477#else
1478	movl	PCB_ESP0(%ebx),%eax
1479	movl	CPUVAR(TSS),%ecx
1480	movl	%eax,TSS_ESP0(%ecx)
1481#endif
1482
1483	/* Switch the dbregs. */
1484	pushl	%edi
1485	pushl	%esi
1486	call	_C_LABEL(x86_dbregs_switch)
1487	addl	$8,%esp
1488
1489	/* Switch the FPU. */
1490	pushl	%edx
1491	pushl	%edi
1492	pushl	%esi
1493	call	_C_LABEL(fpu_switch)
1494	addl	$8,%esp
1495	popl	%edx
1496
1497	/* Don't bother with the rest if switching to a system process. */
1498	testl	$LW_SYSTEM,L_FLAG(%edi)
1499	jnz	switch_return
1500
1501#ifndef XENPV
1502	/* Restore thread-private %fs/%gs descriptors. */
1503	movl	CPUVAR(GDT),%ecx
1504	movl	PCB_FSD(%ebx),%eax
1505	movl	PCB_FSD+4(%ebx),%edx
1506	movl	%eax,(GUFS_SEL*8)(%ecx)
1507	movl	%edx,(GUFS_SEL*8+4)(%ecx)
1508	movl	PCB_GSD(%ebx),%eax
1509	movl	PCB_GSD+4(%ebx),%edx
1510	movl	%eax,(GUGS_SEL*8)(%ecx)
1511	movl	%edx,(GUGS_SEL*8+4)(%ecx)
1512#endif /* !XENPV */
1513
1514	/* Switch I/O bitmap */
1515	movl	PCB_IOMAP(%ebx),%eax
1516	orl	%eax,%eax
1517	jnz	.Lcopy_iobitmap
1518	movl	CPUVAR(TSS),%eax
1519	movl	$(IOMAP_INVALOFF << 16),TSS_IOBASE(%eax)
1520.Liobitmap_done:
1521
1522	/* Is this process using RAS (restartable atomic sequences)? */
1523	movl	L_PROC(%edi),%eax
1524	cmpl	$0,P_RASLIST(%eax)
1525	je	no_RAS
1526
1527	/* Handle restartable atomic sequences (RAS). */
1528	movl	L_MD_REGS(%edi),%ecx
1529	pushl	TF_EIP(%ecx)
1530	pushl	%eax
1531	call	_C_LABEL(ras_lookup)
1532	addl	$8,%esp
1533	cmpl	$-1,%eax
1534	je	no_RAS
1535	movl	L_MD_REGS(%edi),%ecx
1536	movl	%eax,TF_EIP(%ecx)
1537no_RAS:
1538
1539#ifdef XENPV
1540	pushl	%edi
1541	call	_C_LABEL(i386_tls_switch)
1542	addl	$4,%esp
1543#endif
1544
1545switch_return:
1546	/* Return to the new LWP, returning 'oldlwp' in %eax. */
1547	movl	%esi,%eax
1548	popl	%edi
1549	popl	%esi
1550	popl	%ebx
1551	ret
1552
1553.Lcopy_iobitmap:
1554	/* Copy I/O bitmap. */
1555	incl	_C_LABEL(pmap_iobmp_evcnt)+EV_COUNT
1556	movl	$(IOMAPSIZE/4),%ecx
1557	pushl	%esi
1558	pushl	%edi
1559	movl	%eax,%esi		/* pcb_iomap */
1560	movl	CPUVAR(TSS),%edi
1561	leal	TSS_IOMAP(%edi),%edi
1562	rep
1563	movsl
1564	popl	%edi
1565	popl	%esi
1566	movl	CPUVAR(TSS),%eax
1567	movl	$(IOMAP_VALIDOFF << 16),TSS_IOBASE(%eax)
1568	jmp	.Liobitmap_done
1569END(cpu_switchto)
1570
1571/*
1572 * void savectx(struct pcb *pcb);
1573 *
1574 * Update pcb, saving current processor state.
1575 */
1576ENTRY(savectx)
1577	movl	4(%esp),%edx		/* edx = pcb */
1578	movl	%esp,PCB_ESP(%edx)
1579	movl	%ebp,PCB_EBP(%edx)
1580	ret
1581END(savectx)
1582
1583/*
1584 * syscall()
1585 *
1586 * Trap gate entry for syscall
1587 */
1588IDTVEC(syscall)
1589	pushl	$2		/* size of instruction for restart */
1590	pushl	$T_ASTFLT	/* trap # for doing ASTs */
1591	INTRENTRY
1592	STI(%eax)
1593
1594#ifdef DIAGNOSTIC
1595	movzbl	CPUVAR(ILEVEL),%ebx
1596	testl	%ebx,%ebx
1597	jz	1f
1598	pushl	$5f
1599	call	_C_LABEL(panic)
1600	addl	$4,%esp
1601#ifdef DDB
1602	int	$3
1603#endif
16041:
1605#endif /* DIAGNOSTIC */
1606
1607	addl	$1,CPUVAR(NSYSCALL)	/* count it atomically */
1608	adcl	$0,CPUVAR(NSYSCALL)+4	/* count it atomically */
1609	movl	CPUVAR(CURLWP),%edi
1610	movl	L_PROC(%edi),%edx
1611	movl	%esp,L_MD_REGS(%edi)	/* save pointer to frame */
1612	pushl	%esp
1613	call	*P_MD_SYSCALL(%edx)	/* get pointer to syscall() function */
1614	addl	$4,%esp
1615.Lsyscall_checkast:
1616	/* Check for ASTs on exit to user mode. */
1617	CLI(%eax)
1618	movl	L_MD_ASTPENDING(%edi), %eax
1619	orl	CPUVAR(WANT_PMAPLOAD), %eax
1620	jnz	9f
1621
1622	HANDLE_DEFERRED_FPU
1623
1624#ifdef XENPV
1625	STIC(%eax)
1626	jz	14f
1627	call	_C_LABEL(stipending)
1628	testl	%eax,%eax
1629	jz	14f
1630	/* process pending interrupts */
1631	CLI(%eax)
1632	movzbl	CPUVAR(ILEVEL), %ebx
1633	movl	$.Lsyscall_resume, %esi /* address to resume loop at */
1634.Lsyscall_resume:
1635	movl	%ebx,%eax		/* get cpl */
1636	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
1637	andl	CPUVAR(IPENDING),%eax	/* any non-masked bits left? */
1638	jz	17f
1639	bsrl	%eax,%eax
1640	btrl	%eax,CPUVAR(IPENDING)
1641	movl	CPUVAR(ISOURCES)(,%eax,4),%eax
1642	jmp	*IS_RESUME(%eax)
164317:	movb	%bl, CPUVAR(ILEVEL)	/* restore cpl  */
1644	jmp	.Lsyscall_checkast
164514:
1646#endif /* XENPV */
1647
1648#ifdef DIAGNOSTIC
1649	cmpb	$IPL_NONE,CPUVAR(ILEVEL)
1650	jne	3f
1651#endif
1652
1653	INTRFASTEXIT
1654
1655#ifdef DIAGNOSTIC
16563:	STI(%eax)
1657	pushl	$4f
1658	call	_C_LABEL(panic)
1659	addl	$4,%esp
1660	pushl	$IPL_NONE
1661	call	_C_LABEL(spllower)
1662	addl	$4,%esp
1663	jmp	.Lsyscall_checkast
16644:	.asciz	"SPL NOT LOWERED ON SYSCALL EXIT\n"
16655:	.asciz	"SPL NOT ZERO ON SYSCALL ENTRY\n"
1666#endif
1667
16689:
1669	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
1670	jz	10f
1671	STI(%eax)
1672	call	_C_LABEL(pmap_load)
1673	jmp	.Lsyscall_checkast	/* re-check ASTs */
167410:
1675	/* Always returning to user mode here. */
1676	movl	$0, L_MD_ASTPENDING(%edi)
1677	STI(%eax)
1678	/* Pushed T_ASTFLT into tf_trapno on entry. */
1679	pushl	%esp
1680	call	_C_LABEL(trap)
1681	addl	$4,%esp
1682	jmp	.Lsyscall_checkast	/* re-check ASTs */
1683IDTVEC_END(syscall)
1684
1685/*
1686 * int npx586bug1(int a, int b)
1687 * Used when checking for the FDIV bug on first generations pentiums.
1688 * Anything 120MHz or above is fine.
1689 */
1690ENTRY(npx586bug1)
1691	fildl	4(%esp)		/* x */
1692	fildl	8(%esp)		/* y */
1693	fld	%st(1)
1694	fdiv	%st(1),%st	/* x/y */
1695	fmulp	%st,%st(1)	/* (x/y)*y */
1696	fsubrp	%st,%st(1)	/* x-(x/y)*y */
1697	pushl	$0
1698	fistpl	(%esp)
1699	popl	%eax
1700	ret
1701END(npx586bug1)
1702
1703ENTRY(intrfastexit)
1704	movw	TF_GS(%esp),%gs
1705	movw	TF_FS(%esp),%fs
1706	movw	TF_ES(%esp),%es
1707	movw	TF_DS(%esp),%ds
1708	movl	TF_EDI(%esp),%edi
1709	movl	TF_ESI(%esp),%esi
1710	movl	TF_EBP(%esp),%ebp
1711	movl	TF_EBX(%esp),%ebx
1712	movl	TF_EDX(%esp),%edx
1713	movl	TF_ECX(%esp),%ecx
1714	movl	TF_EAX(%esp),%eax
1715	addl	$(TF_PUSHSIZE+8),%esp
1716	iret
1717END(intrfastexit)
1718
1719	.section .rodata
1720
1721	/*
1722	 * Hotpatch templates.
1723	 */
1724
1725LABEL(hp_nolock)
1726	nop
1727LABEL(hp_nolock_end)
1728
1729LABEL(hp_retfence)
1730	lfence
1731LABEL(hp_retfence_end)
1732
1733LABEL(hp_clac)
1734	clac
1735LABEL(hp_clac_end)
1736
1737LABEL(hp_stac)
1738	stac
1739LABEL(hp_stac_end)
1740