xref: /netbsd-src/sys/arch/i386/stand/efiboot/bootx64/startprog64.S (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1/*	$NetBSD: startprog64.S,v 1.3 2017/02/11 10:23:39 nonaka Exp $	*/
2/*	NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp	*/
3
4/* starts program in protected mode / flat space
5 with given stackframe
6 needs global variables flatcodeseg and flatdataseg
7 (gdt offsets)
8  derived from: NetBSD:sys/arch/i386/boot/asm.S
9 */
10
11/*
12 * Ported to boot 386BSD by Julian Elischer (julian@tfs.com) Sept 1992
13 *
14 * Mach Operating System
15 * Copyright (c) 1992, 1991 Carnegie Mellon University
16 * All Rights Reserved.
17 *
18 * Permission to use, copy, modify and distribute this software and its
19 * documentation is hereby granted, provided that both the copyright
20 * notice and this permission notice appear in all copies of the
21 * software, derivative works or modified versions, and any portions
22 * thereof, and that both notices appear in supporting documentation.
23 *
24 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 *
28 * Carnegie Mellon requests users of this software to return to
29 *
30 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
31 *  School of Computer Science
32 *  Carnegie Mellon University
33 *  Pittsburgh PA 15213-3890
34 *
35 * any improvements or extensions that they make and grant Carnegie Mellon
36 * the rights to redistribute these changes.
37 */
38
39/*
40  Copyright 1988, 1989, 1990, 1991, 1992
41   by Intel Corporation, Santa Clara, California.
42
43                All Rights Reserved
44
45Permission to use, copy, modify, and distribute this software and
46its documentation for any purpose and without fee is hereby
47granted, provided that the above copyright notice appears in all
48copies and that both the copyright notice and this permission notice
49appear in supporting documentation, and that the name of Intel
50not be used in advertising or publicity pertaining to distribution
51of the software without specific, written prior permission.
52
53INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60*/
61
62#include <machine/asm.h>
63#include <machine/specialreg.h>
64
65#define	CODE_SEGMENT	0x08
66#define	DATA_SEGMENT	0x10
67
68	.align	16
69	.globl _C_LABEL(startprog64)
70_C_LABEL(startprog64):
71	.quad 0
72
73	.globl _C_LABEL(startprog64_size)
74_C_LABEL(startprog64_size):
75	.long startprog64_end - _C_LABEL(startprog64_start)
76
77	.text
78	.p2align 4,,15
79
80/*
81 * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 */
83ENTRY(startprog64_start)
84start:
85	/*
86	 * This function is to call the loaded kernel's start() with
87	 * 32bit segment mode from x64 mode.
88	 * %rdi: kernel start address
89	 * %rsi: loaded kernel address
90	 * %rdx: stack address
91	 * %rcx: loaded kernel size
92	 * %r8 : loaded start address
93	 * %r9 : kernel entry address
94	 */
95
96	cld		/* LynxOS depends on it */
97
98	cli
99
100	/* Copy kernel */
101	mov	%rcx, %r12		/* original kernel size */
102	movq	%rdi, %r11		/* for misaligned check */
103
104#if !defined(NO_OVERLAP)
105	movq	%rdi, %r13
106	subq	%rsi, %r13
107#endif
108
109	shrq	$3, %rcx		/* count for copy by words */
110	jz	8f			/* j if less than 8 bytes */
111
112	lea	-8(%rdi, %r12), %r14	/* target address of last 8 */
113	mov	-8(%rsi, %r12), %r15	/* get last word */
114#if !defined(NO_OVERLAP)
115	cmpq	%r12, %r13		/* overlapping? */
116	jb	10f
117#endif
118
119/*
120 * Non-overlaping, copy forwards.
121 * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
122 * if %ecx is more than 76.
123 * AMD might do something similar some day.
124 */
125	and	$7, %r11		/* destination misaligned ? */
126	jnz	2f
127	rep
128	movsq
129	mov	%r15, (%r14)		/* write last word */
130	jmp	.Lcopy_done
131
132/*
133 * Destination misaligned
134 * AMD say it is better to align the destination (not the source).
135 * This will also re-align copies if the source and dest are both
136 * misaligned by the same amount)
137 * (I think Nehalem will use its accelerated copy if the source
138 * and destination have the same alignment.)
139 */
1402:
141	lea	-9(%r11, %r12), %rcx	/* post re-alignment count */
142	neg	%r11			/* now -1 .. -7 */
143	mov	(%rsi), %r12		/* get first word */
144	mov	%rdi, %r13		/* target for first word */
145	lea	8(%rsi, %r11), %rsi
146	lea	8(%rdi, %r11), %rdi
147	shr	$3, %rcx
148	rep
149	movsq
150	mov	%r12, (%r13)		/* write first word */
151	mov	%r15, (%r14)		/* write last word */
152	jmp	.Lcopy_done
153
154#if !defined(NO_OVERLAP)
155/* Must copy backwards.
156 * Reverse copy is probably easy to code faster than 'rep movds'
157 * since that requires (IIRC) an extra clock every 3 iterations (AMD).
158 * However I don't suppose anything cares that much!
159 * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
160 * The copy is aligned with the buffer start (more likely to
161 * be a multiple of 8 than the end).
162 */
16310:
164	lea	-8(%rsi, %rcx, 8), %rsi
165	lea	-8(%rdi, %rcx, 8), %rdi
166	std
167	rep
168	movsq
169	cld
170	mov	%r15, (%r14)	/* write last bytes */
171	jmp	.Lcopy_done
172#endif
173
174/* Less than 8 bytes to copy, copy by bytes */
175/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
176 * For longer transfers it is 50+ !
177 */
1788:	mov	%r12, %rcx
179
180#if !defined(NO_OVERLAP)
181	cmpq	%r12, %r13	/* overlapping? */
182	jb	81f
183#endif
184
185	/* nope, copy forwards. */
186	rep
187	movsb
188	jmp	.Lcopy_done
189
190#if !defined(NO_OVERLAP)
191/* Must copy backwards */
19281:
193	lea	-1(%rsi, %rcx), %rsi
194	lea	-1(%rdi, %rcx), %rdi
195	std
196	rep
197	movsb
198	cld
199#endif
200	/* End of copy kernel */
201.Lcopy_done:
202
203	mov	%r8, %rdi	/* %rdi: loaded start address */
204	mov	%r9, %rsi	/* %rsi: kernel entry address */
205
206	/* Prepare jump address */
207	lea	(start32a - start)(%rdi), %rax
208	movl	%eax, (start32r - start)(%rdi)
209
210	/* Setup GDT */
211	lea	(gdt - start)(%rdi), %rax
212	mov	%rax, (gdtrr - start)(%rdi)
213	lgdt	(gdtr - start)(%rdi)
214
215	/* Jump to set %cs */
216	ljmp	*(start32r - start)(%rdi)
217
218	.align	4
219	.code32
220start32a:
221	movl	$DATA_SEGMENT, %eax
222	movw	%ax, %ds
223	movw	%ax, %es
224	movw	%ax, %fs
225	movw	%ax, %gs
226	movw	%ax, %ss
227
228	movl	%edx, %esp
229
230	/* Disable Paging in CR0 */
231	movl	%cr0, %eax
232	andl	$(~CR0_PG), %eax
233	movl	%eax, %cr0
234
235	/* Disable PAE in CR4 */
236	movl	%cr4, %eax
237	andl	$(~CR4_PAE), %eax
238	movl	%eax, %cr4
239
240	jmp	start32b
241
242	.align	4
243start32b:
244	xor	%eax, %eax
245	call	*%esi
246
247	.align	16
248start32r:
249	.long	0
250	.long	CODE_SEGMENT
251	.align	16
252gdt:
253	.long	0, 0
254	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
255	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
256gdtr:
257	.word	gdtr - gdt
258gdtrr:
259	.quad
260start32end:
261	/* Space for the stack */
262	.align	16
263	.space	8192
264startprog64_end:
265