1/* $NetBSD: startprog64.S,v 1.3 2017/02/11 10:23:39 nonaka Exp $ */ 2/* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */ 3 4/* starts program in protected mode / flat space 5 with given stackframe 6 needs global variables flatcodeseg and flatdataseg 7 (gdt offsets) 8 derived from: NetBSD:sys/arch/i386/boot/asm.S 9 */ 10 11/* 12 * Ported to boot 386BSD by Julian Elischer (julian@tfs.com) Sept 1992 13 * 14 * Mach Operating System 15 * Copyright (c) 1992, 1991 Carnegie Mellon University 16 * All Rights Reserved. 17 * 18 * Permission to use, copy, modify and distribute this software and its 19 * documentation is hereby granted, provided that both the copyright 20 * notice and this permission notice appear in all copies of the 21 * software, derivative works or modified versions, and any portions 22 * thereof, and that both notices appear in supporting documentation. 23 * 24 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 25 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 26 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 27 * 28 * Carnegie Mellon requests users of this software to return to 29 * 30 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 31 * School of Computer Science 32 * Carnegie Mellon University 33 * Pittsburgh PA 15213-3890 34 * 35 * any improvements or extensions that they make and grant Carnegie Mellon 36 * the rights to redistribute these changes. 37 */ 38 39/* 40 Copyright 1988, 1989, 1990, 1991, 1992 41 by Intel Corporation, Santa Clara, California. 42 43 All Rights Reserved 44 45Permission to use, copy, modify, and distribute this software and 46its documentation for any purpose and without fee is hereby 47granted, provided that the above copyright notice appears in all 48copies and that both the copyright notice and this permission notice 49appear in supporting documentation, and that the name of Intel 50not be used in advertising or publicity pertaining to distribution 51of the software without specific, written prior permission. 52 53INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE 54INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, 55IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR 56CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 57LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, 58NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION 59WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 60*/ 61 62#include <machine/asm.h> 63#include <machine/specialreg.h> 64 65#define CODE_SEGMENT 0x08 66#define DATA_SEGMENT 0x10 67 68 .align 16 69 .globl _C_LABEL(startprog64) 70_C_LABEL(startprog64): 71 .quad 0 72 73 .globl _C_LABEL(startprog64_size) 74_C_LABEL(startprog64_size): 75 .long startprog64_end - _C_LABEL(startprog64_start) 76 77 .text 78 .p2align 4,,15 79 80/* 81 * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size) 82 */ 83ENTRY(startprog64_start) 84start: 85 /* 86 * This function is to call the loaded kernel's start() with 87 * 32bit segment mode from x64 mode. 88 * %rdi: kernel start address 89 * %rsi: loaded kernel address 90 * %rdx: stack address 91 * %rcx: loaded kernel size 92 * %r8 : loaded start address 93 * %r9 : kernel entry address 94 */ 95 96 cld /* LynxOS depends on it */ 97 98 cli 99 100 /* Copy kernel */ 101 mov %rcx, %r12 /* original kernel size */ 102 movq %rdi, %r11 /* for misaligned check */ 103 104#if !defined(NO_OVERLAP) 105 movq %rdi, %r13 106 subq %rsi, %r13 107#endif 108 109 shrq $3, %rcx /* count for copy by words */ 110 jz 8f /* j if less than 8 bytes */ 111 112 lea -8(%rdi, %r12), %r14 /* target address of last 8 */ 113 mov -8(%rsi, %r12), %r15 /* get last word */ 114#if !defined(NO_OVERLAP) 115 cmpq %r12, %r13 /* overlapping? */ 116 jb 10f 117#endif 118 119/* 120 * Non-overlaping, copy forwards. 121 * Newer Intel cpus (Nehalem) will do 16byte read/write transfers 122 * if %ecx is more than 76. 123 * AMD might do something similar some day. 124 */ 125 and $7, %r11 /* destination misaligned ? */ 126 jnz 2f 127 rep 128 movsq 129 mov %r15, (%r14) /* write last word */ 130 jmp .Lcopy_done 131 132/* 133 * Destination misaligned 134 * AMD say it is better to align the destination (not the source). 135 * This will also re-align copies if the source and dest are both 136 * misaligned by the same amount) 137 * (I think Nehalem will use its accelerated copy if the source 138 * and destination have the same alignment.) 139 */ 1402: 141 lea -9(%r11, %r12), %rcx /* post re-alignment count */ 142 neg %r11 /* now -1 .. -7 */ 143 mov (%rsi), %r12 /* get first word */ 144 mov %rdi, %r13 /* target for first word */ 145 lea 8(%rsi, %r11), %rsi 146 lea 8(%rdi, %r11), %rdi 147 shr $3, %rcx 148 rep 149 movsq 150 mov %r12, (%r13) /* write first word */ 151 mov %r15, (%r14) /* write last word */ 152 jmp .Lcopy_done 153 154#if !defined(NO_OVERLAP) 155/* Must copy backwards. 156 * Reverse copy is probably easy to code faster than 'rep movds' 157 * since that requires (IIRC) an extra clock every 3 iterations (AMD). 158 * However I don't suppose anything cares that much! 159 * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4. 160 * The copy is aligned with the buffer start (more likely to 161 * be a multiple of 8 than the end). 162 */ 16310: 164 lea -8(%rsi, %rcx, 8), %rsi 165 lea -8(%rdi, %rcx, 8), %rdi 166 std 167 rep 168 movsq 169 cld 170 mov %r15, (%r14) /* write last bytes */ 171 jmp .Lcopy_done 172#endif 173 174/* Less than 8 bytes to copy, copy by bytes */ 175/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks). 176 * For longer transfers it is 50+ ! 177 */ 1788: mov %r12, %rcx 179 180#if !defined(NO_OVERLAP) 181 cmpq %r12, %r13 /* overlapping? */ 182 jb 81f 183#endif 184 185 /* nope, copy forwards. */ 186 rep 187 movsb 188 jmp .Lcopy_done 189 190#if !defined(NO_OVERLAP) 191/* Must copy backwards */ 19281: 193 lea -1(%rsi, %rcx), %rsi 194 lea -1(%rdi, %rcx), %rdi 195 std 196 rep 197 movsb 198 cld 199#endif 200 /* End of copy kernel */ 201.Lcopy_done: 202 203 mov %r8, %rdi /* %rdi: loaded start address */ 204 mov %r9, %rsi /* %rsi: kernel entry address */ 205 206 /* Prepare jump address */ 207 lea (start32a - start)(%rdi), %rax 208 movl %eax, (start32r - start)(%rdi) 209 210 /* Setup GDT */ 211 lea (gdt - start)(%rdi), %rax 212 mov %rax, (gdtrr - start)(%rdi) 213 lgdt (gdtr - start)(%rdi) 214 215 /* Jump to set %cs */ 216 ljmp *(start32r - start)(%rdi) 217 218 .align 4 219 .code32 220start32a: 221 movl $DATA_SEGMENT, %eax 222 movw %ax, %ds 223 movw %ax, %es 224 movw %ax, %fs 225 movw %ax, %gs 226 movw %ax, %ss 227 228 movl %edx, %esp 229 230 /* Disable Paging in CR0 */ 231 movl %cr0, %eax 232 andl $(~CR0_PG), %eax 233 movl %eax, %cr0 234 235 /* Disable PAE in CR4 */ 236 movl %cr4, %eax 237 andl $(~CR4_PAE), %eax 238 movl %eax, %cr4 239 240 jmp start32b 241 242 .align 4 243start32b: 244 xor %eax, %eax 245 call *%esi 246 247 .align 16 248start32r: 249 .long 0 250 .long CODE_SEGMENT 251 .align 16 252gdt: 253 .long 0, 0 254 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00 255 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00 256gdtr: 257 .word gdtr - gdt 258gdtrr: 259 .quad 260start32end: 261 /* Space for the stack */ 262 .align 16 263 .space 8192 264startprog64_end: 265