1 /* This file contains code for initialization of protected mode, to initialize 2 * code and data segment descriptors, and to initialize global descriptors 3 * for local descriptors in the process table. 4 */ 5 6 #include <assert.h> 7 #include <string.h> 8 9 #include <minix/cpufeature.h> 10 #include <sys/types.h> 11 #include <machine/multiboot.h> 12 #include "kernel/kernel.h" 13 14 #include "archconst.h" 15 #include "arch_proto.h" 16 17 #include <sys/exec.h> 18 #include <libexec.h> 19 20 #define INT_GATE_TYPE (INT_286_GATE | DESC_386_BIT) 21 #define TSS_TYPE (AVL_286_TSS | DESC_386_BIT) 22 23 /* This is OK initially, when the 1:1 mapping is still there. */ 24 char *video_mem = (char *) MULTIBOOT_VIDEO_BUFFER; 25 26 /* Storage for gdt, idt and tss. */ 27 struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE); 28 struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE); 29 struct tss_s tss[CONFIG_MAX_CPUS]; 30 31 u32_t k_percpu_stacks[CONFIG_MAX_CPUS]; 32 33 int prot_init_done = 0; 34 35 phys_bytes vir2phys(void *vir) 36 { 37 extern char _kern_vir_base, _kern_phys_base; /* in kernel.lds */ 38 u32_t offset = (vir_bytes) &_kern_vir_base - 39 (vir_bytes) &_kern_phys_base; 40 return (phys_bytes)vir - offset; 41 } 42 43 /*===========================================================================* 44 * enable_iop * 45 *===========================================================================*/ 46 void enable_iop(struct proc *pp) 47 { 48 /* Allow a user process to use I/O instructions. Change the I/O Permission 49 * Level bits in the psw. These specify least-privileged Current Permission 50 * Level allowed to execute I/O instructions. Users and servers have CPL 3. 51 * You can't have less privilege than that. Kernel has CPL 0, tasks CPL 1. 52 */ 53 pp->p_reg.psw |= 0x3000; 54 } 55 56 57 /*===========================================================================* 58 * sdesc * 59 *===========================================================================*/ 60 void sdesc(struct segdesc_s *segdp, phys_bytes base, vir_bytes size) 61 { 62 /* Fill in the size fields (base, limit and granularity) of a descriptor. */ 63 segdp->base_low = base; 64 segdp->base_middle = base >> BASE_MIDDLE_SHIFT; 65 segdp->base_high = base >> BASE_HIGH_SHIFT; 66 67 --size; /* convert to a limit, 0 size means 4G */ 68 if (size > BYTE_GRAN_MAX) { 69 segdp->limit_low = size >> PAGE_GRAN_SHIFT; 70 segdp->granularity = GRANULAR | (size >> 71 (PAGE_GRAN_SHIFT + GRANULARITY_SHIFT)); 72 } else { 73 segdp->limit_low = size; 74 segdp->granularity = size >> GRANULARITY_SHIFT; 75 } 76 segdp->granularity |= DEFAULT; /* means BIG for data seg */ 77 } 78 79 /*===========================================================================* 80 * init_dataseg * 81 *===========================================================================*/ 82 void init_param_dataseg(register struct segdesc_s *segdp, 83 phys_bytes base, vir_bytes size, const int privilege) 84 { 85 /* Build descriptor for a data segment. */ 86 sdesc(segdp, base, size); 87 segdp->access = (privilege << DPL_SHIFT) | (PRESENT | SEGMENT | 88 WRITEABLE | ACCESSED); 89 /* EXECUTABLE = 0, EXPAND_DOWN = 0, ACCESSED = 0 */ 90 } 91 92 void init_dataseg(int index, const int privilege) 93 { 94 init_param_dataseg(&gdt[index], 0, 0xFFFFFFFF, privilege); 95 } 96 97 /*===========================================================================* 98 * init_codeseg * 99 *===========================================================================*/ 100 static void init_codeseg(int index, int privilege) 101 { 102 /* Build descriptor for a code segment. */ 103 sdesc(&gdt[index], 0, 0xFFFFFFFF); 104 gdt[index].access = (privilege << DPL_SHIFT) 105 | (PRESENT | SEGMENT | EXECUTABLE | READABLE); 106 /* CONFORMING = 0, ACCESSED = 0 */ 107 } 108 109 static struct gate_table_s gate_table_pic[] = { 110 { hwint00, VECTOR( 0), INTR_PRIVILEGE }, 111 { hwint01, VECTOR( 1), INTR_PRIVILEGE }, 112 { hwint02, VECTOR( 2), INTR_PRIVILEGE }, 113 { hwint03, VECTOR( 3), INTR_PRIVILEGE }, 114 { hwint04, VECTOR( 4), INTR_PRIVILEGE }, 115 { hwint05, VECTOR( 5), INTR_PRIVILEGE }, 116 { hwint06, VECTOR( 6), INTR_PRIVILEGE }, 117 { hwint07, VECTOR( 7), INTR_PRIVILEGE }, 118 { hwint08, VECTOR( 8), INTR_PRIVILEGE }, 119 { hwint09, VECTOR( 9), INTR_PRIVILEGE }, 120 { hwint10, VECTOR(10), INTR_PRIVILEGE }, 121 { hwint11, VECTOR(11), INTR_PRIVILEGE }, 122 { hwint12, VECTOR(12), INTR_PRIVILEGE }, 123 { hwint13, VECTOR(13), INTR_PRIVILEGE }, 124 { hwint14, VECTOR(14), INTR_PRIVILEGE }, 125 { hwint15, VECTOR(15), INTR_PRIVILEGE }, 126 { NULL, 0, 0} 127 }; 128 129 static struct gate_table_s gate_table_exceptions[] = { 130 { divide_error, DIVIDE_VECTOR, INTR_PRIVILEGE }, 131 { single_step_exception, DEBUG_VECTOR, INTR_PRIVILEGE }, 132 { nmi, NMI_VECTOR, INTR_PRIVILEGE }, 133 { breakpoint_exception, BREAKPOINT_VECTOR, USER_PRIVILEGE }, 134 { overflow, OVERFLOW_VECTOR, USER_PRIVILEGE }, 135 { bounds_check, BOUNDS_VECTOR, INTR_PRIVILEGE }, 136 { inval_opcode, INVAL_OP_VECTOR, INTR_PRIVILEGE }, 137 { copr_not_available, COPROC_NOT_VECTOR, INTR_PRIVILEGE }, 138 { double_fault, DOUBLE_FAULT_VECTOR, INTR_PRIVILEGE }, 139 { copr_seg_overrun, COPROC_SEG_VECTOR, INTR_PRIVILEGE }, 140 { inval_tss, INVAL_TSS_VECTOR, INTR_PRIVILEGE }, 141 { segment_not_present, SEG_NOT_VECTOR, INTR_PRIVILEGE }, 142 { stack_exception, STACK_FAULT_VECTOR, INTR_PRIVILEGE }, 143 { general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE }, 144 { page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE }, 145 { copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE }, 146 { alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE }, 147 { machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE }, 148 { simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE }, 149 { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE }, 150 { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE }, 151 { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE }, 152 { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE }, 153 { NULL, 0, 0} 154 }; 155 156 int tss_init(unsigned cpu, void * kernel_stack) 157 { 158 struct tss_s * t = &tss[cpu]; 159 int index = TSS_INDEX(cpu); 160 struct segdesc_s *tssgdt; 161 162 tssgdt = &gdt[index]; 163 164 init_param_dataseg(tssgdt, (phys_bytes) t, 165 sizeof(struct tss_s), INTR_PRIVILEGE); 166 tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; 167 168 /* Build TSS. */ 169 memset(t, 0, sizeof(*t)); 170 t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR; 171 t->cs = KERN_CS_SELECTOR; 172 t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */ 173 174 /* 175 * make space for process pointer and cpu id and point to the first 176 * usable word 177 */ 178 k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED; 179 /* 180 * set the cpu id at the top of the stack so we know on which cpu is 181 * this stak in use when we trap to kernel 182 */ 183 *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu; 184 185 /* Set up Intel SYSENTER support if available. */ 186 if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) { 187 ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR); 188 ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0); 189 ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter); 190 } 191 192 /* Set up AMD SYSCALL support if available. */ 193 if(minix_feature_flags & MKF_I386_AMD_SYSCALL) { 194 u32_t msr_lo, msr_hi; 195 196 /* set SYSCALL ENABLE bit in EFER MSR */ 197 ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo); 198 msr_lo |= AMD_EFER_SCE; 199 ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo); 200 201 /* set STAR register value */ 202 #define set_star_cpu(forcpu) if(cpu == forcpu) { \ 203 ia32_msr_write(AMD_MSR_STAR, \ 204 ((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR, \ 205 (u32_t) ipc_entry_syscall_cpu ## forcpu); } 206 set_star_cpu(0); 207 set_star_cpu(1); 208 set_star_cpu(2); 209 set_star_cpu(3); 210 set_star_cpu(4); 211 set_star_cpu(5); 212 set_star_cpu(6); 213 set_star_cpu(7); 214 assert(CONFIG_MAX_CPUS <= 8); 215 } 216 217 return SEG_SELECTOR(index); 218 } 219 220 phys_bytes init_segdesc(int gdt_index, void *base, int size) 221 { 222 struct desctableptr_s *dtp = (struct desctableptr_s *) &gdt[gdt_index]; 223 dtp->limit = size - 1; 224 dtp->base = (phys_bytes) base; 225 226 return (phys_bytes) dtp; 227 } 228 229 void int_gate(struct gatedesc_s *tab, 230 unsigned vec_nr, vir_bytes offset, unsigned dpl_type) 231 { 232 /* Build descriptor for an interrupt gate. */ 233 register struct gatedesc_s *idp; 234 235 idp = &tab[vec_nr]; 236 idp->offset_low = offset; 237 idp->selector = KERN_CS_SELECTOR; 238 idp->p_dpl_type = dpl_type; 239 idp->offset_high = offset >> OFFSET_HIGH_SHIFT; 240 } 241 242 void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type) 243 { 244 int_gate(idt, vec_nr, offset, dpl_type); 245 } 246 247 void idt_copy_vectors(struct gate_table_s * first) 248 { 249 struct gate_table_s *gtp; 250 for (gtp = first; gtp->gate; gtp++) { 251 int_gate(idt, gtp->vec_nr, (vir_bytes) gtp->gate, 252 PRESENT | INT_GATE_TYPE | 253 (gtp->privilege << DPL_SHIFT)); 254 } 255 } 256 257 void idt_copy_vectors_pic(void) 258 { 259 idt_copy_vectors(gate_table_pic); 260 } 261 262 void idt_init(void) 263 { 264 idt_copy_vectors_pic(); 265 idt_copy_vectors(gate_table_exceptions); 266 } 267 268 struct desctableptr_s gdt_desc, idt_desc; 269 270 void idt_reload(void) 271 { 272 x86_lidt(&idt_desc); 273 } 274 275 multiboot_module_t *bootmod(int pnr) 276 { 277 int i; 278 279 assert(pnr >= 0); 280 281 /* Search for desired process in boot process 282 * list. The first NR_TASKS ones do not correspond 283 * to a module, however, so we don't search those. 284 */ 285 for(i = NR_TASKS; i < NR_BOOT_PROCS; i++) { 286 int p; 287 p = i - NR_TASKS; 288 if(image[i].proc_nr == pnr) { 289 assert(p < MULTIBOOT_MAX_MODS); 290 assert(p < kinfo.mbi.mi_mods_count); 291 return &kinfo.module_list[p]; 292 } 293 } 294 295 panic("boot module %d not found", pnr); 296 } 297 298 int booting_cpu = 0; 299 300 void prot_load_selectors(void) 301 { 302 /* this function is called by both prot_init by the BSP and 303 * the early AP booting code in mpx.S by secondary CPU's. 304 * everything is set up the same except for the TSS that is per-CPU. 305 */ 306 x86_lgdt(&gdt_desc); /* Load gdt */ 307 idt_init(); 308 idt_reload(); 309 x86_lldt(LDT_SELECTOR); /* Load bogus ldt */ 310 x86_ltr(TSS_SELECTOR(booting_cpu)); 311 312 x86_load_kerncs(); 313 x86_load_ds(KERN_DS_SELECTOR); 314 x86_load_es(KERN_DS_SELECTOR); 315 x86_load_fs(KERN_DS_SELECTOR); 316 x86_load_gs(KERN_DS_SELECTOR); 317 x86_load_ss(KERN_DS_SELECTOR); 318 } 319 320 /*===========================================================================* 321 * prot_init * 322 *===========================================================================*/ 323 void prot_init() 324 { 325 extern char k_boot_stktop; 326 327 if(_cpufeature(_CPUF_I386_SYSENTER)) 328 minix_feature_flags |= MKF_I386_INTEL_SYSENTER; 329 if(_cpufeature(_CPUF_I386_SYSCALL)) 330 minix_feature_flags |= MKF_I386_AMD_SYSCALL; 331 332 memset(gdt, 0, sizeof(gdt)); 333 memset(idt, 0, sizeof(idt)); 334 335 /* Build GDT, IDT, IDT descriptors. */ 336 gdt_desc.base = (u32_t) gdt; 337 gdt_desc.limit = sizeof(gdt)-1; 338 idt_desc.base = (u32_t) idt; 339 idt_desc.limit = sizeof(idt)-1; 340 tss_init(0, &k_boot_stktop); 341 342 /* Build GDT */ 343 init_param_dataseg(&gdt[LDT_INDEX], 344 (phys_bytes) 0, 0, INTR_PRIVILEGE); /* unusable LDT */ 345 gdt[LDT_INDEX].access = PRESENT | LDT; 346 init_codeseg(KERN_CS_INDEX, INTR_PRIVILEGE); 347 init_dataseg(KERN_DS_INDEX, INTR_PRIVILEGE); 348 init_codeseg(USER_CS_INDEX, USER_PRIVILEGE); 349 init_dataseg(USER_DS_INDEX, USER_PRIVILEGE); 350 351 /* Currently the multiboot segments are loaded; which is fine, but 352 * let's replace them with the ones from our own GDT so we test 353 * right away whether they work as expected. 354 */ 355 prot_load_selectors(); 356 357 /* Set up a new post-relocate bootstrap pagetable so that 358 * we can map in VM, and we no longer rely on pre-relocated 359 * data. 360 */ 361 362 pg_clear(); 363 pg_identity(&kinfo); /* Still need 1:1 for lapic and video mem and such. */ 364 pg_mapkernel(); 365 pg_load(); 366 367 prot_init_done = 1; 368 } 369 370 static int alloc_for_vm = 0; 371 372 void arch_post_init(void) 373 { 374 /* Let memory mapping code know what's going on at bootstrap time */ 375 struct proc *vm; 376 vm = proc_addr(VM_PROC_NR); 377 get_cpulocal_var(ptproc) = vm; 378 pg_info(&vm->p_seg.p_cr3, &vm->p_seg.p_cr3_v); 379 } 380 381 static int libexec_pg_alloc(struct exec_info *execi, vir_bytes vaddr, size_t len) 382 { 383 pg_map(PG_ALLOCATEME, vaddr, vaddr+len, &kinfo); 384 pg_load(); 385 memset((char *) vaddr, 0, len); 386 alloc_for_vm += len; 387 return OK; 388 } 389 390 void arch_boot_proc(struct boot_image *ip, struct proc *rp) 391 { 392 multiboot_module_t *mod; 393 struct ps_strings *psp; 394 char *sp; 395 396 if(rp->p_nr < 0) return; 397 398 mod = bootmod(rp->p_nr); 399 400 /* Important special case: we put VM in the bootstrap pagetable 401 * so it can run. 402 */ 403 404 if(rp->p_nr == VM_PROC_NR) { 405 struct exec_info execi; 406 407 memset(&execi, 0, sizeof(execi)); 408 409 /* exec parameters */ 410 execi.stack_high = kinfo.user_sp; 411 execi.stack_size = 64 * 1024; /* not too crazy as it must be preallocated */ 412 execi.proc_e = ip->endpoint; 413 execi.hdr = (char *) mod->mod_start; /* phys mem direct */ 414 execi.filesize = execi.hdr_len = mod->mod_end - mod->mod_start; 415 strlcpy(execi.progname, ip->proc_name, sizeof(execi.progname)); 416 execi.frame_len = 0; 417 418 /* callbacks for use in the kernel */ 419 execi.copymem = libexec_copy_memcpy; 420 execi.clearmem = libexec_clear_memset; 421 execi.allocmem_prealloc_junk = libexec_pg_alloc; 422 execi.allocmem_prealloc_cleared = libexec_pg_alloc; 423 execi.allocmem_ondemand = libexec_pg_alloc; 424 execi.clearproc = NULL; 425 426 /* parse VM ELF binary and alloc/map it into bootstrap pagetable */ 427 if(libexec_load_elf(&execi) != OK) 428 panic("VM loading failed"); 429 430 /* Setup a ps_strings struct on the stack, pointing to the 431 * following argv, envp. */ 432 sp = (char *)execi.stack_high; 433 sp -= sizeof(struct ps_strings); 434 psp = (struct ps_strings *) sp; 435 436 /* Take the stack pointer down three words to give startup code 437 * something to use as "argc", "argv" and "envp". 438 */ 439 sp -= (sizeof(void *) + sizeof(void *) + sizeof(int)); 440 441 // linear address space, so it is available. 442 psp->ps_argvstr = (char **)(sp + sizeof(int)); 443 psp->ps_nargvstr = 0; 444 psp->ps_envstr = psp->ps_argvstr + sizeof(void *); 445 psp->ps_nenvstr = 0; 446 447 arch_proc_init(rp, execi.pc, (vir_bytes)sp, 448 execi.stack_high - sizeof(struct ps_strings), 449 ip->proc_name); 450 451 /* Free VM blob that was just copied into existence. */ 452 add_memmap(&kinfo, mod->mod_start, mod->mod_end-mod->mod_start); 453 mod->mod_end = mod->mod_start = 0; 454 455 /* Remember them */ 456 kinfo.vm_allocated_bytes = alloc_for_vm; 457 } 458 } 459