1 /* This file contains code for initialization of protected mode, to initialize
2 * code and data segment descriptors, and to initialize global descriptors
3 * for local descriptors in the process table.
4 */
5
6 #include <assert.h>
7 #include <string.h>
8
9 #include <minix/cpufeature.h>
10 #include <sys/types.h>
11 #include "kernel/kernel.h"
12
13 #include "arch_proto.h"
14
15 #include <sys/exec.h>
16 #include <libexec.h>
17
18 #define INT_GATE_TYPE (INT_286_GATE | DESC_386_BIT)
19 #define TSS_TYPE (AVL_286_TSS | DESC_386_BIT)
20
21 /* This is OK initially, when the 1:1 mapping is still there. */
22 char *video_mem = (char *) MULTIBOOT_VIDEO_BUFFER;
23
24 /* Storage for gdt, idt and tss. */
25 struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE);
26 struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE);
27 struct tss_s tss[CONFIG_MAX_CPUS];
28
29 u32_t k_percpu_stacks[CONFIG_MAX_CPUS];
30
31 int prot_init_done = 0;
32
vir2phys(void * vir)33 phys_bytes vir2phys(void *vir)
34 {
35 extern char _kern_vir_base, _kern_phys_base; /* in kernel.lds */
36 u32_t offset = (vir_bytes) &_kern_vir_base -
37 (vir_bytes) &_kern_phys_base;
38 return (phys_bytes)vir - offset;
39 }
40
41 /*===========================================================================*
42 * enable_iop *
43 *===========================================================================*/
enable_iop(struct proc * pp)44 void enable_iop(struct proc *pp)
45 {
46 /* Allow a user process to use I/O instructions. Change the I/O Permission
47 * Level bits in the psw. These specify least-privileged Current Permission
48 * Level allowed to execute I/O instructions. Users and servers have CPL 3.
49 * You can't have less privilege than that. Kernel has CPL 0, tasks CPL 1.
50 */
51 pp->p_reg.psw |= 0x3000;
52 }
53
54
55 /*===========================================================================*
56 * sdesc *
57 *===========================================================================*/
sdesc(struct segdesc_s * segdp,phys_bytes base,vir_bytes size)58 void sdesc(struct segdesc_s *segdp, phys_bytes base, vir_bytes size)
59 {
60 /* Fill in the size fields (base, limit and granularity) of a descriptor. */
61 segdp->base_low = base;
62 segdp->base_middle = base >> BASE_MIDDLE_SHIFT;
63 segdp->base_high = base >> BASE_HIGH_SHIFT;
64
65 --size; /* convert to a limit, 0 size means 4G */
66 if (size > BYTE_GRAN_MAX) {
67 segdp->limit_low = size >> PAGE_GRAN_SHIFT;
68 segdp->granularity = GRANULAR | (size >>
69 (PAGE_GRAN_SHIFT + GRANULARITY_SHIFT));
70 } else {
71 segdp->limit_low = size;
72 segdp->granularity = size >> GRANULARITY_SHIFT;
73 }
74 segdp->granularity |= DEFAULT; /* means BIG for data seg */
75 }
76
77 /*===========================================================================*
78 * init_dataseg *
79 *===========================================================================*/
init_param_dataseg(register struct segdesc_s * segdp,phys_bytes base,vir_bytes size,const int privilege)80 void init_param_dataseg(register struct segdesc_s *segdp,
81 phys_bytes base, vir_bytes size, const int privilege)
82 {
83 /* Build descriptor for a data segment. */
84 sdesc(segdp, base, size);
85 segdp->access = (privilege << DPL_SHIFT) | (PRESENT | SEGMENT |
86 WRITEABLE | ACCESSED);
87 /* EXECUTABLE = 0, EXPAND_DOWN = 0, ACCESSED = 0 */
88 }
89
init_dataseg(int index,const int privilege)90 void init_dataseg(int index, const int privilege)
91 {
92 init_param_dataseg(&gdt[index], 0, 0xFFFFFFFF, privilege);
93 }
94
95 /*===========================================================================*
96 * init_codeseg *
97 *===========================================================================*/
init_codeseg(int index,int privilege)98 static void init_codeseg(int index, int privilege)
99 {
100 /* Build descriptor for a code segment. */
101 sdesc(&gdt[index], 0, 0xFFFFFFFF);
102 gdt[index].access = (privilege << DPL_SHIFT)
103 | (PRESENT | SEGMENT | EXECUTABLE | READABLE);
104 /* CONFORMING = 0, ACCESSED = 0 */
105 }
106
107 static struct gate_table_s gate_table_pic[] = {
108 { hwint00, VECTOR( 0), INTR_PRIVILEGE },
109 { hwint01, VECTOR( 1), INTR_PRIVILEGE },
110 { hwint02, VECTOR( 2), INTR_PRIVILEGE },
111 { hwint03, VECTOR( 3), INTR_PRIVILEGE },
112 { hwint04, VECTOR( 4), INTR_PRIVILEGE },
113 { hwint05, VECTOR( 5), INTR_PRIVILEGE },
114 { hwint06, VECTOR( 6), INTR_PRIVILEGE },
115 { hwint07, VECTOR( 7), INTR_PRIVILEGE },
116 { hwint08, VECTOR( 8), INTR_PRIVILEGE },
117 { hwint09, VECTOR( 9), INTR_PRIVILEGE },
118 { hwint10, VECTOR(10), INTR_PRIVILEGE },
119 { hwint11, VECTOR(11), INTR_PRIVILEGE },
120 { hwint12, VECTOR(12), INTR_PRIVILEGE },
121 { hwint13, VECTOR(13), INTR_PRIVILEGE },
122 { hwint14, VECTOR(14), INTR_PRIVILEGE },
123 { hwint15, VECTOR(15), INTR_PRIVILEGE },
124 { NULL, 0, 0}
125 };
126
127 static struct gate_table_s gate_table_exceptions[] = {
128 { divide_error, DIVIDE_VECTOR, INTR_PRIVILEGE },
129 { single_step_exception, DEBUG_VECTOR, INTR_PRIVILEGE },
130 { nmi, NMI_VECTOR, INTR_PRIVILEGE },
131 { breakpoint_exception, BREAKPOINT_VECTOR, USER_PRIVILEGE },
132 { overflow, OVERFLOW_VECTOR, USER_PRIVILEGE },
133 { bounds_check, BOUNDS_VECTOR, INTR_PRIVILEGE },
134 { inval_opcode, INVAL_OP_VECTOR, INTR_PRIVILEGE },
135 { copr_not_available, COPROC_NOT_VECTOR, INTR_PRIVILEGE },
136 { double_fault, DOUBLE_FAULT_VECTOR, INTR_PRIVILEGE },
137 { copr_seg_overrun, COPROC_SEG_VECTOR, INTR_PRIVILEGE },
138 { inval_tss, INVAL_TSS_VECTOR, INTR_PRIVILEGE },
139 { segment_not_present, SEG_NOT_VECTOR, INTR_PRIVILEGE },
140 { stack_exception, STACK_FAULT_VECTOR, INTR_PRIVILEGE },
141 { general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE },
142 { page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE },
143 { copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE },
144 { alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE },
145 { machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE },
146 { simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE },
147 { ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
148 { kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
149 { ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
150 { kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
151 { NULL, 0, 0}
152 };
153
tss_init(unsigned cpu,void * kernel_stack)154 int tss_init(unsigned cpu, void * kernel_stack)
155 {
156 struct tss_s * t = &tss[cpu];
157 int index = TSS_INDEX(cpu);
158 struct segdesc_s *tssgdt;
159
160 tssgdt = &gdt[index];
161
162 init_param_dataseg(tssgdt, (phys_bytes) t,
163 sizeof(struct tss_s), INTR_PRIVILEGE);
164 tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE;
165
166 /* Build TSS. */
167 memset(t, 0, sizeof(*t));
168 t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR;
169 t->cs = KERN_CS_SELECTOR;
170 t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */
171
172 /*
173 * make space for process pointer and cpu id and point to the first
174 * usable word
175 */
176 k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
177 /*
178 * set the cpu id at the top of the stack so we know on which cpu is
179 * this stack in use when we trap to kernel
180 */
181 *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
182
183 /* Set up Intel SYSENTER support if available. */
184 if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
185 ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR);
186 ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0);
187 ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter);
188 }
189
190 /* Set up AMD SYSCALL support if available. */
191 if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
192 u32_t msr_lo, msr_hi;
193
194 /* set SYSCALL ENABLE bit in EFER MSR */
195 ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo);
196 msr_lo |= AMD_EFER_SCE;
197 ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo);
198
199 /* set STAR register value */
200 #define set_star_cpu(forcpu) if(cpu == forcpu) { \
201 ia32_msr_write(AMD_MSR_STAR, \
202 ((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR, \
203 (u32_t) ipc_entry_syscall_cpu ## forcpu); }
204 set_star_cpu(0);
205 set_star_cpu(1);
206 set_star_cpu(2);
207 set_star_cpu(3);
208 set_star_cpu(4);
209 set_star_cpu(5);
210 set_star_cpu(6);
211 set_star_cpu(7);
212 assert(CONFIG_MAX_CPUS <= 8);
213 }
214
215 return SEG_SELECTOR(index);
216 }
217
init_segdesc(int gdt_index,void * base,int size)218 phys_bytes init_segdesc(int gdt_index, void *base, int size)
219 {
220 struct desctableptr_s *dtp = (struct desctableptr_s *) &gdt[gdt_index];
221 dtp->limit = size - 1;
222 dtp->base = (phys_bytes) base;
223
224 return (phys_bytes) dtp;
225 }
226
int_gate(struct gatedesc_s * tab,unsigned vec_nr,vir_bytes offset,unsigned dpl_type)227 void int_gate(struct gatedesc_s *tab,
228 unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
229 {
230 /* Build descriptor for an interrupt gate. */
231 register struct gatedesc_s *idp;
232
233 idp = &tab[vec_nr];
234 idp->offset_low = offset;
235 idp->selector = KERN_CS_SELECTOR;
236 idp->p_dpl_type = dpl_type;
237 idp->offset_high = offset >> OFFSET_HIGH_SHIFT;
238 }
239
int_gate_idt(unsigned vec_nr,vir_bytes offset,unsigned dpl_type)240 void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
241 {
242 int_gate(idt, vec_nr, offset, dpl_type);
243 }
244
idt_copy_vectors(struct gate_table_s * first)245 void idt_copy_vectors(struct gate_table_s * first)
246 {
247 struct gate_table_s *gtp;
248 for (gtp = first; gtp->gate; gtp++) {
249 int_gate(idt, gtp->vec_nr, (vir_bytes) gtp->gate,
250 PRESENT | INT_GATE_TYPE |
251 (gtp->privilege << DPL_SHIFT));
252 }
253 }
254
idt_copy_vectors_pic(void)255 void idt_copy_vectors_pic(void)
256 {
257 idt_copy_vectors(gate_table_pic);
258 }
259
idt_init(void)260 void idt_init(void)
261 {
262 idt_copy_vectors_pic();
263 idt_copy_vectors(gate_table_exceptions);
264 }
265
266 struct desctableptr_s gdt_desc, idt_desc;
267
idt_reload(void)268 void idt_reload(void)
269 {
270 x86_lidt(&idt_desc);
271 }
272
bootmod(int pnr)273 multiboot_module_t *bootmod(int pnr)
274 {
275 int i;
276
277 assert(pnr >= 0);
278
279 /* Search for desired process in boot process
280 * list. The first NR_TASKS ones do not correspond
281 * to a module, however, so we don't search those.
282 */
283 for(i = NR_TASKS; i < NR_BOOT_PROCS; i++) {
284 int p;
285 p = i - NR_TASKS;
286 if(image[i].proc_nr == pnr) {
287 assert(p < MULTIBOOT_MAX_MODS);
288 assert(p < kinfo.mbi.mi_mods_count);
289 return &kinfo.module_list[p];
290 }
291 }
292
293 panic("boot module %d not found", pnr);
294 }
295
296 int booting_cpu = 0;
297
prot_load_selectors(void)298 void prot_load_selectors(void)
299 {
300 /* this function is called by both prot_init by the BSP and
301 * the early AP booting code in mpx.S by secondary CPU's.
302 * everything is set up the same except for the TSS that is per-CPU.
303 */
304 x86_lgdt(&gdt_desc); /* Load gdt */
305 idt_init();
306 idt_reload();
307 x86_lldt(LDT_SELECTOR); /* Load bogus ldt */
308 x86_ltr(TSS_SELECTOR(booting_cpu));
309
310 x86_load_kerncs();
311 x86_load_ds(KERN_DS_SELECTOR);
312 x86_load_es(KERN_DS_SELECTOR);
313 x86_load_fs(KERN_DS_SELECTOR);
314 x86_load_gs(KERN_DS_SELECTOR);
315 x86_load_ss(KERN_DS_SELECTOR);
316 }
317
318 /*===========================================================================*
319 * prot_init *
320 *===========================================================================*/
prot_init(void)321 void prot_init(void)
322 {
323 extern char k_boot_stktop;
324
325 if(_cpufeature(_CPUF_I386_SYSENTER))
326 minix_feature_flags |= MKF_I386_INTEL_SYSENTER;
327 if(_cpufeature(_CPUF_I386_SYSCALL))
328 minix_feature_flags |= MKF_I386_AMD_SYSCALL;
329
330 memset(gdt, 0, sizeof(gdt));
331 memset(idt, 0, sizeof(idt));
332
333 /* Build GDT, IDT, IDT descriptors. */
334 gdt_desc.base = (u32_t) gdt;
335 gdt_desc.limit = sizeof(gdt)-1;
336 idt_desc.base = (u32_t) idt;
337 idt_desc.limit = sizeof(idt)-1;
338 tss_init(0, &k_boot_stktop);
339
340 /* Build GDT */
341 init_param_dataseg(&gdt[LDT_INDEX],
342 (phys_bytes) 0, 0, INTR_PRIVILEGE); /* unusable LDT */
343 gdt[LDT_INDEX].access = PRESENT | LDT;
344 init_codeseg(KERN_CS_INDEX, INTR_PRIVILEGE);
345 init_dataseg(KERN_DS_INDEX, INTR_PRIVILEGE);
346 init_codeseg(USER_CS_INDEX, USER_PRIVILEGE);
347 init_dataseg(USER_DS_INDEX, USER_PRIVILEGE);
348
349 /* Currently the multiboot segments are loaded; which is fine, but
350 * let's replace them with the ones from our own GDT so we test
351 * right away whether they work as expected.
352 */
353 prot_load_selectors();
354
355 /* Set up a new post-relocate bootstrap pagetable so that
356 * we can map in VM, and we no longer rely on pre-relocated
357 * data.
358 */
359
360 pg_clear();
361 pg_identity(&kinfo); /* Still need 1:1 for lapic and video mem and such. */
362 pg_mapkernel();
363 pg_load();
364
365 prot_init_done = 1;
366 }
367
368 static int alloc_for_vm = 0;
369
arch_post_init(void)370 void arch_post_init(void)
371 {
372 /* Let memory mapping code know what's going on at bootstrap time */
373 struct proc *vm;
374 vm = proc_addr(VM_PROC_NR);
375 get_cpulocal_var(ptproc) = vm;
376 pg_info(&vm->p_seg.p_cr3, &vm->p_seg.p_cr3_v);
377 }
378
libexec_pg_alloc(struct exec_info * execi,vir_bytes vaddr,size_t len)379 static int libexec_pg_alloc(struct exec_info *execi, vir_bytes vaddr, size_t len)
380 {
381 pg_map(PG_ALLOCATEME, vaddr, vaddr+len, &kinfo);
382 pg_load();
383 memset((char *) vaddr, 0, len);
384 alloc_for_vm += len;
385 return OK;
386 }
387
arch_boot_proc(struct boot_image * ip,struct proc * rp)388 void arch_boot_proc(struct boot_image *ip, struct proc *rp)
389 {
390 multiboot_module_t *mod;
391 struct ps_strings *psp;
392 char *sp;
393
394 if(rp->p_nr < 0) return;
395
396 mod = bootmod(rp->p_nr);
397
398 /* Important special case: we put VM in the bootstrap pagetable
399 * so it can run.
400 */
401
402 if(rp->p_nr == VM_PROC_NR) {
403 struct exec_info execi;
404
405 memset(&execi, 0, sizeof(execi));
406
407 /* exec parameters */
408 execi.stack_high = kinfo.user_sp;
409 execi.stack_size = 64 * 1024; /* not too crazy as it must be preallocated */
410 execi.proc_e = ip->endpoint;
411 execi.hdr = (char *) mod->mod_start; /* phys mem direct */
412 execi.filesize = execi.hdr_len = mod->mod_end - mod->mod_start;
413 strlcpy(execi.progname, ip->proc_name, sizeof(execi.progname));
414 execi.frame_len = 0;
415
416 /* callbacks for use in the kernel */
417 execi.copymem = libexec_copy_memcpy;
418 execi.clearmem = libexec_clear_memset;
419 execi.allocmem_prealloc_junk = libexec_pg_alloc;
420 execi.allocmem_prealloc_cleared = libexec_pg_alloc;
421 execi.allocmem_ondemand = libexec_pg_alloc;
422 execi.clearproc = NULL;
423
424 /* parse VM ELF binary and alloc/map it into bootstrap pagetable */
425 if(libexec_load_elf(&execi) != OK)
426 panic("VM loading failed");
427
428 /* Setup a ps_strings struct on the stack, pointing to the
429 * following argv, envp. */
430 sp = (char *)execi.stack_high;
431 sp -= sizeof(struct ps_strings);
432 psp = (struct ps_strings *) sp;
433
434 /* Take the stack pointer down three words to give startup code
435 * something to use as "argc", "argv" and "envp".
436 */
437 sp -= (sizeof(void *) + sizeof(void *) + sizeof(int));
438
439 // linear address space, so it is available.
440 psp->ps_argvstr = (char **)(sp + sizeof(int));
441 psp->ps_nargvstr = 0;
442 psp->ps_envstr = psp->ps_argvstr + sizeof(void *);
443 psp->ps_nenvstr = 0;
444
445 arch_proc_init(rp, execi.pc, (vir_bytes)sp,
446 execi.stack_high - sizeof(struct ps_strings),
447 ip->proc_name);
448
449 /* Free VM blob that was just copied into existence. */
450 add_memmap(&kinfo, mod->mod_start, mod->mod_end-mod->mod_start);
451 mod->mod_end = mod->mod_start = 0;
452
453 /* Remember them */
454 kinfo.vm_allocated_bytes = alloc_for_vm;
455 }
456 }
457