xref: /minix3/minix/kernel/arch/i386/protect.c (revision 0b98e8aad89f2bd4ba80b523d73cf29e9dd82ce1)
1 /* This file contains code for initialization of protected mode, to initialize
2  * code and data segment descriptors, and to initialize global descriptors
3  * for local descriptors in the process table.
4  */
5 
6 #include <assert.h>
7 #include <string.h>
8 
9 #include <minix/cpufeature.h>
10 #include <sys/types.h>
11 #include <machine/multiboot.h>
12 #include "kernel/kernel.h"
13 
14 #include "archconst.h"
15 #include "arch_proto.h"
16 
17 #include <sys/exec.h>
18 #include <libexec.h>
19 
20 #define INT_GATE_TYPE	(INT_286_GATE | DESC_386_BIT)
21 #define TSS_TYPE	(AVL_286_TSS  | DESC_386_BIT)
22 
23 /* This is OK initially, when the 1:1 mapping is still there. */
24 char *video_mem = (char *) MULTIBOOT_VIDEO_BUFFER;
25 
26 /* Storage for gdt, idt and tss. */
27 struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE);
28 struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE);
29 struct tss_s tss[CONFIG_MAX_CPUS];
30 
31 u32_t k_percpu_stacks[CONFIG_MAX_CPUS];
32 
33 int prot_init_done = 0;
34 
35 phys_bytes vir2phys(void *vir)
36 {
37 	extern char _kern_vir_base, _kern_phys_base;	/* in kernel.lds */
38 	u32_t offset = (vir_bytes) &_kern_vir_base -
39 		(vir_bytes) &_kern_phys_base;
40 	return (phys_bytes)vir - offset;
41 }
42 
43 /*===========================================================================*
44  *				enable_iop				     *
45  *===========================================================================*/
46 void enable_iop(struct proc *pp)
47 {
48 /* Allow a user process to use I/O instructions.  Change the I/O Permission
49  * Level bits in the psw. These specify least-privileged Current Permission
50  * Level allowed to execute I/O instructions. Users and servers have CPL 3.
51  * You can't have less privilege than that. Kernel has CPL 0, tasks CPL 1.
52  */
53   pp->p_reg.psw |= 0x3000;
54 }
55 
56 
57 /*===========================================================================*
58  *				sdesc					     *
59  *===========================================================================*/
60  void sdesc(struct segdesc_s *segdp, phys_bytes base, vir_bytes size)
61 {
62 /* Fill in the size fields (base, limit and granularity) of a descriptor. */
63   segdp->base_low = base;
64   segdp->base_middle = base >> BASE_MIDDLE_SHIFT;
65   segdp->base_high = base >> BASE_HIGH_SHIFT;
66 
67   --size;			/* convert to a limit, 0 size means 4G */
68   if (size > BYTE_GRAN_MAX) {
69 	segdp->limit_low = size >> PAGE_GRAN_SHIFT;
70 	segdp->granularity = GRANULAR | (size >>
71 			     (PAGE_GRAN_SHIFT + GRANULARITY_SHIFT));
72   } else {
73 	segdp->limit_low = size;
74 	segdp->granularity = size >> GRANULARITY_SHIFT;
75   }
76   segdp->granularity |= DEFAULT;	/* means BIG for data seg */
77 }
78 
79 /*===========================================================================*
80  *				init_dataseg				     *
81  *===========================================================================*/
82 void init_param_dataseg(register struct segdesc_s *segdp,
83 	phys_bytes base, vir_bytes size, const int privilege)
84 {
85 	/* Build descriptor for a data segment. */
86 	sdesc(segdp, base, size);
87 	segdp->access = (privilege << DPL_SHIFT) | (PRESENT | SEGMENT |
88 		WRITEABLE | ACCESSED);
89 		/* EXECUTABLE = 0, EXPAND_DOWN = 0, ACCESSED = 0 */
90 }
91 
92 void init_dataseg(int index, const int privilege)
93 {
94 	init_param_dataseg(&gdt[index], 0, 0xFFFFFFFF, privilege);
95 }
96 
97 /*===========================================================================*
98  *				init_codeseg				     *
99  *===========================================================================*/
100 static void init_codeseg(int index, int privilege)
101 {
102 	/* Build descriptor for a code segment. */
103 	sdesc(&gdt[index], 0, 0xFFFFFFFF);
104 	gdt[index].access = (privilege << DPL_SHIFT)
105 	        | (PRESENT | SEGMENT | EXECUTABLE | READABLE);
106 		/* CONFORMING = 0, ACCESSED = 0 */
107 }
108 
109 static struct gate_table_s gate_table_pic[] = {
110 	{ hwint00, VECTOR( 0), INTR_PRIVILEGE },
111 	{ hwint01, VECTOR( 1), INTR_PRIVILEGE },
112 	{ hwint02, VECTOR( 2), INTR_PRIVILEGE },
113 	{ hwint03, VECTOR( 3), INTR_PRIVILEGE },
114 	{ hwint04, VECTOR( 4), INTR_PRIVILEGE },
115 	{ hwint05, VECTOR( 5), INTR_PRIVILEGE },
116 	{ hwint06, VECTOR( 6), INTR_PRIVILEGE },
117 	{ hwint07, VECTOR( 7), INTR_PRIVILEGE },
118 	{ hwint08, VECTOR( 8), INTR_PRIVILEGE },
119 	{ hwint09, VECTOR( 9), INTR_PRIVILEGE },
120 	{ hwint10, VECTOR(10), INTR_PRIVILEGE },
121 	{ hwint11, VECTOR(11), INTR_PRIVILEGE },
122 	{ hwint12, VECTOR(12), INTR_PRIVILEGE },
123 	{ hwint13, VECTOR(13), INTR_PRIVILEGE },
124 	{ hwint14, VECTOR(14), INTR_PRIVILEGE },
125 	{ hwint15, VECTOR(15), INTR_PRIVILEGE },
126 	{ NULL, 0, 0}
127 };
128 
129 static struct gate_table_s gate_table_exceptions[] = {
130 	{ divide_error, DIVIDE_VECTOR, INTR_PRIVILEGE },
131 	{ single_step_exception, DEBUG_VECTOR, INTR_PRIVILEGE },
132 	{ nmi, NMI_VECTOR, INTR_PRIVILEGE },
133 	{ breakpoint_exception, BREAKPOINT_VECTOR, USER_PRIVILEGE },
134 	{ overflow, OVERFLOW_VECTOR, USER_PRIVILEGE },
135 	{ bounds_check, BOUNDS_VECTOR, INTR_PRIVILEGE },
136 	{ inval_opcode, INVAL_OP_VECTOR, INTR_PRIVILEGE },
137 	{ copr_not_available, COPROC_NOT_VECTOR, INTR_PRIVILEGE },
138 	{ double_fault, DOUBLE_FAULT_VECTOR, INTR_PRIVILEGE },
139 	{ copr_seg_overrun, COPROC_SEG_VECTOR, INTR_PRIVILEGE },
140 	{ inval_tss, INVAL_TSS_VECTOR, INTR_PRIVILEGE },
141 	{ segment_not_present, SEG_NOT_VECTOR, INTR_PRIVILEGE },
142 	{ stack_exception, STACK_FAULT_VECTOR, INTR_PRIVILEGE },
143 	{ general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE },
144 	{ page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE },
145 	{ copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE },
146 	{ alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE },
147 	{ machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE },
148 	{ simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE },
149 	{ ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
150 	{ kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
151 	{ ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
152 	{ kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
153 	{ NULL, 0, 0}
154 };
155 
156 int tss_init(unsigned cpu, void * kernel_stack)
157 {
158 	struct tss_s * t = &tss[cpu];
159 	int index = TSS_INDEX(cpu);
160 	struct segdesc_s *tssgdt;
161 
162 	tssgdt = &gdt[index];
163 
164 	init_param_dataseg(tssgdt, (phys_bytes) t,
165 			sizeof(struct tss_s), INTR_PRIVILEGE);
166 	tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE;
167 
168 	/* Build TSS. */
169 	memset(t, 0, sizeof(*t));
170 	t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR;
171 	t->cs = KERN_CS_SELECTOR;
172 	t->iobase = sizeof(struct tss_s);	/* empty i/o permissions map */
173 
174 	/*
175 	 * make space for process pointer and cpu id and point to the first
176 	 * usable word
177 	 */
178 	k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
179 	/*
180 	 * set the cpu id at the top of the stack so we know on which cpu is
181 	 * this stak in use when we trap to kernel
182 	 */
183 	*((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
184 
185 	/* Set up Intel SYSENTER support if available. */
186 	if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
187 	  ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR);
188   	  ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0);
189   	  ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter);
190   	}
191 
192 	/* Set up AMD SYSCALL support if available. */
193 	if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
194 		u32_t msr_lo, msr_hi;
195 
196 		/* set SYSCALL ENABLE bit in EFER MSR */
197 		ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo);
198 		msr_lo |= AMD_EFER_SCE;
199 		ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo);
200 
201 		/* set STAR register value */
202 #define set_star_cpu(forcpu) if(cpu == forcpu) {				\
203 		ia32_msr_write(AMD_MSR_STAR,					\
204 		  ((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR,	\
205 		  (u32_t) ipc_entry_syscall_cpu ## forcpu); }
206 		set_star_cpu(0);
207 		set_star_cpu(1);
208 		set_star_cpu(2);
209 		set_star_cpu(3);
210 		set_star_cpu(4);
211 		set_star_cpu(5);
212 		set_star_cpu(6);
213 		set_star_cpu(7);
214 		assert(CONFIG_MAX_CPUS <= 8);
215   	}
216 
217 	return SEG_SELECTOR(index);
218 }
219 
220 phys_bytes init_segdesc(int gdt_index, void *base, int size)
221 {
222 	struct desctableptr_s *dtp = (struct desctableptr_s *) &gdt[gdt_index];
223 	dtp->limit = size - 1;
224 	dtp->base = (phys_bytes) base;
225 
226 	return (phys_bytes) dtp;
227 }
228 
229 void int_gate(struct gatedesc_s *tab,
230 	unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
231 {
232 /* Build descriptor for an interrupt gate. */
233   register struct gatedesc_s *idp;
234 
235   idp = &tab[vec_nr];
236   idp->offset_low = offset;
237   idp->selector = KERN_CS_SELECTOR;
238   idp->p_dpl_type = dpl_type;
239   idp->offset_high = offset >> OFFSET_HIGH_SHIFT;
240 }
241 
242 void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
243 {
244 	int_gate(idt, vec_nr, offset, dpl_type);
245 }
246 
247 void idt_copy_vectors(struct gate_table_s * first)
248 {
249 	struct gate_table_s *gtp;
250 	for (gtp = first; gtp->gate; gtp++) {
251 		int_gate(idt, gtp->vec_nr, (vir_bytes) gtp->gate,
252 				PRESENT | INT_GATE_TYPE |
253 				(gtp->privilege << DPL_SHIFT));
254 	}
255 }
256 
257 void idt_copy_vectors_pic(void)
258 {
259 	idt_copy_vectors(gate_table_pic);
260 }
261 
262 void idt_init(void)
263 {
264 	idt_copy_vectors_pic();
265 	idt_copy_vectors(gate_table_exceptions);
266 }
267 
268 struct desctableptr_s gdt_desc, idt_desc;
269 
270 void idt_reload(void)
271 {
272 	x86_lidt(&idt_desc);
273 }
274 
275 multiboot_module_t *bootmod(int pnr)
276 {
277 	int i;
278 
279 	assert(pnr >= 0);
280 
281 	/* Search for desired process in boot process
282 	 * list. The first NR_TASKS ones do not correspond
283 	 * to a module, however, so we don't search those.
284 	 */
285 	for(i = NR_TASKS; i < NR_BOOT_PROCS; i++) {
286 		int p;
287 		p = i - NR_TASKS;
288 		if(image[i].proc_nr == pnr) {
289 			assert(p < MULTIBOOT_MAX_MODS);
290 			assert(p < kinfo.mbi.mi_mods_count);
291 			return &kinfo.module_list[p];
292 		}
293 	}
294 
295 	panic("boot module %d not found", pnr);
296 }
297 
298 int booting_cpu = 0;
299 
300 void prot_load_selectors(void)
301 {
302   /* this function is called by both prot_init by the BSP and
303    * the early AP booting code in mpx.S by secondary CPU's.
304    * everything is set up the same except for the TSS that is per-CPU.
305    */
306   x86_lgdt(&gdt_desc);	/* Load gdt */
307   idt_init();
308   idt_reload();
309   x86_lldt(LDT_SELECTOR); 	/* Load bogus ldt */
310   x86_ltr(TSS_SELECTOR(booting_cpu));
311 
312   x86_load_kerncs();
313   x86_load_ds(KERN_DS_SELECTOR);
314   x86_load_es(KERN_DS_SELECTOR);
315   x86_load_fs(KERN_DS_SELECTOR);
316   x86_load_gs(KERN_DS_SELECTOR);
317   x86_load_ss(KERN_DS_SELECTOR);
318 }
319 
320 /*===========================================================================*
321  *				prot_init				     *
322  *===========================================================================*/
323 void prot_init()
324 {
325   extern char k_boot_stktop;
326 
327   if(_cpufeature(_CPUF_I386_SYSENTER))
328 	minix_feature_flags |= MKF_I386_INTEL_SYSENTER;
329   if(_cpufeature(_CPUF_I386_SYSCALL))
330 	minix_feature_flags |= MKF_I386_AMD_SYSCALL;
331 
332   memset(gdt, 0, sizeof(gdt));
333   memset(idt, 0, sizeof(idt));
334 
335   /* Build GDT, IDT, IDT descriptors. */
336   gdt_desc.base = (u32_t) gdt;
337   gdt_desc.limit = sizeof(gdt)-1;
338   idt_desc.base = (u32_t) idt;
339   idt_desc.limit = sizeof(idt)-1;
340   tss_init(0, &k_boot_stktop);
341 
342   /* Build GDT */
343   init_param_dataseg(&gdt[LDT_INDEX],
344     (phys_bytes) 0, 0, INTR_PRIVILEGE); /* unusable LDT */
345   gdt[LDT_INDEX].access = PRESENT | LDT;
346   init_codeseg(KERN_CS_INDEX, INTR_PRIVILEGE);
347   init_dataseg(KERN_DS_INDEX, INTR_PRIVILEGE);
348   init_codeseg(USER_CS_INDEX, USER_PRIVILEGE);
349   init_dataseg(USER_DS_INDEX, USER_PRIVILEGE);
350 
351   /* Currently the multiboot segments are loaded; which is fine, but
352    * let's replace them with the ones from our own GDT so we test
353    * right away whether they work as expected.
354    */
355   prot_load_selectors();
356 
357   /* Set up a new post-relocate bootstrap pagetable so that
358    * we can map in VM, and we no longer rely on pre-relocated
359    * data.
360    */
361 
362   pg_clear();
363   pg_identity(&kinfo); /* Still need 1:1 for lapic and video mem and such. */
364   pg_mapkernel();
365   pg_load();
366 
367   prot_init_done = 1;
368 }
369 
370 static int alloc_for_vm = 0;
371 
372 void arch_post_init(void)
373 {
374   /* Let memory mapping code know what's going on at bootstrap time */
375   struct proc *vm;
376   vm = proc_addr(VM_PROC_NR);
377   get_cpulocal_var(ptproc) = vm;
378   pg_info(&vm->p_seg.p_cr3, &vm->p_seg.p_cr3_v);
379 }
380 
381 static int libexec_pg_alloc(struct exec_info *execi, vir_bytes vaddr, size_t len)
382 {
383         pg_map(PG_ALLOCATEME, vaddr, vaddr+len, &kinfo);
384   	pg_load();
385         memset((char *) vaddr, 0, len);
386 	alloc_for_vm += len;
387         return OK;
388 }
389 
390 void arch_boot_proc(struct boot_image *ip, struct proc *rp)
391 {
392 	multiboot_module_t *mod;
393 	struct ps_strings *psp;
394 	char *sp;
395 
396 	if(rp->p_nr < 0) return;
397 
398 	mod = bootmod(rp->p_nr);
399 
400 	/* Important special case: we put VM in the bootstrap pagetable
401 	 * so it can run.
402 	 */
403 
404 	if(rp->p_nr == VM_PROC_NR) {
405 		struct exec_info execi;
406 
407 		memset(&execi, 0, sizeof(execi));
408 
409 		/* exec parameters */
410 		execi.stack_high = kinfo.user_sp;
411 		execi.stack_size = 64 * 1024;	/* not too crazy as it must be preallocated */
412 		execi.proc_e = ip->endpoint;
413 		execi.hdr = (char *) mod->mod_start; /* phys mem direct */
414 		execi.filesize = execi.hdr_len = mod->mod_end - mod->mod_start;
415 		strlcpy(execi.progname, ip->proc_name, sizeof(execi.progname));
416 		execi.frame_len = 0;
417 
418 		/* callbacks for use in the kernel */
419 		execi.copymem = libexec_copy_memcpy;
420 		execi.clearmem = libexec_clear_memset;
421 		execi.allocmem_prealloc_junk = libexec_pg_alloc;
422 		execi.allocmem_prealloc_cleared = libexec_pg_alloc;
423 		execi.allocmem_ondemand = libexec_pg_alloc;
424 		execi.clearproc = NULL;
425 
426 		/* parse VM ELF binary and alloc/map it into bootstrap pagetable */
427 		if(libexec_load_elf(&execi) != OK)
428 			panic("VM loading failed");
429 
430 		/* Setup a ps_strings struct on the stack, pointing to the
431 		 * following argv, envp. */
432 		sp = (char *)execi.stack_high;
433 		sp -= sizeof(struct ps_strings);
434 		psp = (struct ps_strings *) sp;
435 
436 		/* Take the stack pointer down three words to give startup code
437 		 * something to use as "argc", "argv" and "envp".
438 		 */
439 		sp -= (sizeof(void *) + sizeof(void *) + sizeof(int));
440 
441 		// linear address space, so it is available.
442 		psp->ps_argvstr = (char **)(sp + sizeof(int));
443 		psp->ps_nargvstr = 0;
444 		psp->ps_envstr = psp->ps_argvstr + sizeof(void *);
445 		psp->ps_nenvstr = 0;
446 
447 		arch_proc_init(rp, execi.pc, (vir_bytes)sp,
448 			execi.stack_high - sizeof(struct ps_strings),
449 			ip->proc_name);
450 
451 		/* Free VM blob that was just copied into existence. */
452 		add_memmap(&kinfo, mod->mod_start, mod->mod_end-mod->mod_start);
453 		mod->mod_end = mod->mod_start = 0;
454 
455 		/* Remember them */
456 		kinfo.vm_allocated_bytes = alloc_for_vm;
457 	}
458 }
459