1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1992 Terrence R. Lambert.
28 * Copyright (c) 1990 The Regents of the University of California.
29 * All rights reserved.
30 *
31 * This code is derived from software contributed to Berkeley by
32 * William Jolitz.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
63 */
64
65 #include <sys/types.h>
66 #include <sys/sysmacros.h>
67 #include <sys/tss.h>
68 #include <sys/segments.h>
69 #include <sys/trap.h>
70 #include <sys/cpuvar.h>
71 #include <sys/bootconf.h>
72 #include <sys/x86_archext.h>
73 #include <sys/controlregs.h>
74 #include <sys/archsystm.h>
75 #include <sys/machsystm.h>
76 #include <sys/kobj.h>
77 #include <sys/cmn_err.h>
78 #include <sys/reboot.h>
79 #include <sys/kdi.h>
80 #include <sys/mach_mmu.h>
81 #include <sys/systm.h>
82
83 #ifdef __xpv
84 #include <sys/hypervisor.h>
85 #include <vm/as.h>
86 #endif
87
88 #include <sys/promif.h>
89 #include <sys/bootinfo.h>
90 #include <vm/kboot_mmu.h>
91 #include <vm/hat_pte.h>
92
93 /*
94 * cpu0 and default tables and structures.
95 */
96 user_desc_t *gdt0;
97 #if !defined(__xpv)
98 desctbr_t gdt0_default_r;
99 #endif
100
101 gate_desc_t *idt0; /* interrupt descriptor table */
102 #if defined(__i386)
103 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */
104 #endif
105
106 struct tss *ktss0; /* kernel task state structure */
107
108 #if defined(__i386)
109 struct tss *dftss0; /* #DF double-fault exception */
110 #endif /* __i386 */
111
112 user_desc_t zero_udesc; /* base zero user desc native procs */
113 user_desc_t null_udesc; /* null user descriptor */
114 system_desc_t null_sdesc; /* null system descriptor */
115
116 #if defined(__amd64)
117 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
118 #endif /* __amd64 */
119
120 #if defined(__amd64)
121 user_desc_t ucs_on;
122 user_desc_t ucs_off;
123 user_desc_t ucs32_on;
124 user_desc_t ucs32_off;
125 #endif /* __amd64 */
126
127 #pragma align 16(dblfault_stack0)
128 char dblfault_stack0[DEFAULTSTKSZ];
129
130 extern void fast_null(void);
131 extern hrtime_t get_hrtime(void);
132 extern hrtime_t gethrvtime(void);
133 extern hrtime_t get_hrestime(void);
134 extern uint64_t getlgrp(void);
135
136 void (*(fasttable[]))(void) = {
137 fast_null, /* T_FNULL routine */
138 fast_null, /* T_FGETFP routine (initially null) */
139 fast_null, /* T_FSETFP routine (initially null) */
140 (void (*)())get_hrtime, /* T_GETHRTIME */
141 (void (*)())gethrvtime, /* T_GETHRVTIME */
142 (void (*)())get_hrestime, /* T_GETHRESTIME */
143 (void (*)())getlgrp /* T_GETLGRP */
144 };
145
146 /*
147 * Structure containing pre-computed descriptors to allow us to temporarily
148 * interpose on a standard handler.
149 */
150 struct interposing_handler {
151 int ih_inum;
152 gate_desc_t ih_interp_desc;
153 gate_desc_t ih_default_desc;
154 };
155
156 /*
157 * The brand infrastructure interposes on two handlers, and we use one as a
158 * NULL signpost.
159 */
160 static struct interposing_handler brand_tbl[2];
161
162 /*
163 * software prototypes for default local descriptor table
164 */
165
166 /*
167 * Routines for loading segment descriptors in format the hardware
168 * can understand.
169 */
170
171 #if defined(__amd64)
172
173 /*
174 * In long mode we have the new L or long mode attribute bit
175 * for code segments. Only the conforming bit in type is used along
176 * with descriptor priority and present bits. Default operand size must
177 * be zero when in long mode. In 32-bit compatibility mode all fields
178 * are treated as in legacy mode. For data segments while in long mode
179 * only the present bit is loaded.
180 */
181 void
set_usegd(user_desc_t * dp,uint_t lmode,void * base,size_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)182 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
183 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
184 {
185 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
186
187 /*
188 * 64-bit long mode.
189 */
190 if (lmode == SDP_LONG)
191 dp->usd_def32 = 0; /* 32-bit operands only */
192 else
193 /*
194 * 32-bit compatibility mode.
195 */
196 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
197
198 dp->usd_long = lmode; /* 64-bit mode */
199 dp->usd_type = type;
200 dp->usd_dpl = dpl;
201 dp->usd_p = 1;
202 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
203
204 dp->usd_lobase = (uintptr_t)base;
205 dp->usd_midbase = (uintptr_t)base >> 16;
206 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
207 dp->usd_lolimit = size;
208 dp->usd_hilimit = (uintptr_t)size >> 16;
209 }
210
211 #elif defined(__i386)
212
213 /*
214 * Install user segment descriptor for code and data.
215 */
216 void
set_usegd(user_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)217 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
218 uint_t dpl, uint_t gran, uint_t defopsz)
219 {
220 dp->usd_lolimit = size;
221 dp->usd_hilimit = (uintptr_t)size >> 16;
222
223 dp->usd_lobase = (uintptr_t)base;
224 dp->usd_midbase = (uintptr_t)base >> 16;
225 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
226
227 dp->usd_type = type;
228 dp->usd_dpl = dpl;
229 dp->usd_p = 1;
230 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */
231 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
232 }
233
234 #endif /* __i386 */
235
236 /*
237 * Install system segment descriptor for LDT and TSS segments.
238 */
239
240 #if defined(__amd64)
241
242 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)243 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
244 uint_t dpl)
245 {
246 dp->ssd_lolimit = size;
247 dp->ssd_hilimit = (uintptr_t)size >> 16;
248
249 dp->ssd_lobase = (uintptr_t)base;
250 dp->ssd_midbase = (uintptr_t)base >> 16;
251 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
252 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
253
254 dp->ssd_type = type;
255 dp->ssd_zero1 = 0; /* must be zero */
256 dp->ssd_zero2 = 0;
257 dp->ssd_dpl = dpl;
258 dp->ssd_p = 1;
259 dp->ssd_gran = 0; /* force byte units */
260 }
261
262 void *
get_ssd_base(system_desc_t * dp)263 get_ssd_base(system_desc_t *dp)
264 {
265 uintptr_t base;
266
267 base = (uintptr_t)dp->ssd_lobase |
268 (uintptr_t)dp->ssd_midbase << 16 |
269 (uintptr_t)dp->ssd_hibase << (16 + 8) |
270 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
271 return ((void *)base);
272 }
273
274 #elif defined(__i386)
275
276 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)277 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
278 uint_t dpl)
279 {
280 dp->ssd_lolimit = size;
281 dp->ssd_hilimit = (uintptr_t)size >> 16;
282
283 dp->ssd_lobase = (uintptr_t)base;
284 dp->ssd_midbase = (uintptr_t)base >> 16;
285 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
286
287 dp->ssd_type = type;
288 dp->ssd_zero = 0; /* must be zero */
289 dp->ssd_dpl = dpl;
290 dp->ssd_p = 1;
291 dp->ssd_gran = 0; /* force byte units */
292 }
293
294 void *
get_ssd_base(system_desc_t * dp)295 get_ssd_base(system_desc_t *dp)
296 {
297 uintptr_t base;
298
299 base = (uintptr_t)dp->ssd_lobase |
300 (uintptr_t)dp->ssd_midbase << 16 |
301 (uintptr_t)dp->ssd_hibase << (16 + 8);
302 return ((void *)base);
303 }
304
305 #endif /* __i386 */
306
307 /*
308 * Install gate segment descriptor for interrupt, trap, call and task gates.
309 */
310
311 #if defined(__amd64)
312
313 /*ARGSUSED*/
314 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t vector)315 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
316 uint_t type, uint_t dpl, uint_t vector)
317 {
318 dp->sgd_looffset = (uintptr_t)func;
319 dp->sgd_hioffset = (uintptr_t)func >> 16;
320 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
321
322 dp->sgd_selector = (uint16_t)sel;
323
324 /*
325 * For 64 bit native we use the IST stack mechanism
326 * for double faults. All other traps use the CPL = 0
327 * (tss_rsp0) stack.
328 */
329 #if !defined(__xpv)
330 if (vector == T_DBLFLT)
331 dp->sgd_ist = 1;
332 else
333 #endif
334 dp->sgd_ist = 0;
335
336 dp->sgd_type = type;
337 dp->sgd_dpl = dpl;
338 dp->sgd_p = 1;
339 }
340
341 #elif defined(__i386)
342
343 /*ARGSUSED*/
344 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t unused)345 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
346 uint_t type, uint_t dpl, uint_t unused)
347 {
348 dp->sgd_looffset = (uintptr_t)func;
349 dp->sgd_hioffset = (uintptr_t)func >> 16;
350
351 dp->sgd_selector = (uint16_t)sel;
352 dp->sgd_stkcpy = 0; /* always zero bytes */
353 dp->sgd_type = type;
354 dp->sgd_dpl = dpl;
355 dp->sgd_p = 1;
356 }
357
358 #endif /* __i386 */
359
360 /*
361 * Updates a single user descriptor in the the GDT of the current cpu.
362 * Caller is responsible for preventing cpu migration.
363 */
364
365 void
gdt_update_usegd(uint_t sidx,user_desc_t * udp)366 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
367 {
368 #if defined(__xpv)
369
370 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
371
372 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
373 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
374
375 #else /* __xpv */
376
377 CPU->cpu_gdt[sidx] = *udp;
378
379 #endif /* __xpv */
380 }
381
382 /*
383 * Writes single descriptor pointed to by udp into a processes
384 * LDT entry pointed to by ldp.
385 */
386 int
ldt_update_segd(user_desc_t * ldp,user_desc_t * udp)387 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
388 {
389 #if defined(__xpv)
390
391 uint64_t dpa;
392
393 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
394 ((uintptr_t)ldp & PAGEOFFSET);
395
396 /*
397 * The hypervisor is a little more restrictive about what it
398 * supports in the LDT.
399 */
400 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
401 return (EINVAL);
402
403 #else /* __xpv */
404
405 *ldp = *udp;
406
407 #endif /* __xpv */
408 return (0);
409 }
410
411 #if defined(__xpv)
412
413 /*
414 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
415 * Returns true if a valid entry was written.
416 */
417 int
xen_idt_to_trap_info(uint_t vec,gate_desc_t * sgd,void * ti_arg)418 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
419 {
420 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
421
422 /*
423 * skip holes in the IDT
424 */
425 if (GATESEG_GETOFFSET(sgd) == 0)
426 return (0);
427
428 ASSERT(sgd->sgd_type == SDT_SYSIGT);
429 ti->vector = vec;
430 TI_SET_DPL(ti, sgd->sgd_dpl);
431
432 /*
433 * Is this an interrupt gate?
434 */
435 if (sgd->sgd_type == SDT_SYSIGT) {
436 /* LINTED */
437 TI_SET_IF(ti, 1);
438 }
439 ti->cs = sgd->sgd_selector;
440 #if defined(__amd64)
441 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
442 #endif
443 ti->address = GATESEG_GETOFFSET(sgd);
444 return (1);
445 }
446
447 /*
448 * Convert a single hw format gate descriptor and write it into our virtual IDT.
449 */
450 void
xen_idt_write(gate_desc_t * sgd,uint_t vec)451 xen_idt_write(gate_desc_t *sgd, uint_t vec)
452 {
453 trap_info_t trapinfo[2];
454
455 bzero(trapinfo, sizeof (trapinfo));
456 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
457 return;
458 if (xen_set_trap_table(trapinfo) != 0)
459 panic("xen_idt_write: xen_set_trap_table() failed");
460 }
461
462 #endif /* __xpv */
463
464 #if defined(__amd64)
465
466 /*
467 * Build kernel GDT.
468 */
469
470 static void
init_gdt_common(user_desc_t * gdt)471 init_gdt_common(user_desc_t *gdt)
472 {
473 int i;
474
475 /*
476 * 64-bit kernel code segment.
477 */
478 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
479 SDP_PAGES, SDP_OP32);
480
481 /*
482 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
483 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
484 * instruction to return from system calls back to 32-bit applications.
485 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
486 * descriptors. We therefore must ensure that the kernel uses something,
487 * though it will be ignored by hardware, that is compatible with 32-bit
488 * apps. For the same reason we must set the default op size of this
489 * descriptor to 32-bit operands.
490 */
491 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
492 SEL_KPL, SDP_PAGES, SDP_OP32);
493 gdt[GDT_KDATA].usd_def32 = 1;
494
495 /*
496 * 64-bit user code segment.
497 */
498 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
499 SDP_PAGES, SDP_OP32);
500
501 /*
502 * 32-bit user code segment.
503 */
504 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
505 SEL_UPL, SDP_PAGES, SDP_OP32);
506
507 /*
508 * See gdt_ucode32() and gdt_ucode_native().
509 */
510 ucs_on = ucs_off = gdt[GDT_UCODE];
511 ucs_off.usd_p = 0; /* forces #np fault */
512
513 ucs32_on = ucs32_off = gdt[GDT_U32CODE];
514 ucs32_off.usd_p = 0; /* forces #np fault */
515
516 /*
517 * 32 and 64 bit data segments can actually share the same descriptor.
518 * In long mode only the present bit is checked but all other fields
519 * are loaded. But in compatibility mode all fields are interpreted
520 * as in legacy mode so they must be set correctly for a 32-bit data
521 * segment.
522 */
523 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
524 SDP_PAGES, SDP_OP32);
525
526 #if !defined(__xpv)
527
528 /*
529 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
530 * in the GDT is 0.
531 */
532
533 /*
534 * Kernel TSS
535 */
536 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
537 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
538
539 #endif /* !__xpv */
540
541 /*
542 * Initialize fs and gs descriptors for 32 bit processes.
543 * Only attributes and limits are initialized, the effective
544 * base address is programmed via fsbase/gsbase.
545 */
546 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
547 SEL_UPL, SDP_PAGES, SDP_OP32);
548 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
549 SEL_UPL, SDP_PAGES, SDP_OP32);
550
551 /*
552 * Initialize the descriptors set aside for brand usage.
553 * Only attributes and limits are initialized.
554 */
555 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
556 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
557 SEL_UPL, SDP_PAGES, SDP_OP32);
558
559 /*
560 * Initialize convenient zero base user descriptors for clearing
561 * lwp private %fs and %gs descriptors in GDT. See setregs() for
562 * an example.
563 */
564 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
565 SDP_BYTES, SDP_OP32);
566 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
567 SDP_PAGES, SDP_OP32);
568 }
569
570 #if defined(__xpv)
571
572 static user_desc_t *
init_gdt(void)573 init_gdt(void)
574 {
575 uint64_t gdtpa;
576 ulong_t ma[1]; /* XXPV should be a memory_t */
577 ulong_t addr;
578
579 #if !defined(__lint)
580 /*
581 * Our gdt is never larger than a single page.
582 */
583 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
584 #endif
585 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
586 PAGESIZE, PAGESIZE);
587 bzero(gdt0, PAGESIZE);
588
589 init_gdt_common(gdt0);
590
591 /*
592 * XXX Since we never invoke kmdb until after the kernel takes
593 * over the descriptor tables why not have it use the kernel's
594 * selectors?
595 */
596 if (boothowto & RB_DEBUG) {
597 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
598 SEL_KPL, SDP_PAGES, SDP_OP32);
599 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
600 SEL_KPL, SDP_PAGES, SDP_OP32);
601 }
602
603 /*
604 * Clear write permission for page containing the gdt and install it.
605 */
606 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
607 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
608 kbm_read_only((uintptr_t)gdt0, gdtpa);
609 xen_set_gdt(ma, NGDT);
610
611 /*
612 * Reload the segment registers to use the new GDT.
613 * On 64-bit, fixup KCS_SEL to be in ring 3.
614 * See KCS_SEL in segments.h.
615 */
616 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
617
618 /*
619 * setup %gs for kernel
620 */
621 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
622
623 /*
624 * XX64 We should never dereference off "other gsbase" or
625 * "fsbase". So, we should arrange to point FSBASE and
626 * KGSBASE somewhere truly awful e.g. point it at the last
627 * valid address below the hole so that any attempts to index
628 * off them cause an exception.
629 *
630 * For now, point it at 8G -- at least it should be unmapped
631 * until some 64-bit processes run.
632 */
633 addr = 0x200000000ul;
634 xen_set_segment_base(SEGBASE_FS, addr);
635 xen_set_segment_base(SEGBASE_GS_USER, addr);
636 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
637
638 return (gdt0);
639 }
640
641 #else /* __xpv */
642
643 static user_desc_t *
init_gdt(void)644 init_gdt(void)
645 {
646 desctbr_t r_bgdt, r_gdt;
647 user_desc_t *bgdt;
648
649 #if !defined(__lint)
650 /*
651 * Our gdt is never larger than a single page.
652 */
653 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
654 #endif
655 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
656 PAGESIZE, PAGESIZE);
657 bzero(gdt0, PAGESIZE);
658
659 init_gdt_common(gdt0);
660
661 /*
662 * Copy in from boot's gdt to our gdt.
663 * Entry 0 is the null descriptor by definition.
664 */
665 rd_gdtr(&r_bgdt);
666 bgdt = (user_desc_t *)r_bgdt.dtr_base;
667 if (bgdt == NULL)
668 panic("null boot gdt");
669
670 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
671 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
672 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
673 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
674 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
675
676 /*
677 * Install our new GDT
678 */
679 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
680 r_gdt.dtr_base = (uintptr_t)gdt0;
681 wr_gdtr(&r_gdt);
682
683 /*
684 * Reload the segment registers to use the new GDT
685 */
686 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
687
688 /*
689 * setup %gs for kernel
690 */
691 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
692
693 /*
694 * XX64 We should never dereference off "other gsbase" or
695 * "fsbase". So, we should arrange to point FSBASE and
696 * KGSBASE somewhere truly awful e.g. point it at the last
697 * valid address below the hole so that any attempts to index
698 * off them cause an exception.
699 *
700 * For now, point it at 8G -- at least it should be unmapped
701 * until some 64-bit processes run.
702 */
703 wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
704 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
705 return (gdt0);
706 }
707
708 #endif /* __xpv */
709
710 #elif defined(__i386)
711
712 static void
init_gdt_common(user_desc_t * gdt)713 init_gdt_common(user_desc_t *gdt)
714 {
715 int i;
716
717 /*
718 * Text and data for both kernel and user span entire 32 bit
719 * address space.
720 */
721
722 /*
723 * kernel code segment.
724 */
725 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
726 SDP_OP32);
727
728 /*
729 * kernel data segment.
730 */
731 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
732 SDP_OP32);
733
734 /*
735 * user code segment.
736 */
737 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
738 SDP_OP32);
739
740 /*
741 * user data segment.
742 */
743 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
744 SDP_OP32);
745
746 #if !defined(__xpv)
747
748 /*
749 * TSS for T_DBLFLT (double fault) handler
750 */
751 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
752 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
753
754 /*
755 * TSS for kernel
756 */
757 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
758 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
759
760 #endif /* !__xpv */
761
762 /*
763 * %gs selector for kernel
764 */
765 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
766 SEL_KPL, SDP_BYTES, SDP_OP32);
767
768 /*
769 * Initialize lwp private descriptors.
770 * Only attributes and limits are initialized, the effective
771 * base address is programmed via fsbase/gsbase.
772 */
773 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
774 SDP_PAGES, SDP_OP32);
775 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
776 SDP_PAGES, SDP_OP32);
777
778 /*
779 * Initialize the descriptors set aside for brand usage.
780 * Only attributes and limits are initialized.
781 */
782 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
783 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
784 SDP_PAGES, SDP_OP32);
785 /*
786 * Initialize convenient zero base user descriptor for clearing
787 * lwp private %fs and %gs descriptors in GDT. See setregs() for
788 * an example.
789 */
790 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
791 SDP_BYTES, SDP_OP32);
792 }
793
794 #if defined(__xpv)
795
796 static user_desc_t *
init_gdt(void)797 init_gdt(void)
798 {
799 uint64_t gdtpa;
800 ulong_t ma[1]; /* XXPV should be a memory_t */
801
802 #if !defined(__lint)
803 /*
804 * Our gdt is never larger than a single page.
805 */
806 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
807 #endif
808 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
809 PAGESIZE, PAGESIZE);
810 bzero(gdt0, PAGESIZE);
811
812 init_gdt_common(gdt0);
813 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
814
815 /*
816 * XXX Since we never invoke kmdb until after the kernel takes
817 * over the descriptor tables why not have it use the kernel's
818 * selectors?
819 */
820 if (boothowto & RB_DEBUG) {
821 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
822 SDP_PAGES, SDP_OP32);
823 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
824 SDP_PAGES, SDP_OP32);
825 }
826
827 /*
828 * Clear write permission for page containing the gdt and install it.
829 */
830 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
831 kbm_read_only((uintptr_t)gdt0, gdtpa);
832 xen_set_gdt(ma, NGDT);
833
834 /*
835 * Reload the segment registers to use the new GDT
836 */
837 load_segment_registers(
838 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
839
840 return (gdt0);
841 }
842
843 #else /* __xpv */
844
845 static user_desc_t *
init_gdt(void)846 init_gdt(void)
847 {
848 desctbr_t r_bgdt, r_gdt;
849 user_desc_t *bgdt;
850
851 #if !defined(__lint)
852 /*
853 * Our gdt is never larger than a single page.
854 */
855 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
856 #endif
857 /*
858 * XXX this allocation belongs in our caller, not here.
859 */
860 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
861 PAGESIZE, PAGESIZE);
862 bzero(gdt0, PAGESIZE);
863
864 init_gdt_common(gdt0);
865
866 /*
867 * Copy in from boot's gdt to our gdt entries.
868 * Entry 0 is null descriptor by definition.
869 */
870 rd_gdtr(&r_bgdt);
871 bgdt = (user_desc_t *)r_bgdt.dtr_base;
872 if (bgdt == NULL)
873 panic("null boot gdt");
874
875 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
876 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
877 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
878 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
879
880 /*
881 * Install our new GDT
882 */
883 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
884 r_gdt.dtr_base = (uintptr_t)gdt0;
885 wr_gdtr(&r_gdt);
886
887 /*
888 * Reload the segment registers to use the new GDT
889 */
890 load_segment_registers(
891 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
892
893 return (gdt0);
894 }
895
896 #endif /* __xpv */
897 #endif /* __i386 */
898
899 /*
900 * Build kernel IDT.
901 *
902 * Note that for amd64 we pretty much require every gate to be an interrupt
903 * gate which blocks interrupts atomically on entry; that's because of our
904 * dependency on using 'swapgs' every time we come into the kernel to find
905 * the cpu structure. If we get interrupted just before doing that, %cs could
906 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
907 * %gsbase is really still pointing at something in userland. Bad things will
908 * ensue. We also use interrupt gates for i386 as well even though this is not
909 * required for some traps.
910 *
911 * Perhaps they should have invented a trap gate that does an atomic swapgs?
912 */
913 static void
init_idt_common(gate_desc_t * idt)914 init_idt_common(gate_desc_t *idt)
915 {
916 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
917 0);
918 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
919 0);
920 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 0);
922 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
923 0);
924 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
925 0);
926 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
927 TRP_KPL, 0);
928 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
929 0);
930 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
931 0);
932
933 /*
934 * double fault handler.
935 *
936 * Note that on the hypervisor a guest does not receive #df faults.
937 * Instead a failsafe event is injected into the guest if its selectors
938 * and/or stack is in a broken state. See xen_failsafe_callback.
939 */
940 #if !defined(__xpv)
941 #if defined(__amd64)
942
943 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
944 T_DBLFLT);
945
946 #elif defined(__i386)
947
948 /*
949 * task gate required.
950 */
951 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
952 0);
953
954 #endif /* __i386 */
955 #endif /* !__xpv */
956
957 /*
958 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
959 */
960
961 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
962 0);
963 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
964 0);
965 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
966 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
967 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
968 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
969 0);
970 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
971 TRP_KPL, 0);
972 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
973 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
974
975 /*
976 * install fast trap handler at 210.
977 */
978 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
979 0);
980
981 /*
982 * System call handler.
983 */
984 #if defined(__amd64)
985 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
986 TRP_UPL, 0);
987
988 #elif defined(__i386)
989 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
990 TRP_UPL, 0);
991 #endif /* __i386 */
992
993 /*
994 * Install the DTrace interrupt handler for the pid provider.
995 */
996 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
997 SDT_SYSIGT, TRP_UPL, 0);
998
999 /*
1000 * Prepare interposing descriptor for the syscall handler
1001 * and cache copy of the default descriptor.
1002 */
1003 brand_tbl[0].ih_inum = T_SYSCALLINT;
1004 brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1005
1006 #if defined(__amd64)
1007 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1008 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1009 #elif defined(__i386)
1010 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1011 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1012 #endif /* __i386 */
1013
1014 brand_tbl[1].ih_inum = 0;
1015 }
1016
1017 #if defined(__xpv)
1018
1019 static void
init_idt(gate_desc_t * idt)1020 init_idt(gate_desc_t *idt)
1021 {
1022 init_idt_common(idt);
1023 }
1024
1025 #else /* __xpv */
1026
1027 static void
init_idt(gate_desc_t * idt)1028 init_idt(gate_desc_t *idt)
1029 {
1030 char ivctname[80];
1031 void (*ivctptr)(void);
1032 int i;
1033
1034 /*
1035 * Initialize entire table with 'reserved' trap and then overwrite
1036 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1037 * since it can only be generated on a 386 processor. 15 is also
1038 * unsupported and reserved.
1039 */
1040 for (i = 0; i < NIDT; i++)
1041 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1042 0);
1043
1044 /*
1045 * 20-31 reserved
1046 */
1047 for (i = 20; i < 32; i++)
1048 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1049 0);
1050
1051 /*
1052 * interrupts 32 - 255
1053 */
1054 for (i = 32; i < 256; i++) {
1055 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1056 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1057 if (ivctptr == NULL)
1058 panic("kobj_getsymvalue(%s) failed", ivctname);
1059
1060 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1061 }
1062
1063 /*
1064 * Now install the common ones. Note that it will overlay some
1065 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1066 */
1067 init_idt_common(idt);
1068 }
1069
1070 #endif /* __xpv */
1071
1072 /*
1073 * The kernel does not deal with LDTs unless a user explicitly creates
1074 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1075 * to reference the LDT will therefore cause a #gp. System calls made via the
1076 * obsolete lcall mechanism are emulated by the #gp fault handler.
1077 */
1078 static void
init_ldt(void)1079 init_ldt(void)
1080 {
1081 #if defined(__xpv)
1082 xen_set_ldt(NULL, 0);
1083 #else
1084 wr_ldtr(0);
1085 #endif
1086 }
1087
1088 #if !defined(__xpv)
1089 #if defined(__amd64)
1090
1091 static void
init_tss(void)1092 init_tss(void)
1093 {
1094 /*
1095 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1096 * All exceptions but #DF will run on the thread stack.
1097 * Set up the double fault stack here.
1098 */
1099 ktss0->tss_ist1 =
1100 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1101
1102 /*
1103 * Set I/O bit map offset equal to size of TSS segment limit
1104 * for no I/O permission map. This will force all user I/O
1105 * instructions to generate #gp fault.
1106 */
1107 ktss0->tss_bitmapbase = sizeof (*ktss0);
1108
1109 /*
1110 * Point %tr to descriptor for ktss0 in gdt.
1111 */
1112 wr_tsr(KTSS_SEL);
1113 }
1114
1115 #elif defined(__i386)
1116
1117 static void
init_tss(void)1118 init_tss(void)
1119 {
1120 /*
1121 * ktss0->tss_esp dynamically filled in by resume() on each
1122 * context switch.
1123 */
1124 ktss0->tss_ss0 = KDS_SEL;
1125 ktss0->tss_eip = (uint32_t)_start;
1126 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1127 ktss0->tss_cs = KCS_SEL;
1128 ktss0->tss_fs = KFS_SEL;
1129 ktss0->tss_gs = KGS_SEL;
1130 ktss0->tss_ldt = ULDT_SEL;
1131
1132 /*
1133 * Initialize double fault tss.
1134 */
1135 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1136 dftss0->tss_ss0 = KDS_SEL;
1137
1138 /*
1139 * tss_cr3 will get initialized in hat_kern_setup() once our page
1140 * tables have been setup.
1141 */
1142 dftss0->tss_eip = (uint32_t)syserrtrap;
1143 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1144 dftss0->tss_cs = KCS_SEL;
1145 dftss0->tss_ds = KDS_SEL;
1146 dftss0->tss_es = KDS_SEL;
1147 dftss0->tss_ss = KDS_SEL;
1148 dftss0->tss_fs = KFS_SEL;
1149 dftss0->tss_gs = KGS_SEL;
1150
1151 /*
1152 * Set I/O bit map offset equal to size of TSS segment limit
1153 * for no I/O permission map. This will force all user I/O
1154 * instructions to generate #gp fault.
1155 */
1156 ktss0->tss_bitmapbase = sizeof (*ktss0);
1157
1158 /*
1159 * Point %tr to descriptor for ktss0 in gdt.
1160 */
1161 wr_tsr(KTSS_SEL);
1162 }
1163
1164 #endif /* __i386 */
1165 #endif /* !__xpv */
1166
1167 #if defined(__xpv)
1168
1169 void
init_desctbls(void)1170 init_desctbls(void)
1171 {
1172 uint_t vec;
1173 user_desc_t *gdt;
1174
1175 /*
1176 * Setup and install our GDT.
1177 */
1178 gdt = init_gdt();
1179
1180 /*
1181 * Store static pa of gdt to speed up pa_to_ma() translations
1182 * on lwp context switches.
1183 */
1184 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1185 CPU->cpu_gdt = gdt;
1186 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1187
1188 /*
1189 * Setup and install our IDT.
1190 */
1191 #if !defined(__lint)
1192 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1193 #endif
1194 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1195 PAGESIZE, PAGESIZE);
1196 bzero(idt0, PAGESIZE);
1197 init_idt(idt0);
1198 for (vec = 0; vec < NIDT; vec++)
1199 xen_idt_write(&idt0[vec], vec);
1200
1201 CPU->cpu_idt = idt0;
1202
1203 /*
1204 * set default kernel stack
1205 */
1206 xen_stack_switch(KDS_SEL,
1207 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1208
1209 xen_init_callbacks();
1210
1211 init_ldt();
1212 }
1213
1214 #else /* __xpv */
1215
1216 void
init_desctbls(void)1217 init_desctbls(void)
1218 {
1219 user_desc_t *gdt;
1220 desctbr_t idtr;
1221
1222 /*
1223 * Allocate IDT and TSS structures on unique pages for better
1224 * performance in virtual machines.
1225 */
1226 #if !defined(__lint)
1227 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1228 #endif
1229 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1230 PAGESIZE, PAGESIZE);
1231 bzero(idt0, PAGESIZE);
1232 #if !defined(__lint)
1233 ASSERT(sizeof (*ktss0) <= PAGESIZE);
1234 #endif
1235 ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1236 PAGESIZE, PAGESIZE);
1237 bzero(ktss0, PAGESIZE);
1238
1239 #if defined(__i386)
1240 #if !defined(__lint)
1241 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1242 #endif
1243 dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1244 PAGESIZE, PAGESIZE);
1245 bzero(dftss0, PAGESIZE);
1246 #endif
1247
1248 /*
1249 * Setup and install our GDT.
1250 */
1251 gdt = init_gdt();
1252 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1253 CPU->cpu_gdt = gdt;
1254
1255 /*
1256 * Setup and install our IDT.
1257 */
1258 init_idt(idt0);
1259
1260 idtr.dtr_base = (uintptr_t)idt0;
1261 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1262 wr_idtr(&idtr);
1263 CPU->cpu_idt = idt0;
1264
1265 #if defined(__i386)
1266 /*
1267 * We maintain a description of idt0 in convenient IDTR format
1268 * for #pf's on some older pentium processors. See pentium_pftrap().
1269 */
1270 idt0_default_r = idtr;
1271 #endif /* __i386 */
1272
1273 init_tss();
1274 CPU->cpu_tss = ktss0;
1275 init_ldt();
1276 }
1277
1278 #endif /* __xpv */
1279
1280 /*
1281 * In the early kernel, we need to set up a simple GDT to run on.
1282 *
1283 * XXPV Can dboot use this too? See dboot_gdt.s
1284 */
1285 void
init_boot_gdt(user_desc_t * bgdt)1286 init_boot_gdt(user_desc_t *bgdt)
1287 {
1288 #if defined(__amd64)
1289 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1290 SDP_PAGES, SDP_OP32);
1291 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1292 SDP_PAGES, SDP_OP32);
1293 #elif defined(__i386)
1294 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1295 SDP_PAGES, SDP_OP32);
1296 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1297 SDP_PAGES, SDP_OP32);
1298 #endif /* __i386 */
1299 }
1300
1301 /*
1302 * Enable interpositioning on the system call path by rewriting the
1303 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1304 * the branded entry points.
1305 */
1306 void
brand_interpositioning_enable(void)1307 brand_interpositioning_enable(void)
1308 {
1309 gate_desc_t *idt = CPU->cpu_idt;
1310 int i;
1311
1312 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1313
1314 for (i = 0; brand_tbl[i].ih_inum; i++) {
1315 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1316 #if defined(__xpv)
1317 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1318 brand_tbl[i].ih_inum);
1319 #endif
1320 }
1321
1322 #if defined(__amd64)
1323 #if defined(__xpv)
1324
1325 /*
1326 * Currently the hypervisor only supports 64-bit syscalls via
1327 * syscall instruction. The 32-bit syscalls are handled by
1328 * interrupt gate above.
1329 */
1330 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1331 CALLBACKF_mask_events);
1332
1333 #else
1334
1335 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1336 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1337 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1338 }
1339
1340 #endif
1341 #endif /* __amd64 */
1342
1343 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1344 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1345 }
1346
1347 /*
1348 * Disable interpositioning on the system call path by rewriting the
1349 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1350 * the standard entry points, which bypass the interpositioning hooks.
1351 */
1352 void
brand_interpositioning_disable(void)1353 brand_interpositioning_disable(void)
1354 {
1355 gate_desc_t *idt = CPU->cpu_idt;
1356 int i;
1357
1358 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1359
1360 for (i = 0; brand_tbl[i].ih_inum; i++) {
1361 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1362 #if defined(__xpv)
1363 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1364 brand_tbl[i].ih_inum);
1365 #endif
1366 }
1367
1368 #if defined(__amd64)
1369 #if defined(__xpv)
1370
1371 /*
1372 * See comment above in brand_interpositioning_enable.
1373 */
1374 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1375 CALLBACKF_mask_events);
1376
1377 #else
1378
1379 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1380 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1381 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1382 }
1383
1384 #endif
1385 #endif /* __amd64 */
1386
1387 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1388 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1389 }
1390