xref: /onnv-gate/usr/src/uts/intel/ia32/os/desctbls.c (revision 5084:7d838c5c0eed)
10Sstevel@tonic-gate /*
23446Smrj  * CDDL HEADER START
33446Smrj  *
43446Smrj  * The contents of this file are subject to the terms of the
53446Smrj  * Common Development and Distribution License (the "License").
63446Smrj  * You may not use this file except in compliance with the License.
73446Smrj  *
83446Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93446Smrj  * or http://www.opensolaris.org/os/licensing.
103446Smrj  * See the License for the specific language governing permissions
113446Smrj  * and limitations under the License.
123446Smrj  *
133446Smrj  * When distributing Covered Code, include this CDDL HEADER in each
143446Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153446Smrj  * If applicable, add the following below this CDDL HEADER, with the
163446Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
173446Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
183446Smrj  *
193446Smrj  * CDDL HEADER END
203446Smrj  */
213446Smrj 
223446Smrj /*
233446Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * Copyright (c) 1992 Terrence R. Lambert.
310Sstevel@tonic-gate  * Copyright (c) 1990 The Regents of the University of California.
320Sstevel@tonic-gate  * All rights reserved.
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
350Sstevel@tonic-gate  * William Jolitz.
360Sstevel@tonic-gate  *
370Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
380Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
390Sstevel@tonic-gate  * are met:
400Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
410Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
420Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
430Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
440Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
450Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
460Sstevel@tonic-gate  *    must display the following acknowledgement:
470Sstevel@tonic-gate  *	This product includes software developed by the University of
480Sstevel@tonic-gate  *	California, Berkeley and its contributors.
490Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
500Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
510Sstevel@tonic-gate  *    without specific prior written permission.
520Sstevel@tonic-gate  *
530Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
540Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
550Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
560Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
570Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
580Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
590Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
600Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
610Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
620Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
630Sstevel@tonic-gate  * SUCH DAMAGE.
640Sstevel@tonic-gate  *
650Sstevel@tonic-gate  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
660Sstevel@tonic-gate  */
670Sstevel@tonic-gate 
680Sstevel@tonic-gate #include <sys/types.h>
693446Smrj #include <sys/sysmacros.h>
700Sstevel@tonic-gate #include <sys/tss.h>
710Sstevel@tonic-gate #include <sys/segments.h>
720Sstevel@tonic-gate #include <sys/trap.h>
730Sstevel@tonic-gate #include <sys/cpuvar.h>
743446Smrj #include <sys/bootconf.h>
750Sstevel@tonic-gate #include <sys/x86_archext.h>
763446Smrj #include <sys/controlregs.h>
770Sstevel@tonic-gate #include <sys/archsystm.h>
780Sstevel@tonic-gate #include <sys/machsystm.h>
790Sstevel@tonic-gate #include <sys/kobj.h>
800Sstevel@tonic-gate #include <sys/cmn_err.h>
810Sstevel@tonic-gate #include <sys/reboot.h>
820Sstevel@tonic-gate #include <sys/kdi.h>
833446Smrj #include <sys/mach_mmu.h>
841217Srab #include <sys/systm.h>
85*5084Sjohnlev 
86*5084Sjohnlev #ifdef __xpv
87*5084Sjohnlev #include <sys/hypervisor.h>
88*5084Sjohnlev #include <vm/as.h>
89*5084Sjohnlev #endif
90*5084Sjohnlev 
913446Smrj #include <sys/promif.h>
923446Smrj #include <sys/bootinfo.h>
933446Smrj #include <vm/kboot_mmu.h>
94*5084Sjohnlev #include <vm/hat_pte.h>
950Sstevel@tonic-gate 
960Sstevel@tonic-gate /*
970Sstevel@tonic-gate  * cpu0 and default tables and structures.
980Sstevel@tonic-gate  */
993446Smrj user_desc_t	*gdt0;
100*5084Sjohnlev #if !defined(__xpv)
1010Sstevel@tonic-gate desctbr_t	gdt0_default_r;
102*5084Sjohnlev #endif
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate #pragma	align	16(idt0)
1050Sstevel@tonic-gate gate_desc_t	idt0[NIDT]; 		/* interrupt descriptor table */
1063446Smrj #if defined(__i386)
1070Sstevel@tonic-gate desctbr_t	idt0_default_r;		/* describes idt0 in IDTR format */
1083446Smrj #endif
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate #pragma align	16(ktss0)
1110Sstevel@tonic-gate struct tss	ktss0;			/* kernel task state structure */
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate #if defined(__i386)
1140Sstevel@tonic-gate #pragma align	16(dftss0)
1150Sstevel@tonic-gate struct tss	dftss0;			/* #DF double-fault exception */
1160Sstevel@tonic-gate #endif	/* __i386 */
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate user_desc_t	zero_udesc;		/* base zero user desc native procs */
119*5084Sjohnlev user_desc_t	null_udesc;		/* null user descriptor */
120*5084Sjohnlev system_desc_t	null_sdesc;		/* null system descriptor */
1210Sstevel@tonic-gate 
1220Sstevel@tonic-gate #if defined(__amd64)
1230Sstevel@tonic-gate user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
1240Sstevel@tonic-gate #endif	/* __amd64 */
1250Sstevel@tonic-gate 
126*5084Sjohnlev #if defined(__amd64)
127*5084Sjohnlev user_desc_t	ucs_on;
128*5084Sjohnlev user_desc_t	ucs_off;
129*5084Sjohnlev user_desc_t	ucs32_on;
130*5084Sjohnlev user_desc_t	ucs32_off;
131*5084Sjohnlev #endif	/* __amd64 */
132*5084Sjohnlev 
1330Sstevel@tonic-gate #pragma	align	16(dblfault_stack0)
1340Sstevel@tonic-gate char		dblfault_stack0[DEFAULTSTKSZ];
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate extern void	fast_null(void);
1370Sstevel@tonic-gate extern hrtime_t	get_hrtime(void);
1380Sstevel@tonic-gate extern hrtime_t	gethrvtime(void);
1390Sstevel@tonic-gate extern hrtime_t	get_hrestime(void);
1400Sstevel@tonic-gate extern uint64_t	getlgrp(void);
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate void (*(fasttable[]))(void) = {
1430Sstevel@tonic-gate 	fast_null,			/* T_FNULL routine */
1440Sstevel@tonic-gate 	fast_null,			/* T_FGETFP routine (initially null) */
1450Sstevel@tonic-gate 	fast_null,			/* T_FSETFP routine (initially null) */
1460Sstevel@tonic-gate 	(void (*)())get_hrtime,		/* T_GETHRTIME */
1470Sstevel@tonic-gate 	(void (*)())gethrvtime,		/* T_GETHRVTIME */
1480Sstevel@tonic-gate 	(void (*)())get_hrestime,	/* T_GETHRESTIME */
1490Sstevel@tonic-gate 	(void (*)())getlgrp		/* T_GETLGRP */
1500Sstevel@tonic-gate };
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate /*
1532712Snn35248  * Structure containing pre-computed descriptors to allow us to temporarily
1542712Snn35248  * interpose on a standard handler.
1552712Snn35248  */
1562712Snn35248 struct interposing_handler {
1572712Snn35248 	int ih_inum;
1582712Snn35248 	gate_desc_t ih_interp_desc;
1592712Snn35248 	gate_desc_t ih_default_desc;
1602712Snn35248 };
1612712Snn35248 
1622712Snn35248 /*
1632712Snn35248  * The brand infrastructure interposes on two handlers, and we use one as a
1642712Snn35248  * NULL signpost.
1652712Snn35248  */
1662712Snn35248 static struct interposing_handler brand_tbl[3];
1672712Snn35248 
1682712Snn35248 /*
1690Sstevel@tonic-gate  * software prototypes for default local descriptor table
1700Sstevel@tonic-gate  */
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate /*
1730Sstevel@tonic-gate  * Routines for loading segment descriptors in format the hardware
1740Sstevel@tonic-gate  * can understand.
1750Sstevel@tonic-gate  */
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate #if defined(__amd64)
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate /*
1800Sstevel@tonic-gate  * In long mode we have the new L or long mode attribute bit
1810Sstevel@tonic-gate  * for code segments. Only the conforming bit in type is used along
1820Sstevel@tonic-gate  * with descriptor priority and present bits. Default operand size must
1830Sstevel@tonic-gate  * be zero when in long mode. In 32-bit compatibility mode all fields
1840Sstevel@tonic-gate  * are treated as in legacy mode. For data segments while in long mode
1850Sstevel@tonic-gate  * only the present bit is loaded.
1860Sstevel@tonic-gate  */
1870Sstevel@tonic-gate void
1880Sstevel@tonic-gate set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
1890Sstevel@tonic-gate     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
1900Sstevel@tonic-gate {
1910Sstevel@tonic-gate 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	/*
1940Sstevel@tonic-gate 	 * 64-bit long mode.
1950Sstevel@tonic-gate 	 */
1960Sstevel@tonic-gate 	if (lmode == SDP_LONG)
1970Sstevel@tonic-gate 		dp->usd_def32 = 0;		/* 32-bit operands only */
1980Sstevel@tonic-gate 	else
1990Sstevel@tonic-gate 		/*
2000Sstevel@tonic-gate 		 * 32-bit compatibility mode.
2010Sstevel@tonic-gate 		 */
2020Sstevel@tonic-gate 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate 	dp->usd_long = lmode;	/* 64-bit mode */
2050Sstevel@tonic-gate 	dp->usd_type = type;
2060Sstevel@tonic-gate 	dp->usd_dpl = dpl;
2070Sstevel@tonic-gate 	dp->usd_p = 1;
2080Sstevel@tonic-gate 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 	dp->usd_lobase = (uintptr_t)base;
2110Sstevel@tonic-gate 	dp->usd_midbase = (uintptr_t)base >> 16;
2120Sstevel@tonic-gate 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
2130Sstevel@tonic-gate 	dp->usd_lolimit = size;
2140Sstevel@tonic-gate 	dp->usd_hilimit = (uintptr_t)size >> 16;
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate #elif defined(__i386)
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate /*
2200Sstevel@tonic-gate  * Install user segment descriptor for code and data.
2210Sstevel@tonic-gate  */
2220Sstevel@tonic-gate void
2230Sstevel@tonic-gate set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
2240Sstevel@tonic-gate     uint_t dpl, uint_t gran, uint_t defopsz)
2250Sstevel@tonic-gate {
2260Sstevel@tonic-gate 	dp->usd_lolimit = size;
2270Sstevel@tonic-gate 	dp->usd_hilimit = (uintptr_t)size >> 16;
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate 	dp->usd_lobase = (uintptr_t)base;
2300Sstevel@tonic-gate 	dp->usd_midbase = (uintptr_t)base >> 16;
2310Sstevel@tonic-gate 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	dp->usd_type = type;
2340Sstevel@tonic-gate 	dp->usd_dpl = dpl;
2350Sstevel@tonic-gate 	dp->usd_p = 1;
2360Sstevel@tonic-gate 	dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32 bit operands */
2370Sstevel@tonic-gate 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
2380Sstevel@tonic-gate }
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate #endif	/* __i386 */
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate /*
2430Sstevel@tonic-gate  * Install system segment descriptor for LDT and TSS segments.
2440Sstevel@tonic-gate  */
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate #if defined(__amd64)
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate void
2490Sstevel@tonic-gate set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
2500Sstevel@tonic-gate     uint_t dpl)
2510Sstevel@tonic-gate {
2520Sstevel@tonic-gate 	dp->ssd_lolimit = size;
2530Sstevel@tonic-gate 	dp->ssd_hilimit = (uintptr_t)size >> 16;
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 	dp->ssd_lobase = (uintptr_t)base;
2560Sstevel@tonic-gate 	dp->ssd_midbase = (uintptr_t)base >> 16;
2570Sstevel@tonic-gate 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
2580Sstevel@tonic-gate 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate 	dp->ssd_type = type;
2610Sstevel@tonic-gate 	dp->ssd_zero1 = 0;	/* must be zero */
2620Sstevel@tonic-gate 	dp->ssd_zero2 = 0;
2630Sstevel@tonic-gate 	dp->ssd_dpl = dpl;
2640Sstevel@tonic-gate 	dp->ssd_p = 1;
2650Sstevel@tonic-gate 	dp->ssd_gran = 0;	/* force byte units */
2660Sstevel@tonic-gate }
2670Sstevel@tonic-gate 
268*5084Sjohnlev void *
269*5084Sjohnlev get_ssd_base(system_desc_t *dp)
270*5084Sjohnlev {
271*5084Sjohnlev 	uintptr_t	base;
272*5084Sjohnlev 
273*5084Sjohnlev 	base = (uintptr_t)dp->ssd_lobase |
274*5084Sjohnlev 	    (uintptr_t)dp->ssd_midbase << 16 |
275*5084Sjohnlev 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
276*5084Sjohnlev 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
277*5084Sjohnlev 	return ((void *)base);
278*5084Sjohnlev }
279*5084Sjohnlev 
2800Sstevel@tonic-gate #elif defined(__i386)
2810Sstevel@tonic-gate 
2820Sstevel@tonic-gate void
2830Sstevel@tonic-gate set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
2840Sstevel@tonic-gate     uint_t dpl)
2850Sstevel@tonic-gate {
2860Sstevel@tonic-gate 	dp->ssd_lolimit = size;
2870Sstevel@tonic-gate 	dp->ssd_hilimit = (uintptr_t)size >> 16;
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate 	dp->ssd_lobase = (uintptr_t)base;
2900Sstevel@tonic-gate 	dp->ssd_midbase = (uintptr_t)base >> 16;
2910Sstevel@tonic-gate 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate 	dp->ssd_type = type;
2940Sstevel@tonic-gate 	dp->ssd_zero = 0;	/* must be zero */
2950Sstevel@tonic-gate 	dp->ssd_dpl = dpl;
2960Sstevel@tonic-gate 	dp->ssd_p = 1;
2970Sstevel@tonic-gate 	dp->ssd_gran = 0;	/* force byte units */
2980Sstevel@tonic-gate }
2990Sstevel@tonic-gate 
300*5084Sjohnlev void *
301*5084Sjohnlev get_ssd_base(system_desc_t *dp)
302*5084Sjohnlev {
303*5084Sjohnlev 	uintptr_t	base;
304*5084Sjohnlev 
305*5084Sjohnlev 	base = (uintptr_t)dp->ssd_lobase |
306*5084Sjohnlev 	    (uintptr_t)dp->ssd_midbase << 16 |
307*5084Sjohnlev 	    (uintptr_t)dp->ssd_hibase << (16 + 8);
308*5084Sjohnlev 	return ((void *)base);
309*5084Sjohnlev }
310*5084Sjohnlev 
3110Sstevel@tonic-gate #endif	/* __i386 */
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate /*
3140Sstevel@tonic-gate  * Install gate segment descriptor for interrupt, trap, call and task gates.
3150Sstevel@tonic-gate  */
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate #if defined(__amd64)
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate void
3203446Smrj set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
3210Sstevel@tonic-gate     uint_t type, uint_t dpl)
3220Sstevel@tonic-gate {
3230Sstevel@tonic-gate 	dp->sgd_looffset = (uintptr_t)func;
3240Sstevel@tonic-gate 	dp->sgd_hioffset = (uintptr_t)func >> 16;
3250Sstevel@tonic-gate 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate 	dp->sgd_selector =  (uint16_t)sel;
3283446Smrj 
3293446Smrj 	/*
3303446Smrj 	 * For 64 bit native we use the IST stack mechanism
3313446Smrj 	 * for double faults. All other traps use the CPL = 0
3323446Smrj 	 * (tss_rsp0) stack.
3333446Smrj 	 */
334*5084Sjohnlev #if !defined(__xpv)
3353446Smrj 	if (type == T_DBLFLT)
3363446Smrj 		dp->sgd_ist = 1;
3373446Smrj 	else
338*5084Sjohnlev #endif
3393446Smrj 		dp->sgd_ist = 0;
3403446Smrj 
3410Sstevel@tonic-gate 	dp->sgd_type = type;
3420Sstevel@tonic-gate 	dp->sgd_dpl = dpl;
3430Sstevel@tonic-gate 	dp->sgd_p = 1;
3440Sstevel@tonic-gate }
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate #elif defined(__i386)
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate void
3490Sstevel@tonic-gate set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
3503446Smrj     uint_t type, uint_t dpl)
3510Sstevel@tonic-gate {
3520Sstevel@tonic-gate 	dp->sgd_looffset = (uintptr_t)func;
3530Sstevel@tonic-gate 	dp->sgd_hioffset = (uintptr_t)func >> 16;
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 	dp->sgd_selector =  (uint16_t)sel;
3563446Smrj 	dp->sgd_stkcpy = 0;	/* always zero bytes */
3570Sstevel@tonic-gate 	dp->sgd_type = type;
3580Sstevel@tonic-gate 	dp->sgd_dpl = dpl;
3590Sstevel@tonic-gate 	dp->sgd_p = 1;
3600Sstevel@tonic-gate }
3610Sstevel@tonic-gate 
3623446Smrj #endif	/* __i386 */
3633446Smrj 
364*5084Sjohnlev /*
365*5084Sjohnlev  * Updates a single user descriptor in the the GDT of the current cpu.
366*5084Sjohnlev  * Caller is responsible for preventing cpu migration.
367*5084Sjohnlev  */
368*5084Sjohnlev 
369*5084Sjohnlev void
370*5084Sjohnlev gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371*5084Sjohnlev {
372*5084Sjohnlev #if defined(__xpv)
373*5084Sjohnlev 
374*5084Sjohnlev 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375*5084Sjohnlev 
376*5084Sjohnlev 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377*5084Sjohnlev 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378*5084Sjohnlev 
379*5084Sjohnlev #else	/* __xpv */
380*5084Sjohnlev 
381*5084Sjohnlev 	CPU->cpu_gdt[sidx] = *udp;
382*5084Sjohnlev 
383*5084Sjohnlev #endif	/* __xpv */
384*5084Sjohnlev }
385*5084Sjohnlev 
386*5084Sjohnlev /*
387*5084Sjohnlev  * Writes single descriptor pointed to by udp into a processes
388*5084Sjohnlev  * LDT entry pointed to by ldp.
389*5084Sjohnlev  */
390*5084Sjohnlev int
391*5084Sjohnlev ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
392*5084Sjohnlev {
393*5084Sjohnlev #if defined(__xpv)
394*5084Sjohnlev 
395*5084Sjohnlev 	uint64_t dpa;
396*5084Sjohnlev 
397*5084Sjohnlev 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
398*5084Sjohnlev 	    ((uintptr_t)ldp & PAGEOFFSET);
399*5084Sjohnlev 
400*5084Sjohnlev 	/*
401*5084Sjohnlev 	 * The hypervisor is a little more restrictive about what it
402*5084Sjohnlev 	 * supports in the LDT.
403*5084Sjohnlev 	 */
404*5084Sjohnlev 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
405*5084Sjohnlev 		return (EINVAL);
406*5084Sjohnlev 
407*5084Sjohnlev #else	/* __xpv */
408*5084Sjohnlev 
409*5084Sjohnlev 	*ldp = *udp;
410*5084Sjohnlev 
411*5084Sjohnlev #endif	/* __xpv */
412*5084Sjohnlev 	return (0);
413*5084Sjohnlev }
414*5084Sjohnlev 
415*5084Sjohnlev #if defined(__xpv)
416*5084Sjohnlev 
417*5084Sjohnlev /*
418*5084Sjohnlev  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
419*5084Sjohnlev  * Returns true if a valid entry was written.
420*5084Sjohnlev  */
421*5084Sjohnlev int
422*5084Sjohnlev xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
423*5084Sjohnlev {
424*5084Sjohnlev 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
425*5084Sjohnlev 
426*5084Sjohnlev 	/*
427*5084Sjohnlev 	 * skip holes in the IDT
428*5084Sjohnlev 	 */
429*5084Sjohnlev 	if (GATESEG_GETOFFSET(sgd) == 0)
430*5084Sjohnlev 		return (0);
431*5084Sjohnlev 
432*5084Sjohnlev 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
433*5084Sjohnlev 	ti->vector = vec;
434*5084Sjohnlev 	TI_SET_DPL(ti, sgd->sgd_dpl);
435*5084Sjohnlev 
436*5084Sjohnlev 	/*
437*5084Sjohnlev 	 * Is this an interrupt gate?
438*5084Sjohnlev 	 */
439*5084Sjohnlev 	if (sgd->sgd_type == SDT_SYSIGT) {
440*5084Sjohnlev 		/* LINTED */
441*5084Sjohnlev 		TI_SET_IF(ti, 1);
442*5084Sjohnlev 	}
443*5084Sjohnlev 	ti->cs = sgd->sgd_selector;
444*5084Sjohnlev #if defined(__amd64)
445*5084Sjohnlev 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
446*5084Sjohnlev #endif
447*5084Sjohnlev 	ti->address = GATESEG_GETOFFSET(sgd);
448*5084Sjohnlev 	return (1);
449*5084Sjohnlev }
450*5084Sjohnlev 
451*5084Sjohnlev /*
452*5084Sjohnlev  * Convert a single hw format gate descriptor and write it into our virtual IDT.
453*5084Sjohnlev  */
454*5084Sjohnlev void
455*5084Sjohnlev xen_idt_write(gate_desc_t *sgd, uint_t vec)
456*5084Sjohnlev {
457*5084Sjohnlev 	trap_info_t trapinfo[2];
458*5084Sjohnlev 
459*5084Sjohnlev 	bzero(trapinfo, sizeof (trapinfo));
460*5084Sjohnlev 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
461*5084Sjohnlev 		return;
462*5084Sjohnlev 	if (xen_set_trap_table(trapinfo) != 0)
463*5084Sjohnlev 		panic("xen_idt_write: xen_set_trap_table() failed");
464*5084Sjohnlev }
465*5084Sjohnlev 
466*5084Sjohnlev #endif	/* __xpv */
467*5084Sjohnlev 
4683446Smrj #if defined(__amd64)
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate /*
4710Sstevel@tonic-gate  * Build kernel GDT.
4720Sstevel@tonic-gate  */
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate static void
4753446Smrj init_gdt_common(user_desc_t *gdt)
4760Sstevel@tonic-gate {
4773446Smrj 	int i;
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 	/*
4800Sstevel@tonic-gate 	 * 64-bit kernel code segment.
4810Sstevel@tonic-gate 	 */
4823446Smrj 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
4830Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate 	/*
4860Sstevel@tonic-gate 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
4870Sstevel@tonic-gate 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
4880Sstevel@tonic-gate 	 * instruction to return from system calls back to 32-bit applications.
4890Sstevel@tonic-gate 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
4900Sstevel@tonic-gate 	 * descriptors. We therefore must ensure that the kernel uses something,
4910Sstevel@tonic-gate 	 * though it will be ignored by hardware, that is compatible with 32-bit
4920Sstevel@tonic-gate 	 * apps. For the same reason we must set the default op size of this
4930Sstevel@tonic-gate 	 * descriptor to 32-bit operands.
4940Sstevel@tonic-gate 	 */
4953446Smrj 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
4960Sstevel@tonic-gate 	    SEL_KPL, SDP_PAGES, SDP_OP32);
4973446Smrj 	gdt[GDT_KDATA].usd_def32 = 1;
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 	/*
5000Sstevel@tonic-gate 	 * 64-bit user code segment.
5010Sstevel@tonic-gate 	 */
5023446Smrj 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
5030Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	/*
5060Sstevel@tonic-gate 	 * 32-bit user code segment.
5070Sstevel@tonic-gate 	 */
5083446Smrj 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
5090Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate 	/*
512*5084Sjohnlev 	 * See gdt_ucode32() and gdt_ucode_native().
513*5084Sjohnlev 	 */
514*5084Sjohnlev 	ucs_on = ucs_off = gdt[GDT_UCODE];
515*5084Sjohnlev 	ucs_off.usd_p = 0;	/* forces #np fault */
516*5084Sjohnlev 
517*5084Sjohnlev 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
518*5084Sjohnlev 	ucs32_off.usd_p = 0;	/* forces #np fault */
519*5084Sjohnlev 
520*5084Sjohnlev 	/*
5210Sstevel@tonic-gate 	 * 32 and 64 bit data segments can actually share the same descriptor.
5220Sstevel@tonic-gate 	 * In long mode only the present bit is checked but all other fields
5230Sstevel@tonic-gate 	 * are loaded. But in compatibility mode all fields are interpreted
5240Sstevel@tonic-gate 	 * as in legacy mode so they must be set correctly for a 32-bit data
5250Sstevel@tonic-gate 	 * segment.
5260Sstevel@tonic-gate 	 */
5273446Smrj 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
5280Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
5290Sstevel@tonic-gate 
530*5084Sjohnlev #if !defined(__xpv)
531*5084Sjohnlev 
5320Sstevel@tonic-gate 	/*
5331217Srab 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
5341217Srab 	 * in the GDT is 0.
5350Sstevel@tonic-gate 	 */
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 	/*
5380Sstevel@tonic-gate 	 * Kernel TSS
5390Sstevel@tonic-gate 	 */
5403446Smrj 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0,
5410Sstevel@tonic-gate 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
5420Sstevel@tonic-gate 
543*5084Sjohnlev #endif	/* !__xpv */
544*5084Sjohnlev 
5450Sstevel@tonic-gate 	/*
5460Sstevel@tonic-gate 	 * Initialize fs and gs descriptors for 32 bit processes.
5470Sstevel@tonic-gate 	 * Only attributes and limits are initialized, the effective
5480Sstevel@tonic-gate 	 * base address is programmed via fsbase/gsbase.
5490Sstevel@tonic-gate 	 */
5503446Smrj 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
5510Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5523446Smrj 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
5530Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate 	/*
5562712Snn35248 	 * Initialize the descriptors set aside for brand usage.
5572712Snn35248 	 * Only attributes and limits are initialized.
5582712Snn35248 	 */
5592712Snn35248 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
5603446Smrj 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
5612712Snn35248 		    SEL_UPL, SDP_PAGES, SDP_OP32);
5622712Snn35248 
5632712Snn35248 	/*
5640Sstevel@tonic-gate 	 * Initialize convenient zero base user descriptors for clearing
5650Sstevel@tonic-gate 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
5660Sstevel@tonic-gate 	 * an example.
5670Sstevel@tonic-gate 	 */
5680Sstevel@tonic-gate 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
5690Sstevel@tonic-gate 	    SDP_BYTES, SDP_OP32);
5700Sstevel@tonic-gate 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
5710Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
5720Sstevel@tonic-gate }
5730Sstevel@tonic-gate 
574*5084Sjohnlev #if defined(__xpv)
575*5084Sjohnlev 
576*5084Sjohnlev static user_desc_t *
577*5084Sjohnlev init_gdt(void)
578*5084Sjohnlev {
579*5084Sjohnlev 	uint64_t gdtpa;
580*5084Sjohnlev 	ulong_t ma[1];		/* XXPV should be a memory_t */
581*5084Sjohnlev 	ulong_t addr;
582*5084Sjohnlev 
583*5084Sjohnlev #if !defined(__lint)
584*5084Sjohnlev 	/*
585*5084Sjohnlev 	 * Our gdt is never larger than a single page.
586*5084Sjohnlev 	 */
587*5084Sjohnlev 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
588*5084Sjohnlev #endif
589*5084Sjohnlev 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
590*5084Sjohnlev 	    PAGESIZE, PAGESIZE);
591*5084Sjohnlev 	if (gdt0 == NULL)
592*5084Sjohnlev 		panic("init_gdt: BOP_ALLOC failed");
593*5084Sjohnlev 	bzero(gdt0, PAGESIZE);
594*5084Sjohnlev 
595*5084Sjohnlev 	init_gdt_common(gdt0);
596*5084Sjohnlev 
597*5084Sjohnlev 	/*
598*5084Sjohnlev 	 * XXX Since we never invoke kmdb until after the kernel takes
599*5084Sjohnlev 	 * over the descriptor tables why not have it use the kernel's
600*5084Sjohnlev 	 * selectors?
601*5084Sjohnlev 	 */
602*5084Sjohnlev 	if (boothowto & RB_DEBUG) {
603*5084Sjohnlev 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
604*5084Sjohnlev 		    SEL_KPL, SDP_PAGES, SDP_OP32);
605*5084Sjohnlev 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
606*5084Sjohnlev 		    SEL_KPL, SDP_PAGES, SDP_OP32);
607*5084Sjohnlev 	}
608*5084Sjohnlev 
609*5084Sjohnlev 	/*
610*5084Sjohnlev 	 * Clear write permission for page containing the gdt and install it.
611*5084Sjohnlev 	 */
612*5084Sjohnlev 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
613*5084Sjohnlev 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
614*5084Sjohnlev 	kbm_read_only((uintptr_t)gdt0, gdtpa);
615*5084Sjohnlev 	xen_set_gdt(ma, NGDT);
616*5084Sjohnlev 
617*5084Sjohnlev 	/*
618*5084Sjohnlev 	 * Reload the segment registers to use the new GDT.
619*5084Sjohnlev 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
620*5084Sjohnlev 	 * See KCS_SEL in segments.h.
621*5084Sjohnlev 	 */
622*5084Sjohnlev 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
623*5084Sjohnlev 
624*5084Sjohnlev 	/*
625*5084Sjohnlev 	 *  setup %gs for kernel
626*5084Sjohnlev 	 */
627*5084Sjohnlev 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
628*5084Sjohnlev 
629*5084Sjohnlev 	/*
630*5084Sjohnlev 	 * XX64 We should never dereference off "other gsbase" or
631*5084Sjohnlev 	 * "fsbase".  So, we should arrange to point FSBASE and
632*5084Sjohnlev 	 * KGSBASE somewhere truly awful e.g. point it at the last
633*5084Sjohnlev 	 * valid address below the hole so that any attempts to index
634*5084Sjohnlev 	 * off them cause an exception.
635*5084Sjohnlev 	 *
636*5084Sjohnlev 	 * For now, point it at 8G -- at least it should be unmapped
637*5084Sjohnlev 	 * until some 64-bit processes run.
638*5084Sjohnlev 	 */
639*5084Sjohnlev 	addr = 0x200000000ul;
640*5084Sjohnlev 	xen_set_segment_base(SEGBASE_FS, addr);
641*5084Sjohnlev 	xen_set_segment_base(SEGBASE_GS_USER, addr);
642*5084Sjohnlev 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
643*5084Sjohnlev 
644*5084Sjohnlev 	return (gdt0);
645*5084Sjohnlev }
646*5084Sjohnlev 
647*5084Sjohnlev #else	/* __xpv */
648*5084Sjohnlev 
6493446Smrj static user_desc_t *
6500Sstevel@tonic-gate init_gdt(void)
6510Sstevel@tonic-gate {
6520Sstevel@tonic-gate 	desctbr_t	r_bgdt, r_gdt;
6530Sstevel@tonic-gate 	user_desc_t	*bgdt;
6543446Smrj 
6553446Smrj #if !defined(__lint)
6563446Smrj 	/*
6573446Smrj 	 * Our gdt is never larger than a single page.
6583446Smrj 	 */
6593446Smrj 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
6603446Smrj #endif
6613446Smrj 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
6623446Smrj 	    PAGESIZE, PAGESIZE);
6633446Smrj 	if (gdt0 == NULL)
6643446Smrj 		panic("init_gdt: BOP_ALLOC failed");
6653446Smrj 	bzero(gdt0, PAGESIZE);
6663446Smrj 
6673446Smrj 	init_gdt_common(gdt0);
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 	/*
6703446Smrj 	 * Copy in from boot's gdt to our gdt.
6713446Smrj 	 * Entry 0 is the null descriptor by definition.
6720Sstevel@tonic-gate 	 */
6730Sstevel@tonic-gate 	rd_gdtr(&r_bgdt);
6740Sstevel@tonic-gate 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
6750Sstevel@tonic-gate 	if (bgdt == NULL)
6760Sstevel@tonic-gate 		panic("null boot gdt");
6770Sstevel@tonic-gate 
6783446Smrj 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
6793446Smrj 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
6803446Smrj 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
6813446Smrj 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
6823446Smrj 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
6833446Smrj 
6843446Smrj 	/*
6853446Smrj 	 * Install our new GDT
6863446Smrj 	 */
6873446Smrj 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
6883446Smrj 	r_gdt.dtr_base = (uintptr_t)gdt0;
6893446Smrj 	wr_gdtr(&r_gdt);
6903446Smrj 
6913446Smrj 	/*
6923446Smrj 	 * Reload the segment registers to use the new GDT
6933446Smrj 	 */
6943446Smrj 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
6953446Smrj 
6963446Smrj 	/*
6973446Smrj 	 *  setup %gs for kernel
6983446Smrj 	 */
6993446Smrj 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
7003446Smrj 
7013446Smrj 	/*
7023446Smrj 	 * XX64 We should never dereference off "other gsbase" or
7033446Smrj 	 * "fsbase".  So, we should arrange to point FSBASE and
7043446Smrj 	 * KGSBASE somewhere truly awful e.g. point it at the last
7053446Smrj 	 * valid address below the hole so that any attempts to index
7063446Smrj 	 * off them cause an exception.
7073446Smrj 	 *
7083446Smrj 	 * For now, point it at 8G -- at least it should be unmapped
7093446Smrj 	 * until some 64-bit processes run.
7103446Smrj 	 */
7113446Smrj 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
7123446Smrj 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
7133446Smrj 	return (gdt0);
7143446Smrj }
7153446Smrj 
716*5084Sjohnlev #endif	/* __xpv */
717*5084Sjohnlev 
7183446Smrj #elif defined(__i386)
7193446Smrj 
7203446Smrj static void
7213446Smrj init_gdt_common(user_desc_t *gdt)
7223446Smrj {
7233446Smrj 	int i;
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate 	/*
7260Sstevel@tonic-gate 	 * Text and data for both kernel and user span entire 32 bit
7270Sstevel@tonic-gate 	 * address space.
7280Sstevel@tonic-gate 	 */
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	/*
7310Sstevel@tonic-gate 	 * kernel code segment.
7320Sstevel@tonic-gate 	 */
7333446Smrj 	set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
7340Sstevel@tonic-gate 	    SDP_OP32);
7350Sstevel@tonic-gate 
7360Sstevel@tonic-gate 	/*
7370Sstevel@tonic-gate 	 * kernel data segment.
7380Sstevel@tonic-gate 	 */
7393446Smrj 	set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
7400Sstevel@tonic-gate 	    SDP_OP32);
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 	/*
7430Sstevel@tonic-gate 	 * user code segment.
7440Sstevel@tonic-gate 	 */
7453446Smrj 	set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
7460Sstevel@tonic-gate 	    SDP_OP32);
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate 	/*
7490Sstevel@tonic-gate 	 * user data segment.
7500Sstevel@tonic-gate 	 */
7513446Smrj 	set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
7520Sstevel@tonic-gate 	    SDP_OP32);
7530Sstevel@tonic-gate 
754*5084Sjohnlev #if !defined(__xpv)
755*5084Sjohnlev 
7560Sstevel@tonic-gate 	/*
7570Sstevel@tonic-gate 	 * TSS for T_DBLFLT (double fault) handler
7580Sstevel@tonic-gate 	 */
7593446Smrj 	set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], &dftss0,
7600Sstevel@tonic-gate 	    sizeof (dftss0) - 1, SDT_SYSTSS, SEL_KPL);
7610Sstevel@tonic-gate 
7620Sstevel@tonic-gate 	/*
7630Sstevel@tonic-gate 	 * TSS for kernel
7640Sstevel@tonic-gate 	 */
7653446Smrj 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0,
7660Sstevel@tonic-gate 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
7670Sstevel@tonic-gate 
768*5084Sjohnlev #endif	/* !__xpv */
769*5084Sjohnlev 
7700Sstevel@tonic-gate 	/*
7710Sstevel@tonic-gate 	 * %gs selector for kernel
7720Sstevel@tonic-gate 	 */
7733446Smrj 	set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
7740Sstevel@tonic-gate 	    SEL_KPL, SDP_BYTES, SDP_OP32);
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate 	/*
7770Sstevel@tonic-gate 	 * Initialize lwp private descriptors.
7780Sstevel@tonic-gate 	 * Only attributes and limits are initialized, the effective
7790Sstevel@tonic-gate 	 * base address is programmed via fsbase/gsbase.
7800Sstevel@tonic-gate 	 */
7813446Smrj 	set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
7820Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
7833446Smrj 	set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
7840Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
7850Sstevel@tonic-gate 
7860Sstevel@tonic-gate 	/*
7872712Snn35248 	 * Initialize the descriptors set aside for brand usage.
7882712Snn35248 	 * Only attributes and limits are initialized.
7892712Snn35248 	 */
7902712Snn35248 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
7912712Snn35248 		set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
7922712Snn35248 		    SDP_PAGES, SDP_OP32);
7933446Smrj 	/*
7943446Smrj 	 * Initialize convenient zero base user descriptor for clearing
7953446Smrj 	 * lwp  private %fs and %gs descriptors in GDT. See setregs() for
7963446Smrj 	 * an example.
7973446Smrj 	 */
7983446Smrj 	set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
7993446Smrj 	    SDP_BYTES, SDP_OP32);
8003446Smrj }
8013446Smrj 
802*5084Sjohnlev #if defined(__xpv)
803*5084Sjohnlev 
804*5084Sjohnlev static user_desc_t *
805*5084Sjohnlev init_gdt(void)
806*5084Sjohnlev {
807*5084Sjohnlev 	uint64_t gdtpa;
808*5084Sjohnlev 	ulong_t ma[1];		/* XXPV should be a memory_t */
809*5084Sjohnlev 
810*5084Sjohnlev #if !defined(__lint)
811*5084Sjohnlev 	/*
812*5084Sjohnlev 	 * Our gdt is never larger than a single page.
813*5084Sjohnlev 	 */
814*5084Sjohnlev 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
815*5084Sjohnlev #endif
816*5084Sjohnlev 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
817*5084Sjohnlev 	    PAGESIZE, PAGESIZE);
818*5084Sjohnlev 	if (gdt0 == NULL)
819*5084Sjohnlev 		panic("init_gdt: BOP_ALLOC failed");
820*5084Sjohnlev 	bzero(gdt0, PAGESIZE);
821*5084Sjohnlev 
822*5084Sjohnlev 	init_gdt_common(gdt0);
823*5084Sjohnlev 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
824*5084Sjohnlev 
825*5084Sjohnlev 	/*
826*5084Sjohnlev 	 * XXX Since we never invoke kmdb until after the kernel takes
827*5084Sjohnlev 	 * over the descriptor tables why not have it use the kernel's
828*5084Sjohnlev 	 * selectors?
829*5084Sjohnlev 	 */
830*5084Sjohnlev 	if (boothowto & RB_DEBUG) {
831*5084Sjohnlev 		set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
832*5084Sjohnlev 		    SDP_PAGES, SDP_OP32);
833*5084Sjohnlev 		set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
834*5084Sjohnlev 		    SDP_PAGES, SDP_OP32);
835*5084Sjohnlev 	}
836*5084Sjohnlev 
837*5084Sjohnlev 	/*
838*5084Sjohnlev 	 * Clear write permission for page containing the gdt and install it.
839*5084Sjohnlev 	 */
840*5084Sjohnlev 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
841*5084Sjohnlev 	kbm_read_only((uintptr_t)gdt0, gdtpa);
842*5084Sjohnlev 	xen_set_gdt(ma, NGDT);
843*5084Sjohnlev 
844*5084Sjohnlev 	/*
845*5084Sjohnlev 	 * Reload the segment registers to use the new GDT
846*5084Sjohnlev 	 */
847*5084Sjohnlev 	load_segment_registers(
848*5084Sjohnlev 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
849*5084Sjohnlev 
850*5084Sjohnlev 	return (gdt0);
851*5084Sjohnlev }
852*5084Sjohnlev 
853*5084Sjohnlev #else	/* __xpv */
854*5084Sjohnlev 
8553446Smrj static user_desc_t *
8563446Smrj init_gdt(void)
8573446Smrj {
8583446Smrj 	desctbr_t	r_bgdt, r_gdt;
8593446Smrj 	user_desc_t	*bgdt;
8603446Smrj 
8613446Smrj #if !defined(__lint)
8623446Smrj 	/*
8633446Smrj 	 * Our gdt is never larger than a single page.
8643446Smrj 	 */
8653446Smrj 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
8663446Smrj #endif
8673446Smrj 	/*
8683446Smrj 	 * XXX this allocation belongs in our caller, not here.
8693446Smrj 	 */
8703446Smrj 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
8713446Smrj 	    PAGESIZE, PAGESIZE);
8723446Smrj 	if (gdt0 == NULL)
8733446Smrj 		panic("init_gdt: BOP_ALLOC failed");
8743446Smrj 	bzero(gdt0, PAGESIZE);
8753446Smrj 
8763446Smrj 	init_gdt_common(gdt0);
8773446Smrj 
8783446Smrj 	/*
8793446Smrj 	 * Copy in from boot's gdt to our gdt entries.
8803446Smrj 	 * Entry 0 is null descriptor by definition.
8813446Smrj 	 */
8823446Smrj 	rd_gdtr(&r_bgdt);
8833446Smrj 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
8843446Smrj 	if (bgdt == NULL)
8853446Smrj 		panic("null boot gdt");
8863446Smrj 
8873446Smrj 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
8883446Smrj 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
8893446Smrj 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
8903446Smrj 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
8912712Snn35248 
8922712Snn35248 	/*
8930Sstevel@tonic-gate 	 * Install our new GDT
8940Sstevel@tonic-gate 	 */
8953446Smrj 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
8960Sstevel@tonic-gate 	r_gdt.dtr_base = (uintptr_t)gdt0;
8970Sstevel@tonic-gate 	wr_gdtr(&r_gdt);
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate 	/*
9003446Smrj 	 * Reload the segment registers to use the new GDT
9010Sstevel@tonic-gate 	 */
9023446Smrj 	load_segment_registers(
9033446Smrj 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
9043446Smrj 
9053446Smrj 	return (gdt0);
9060Sstevel@tonic-gate }
9070Sstevel@tonic-gate 
908*5084Sjohnlev #endif	/* __xpv */
9090Sstevel@tonic-gate #endif	/* __i386 */
9100Sstevel@tonic-gate 
9110Sstevel@tonic-gate /*
9120Sstevel@tonic-gate  * Build kernel IDT.
9130Sstevel@tonic-gate  *
9143446Smrj  * Note that for amd64 we pretty much require every gate to be an interrupt
9153446Smrj  * gate which blocks interrupts atomically on entry; that's because of our
9163446Smrj  * dependency on using 'swapgs' every time we come into the kernel to find
9173446Smrj  * the cpu structure. If we get interrupted just before doing that, %cs could
9183446Smrj  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
9193446Smrj  * %gsbase is really still pointing at something in userland. Bad things will
9203446Smrj  * ensue. We also use interrupt gates for i386 as well even though this is not
9213446Smrj  * required for some traps.
9220Sstevel@tonic-gate  *
9230Sstevel@tonic-gate  * Perhaps they should have invented a trap gate that does an atomic swapgs?
9240Sstevel@tonic-gate  */
9250Sstevel@tonic-gate static void
9263446Smrj init_idt_common(gate_desc_t *idt)
9273446Smrj {
928*5084Sjohnlev 	set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
929*5084Sjohnlev 	set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
930*5084Sjohnlev 	set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL);
931*5084Sjohnlev 	set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL);
932*5084Sjohnlev 	set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL);
9333446Smrj 	set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
934*5084Sjohnlev 	    TRP_KPL);
935*5084Sjohnlev 	set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
936*5084Sjohnlev 	set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL);
9373446Smrj 
9383446Smrj 	/*
9393446Smrj 	 * double fault handler.
940*5084Sjohnlev 	 *
941*5084Sjohnlev 	 * Note that on the hypervisor a guest does not receive #df faults.
942*5084Sjohnlev 	 * Instead a failsafe event is injected into the guest if its selectors
943*5084Sjohnlev 	 * and/or stack is in a broken state. See xen_failsafe_callback.
9443446Smrj 	 */
945*5084Sjohnlev #if !defined(__xpv)
9463446Smrj #if defined(__amd64)
947*5084Sjohnlev 
948*5084Sjohnlev 	set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
949*5084Sjohnlev 
9503446Smrj #elif defined(__i386)
951*5084Sjohnlev 
9523446Smrj 	/*
9533446Smrj 	 * task gate required.
9543446Smrj 	 */
955*5084Sjohnlev 	set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL);
9563446Smrj 
9573446Smrj #endif	/* __i386 */
958*5084Sjohnlev #endif	/* !__xpv */
9593446Smrj 
9603446Smrj 	/*
9613446Smrj 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
9623446Smrj 	 */
9633446Smrj 
964*5084Sjohnlev 	set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
965*5084Sjohnlev 	set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
966*5084Sjohnlev 	set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
967*5084Sjohnlev 	set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
968*5084Sjohnlev 	set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
969*5084Sjohnlev 	set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL);
9703446Smrj 	set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
971*5084Sjohnlev 	    TRP_KPL);
972*5084Sjohnlev 	set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
973*5084Sjohnlev 	set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
9743446Smrj 
9753446Smrj 	/*
9763446Smrj 	 * install "int80" handler at, well, 0x80.
9773446Smrj 	 */
978*5084Sjohnlev 	set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL);
9793446Smrj 
9803446Smrj 	/*
9813446Smrj 	 * install fast trap handler at 210.
9823446Smrj 	 */
983*5084Sjohnlev 	set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL);
9843446Smrj 
9853446Smrj 	/*
9863446Smrj 	 * System call handler.
9873446Smrj 	 */
9883446Smrj #if defined(__amd64)
9893446Smrj 	set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
990*5084Sjohnlev 	    TRP_UPL);
9913446Smrj 
9923446Smrj #elif defined(__i386)
9933446Smrj 	set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
994*5084Sjohnlev 	    TRP_UPL);
9953446Smrj #endif	/* __i386 */
9963446Smrj 
9973446Smrj 	/*
9983446Smrj 	 * Install the DTrace interrupt handler for the pid provider.
9993446Smrj 	 */
10003446Smrj 	set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001*5084Sjohnlev 	    SDT_SYSIGT, TRP_UPL);
10023446Smrj 
10033446Smrj 	/*
10043446Smrj 	 * Prepare interposing descriptors for the branded "int80"
10053446Smrj 	 * and syscall handlers and cache copies of the default
10063446Smrj 	 * descriptors.
10073446Smrj 	 */
10083446Smrj 	brand_tbl[0].ih_inum = T_INT80;
10093446Smrj 	brand_tbl[0].ih_default_desc = idt0[T_INT80];
10103446Smrj 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
1011*5084Sjohnlev 	    SDT_SYSIGT, TRP_UPL);
10123446Smrj 
10133446Smrj 	brand_tbl[1].ih_inum = T_SYSCALLINT;
10143446Smrj 	brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
10153446Smrj 
10163446Smrj #if defined(__amd64)
10173446Smrj 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1018*5084Sjohnlev 	    KCS_SEL, SDT_SYSIGT, TRP_UPL);
10193446Smrj #elif defined(__i386)
10203446Smrj 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1021*5084Sjohnlev 	    KCS_SEL, SDT_SYSIGT, TRP_UPL);
10223446Smrj #endif	/* __i386 */
10233446Smrj 
10243446Smrj 	brand_tbl[2].ih_inum = 0;
10253446Smrj }
10263446Smrj 
1027*5084Sjohnlev #if defined(__xpv)
1028*5084Sjohnlev 
1029*5084Sjohnlev static void
1030*5084Sjohnlev init_idt(gate_desc_t *idt)
1031*5084Sjohnlev {
1032*5084Sjohnlev 	/*
1033*5084Sjohnlev 	 * currently nothing extra for the hypervisor
1034*5084Sjohnlev 	 */
1035*5084Sjohnlev 	init_idt_common(idt);
1036*5084Sjohnlev }
1037*5084Sjohnlev 
1038*5084Sjohnlev #else	/* __xpv */
1039*5084Sjohnlev 
10403446Smrj static void
10413446Smrj init_idt(gate_desc_t *idt)
10420Sstevel@tonic-gate {
10430Sstevel@tonic-gate 	char	ivctname[80];
10440Sstevel@tonic-gate 	void	(*ivctptr)(void);
10450Sstevel@tonic-gate 	int	i;
10460Sstevel@tonic-gate 
10470Sstevel@tonic-gate 	/*
10480Sstevel@tonic-gate 	 * Initialize entire table with 'reserved' trap and then overwrite
10490Sstevel@tonic-gate 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
10500Sstevel@tonic-gate 	 * since it can only be generated on a 386 processor. 15 is also
10510Sstevel@tonic-gate 	 * unsupported and reserved.
10520Sstevel@tonic-gate 	 */
10530Sstevel@tonic-gate 	for (i = 0; i < NIDT; i++)
1054*5084Sjohnlev 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
10550Sstevel@tonic-gate 
10560Sstevel@tonic-gate 	/*
10570Sstevel@tonic-gate 	 * 20-31 reserved
10580Sstevel@tonic-gate 	 */
10590Sstevel@tonic-gate 	for (i = 20; i < 32; i++)
1060*5084Sjohnlev 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
10610Sstevel@tonic-gate 
10620Sstevel@tonic-gate 	/*
10630Sstevel@tonic-gate 	 * interrupts 32 - 255
10640Sstevel@tonic-gate 	 */
10650Sstevel@tonic-gate 	for (i = 32; i < 256; i++) {
10660Sstevel@tonic-gate 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
10670Sstevel@tonic-gate 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
10680Sstevel@tonic-gate 		if (ivctptr == NULL)
10690Sstevel@tonic-gate 			panic("kobj_getsymvalue(%s) failed", ivctname);
10700Sstevel@tonic-gate 
1071*5084Sjohnlev 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL);
10720Sstevel@tonic-gate 	}
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate 	/*
10753446Smrj 	 * Now install the common ones. Note that it will overlay some
10763446Smrj 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
10770Sstevel@tonic-gate 	 */
10783446Smrj 	init_idt_common(idt);
10790Sstevel@tonic-gate }
10800Sstevel@tonic-gate 
1081*5084Sjohnlev #endif	/* __xpv */
1082*5084Sjohnlev 
10830Sstevel@tonic-gate /*
10841217Srab  * The kernel does not deal with LDTs unless a user explicitly creates
10851217Srab  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
10861217Srab  * to reference the LDT will therefore cause a #gp. System calls made via the
10871217Srab  * obsolete lcall mechanism are emulated by the #gp fault handler.
10880Sstevel@tonic-gate  */
10890Sstevel@tonic-gate static void
10900Sstevel@tonic-gate init_ldt(void)
10910Sstevel@tonic-gate {
1092*5084Sjohnlev #if defined(__xpv)
1093*5084Sjohnlev 	xen_set_ldt(NULL, 0);
1094*5084Sjohnlev #else
10951217Srab 	wr_ldtr(0);
1096*5084Sjohnlev #endif
10970Sstevel@tonic-gate }
10980Sstevel@tonic-gate 
1099*5084Sjohnlev #if !defined(__xpv)
11000Sstevel@tonic-gate #if defined(__amd64)
11010Sstevel@tonic-gate 
11020Sstevel@tonic-gate static void
11030Sstevel@tonic-gate init_tss(void)
11040Sstevel@tonic-gate {
11050Sstevel@tonic-gate 	/*
11060Sstevel@tonic-gate 	 * tss_rsp0 is dynamically filled in by resume() on each context switch.
11070Sstevel@tonic-gate 	 * All exceptions but #DF will run on the thread stack.
11080Sstevel@tonic-gate 	 * Set up the double fault stack here.
11090Sstevel@tonic-gate 	 */
11100Sstevel@tonic-gate 	ktss0.tss_ist1 =
11110Sstevel@tonic-gate 	    (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
11120Sstevel@tonic-gate 
11130Sstevel@tonic-gate 	/*
11140Sstevel@tonic-gate 	 * Set I/O bit map offset equal to size of TSS segment limit
11150Sstevel@tonic-gate 	 * for no I/O permission map. This will force all user I/O
11160Sstevel@tonic-gate 	 * instructions to generate #gp fault.
11170Sstevel@tonic-gate 	 */
11180Sstevel@tonic-gate 	ktss0.tss_bitmapbase = sizeof (ktss0);
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate 	/*
11210Sstevel@tonic-gate 	 * Point %tr to descriptor for ktss0 in gdt.
11220Sstevel@tonic-gate 	 */
11230Sstevel@tonic-gate 	wr_tsr(KTSS_SEL);
11240Sstevel@tonic-gate }
11250Sstevel@tonic-gate 
11260Sstevel@tonic-gate #elif defined(__i386)
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate static void
11290Sstevel@tonic-gate init_tss(void)
11300Sstevel@tonic-gate {
11310Sstevel@tonic-gate 	/*
11320Sstevel@tonic-gate 	 * ktss0.tss_esp dynamically filled in by resume() on each
11330Sstevel@tonic-gate 	 * context switch.
11340Sstevel@tonic-gate 	 */
11350Sstevel@tonic-gate 	ktss0.tss_ss0	= KDS_SEL;
11360Sstevel@tonic-gate 	ktss0.tss_eip	= (uint32_t)_start;
11370Sstevel@tonic-gate 	ktss0.tss_ds	= ktss0.tss_es = ktss0.tss_ss = KDS_SEL;
11380Sstevel@tonic-gate 	ktss0.tss_cs	= KCS_SEL;
11390Sstevel@tonic-gate 	ktss0.tss_fs	= KFS_SEL;
11400Sstevel@tonic-gate 	ktss0.tss_gs	= KGS_SEL;
11410Sstevel@tonic-gate 	ktss0.tss_ldt	= ULDT_SEL;
11420Sstevel@tonic-gate 
11430Sstevel@tonic-gate 	/*
11440Sstevel@tonic-gate 	 * Initialize double fault tss.
11450Sstevel@tonic-gate 	 */
11460Sstevel@tonic-gate 	dftss0.tss_esp0	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
11470Sstevel@tonic-gate 	dftss0.tss_ss0	= KDS_SEL;
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate 	/*
11500Sstevel@tonic-gate 	 * tss_cr3 will get initialized in hat_kern_setup() once our page
11510Sstevel@tonic-gate 	 * tables have been setup.
11520Sstevel@tonic-gate 	 */
11530Sstevel@tonic-gate 	dftss0.tss_eip	= (uint32_t)syserrtrap;
11540Sstevel@tonic-gate 	dftss0.tss_esp	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
11550Sstevel@tonic-gate 	dftss0.tss_cs	= KCS_SEL;
11560Sstevel@tonic-gate 	dftss0.tss_ds	= KDS_SEL;
11570Sstevel@tonic-gate 	dftss0.tss_es	= KDS_SEL;
11580Sstevel@tonic-gate 	dftss0.tss_ss	= KDS_SEL;
11590Sstevel@tonic-gate 	dftss0.tss_fs	= KFS_SEL;
11600Sstevel@tonic-gate 	dftss0.tss_gs	= KGS_SEL;
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate 	/*
11630Sstevel@tonic-gate 	 * Set I/O bit map offset equal to size of TSS segment limit
11640Sstevel@tonic-gate 	 * for no I/O permission map. This will force all user I/O
11650Sstevel@tonic-gate 	 * instructions to generate #gp fault.
11660Sstevel@tonic-gate 	 */
11670Sstevel@tonic-gate 	ktss0.tss_bitmapbase = sizeof (ktss0);
11680Sstevel@tonic-gate 
11690Sstevel@tonic-gate 	/*
11700Sstevel@tonic-gate 	 * Point %tr to descriptor for ktss0 in gdt.
11710Sstevel@tonic-gate 	 */
11720Sstevel@tonic-gate 	wr_tsr(KTSS_SEL);
11730Sstevel@tonic-gate }
11740Sstevel@tonic-gate 
11750Sstevel@tonic-gate #endif	/* __i386 */
1176*5084Sjohnlev #endif	/* !__xpv */
1177*5084Sjohnlev 
1178*5084Sjohnlev #if defined(__xpv)
1179*5084Sjohnlev 
1180*5084Sjohnlev void
1181*5084Sjohnlev init_desctbls(void)
1182*5084Sjohnlev {
1183*5084Sjohnlev 	uint_t vec;
1184*5084Sjohnlev 	user_desc_t *gdt;
1185*5084Sjohnlev 
1186*5084Sjohnlev 	/*
1187*5084Sjohnlev 	 * Setup and install our GDT.
1188*5084Sjohnlev 	 */
1189*5084Sjohnlev 	gdt = init_gdt();
1190*5084Sjohnlev 
1191*5084Sjohnlev 	/*
1192*5084Sjohnlev 	 * Store static pa of gdt to speed up pa_to_ma() translations
1193*5084Sjohnlev 	 * on lwp context switches.
1194*5084Sjohnlev 	 */
1195*5084Sjohnlev 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1196*5084Sjohnlev 	CPU->cpu_m.mcpu_gdt = gdt;
1197*5084Sjohnlev 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1198*5084Sjohnlev 
1199*5084Sjohnlev 	/*
1200*5084Sjohnlev 	 * Setup and install our IDT.
1201*5084Sjohnlev 	 */
1202*5084Sjohnlev 	init_idt(&idt0[0]);
1203*5084Sjohnlev 	for (vec = 0; vec < NIDT; vec++)
1204*5084Sjohnlev 		xen_idt_write(&idt0[vec], vec);
1205*5084Sjohnlev 
1206*5084Sjohnlev 	CPU->cpu_m.mcpu_idt = idt0;
1207*5084Sjohnlev 
1208*5084Sjohnlev 	/*
1209*5084Sjohnlev 	 * set default kernel stack
1210*5084Sjohnlev 	 */
1211*5084Sjohnlev 	xen_stack_switch(KDS_SEL,
1212*5084Sjohnlev 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1213*5084Sjohnlev 
1214*5084Sjohnlev 	xen_init_callbacks();
1215*5084Sjohnlev 
1216*5084Sjohnlev 	init_ldt();
1217*5084Sjohnlev }
1218*5084Sjohnlev 
1219*5084Sjohnlev #else	/* __xpv */
12200Sstevel@tonic-gate 
12210Sstevel@tonic-gate void
12223446Smrj init_desctbls(void)
12230Sstevel@tonic-gate {
12243446Smrj 	user_desc_t *gdt;
12253446Smrj 	desctbr_t idtr;
12263446Smrj 
12273446Smrj 	/*
12283446Smrj 	 * Setup and install our GDT.
12293446Smrj 	 */
12303446Smrj 	gdt = init_gdt();
12313446Smrj 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
12323446Smrj 	CPU->cpu_m.mcpu_gdt = gdt;
12333446Smrj 
12343446Smrj 	/*
12353446Smrj 	 * Setup and install our IDT.
12363446Smrj 	 */
12373446Smrj 	init_idt(&idt0[0]);
12383446Smrj 
12393446Smrj 	idtr.dtr_base = (uintptr_t)idt0;
12403446Smrj 	idtr.dtr_limit = sizeof (idt0) - 1;
12413446Smrj 	wr_idtr(&idtr);
12423446Smrj 	CPU->cpu_m.mcpu_idt = idt0;
12433446Smrj 
12443446Smrj #if defined(__i386)
12453446Smrj 	/*
12463446Smrj 	 * We maintain a description of idt0 in convenient IDTR format
12473446Smrj 	 * for #pf's on some older pentium processors. See pentium_pftrap().
12483446Smrj 	 */
12493446Smrj 	idt0_default_r = idtr;
12503446Smrj #endif	/* __i386 */
12513446Smrj 
12520Sstevel@tonic-gate 	init_tss();
12533446Smrj 	CPU->cpu_tss = &ktss0;
12540Sstevel@tonic-gate 	init_ldt();
12550Sstevel@tonic-gate }
12562712Snn35248 
1257*5084Sjohnlev #endif	/* __xpv */
1258*5084Sjohnlev 
12592712Snn35248 /*
12603446Smrj  * In the early kernel, we need to set up a simple GDT to run on.
1261*5084Sjohnlev  *
1262*5084Sjohnlev  * XXPV	Can dboot use this too?  See dboot_gdt.s
12633446Smrj  */
12643446Smrj void
12653446Smrj init_boot_gdt(user_desc_t *bgdt)
12663446Smrj {
12673446Smrj #if defined(__amd64)
12683446Smrj 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
12693446Smrj 	    SDP_PAGES, SDP_OP32);
12703446Smrj 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
12713446Smrj 	    SDP_PAGES, SDP_OP32);
12723446Smrj #elif defined(__i386)
12733446Smrj 	set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
12743446Smrj 	    SDP_PAGES, SDP_OP32);
12753446Smrj 	set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
12763446Smrj 	    SDP_PAGES, SDP_OP32);
12773446Smrj #endif	/* __i386 */
12783446Smrj }
12793446Smrj 
12803446Smrj /*
12812712Snn35248  * Enable interpositioning on the system call path by rewriting the
12822712Snn35248  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
12832712Snn35248  * the branded entry points.
12842712Snn35248  */
12852712Snn35248 void
12862712Snn35248 brand_interpositioning_enable(void)
12872712Snn35248 {
1288*5084Sjohnlev 	gate_desc_t	*idt = CPU->cpu_idt;
1289*5084Sjohnlev 	int 		i;
1290*5084Sjohnlev 
1291*5084Sjohnlev 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
12922712Snn35248 
1293*5084Sjohnlev 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1294*5084Sjohnlev 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1295*5084Sjohnlev #if defined(__xpv)
1296*5084Sjohnlev 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1297*5084Sjohnlev 		    brand_tbl[i].ih_inum);
1298*5084Sjohnlev #endif
1299*5084Sjohnlev 	}
13002712Snn35248 
13012712Snn35248 #if defined(__amd64)
1302*5084Sjohnlev #if defined(__xpv)
1303*5084Sjohnlev 
1304*5084Sjohnlev 	/*
1305*5084Sjohnlev 	 * Currently the hypervisor only supports 64-bit syscalls via
1306*5084Sjohnlev 	 * syscall instruction. The 32-bit syscalls are handled by
1307*5084Sjohnlev 	 * interrupt gate above.
1308*5084Sjohnlev 	 */
1309*5084Sjohnlev 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1310*5084Sjohnlev 	    CALLBACKF_mask_events);
1311*5084Sjohnlev 
1312*5084Sjohnlev #else
1313*5084Sjohnlev 
1314*5084Sjohnlev 	if (x86_feature & X86_ASYSC) {
1315*5084Sjohnlev 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1316*5084Sjohnlev 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1317*5084Sjohnlev 	}
1318*5084Sjohnlev 
13192712Snn35248 #endif
1320*5084Sjohnlev #endif	/* __amd64 */
13212712Snn35248 
13222712Snn35248 	if (x86_feature & X86_SEP)
13232712Snn35248 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
13242712Snn35248 }
13252712Snn35248 
13262712Snn35248 /*
13272712Snn35248  * Disable interpositioning on the system call path by rewriting the
13282712Snn35248  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
13292712Snn35248  * the standard entry points, which bypass the interpositioning hooks.
13302712Snn35248  */
13312712Snn35248 void
13322712Snn35248 brand_interpositioning_disable(void)
13332712Snn35248 {
1334*5084Sjohnlev 	gate_desc_t	*idt = CPU->cpu_idt;
13352712Snn35248 	int i;
13362712Snn35248 
1337*5084Sjohnlev 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1338*5084Sjohnlev 
1339*5084Sjohnlev 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1340*5084Sjohnlev 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1341*5084Sjohnlev #if defined(__xpv)
1342*5084Sjohnlev 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1343*5084Sjohnlev 		    brand_tbl[i].ih_inum);
1344*5084Sjohnlev #endif
1345*5084Sjohnlev 	}
13462712Snn35248 
13472712Snn35248 #if defined(__amd64)
1348*5084Sjohnlev #if defined(__xpv)
1349*5084Sjohnlev 
1350*5084Sjohnlev 	/*
1351*5084Sjohnlev 	 * See comment above in brand_interpositioning_enable.
1352*5084Sjohnlev 	 */
1353*5084Sjohnlev 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1354*5084Sjohnlev 	    CALLBACKF_mask_events);
1355*5084Sjohnlev 
1356*5084Sjohnlev #else
1357*5084Sjohnlev 
1358*5084Sjohnlev 	if (x86_feature & X86_ASYSC) {
1359*5084Sjohnlev 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1360*5084Sjohnlev 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1361*5084Sjohnlev 	}
1362*5084Sjohnlev 
13632712Snn35248 #endif
1364*5084Sjohnlev #endif	/* __amd64 */
13652712Snn35248 
13662712Snn35248 	if (x86_feature & X86_SEP)
13672712Snn35248 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
13682712Snn35248 }
1369