xref: /onnv-gate/usr/src/uts/intel/ia32/os/sysi86.c (revision 5084)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52712Snn35248  * Common Development and Distribution License (the "License").
62712Snn35248  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223446Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
270Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
280Sstevel@tonic-gate /*	  All Rights Reserved  	*/
290Sstevel@tonic-gate 
300Sstevel@tonic-gate /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
310Sstevel@tonic-gate /*	  All Rights Reserved	*/
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
340Sstevel@tonic-gate 
350Sstevel@tonic-gate #include <sys/param.h>
360Sstevel@tonic-gate #include <sys/types.h>
370Sstevel@tonic-gate #include <sys/sysmacros.h>
380Sstevel@tonic-gate #include <sys/systm.h>
390Sstevel@tonic-gate #include <sys/signal.h>
400Sstevel@tonic-gate #include <sys/errno.h>
410Sstevel@tonic-gate #include <sys/fault.h>
420Sstevel@tonic-gate #include <sys/syscall.h>
430Sstevel@tonic-gate #include <sys/cpuvar.h>
440Sstevel@tonic-gate #include <sys/sysi86.h>
450Sstevel@tonic-gate #include <sys/psw.h>
460Sstevel@tonic-gate #include <sys/cred.h>
470Sstevel@tonic-gate #include <sys/policy.h>
480Sstevel@tonic-gate #include <sys/thread.h>
490Sstevel@tonic-gate #include <sys/debug.h>
500Sstevel@tonic-gate #include <sys/ontrap.h>
510Sstevel@tonic-gate #include <sys/privregs.h>
520Sstevel@tonic-gate #include <sys/x86_archext.h>
530Sstevel@tonic-gate #include <sys/vmem.h>
540Sstevel@tonic-gate #include <sys/kmem.h>
550Sstevel@tonic-gate #include <sys/mman.h>
560Sstevel@tonic-gate #include <sys/archsystm.h>
570Sstevel@tonic-gate #include <vm/hat.h>
580Sstevel@tonic-gate #include <vm/as.h>
590Sstevel@tonic-gate #include <vm/seg.h>
600Sstevel@tonic-gate #include <vm/seg_kmem.h>
610Sstevel@tonic-gate #include <vm/faultcode.h>
620Sstevel@tonic-gate #include <sys/fp.h>
630Sstevel@tonic-gate #include <sys/cmn_err.h>
643446Smrj #include <sys/segments.h>
653446Smrj #include <sys/clock.h>
66*5084Sjohnlev #if defined(__xpv)
67*5084Sjohnlev #include <sys/hypervisor.h>
68*5084Sjohnlev #include <sys/note.h>
69*5084Sjohnlev #endif
700Sstevel@tonic-gate 
71*5084Sjohnlev static void ldt_alloc(proc_t *, uint_t);
72*5084Sjohnlev static void ldt_free(proc_t *);
73*5084Sjohnlev static void ldt_dup(proc_t *, proc_t *);
74*5084Sjohnlev static void ldt_grow(proc_t *, uint_t);
750Sstevel@tonic-gate 
760Sstevel@tonic-gate /*
770Sstevel@tonic-gate  * sysi86 System Call
780Sstevel@tonic-gate  */
790Sstevel@tonic-gate 
800Sstevel@tonic-gate /* ARGSUSED */
810Sstevel@tonic-gate int
820Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
830Sstevel@tonic-gate {
842712Snn35248 	struct ssd ssd;
850Sstevel@tonic-gate 	int error = 0;
860Sstevel@tonic-gate 	int c;
870Sstevel@tonic-gate 	proc_t *pp = curproc;
880Sstevel@tonic-gate 
890Sstevel@tonic-gate 	switch (cmd) {
900Sstevel@tonic-gate 
910Sstevel@tonic-gate 	/*
920Sstevel@tonic-gate 	 * The SI86V86 subsystem call of the SYSI86 system call
930Sstevel@tonic-gate 	 * supports only one subcode -- V86SC_IOPL.
940Sstevel@tonic-gate 	 */
950Sstevel@tonic-gate 	case SI86V86:
960Sstevel@tonic-gate 		if (arg1 == V86SC_IOPL) {
970Sstevel@tonic-gate 			struct regs *rp = lwptoregs(ttolwp(curthread));
980Sstevel@tonic-gate 			greg_t oldpl = rp->r_ps & PS_IOPL;
990Sstevel@tonic-gate 			greg_t newpl = arg2 & PS_IOPL;
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate 			/*
1020Sstevel@tonic-gate 			 * Must be privileged to run this system call
1030Sstevel@tonic-gate 			 * if giving more io privilege.
1040Sstevel@tonic-gate 			 */
1050Sstevel@tonic-gate 			if (newpl > oldpl && (error =
1060Sstevel@tonic-gate 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
1070Sstevel@tonic-gate 				return (set_errno(error));
108*5084Sjohnlev #if defined(__xpv)
109*5084Sjohnlev 			kpreempt_disable();
110*5084Sjohnlev 			installctx(curthread, NULL, xen_disable_user_iopl,
111*5084Sjohnlev 			    xen_enable_user_iopl, NULL, NULL,
112*5084Sjohnlev 			    xen_disable_user_iopl, NULL);
113*5084Sjohnlev 			xen_enable_user_iopl();
114*5084Sjohnlev 			kpreempt_enable();
115*5084Sjohnlev #else
1160Sstevel@tonic-gate 			rp->r_ps ^= oldpl ^ newpl;
117*5084Sjohnlev #endif
1180Sstevel@tonic-gate 		} else
1190Sstevel@tonic-gate 			error = EINVAL;
1200Sstevel@tonic-gate 		break;
1210Sstevel@tonic-gate 
1220Sstevel@tonic-gate 	/*
1230Sstevel@tonic-gate 	 * Set a segment descriptor
1240Sstevel@tonic-gate 	 */
1250Sstevel@tonic-gate 	case SI86DSCR:
1260Sstevel@tonic-gate 		/*
1270Sstevel@tonic-gate 		 * There are considerable problems here manipulating
1280Sstevel@tonic-gate 		 * resources shared by many running lwps.  Get everyone
1290Sstevel@tonic-gate 		 * into a safe state before changing the LDT.
1300Sstevel@tonic-gate 		 */
1310Sstevel@tonic-gate 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
1320Sstevel@tonic-gate 			error = EINTR;
1330Sstevel@tonic-gate 			break;
1340Sstevel@tonic-gate 		}
1352712Snn35248 
1362712Snn35248 		if (get_udatamodel() == DATAMODEL_LP64) {
1372712Snn35248 			error = EINVAL;
1382712Snn35248 			break;
1392712Snn35248 		}
1402712Snn35248 
1412712Snn35248 		if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) {
1422712Snn35248 			error = EFAULT;
1432712Snn35248 			break;
1442712Snn35248 		}
1452712Snn35248 
1462712Snn35248 		error = setdscr(&ssd);
1472712Snn35248 
1480Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
1490Sstevel@tonic-gate 		if (curthread != pp->p_agenttp)
1500Sstevel@tonic-gate 			continuelwps(pp);
1510Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
1520Sstevel@tonic-gate 		break;
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	case SI86FPHW:
1550Sstevel@tonic-gate 		c = fp_kind & 0xff;
1560Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1)
1570Sstevel@tonic-gate 			error = EFAULT;
1580Sstevel@tonic-gate 		break;
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate 	case SI86FPSTART:
1610Sstevel@tonic-gate 		/*
1620Sstevel@tonic-gate 		 * arg1 is the address of _fp_hw
1630Sstevel@tonic-gate 		 * arg2 is the desired x87 FCW value
1640Sstevel@tonic-gate 		 * arg3 is the desired SSE MXCSR value
1650Sstevel@tonic-gate 		 * a return value of one means SSE hardware, else none.
1660Sstevel@tonic-gate 		 */
1670Sstevel@tonic-gate 		c = fp_kind & 0xff;
1680Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1) {
1690Sstevel@tonic-gate 			error = EFAULT;
1700Sstevel@tonic-gate 			break;
1710Sstevel@tonic-gate 		}
1720Sstevel@tonic-gate 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
1730Sstevel@tonic-gate 		return (fp_kind == __FP_SSE ? 1 : 0);
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	/* real time clock management commands */
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	case WTODC:
1780Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1790Sstevel@tonic-gate 			timestruc_t ts;
1800Sstevel@tonic-gate 			mutex_enter(&tod_lock);
1810Sstevel@tonic-gate 			gethrestime(&ts);
1820Sstevel@tonic-gate 			tod_set(ts);
1830Sstevel@tonic-gate 			mutex_exit(&tod_lock);
1840Sstevel@tonic-gate 		}
1850Sstevel@tonic-gate 		break;
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate /* Give some timezone playing room */
1880Sstevel@tonic-gate #define	ONEWEEK	(7 * 24 * 60 * 60)
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 	case SGMTL:
1910Sstevel@tonic-gate 		/*
1920Sstevel@tonic-gate 		 * Called from 32 bit land, negative values
1930Sstevel@tonic-gate 		 * are not sign extended, so we do that here
1940Sstevel@tonic-gate 		 * by casting it to an int and back.  We also
1950Sstevel@tonic-gate 		 * clamp the value to within reason and detect
1960Sstevel@tonic-gate 		 * when a 64 bit call overflows an int.
1970Sstevel@tonic-gate 		 */
1980Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1990Sstevel@tonic-gate 			int newlag = (int)arg1;
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
2020Sstevel@tonic-gate 			if (get_udatamodel() == DATAMODEL_NATIVE &&
2030Sstevel@tonic-gate 			    (long)newlag != (long)arg1) {
2040Sstevel@tonic-gate 				error = EOVERFLOW;
2050Sstevel@tonic-gate 			} else
2060Sstevel@tonic-gate #endif
2070Sstevel@tonic-gate 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
2080Sstevel@tonic-gate 				sgmtl(newlag);
2090Sstevel@tonic-gate 			else
2100Sstevel@tonic-gate 				error = EOVERFLOW;
2110Sstevel@tonic-gate 		}
2120Sstevel@tonic-gate 		break;
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	case GGMTL:
2150Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
2160Sstevel@tonic-gate 			if (sulword((void *)arg1, ggmtl()) == -1)
2170Sstevel@tonic-gate 				error = EFAULT;
2180Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
2190Sstevel@tonic-gate 		} else {
2200Sstevel@tonic-gate 			time_t gmtl;
2210Sstevel@tonic-gate 
2220Sstevel@tonic-gate 			if ((gmtl = ggmtl()) > INT32_MAX) {
2230Sstevel@tonic-gate 				/*
2240Sstevel@tonic-gate 				 * Since gmt_lag can at most be
2250Sstevel@tonic-gate 				 * +/- 12 hours, something is
2260Sstevel@tonic-gate 				 * *seriously* messed up here.
2270Sstevel@tonic-gate 				 */
2280Sstevel@tonic-gate 				error = EOVERFLOW;
2290Sstevel@tonic-gate 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
2300Sstevel@tonic-gate 				error = EFAULT;
2310Sstevel@tonic-gate #endif
2320Sstevel@tonic-gate 		}
2330Sstevel@tonic-gate 		break;
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 	case RTCSYNC:
2360Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0)
2370Sstevel@tonic-gate 			rtcsync();
2380Sstevel@tonic-gate 		break;
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	/* END OF real time clock management commands */
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	default:
2430Sstevel@tonic-gate 		error = EINVAL;
2440Sstevel@tonic-gate 		break;
2450Sstevel@tonic-gate 	}
2460Sstevel@tonic-gate 	return (error == 0 ? 0 : set_errno(error));
2470Sstevel@tonic-gate }
2480Sstevel@tonic-gate 
2490Sstevel@tonic-gate void
2500Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
2510Sstevel@tonic-gate {
2520Sstevel@tonic-gate 	ssd->bo = USEGD_GETBASE(usd);
2530Sstevel@tonic-gate 	ssd->ls = USEGD_GETLIMIT(usd);
2540Sstevel@tonic-gate 	ssd->sel = sel;
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate 	/*
2570Sstevel@tonic-gate 	 * set type, dpl and present bits.
2580Sstevel@tonic-gate 	 */
2590Sstevel@tonic-gate 	ssd->acc1 = usd->usd_type;
2600Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_dpl << 5;
2610Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_p << (5 + 2);
2620Sstevel@tonic-gate 
2630Sstevel@tonic-gate 	/*
2640Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
2650Sstevel@tonic-gate 	 */
2660Sstevel@tonic-gate 	ssd->acc2 = usd->usd_avl;
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate #if defined(__amd64)
2690Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_long << 1;
2700Sstevel@tonic-gate #else
2710Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_reserved << 1;
2720Sstevel@tonic-gate #endif
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
2750Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
2760Sstevel@tonic-gate }
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate static void
2790Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
2800Sstevel@tonic-gate {
2810Sstevel@tonic-gate 
282*5084Sjohnlev 	ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0);
283*5084Sjohnlev 
2840Sstevel@tonic-gate 	USEGD_SETBASE(usd, ssd->bo);
2850Sstevel@tonic-gate 	USEGD_SETLIMIT(usd, ssd->ls);
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate 	/*
2880Sstevel@tonic-gate 	 * set type, dpl and present bits.
2890Sstevel@tonic-gate 	 */
2900Sstevel@tonic-gate 	usd->usd_type = ssd->acc1;
2910Sstevel@tonic-gate 	usd->usd_dpl = ssd->acc1 >> 5;
2920Sstevel@tonic-gate 	usd->usd_p = ssd->acc1 >> (5 + 2);
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	ASSERT(usd->usd_type >= SDT_MEMRO);
2950Sstevel@tonic-gate 	ASSERT(usd->usd_dpl == SEL_UPL);
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	/*
298*5084Sjohnlev 	 * 64-bit code selectors are never allowed in the LDT.
299*5084Sjohnlev 	 * Reserved bit is always 0 on 32-bit sytems.
300*5084Sjohnlev 	 */
301*5084Sjohnlev #if defined(__amd64)
302*5084Sjohnlev 	usd->usd_long = 0;
303*5084Sjohnlev #else
304*5084Sjohnlev 	usd->usd_reserved = 0;
305*5084Sjohnlev #endif
306*5084Sjohnlev 
307*5084Sjohnlev 	/*
3080Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
3090Sstevel@tonic-gate 	 */
3100Sstevel@tonic-gate 	usd->usd_avl = ssd->acc2;
3110Sstevel@tonic-gate 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
3120Sstevel@tonic-gate 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
3130Sstevel@tonic-gate }
3140Sstevel@tonic-gate 
315*5084Sjohnlev 
316*5084Sjohnlev #if defined(__i386)
317*5084Sjohnlev 
3180Sstevel@tonic-gate static void
3190Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
3200Sstevel@tonic-gate {
3210Sstevel@tonic-gate 
322*5084Sjohnlev 	ASSERT(bcmp(sgd, &null_sdesc, sizeof (*sgd)) == 0);
323*5084Sjohnlev 
3240Sstevel@tonic-gate 	sgd->sgd_looffset = ssd->bo;
3250Sstevel@tonic-gate 	sgd->sgd_hioffset = ssd->bo >> 16;
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate 	sgd->sgd_selector = ssd->ls;
328*5084Sjohnlev 
3290Sstevel@tonic-gate 	/*
3300Sstevel@tonic-gate 	 * set type, dpl and present bits.
3310Sstevel@tonic-gate 	 */
3320Sstevel@tonic-gate 	sgd->sgd_type = ssd->acc1;
3330Sstevel@tonic-gate 	sgd->sgd_dpl = ssd->acc1 >> 5;
3340Sstevel@tonic-gate 	sgd->sgd_p = ssd->acc1 >> 7;
3350Sstevel@tonic-gate 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
3360Sstevel@tonic-gate 	ASSERT(sgd->sgd_dpl == SEL_UPL);
337*5084Sjohnlev 	sgd->sgd_stkcpy = 0;
338*5084Sjohnlev }
3390Sstevel@tonic-gate 
340*5084Sjohnlev #endif	/* __i386 */
3410Sstevel@tonic-gate 
3421217Srab /*
3431217Srab  * Load LDT register with the current process's LDT.
3441217Srab  */
345*5084Sjohnlev static void
3461217Srab ldt_load(void)
3471217Srab {
348*5084Sjohnlev #if defined(__xpv)
349*5084Sjohnlev 	xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc),
350*5084Sjohnlev 	    curproc->p_ldtlimit + 1);
351*5084Sjohnlev #else
3521217Srab 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
3531217Srab 	wr_ldtr(ULDT_SEL);
354*5084Sjohnlev #endif
3551217Srab }
3561217Srab 
3571217Srab /*
3581217Srab  * Store a NULL selector in the LDTR. All subsequent illegal references to
3591217Srab  * the LDT will result in a #gp.
3601217Srab  */
3611217Srab void
3621217Srab ldt_unload(void)
3631217Srab {
364*5084Sjohnlev #if defined(__xpv)
365*5084Sjohnlev 	xen_set_ldt(NULL, 0);
366*5084Sjohnlev #else
367*5084Sjohnlev 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc;
3681217Srab 	wr_ldtr(0);
369*5084Sjohnlev #endif
3701217Srab }
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate /*ARGSUSED*/
3730Sstevel@tonic-gate static void
3741217Srab ldt_savectx(proc_t *p)
3750Sstevel@tonic-gate {
3761217Srab 	ASSERT(p->p_ldt != NULL);
3771217Srab 	ASSERT(p == curproc);
3781217Srab 
3790Sstevel@tonic-gate #if defined(__amd64)
3800Sstevel@tonic-gate 	/*
3810Sstevel@tonic-gate 	 * The 64-bit kernel must be sure to clear any stale ldt
3820Sstevel@tonic-gate 	 * selectors when context switching away from a process that
3830Sstevel@tonic-gate 	 * has a private ldt. Consider the following example:
3840Sstevel@tonic-gate 	 *
3850Sstevel@tonic-gate 	 * 	Wine creats a ldt descriptor and points a segment register
3860Sstevel@tonic-gate 	 * 	to it.
3870Sstevel@tonic-gate 	 *
3880Sstevel@tonic-gate 	 *	We then context switch away from wine lwp to kernel
3890Sstevel@tonic-gate 	 *	thread and hit breakpoint in kernel with kmdb
3900Sstevel@tonic-gate 	 *
3910Sstevel@tonic-gate 	 *	When we continue and resume from kmdb we will #gp
3920Sstevel@tonic-gate 	 * 	fault since kmdb will have saved the stale ldt selector
3930Sstevel@tonic-gate 	 *	from wine and will try to restore it but we are no longer in
3940Sstevel@tonic-gate 	 *	the context of the wine process and do not have our
3950Sstevel@tonic-gate 	 *	ldtr register pointing to the private ldt.
3960Sstevel@tonic-gate 	 */
397*5084Sjohnlev 	reset_sregs();
3980Sstevel@tonic-gate #endif
3990Sstevel@tonic-gate 
4001217Srab 	ldt_unload();
4010Sstevel@tonic-gate 	cpu_fast_syscall_enable(NULL);
4020Sstevel@tonic-gate }
4030Sstevel@tonic-gate 
4041217Srab static void
4051217Srab ldt_restorectx(proc_t *p)
4061217Srab {
4071217Srab 	ASSERT(p->p_ldt != NULL);
4081217Srab 	ASSERT(p == curproc);
4091217Srab 
4101217Srab 	ldt_load();
4111217Srab 	cpu_fast_syscall_disable(NULL);
4121217Srab }
4131217Srab 
4140Sstevel@tonic-gate /*
4151217Srab  * When a process with a private LDT execs, fast syscalls must be enabled for
4161217Srab  * the new process image.
4170Sstevel@tonic-gate  */
4180Sstevel@tonic-gate /* ARGSUSED */
4190Sstevel@tonic-gate static void
4201217Srab ldt_freectx(proc_t *p, int isexec)
4210Sstevel@tonic-gate {
4221217Srab 	ASSERT(p->p_ldt);
4231217Srab 
4240Sstevel@tonic-gate 	if (isexec) {
4250Sstevel@tonic-gate 		kpreempt_disable();
4260Sstevel@tonic-gate 		cpu_fast_syscall_enable(NULL);
4270Sstevel@tonic-gate 		kpreempt_enable();
4280Sstevel@tonic-gate 	}
4291217Srab 
4301217Srab 	/*
4311217Srab 	 * ldt_free() will free the memory used by the private LDT, reset the
4321217Srab 	 * process's descriptor, and re-program the LDTR.
4331217Srab 	 */
4341217Srab 	ldt_free(p);
4350Sstevel@tonic-gate }
4360Sstevel@tonic-gate 
4370Sstevel@tonic-gate /*
4380Sstevel@tonic-gate  * Install ctx op that ensures syscall/sysenter are disabled.
4390Sstevel@tonic-gate  * See comments below.
4400Sstevel@tonic-gate  *
4411217Srab  * When a thread with a private LDT forks, the new process
4420Sstevel@tonic-gate  * must have the LDT context ops installed.
4430Sstevel@tonic-gate  */
4440Sstevel@tonic-gate /* ARGSUSED */
4450Sstevel@tonic-gate static void
4461217Srab ldt_installctx(proc_t *p, proc_t *cp)
4470Sstevel@tonic-gate {
4481217Srab 	proc_t		*targ = p;
4491217Srab 	kthread_t	*t;
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	/*
4521217Srab 	 * If this is a fork, operate on the child process.
4530Sstevel@tonic-gate 	 */
4541217Srab 	if (cp != NULL) {
4551217Srab 		targ = cp;
4561217Srab 		ldt_dup(p, cp);
4571217Srab 	}
4580Sstevel@tonic-gate 
4591217Srab 	/*
4601217Srab 	 * The process context ops expect the target process as their argument.
4611217Srab 	 */
4621217Srab 	ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
4631217Srab 	    ldt_installctx, ldt_savectx, ldt_freectx) == 0);
4640Sstevel@tonic-gate 
4651217Srab 	installpctx(targ, targ, ldt_savectx, ldt_restorectx,
4661217Srab 	    ldt_installctx, ldt_savectx, ldt_freectx);
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	/*
4690Sstevel@tonic-gate 	 * We've just disabled fast system call and return instructions; take
4700Sstevel@tonic-gate 	 * the slow path out to make sure we don't try to use one to return
4711217Srab 	 * back to user. We must set t_post_sys for every thread in the
4721217Srab 	 * process to make sure none of them escape out via fast return.
4730Sstevel@tonic-gate 	 */
4741217Srab 
4751217Srab 	mutex_enter(&targ->p_lock);
4761217Srab 	t = targ->p_tlist;
4771217Srab 	do {
4781217Srab 		t->t_post_sys = 1;
4791217Srab 	} while ((t = t->t_forw) != targ->p_tlist);
4801217Srab 	mutex_exit(&targ->p_lock);
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate 
4832712Snn35248 int
4842712Snn35248 setdscr(struct ssd *ssd)
4850Sstevel@tonic-gate {
4860Sstevel@tonic-gate 	ushort_t seli; 		/* selector index */
487*5084Sjohnlev 	user_desc_t *ldp;	/* descriptor pointer */
488*5084Sjohnlev 	user_desc_t ndesc;	/* new descriptor */
4890Sstevel@tonic-gate 	proc_t	*pp = ttoproc(curthread);
490*5084Sjohnlev 	int	rc = 0;
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate 	/*
4930Sstevel@tonic-gate 	 * LDT segments: executable and data at DPL 3 only.
4940Sstevel@tonic-gate 	 */
4952712Snn35248 	if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
4960Sstevel@tonic-gate 		return (EINVAL);
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 	/*
4990Sstevel@tonic-gate 	 * check the selector index.
5000Sstevel@tonic-gate 	 */
5012712Snn35248 	seli = SELTOIDX(ssd->sel);
5021217Srab 	if (seli >= MAXNLDT || seli < LDT_UDBASE)
5030Sstevel@tonic-gate 		return (EINVAL);
5040Sstevel@tonic-gate 
505*5084Sjohnlev 	ndesc = null_udesc;
5060Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 	/*
5090Sstevel@tonic-gate 	 * If this is the first time for this process then setup a
5100Sstevel@tonic-gate 	 * private LDT for it.
5110Sstevel@tonic-gate 	 */
5120Sstevel@tonic-gate 	if (pp->p_ldt == NULL) {
513*5084Sjohnlev 		ldt_alloc(pp, seli);
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 		/*
5160Sstevel@tonic-gate 		 * Now that this process has a private LDT, the use of
5170Sstevel@tonic-gate 		 * the syscall/sysret and sysenter/sysexit instructions
5180Sstevel@tonic-gate 		 * is forbidden for this processes because they destroy
5190Sstevel@tonic-gate 		 * the contents of %cs and %ss segment registers.
5200Sstevel@tonic-gate 		 *
5211217Srab 		 * Explicity disable them here and add a context handler
5221217Srab 		 * to the process. Note that disabling
5230Sstevel@tonic-gate 		 * them here means we can't use sysret or sysexit on
5240Sstevel@tonic-gate 		 * the way out of this system call - so we force this
5250Sstevel@tonic-gate 		 * thread to take the slow path (which doesn't make use
5260Sstevel@tonic-gate 		 * of sysenter or sysexit) back out.
5270Sstevel@tonic-gate 		 */
528*5084Sjohnlev 		kpreempt_disable();
5291217Srab 		ldt_installctx(pp, NULL);
5300Sstevel@tonic-gate 		cpu_fast_syscall_disable(NULL);
5310Sstevel@tonic-gate 		ASSERT(curthread->t_post_sys != 0);
5321217Srab 		kpreempt_enable();
533*5084Sjohnlev 
534*5084Sjohnlev 	} else if (seli > pp->p_ldtlimit) {
5350Sstevel@tonic-gate 
536*5084Sjohnlev 		/*
537*5084Sjohnlev 		 * Increase size of ldt to include seli.
538*5084Sjohnlev 		 */
539*5084Sjohnlev 		ldt_grow(pp, seli);
5400Sstevel@tonic-gate 	}
5410Sstevel@tonic-gate 
5420Sstevel@tonic-gate 	ASSERT(seli <= pp->p_ldtlimit);
543*5084Sjohnlev 	ldp = &pp->p_ldt[seli];
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate 	/*
5460Sstevel@tonic-gate 	 * On the 64-bit kernel, this is where things get more subtle.
5470Sstevel@tonic-gate 	 * Recall that in the 64-bit kernel, when we enter the kernel we
5480Sstevel@tonic-gate 	 * deliberately -don't- reload the segment selectors we came in on
5490Sstevel@tonic-gate 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
5500Sstevel@tonic-gate 	 * and the underlying descriptors are essentially ignored by the
5510Sstevel@tonic-gate 	 * hardware in long mode - except for the base that we override with
5520Sstevel@tonic-gate 	 * the gsbase MSRs.
5530Sstevel@tonic-gate 	 *
5540Sstevel@tonic-gate 	 * However, there's one unfortunate issue with this rosy picture --
5550Sstevel@tonic-gate 	 * a descriptor that's not marked as 'present' will still generate
5560Sstevel@tonic-gate 	 * an #np when loading a segment register.
5570Sstevel@tonic-gate 	 *
5580Sstevel@tonic-gate 	 * Consider this case.  An lwp creates a harmless LDT entry, points
5590Sstevel@tonic-gate 	 * one of it's segment registers at it, then tells the kernel (here)
5600Sstevel@tonic-gate 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
5610Sstevel@tonic-gate 	 * way back to userland where we reload the segment registers, and be
5620Sstevel@tonic-gate 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
5630Sstevel@tonic-gate 	 * will happen in the normal case too.  However, if we're trying to
5640Sstevel@tonic-gate 	 * use a debugger that wants to save and restore the segment registers,
5650Sstevel@tonic-gate 	 * and the debugger things that we have valid segment registers, we
5660Sstevel@tonic-gate 	 * have the problem that the debugger will try and restore the
5670Sstevel@tonic-gate 	 * segment register that points at the now 'not present' descriptor
5680Sstevel@tonic-gate 	 * and will take a #np right there.
5690Sstevel@tonic-gate 	 *
5700Sstevel@tonic-gate 	 * We should obviously fix the debugger to be paranoid about
5710Sstevel@tonic-gate 	 * -not- restoring segment registers that point to bad descriptors;
5720Sstevel@tonic-gate 	 * however we can prevent the problem here if we check to see if any
5730Sstevel@tonic-gate 	 * of the segment registers are still pointing at the thing we're
5740Sstevel@tonic-gate 	 * destroying; if they are, return an error instead. (That also seems
5750Sstevel@tonic-gate 	 * a lot better failure mode than SIGKILL and a core file
5760Sstevel@tonic-gate 	 * from kern_gpfault() too.)
5770Sstevel@tonic-gate 	 */
5782712Snn35248 	if (SI86SSD_PRES(ssd) == 0) {
5790Sstevel@tonic-gate 		kthread_t *t;
5800Sstevel@tonic-gate 		int bad = 0;
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate 		/*
5830Sstevel@tonic-gate 		 * Look carefully at the segment registers of every lwp
5840Sstevel@tonic-gate 		 * in the process (they're all stopped by our caller).
5850Sstevel@tonic-gate 		 * If we're about to invalidate a descriptor that's still
5860Sstevel@tonic-gate 		 * being referenced by *any* of them, return an error,
5870Sstevel@tonic-gate 		 * rather than having them #gp on their way out of the kernel.
5880Sstevel@tonic-gate 		 */
5890Sstevel@tonic-gate 		ASSERT(pp->p_lwprcnt == 1);
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
5920Sstevel@tonic-gate 		t = pp->p_tlist;
5930Sstevel@tonic-gate 		do {
5940Sstevel@tonic-gate 			klwp_t *lwp = ttolwp(t);
5950Sstevel@tonic-gate 			struct regs *rp = lwp->lwp_regs;
5960Sstevel@tonic-gate #if defined(__amd64)
5970Sstevel@tonic-gate 			pcb_t *pcb = &lwp->lwp_pcb;
5980Sstevel@tonic-gate #endif
5990Sstevel@tonic-gate 
6002712Snn35248 			if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) {
6010Sstevel@tonic-gate 				bad = 1;
6020Sstevel@tonic-gate 				break;
6030Sstevel@tonic-gate 			}
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate #if defined(__amd64)
6064503Ssudheer 			if (pcb->pcb_rupdate == 1) {
6072712Snn35248 				if (ssd->sel == pcb->pcb_ds ||
6082712Snn35248 				    ssd->sel == pcb->pcb_es ||
6092712Snn35248 				    ssd->sel == pcb->pcb_fs ||
6102712Snn35248 				    ssd->sel == pcb->pcb_gs) {
6110Sstevel@tonic-gate 					bad = 1;
6120Sstevel@tonic-gate 					break;
6130Sstevel@tonic-gate 				}
6140Sstevel@tonic-gate 			} else
6150Sstevel@tonic-gate #endif
6160Sstevel@tonic-gate 			{
6172712Snn35248 				if (ssd->sel == rp->r_ds ||
6182712Snn35248 				    ssd->sel == rp->r_es ||
6192712Snn35248 				    ssd->sel == rp->r_fs ||
6202712Snn35248 				    ssd->sel == rp->r_gs) {
6210Sstevel@tonic-gate 					bad = 1;
6220Sstevel@tonic-gate 					break;
6230Sstevel@tonic-gate 				}
6240Sstevel@tonic-gate 			}
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 		} while ((t = t->t_forw) != pp->p_tlist);
6270Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
6280Sstevel@tonic-gate 
6290Sstevel@tonic-gate 		if (bad) {
6300Sstevel@tonic-gate 			mutex_exit(&pp->p_ldtlock);
6310Sstevel@tonic-gate 			return (EBUSY);
6320Sstevel@tonic-gate 		}
6330Sstevel@tonic-gate 	}
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 	/*
6360Sstevel@tonic-gate 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
6370Sstevel@tonic-gate 	 */
6382712Snn35248 	if (ssd->acc1 == 0) {
639*5084Sjohnlev 		rc  = ldt_update_segd(ldp, &null_udesc);
6400Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
641*5084Sjohnlev 		return (rc);
6420Sstevel@tonic-gate 	}
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	/*
6450Sstevel@tonic-gate 	 * Check segment type, allow segment not present and
6460Sstevel@tonic-gate 	 * only user DPL (3).
6470Sstevel@tonic-gate 	 */
6482712Snn35248 	if (SI86SSD_DPL(ssd) != SEL_UPL) {
6490Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6500Sstevel@tonic-gate 		return (EINVAL);
6510Sstevel@tonic-gate 	}
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate #if defined(__amd64)
6540Sstevel@tonic-gate 	/*
6552712Snn35248 	 * Do not allow 32-bit applications to create 64-bit mode code
6562712Snn35248 	 * segments.
6570Sstevel@tonic-gate 	 */
6582712Snn35248 	if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
6592712Snn35248 	    SI86SSD_ISLONG(ssd)) {
6600Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6610Sstevel@tonic-gate 		return (EINVAL);
6620Sstevel@tonic-gate 	}
6630Sstevel@tonic-gate #endif /* __amd64 */
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	/*
6660Sstevel@tonic-gate 	 * Set up a code or data user segment descriptor.
6670Sstevel@tonic-gate 	 */
6682712Snn35248 	if (SI86SSD_ISUSEG(ssd)) {
669*5084Sjohnlev 		ssd_to_usd(ssd, &ndesc);
670*5084Sjohnlev 		rc = ldt_update_segd(ldp, &ndesc);
6710Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
672*5084Sjohnlev 		return (rc);
6730Sstevel@tonic-gate 	}
6740Sstevel@tonic-gate 
675*5084Sjohnlev #if defined(__i386)
6760Sstevel@tonic-gate 	/*
677*5084Sjohnlev 	 * Allow a call gate only if the destination is in the LDT
678*5084Sjohnlev 	 * and the system is running in 32-bit legacy mode.
679*5084Sjohnlev 	 *
680*5084Sjohnlev 	 * In long mode 32-bit call gates are redefined as 64-bit call
681*5084Sjohnlev 	 * gates and the hw enforces that the target code selector
682*5084Sjohnlev 	 * of the call gate must be 64-bit selector. A #gp fault is
683*5084Sjohnlev 	 * generated if otherwise. Since we do not allow 32-bit processes
684*5084Sjohnlev 	 * to switch themselves to 64-bits we never allow call gates
685*5084Sjohnlev 	 * on 64-bit system system.
6860Sstevel@tonic-gate 	 */
6872712Snn35248 	if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
688*5084Sjohnlev 
689*5084Sjohnlev 
690*5084Sjohnlev 		ssd_to_sgd(ssd, (gate_desc_t *)&ndesc);
691*5084Sjohnlev 		rc = ldt_update_segd(ldp, &ndesc);
6920Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
693*5084Sjohnlev 		return (rc);
6940Sstevel@tonic-gate 	}
695*5084Sjohnlev #endif	/* __i386 */
6960Sstevel@tonic-gate 
6970Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
6980Sstevel@tonic-gate 	return (EINVAL);
6990Sstevel@tonic-gate }
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate /*
702*5084Sjohnlev  * Allocate new LDT for process just large enough to contain seli.
703*5084Sjohnlev  * Note we allocate and grow LDT in PAGESIZE chunks. We do this
704*5084Sjohnlev  * to simplify the implementation and because on the hypervisor it's
705*5084Sjohnlev  * required, since the LDT must live on pages that have PROT_WRITE
706*5084Sjohnlev  * removed and which are given to the hypervisor.
7070Sstevel@tonic-gate  */
7082712Snn35248 static void
709*5084Sjohnlev ldt_alloc(proc_t *pp, uint_t seli)
7100Sstevel@tonic-gate {
711*5084Sjohnlev 	user_desc_t	*ldt;
712*5084Sjohnlev 	size_t		ldtsz;
713*5084Sjohnlev 	uint_t		nsels;
7140Sstevel@tonic-gate 
715*5084Sjohnlev 	ASSERT(MUTEX_HELD(&pp->p_ldtlock));
716*5084Sjohnlev 	ASSERT(pp->p_ldt == NULL);
717*5084Sjohnlev 	ASSERT(pp->p_ldtlimit == 0);
7180Sstevel@tonic-gate 
7190Sstevel@tonic-gate 	/*
720*5084Sjohnlev 	 * Allocate new LDT just large enough to contain seli.
7210Sstevel@tonic-gate 	 */
722*5084Sjohnlev 	ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
723*5084Sjohnlev 	nsels = ldtsz / sizeof (user_desc_t);
724*5084Sjohnlev 	ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
7251217Srab 
726*5084Sjohnlev 	ldt = kmem_zalloc(ldtsz, KM_SLEEP);
727*5084Sjohnlev 	ASSERT(IS_P2ALIGNED(ldt, PAGESIZE));
7280Sstevel@tonic-gate 
729*5084Sjohnlev #if defined(__xpv)
730*5084Sjohnlev 	if (xen_ldt_setprot(ldt, ldtsz, PROT_READ))
731*5084Sjohnlev 		panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
732*5084Sjohnlev #endif
7330Sstevel@tonic-gate 
734*5084Sjohnlev 	pp->p_ldt = ldt;
735*5084Sjohnlev 	pp->p_ldtlimit = nsels - 1;
736*5084Sjohnlev 	set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL);
7370Sstevel@tonic-gate 
738*5084Sjohnlev 	if (pp == curproc) {
739*5084Sjohnlev 		kpreempt_disable();
740*5084Sjohnlev 		ldt_load();
741*5084Sjohnlev 		kpreempt_enable();
742*5084Sjohnlev 	}
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate 
7451217Srab static void
7460Sstevel@tonic-gate ldt_free(proc_t *pp)
7470Sstevel@tonic-gate {
748*5084Sjohnlev 	user_desc_t	*ldt;
749*5084Sjohnlev 	size_t		ldtsz;
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 	ASSERT(pp->p_ldt != NULL);
7520Sstevel@tonic-gate 
7530Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
754*5084Sjohnlev 	ldt = pp->p_ldt;
755*5084Sjohnlev 	ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
756*5084Sjohnlev 
757*5084Sjohnlev 	ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
7580Sstevel@tonic-gate 
759*5084Sjohnlev 	pp->p_ldt = NULL;
760*5084Sjohnlev 	pp->p_ldtlimit = 0;
761*5084Sjohnlev 	pp->p_ldt_desc = null_sdesc;
762*5084Sjohnlev 	mutex_exit(&pp->p_ldtlock);
7630Sstevel@tonic-gate 
764*5084Sjohnlev 	if (pp == curproc) {
765*5084Sjohnlev 		kpreempt_disable();
766*5084Sjohnlev 		ldt_unload();
767*5084Sjohnlev 		kpreempt_enable();
768*5084Sjohnlev 	}
7691217Srab 
770*5084Sjohnlev #if defined(__xpv)
771*5084Sjohnlev 	/*
772*5084Sjohnlev 	 * We are not allowed to make the ldt writable until after
773*5084Sjohnlev 	 * we tell the hypervisor to unload it.
774*5084Sjohnlev 	 */
775*5084Sjohnlev 	if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE))
776*5084Sjohnlev 		panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
777*5084Sjohnlev #endif
778*5084Sjohnlev 
779*5084Sjohnlev 	kmem_free(ldt, ldtsz);
7800Sstevel@tonic-gate }
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate /*
7830Sstevel@tonic-gate  * On fork copy new ldt for child.
7840Sstevel@tonic-gate  */
785*5084Sjohnlev static void
7860Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp)
7870Sstevel@tonic-gate {
788*5084Sjohnlev 	size_t	ldtsz;
789*5084Sjohnlev 
790*5084Sjohnlev 	ASSERT(pp->p_ldt != NULL);
791*5084Sjohnlev 	ASSERT(cp != curproc);
7920Sstevel@tonic-gate 
793*5084Sjohnlev 	/*
794*5084Sjohnlev 	 * I assume the parent's ldt can't increase since we're in a fork.
795*5084Sjohnlev 	 */
796*5084Sjohnlev 	mutex_enter(&pp->p_ldtlock);
797*5084Sjohnlev 	mutex_enter(&cp->p_ldtlock);
798*5084Sjohnlev 
799*5084Sjohnlev 	ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
800*5084Sjohnlev 
801*5084Sjohnlev 	ldt_alloc(cp, pp->p_ldtlimit);
8020Sstevel@tonic-gate 
803*5084Sjohnlev #if defined(__xpv)
804*5084Sjohnlev 	/*
805*5084Sjohnlev 	 * Make child's ldt writable so it can be copied into from
806*5084Sjohnlev 	 * parent's ldt. This works since ldt_alloc above did not load
807*5084Sjohnlev 	 * the ldt since its for the child process. If we tried to make
808*5084Sjohnlev 	 * an LDT writable that is loaded in hw the setprot operation
809*5084Sjohnlev 	 * would fail.
810*5084Sjohnlev 	 */
811*5084Sjohnlev 	if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE))
812*5084Sjohnlev 		panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
813*5084Sjohnlev #endif
814*5084Sjohnlev 
815*5084Sjohnlev 	bcopy(pp->p_ldt, cp->p_ldt, ldtsz);
8160Sstevel@tonic-gate 
817*5084Sjohnlev #if defined(__xpv)
818*5084Sjohnlev 	if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ))
819*5084Sjohnlev 		panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed");
820*5084Sjohnlev #endif
821*5084Sjohnlev 	mutex_exit(&cp->p_ldtlock);
822*5084Sjohnlev 	mutex_exit(&pp->p_ldtlock);
823*5084Sjohnlev 
824*5084Sjohnlev }
825*5084Sjohnlev 
826*5084Sjohnlev static void
827*5084Sjohnlev ldt_grow(proc_t *pp, uint_t seli)
828*5084Sjohnlev {
829*5084Sjohnlev 	user_desc_t	*oldt, *nldt;
830*5084Sjohnlev 	uint_t		nsels;
831*5084Sjohnlev 	size_t		oldtsz, nldtsz;
832*5084Sjohnlev 
833*5084Sjohnlev 	ASSERT(MUTEX_HELD(&pp->p_ldtlock));
834*5084Sjohnlev 	ASSERT(pp->p_ldt != NULL);
835*5084Sjohnlev 	ASSERT(pp->p_ldtlimit != 0);
8360Sstevel@tonic-gate 
837*5084Sjohnlev 	/*
838*5084Sjohnlev 	 * Allocate larger LDT just large enough to contain seli.
839*5084Sjohnlev 	 */
840*5084Sjohnlev 	nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
841*5084Sjohnlev 	nsels = nldtsz / sizeof (user_desc_t);
842*5084Sjohnlev 	ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
843*5084Sjohnlev 	ASSERT(nsels > pp->p_ldtlimit);
844*5084Sjohnlev 
845*5084Sjohnlev 	oldt = pp->p_ldt;
846*5084Sjohnlev 	oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
847*5084Sjohnlev 
848*5084Sjohnlev 	nldt = kmem_zalloc(nldtsz, KM_SLEEP);
849*5084Sjohnlev 	ASSERT(IS_P2ALIGNED(nldt, PAGESIZE));
850*5084Sjohnlev 
851*5084Sjohnlev 	bcopy(oldt, nldt, oldtsz);
852*5084Sjohnlev 
853*5084Sjohnlev 	/*
854*5084Sjohnlev 	 * unload old ldt.
855*5084Sjohnlev 	 */
856*5084Sjohnlev 	kpreempt_disable();
857*5084Sjohnlev 	ldt_unload();
858*5084Sjohnlev 	kpreempt_enable();
859*5084Sjohnlev 
860*5084Sjohnlev #if defined(__xpv)
861*5084Sjohnlev 
862*5084Sjohnlev 	/*
863*5084Sjohnlev 	 * Make old ldt writable and new ldt read only.
864*5084Sjohnlev 	 */
865*5084Sjohnlev 	if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE))
866*5084Sjohnlev 		panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
867*5084Sjohnlev 
868*5084Sjohnlev 	if (xen_ldt_setprot(nldt, nldtsz, PROT_READ))
869*5084Sjohnlev 		panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed");
870*5084Sjohnlev #endif
871*5084Sjohnlev 
872*5084Sjohnlev 	pp->p_ldt = nldt;
873*5084Sjohnlev 	pp->p_ldtlimit = nsels - 1;
874*5084Sjohnlev 
875*5084Sjohnlev 	/*
876*5084Sjohnlev 	 * write new ldt segment descriptor.
877*5084Sjohnlev 	 */
878*5084Sjohnlev 	set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL);
879*5084Sjohnlev 
880*5084Sjohnlev 	/*
881*5084Sjohnlev 	 * load the new ldt.
882*5084Sjohnlev 	 */
883*5084Sjohnlev 	kpreempt_disable();
884*5084Sjohnlev 	ldt_load();
885*5084Sjohnlev 	kpreempt_enable();
886*5084Sjohnlev 
887*5084Sjohnlev 	kmem_free(oldt, oldtsz);
8880Sstevel@tonic-gate }
889