xref: /onnv-gate/usr/src/uts/intel/ia32/os/sysi86.c (revision 2712)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*2712Snn35248  * Common Development and Distribution License (the "License").
6*2712Snn35248  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
221217Srab  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
270Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
280Sstevel@tonic-gate /*	  All Rights Reserved  	*/
290Sstevel@tonic-gate 
300Sstevel@tonic-gate /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
310Sstevel@tonic-gate /*	  All Rights Reserved	*/
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
340Sstevel@tonic-gate 
350Sstevel@tonic-gate #include <sys/param.h>
360Sstevel@tonic-gate #include <sys/types.h>
370Sstevel@tonic-gate #include <sys/sysmacros.h>
380Sstevel@tonic-gate #include <sys/systm.h>
390Sstevel@tonic-gate #include <sys/signal.h>
400Sstevel@tonic-gate #include <sys/errno.h>
410Sstevel@tonic-gate #include <sys/fault.h>
420Sstevel@tonic-gate #include <sys/syscall.h>
430Sstevel@tonic-gate #include <sys/cpuvar.h>
440Sstevel@tonic-gate #include <sys/sysi86.h>
450Sstevel@tonic-gate #include <sys/psw.h>
460Sstevel@tonic-gate #include <sys/cred.h>
470Sstevel@tonic-gate #include <sys/policy.h>
480Sstevel@tonic-gate #include <sys/thread.h>
490Sstevel@tonic-gate #include <sys/debug.h>
500Sstevel@tonic-gate #include <sys/ontrap.h>
510Sstevel@tonic-gate #include <sys/privregs.h>
520Sstevel@tonic-gate #include <sys/x86_archext.h>
530Sstevel@tonic-gate #include <sys/vmem.h>
540Sstevel@tonic-gate #include <sys/kmem.h>
550Sstevel@tonic-gate #include <sys/mman.h>
560Sstevel@tonic-gate #include <sys/archsystm.h>
570Sstevel@tonic-gate #include <vm/hat.h>
580Sstevel@tonic-gate #include <vm/as.h>
590Sstevel@tonic-gate #include <vm/seg.h>
600Sstevel@tonic-gate #include <vm/seg_kmem.h>
610Sstevel@tonic-gate #include <vm/faultcode.h>
620Sstevel@tonic-gate #include <sys/fp.h>
630Sstevel@tonic-gate #include <sys/cmn_err.h>
640Sstevel@tonic-gate 
651217Srab static void setup_ldt(proc_t *pp);
660Sstevel@tonic-gate static void *ldt_map(proc_t *pp, uint_t seli);
671217Srab static void ldt_free(proc_t *pp);
680Sstevel@tonic-gate 
690Sstevel@tonic-gate extern void rtcsync(void);
700Sstevel@tonic-gate extern long ggmtl(void);
710Sstevel@tonic-gate extern void sgmtl(long);
720Sstevel@tonic-gate 
730Sstevel@tonic-gate /*
740Sstevel@tonic-gate  * sysi86 System Call
750Sstevel@tonic-gate  */
760Sstevel@tonic-gate 
770Sstevel@tonic-gate /* ARGSUSED */
780Sstevel@tonic-gate int
790Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
800Sstevel@tonic-gate {
81*2712Snn35248 	struct ssd ssd;
820Sstevel@tonic-gate 	int error = 0;
830Sstevel@tonic-gate 	int c;
840Sstevel@tonic-gate 	proc_t *pp = curproc;
850Sstevel@tonic-gate 
860Sstevel@tonic-gate 	switch (cmd) {
870Sstevel@tonic-gate 
880Sstevel@tonic-gate 	/*
890Sstevel@tonic-gate 	 * The SI86V86 subsystem call of the SYSI86 system call
900Sstevel@tonic-gate 	 * supports only one subcode -- V86SC_IOPL.
910Sstevel@tonic-gate 	 */
920Sstevel@tonic-gate 	case SI86V86:
930Sstevel@tonic-gate 		if (arg1 == V86SC_IOPL) {
940Sstevel@tonic-gate 			struct regs *rp = lwptoregs(ttolwp(curthread));
950Sstevel@tonic-gate 			greg_t oldpl = rp->r_ps & PS_IOPL;
960Sstevel@tonic-gate 			greg_t newpl = arg2 & PS_IOPL;
970Sstevel@tonic-gate 
980Sstevel@tonic-gate 			/*
990Sstevel@tonic-gate 			 * Must be privileged to run this system call
1000Sstevel@tonic-gate 			 * if giving more io privilege.
1010Sstevel@tonic-gate 			 */
1020Sstevel@tonic-gate 			if (newpl > oldpl && (error =
1030Sstevel@tonic-gate 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
1040Sstevel@tonic-gate 				return (set_errno(error));
1050Sstevel@tonic-gate 			rp->r_ps ^= oldpl ^ newpl;
1060Sstevel@tonic-gate 		} else
1070Sstevel@tonic-gate 			error = EINVAL;
1080Sstevel@tonic-gate 		break;
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate 	/*
1110Sstevel@tonic-gate 	 * Set a segment descriptor
1120Sstevel@tonic-gate 	 */
1130Sstevel@tonic-gate 	case SI86DSCR:
1140Sstevel@tonic-gate 		/*
1150Sstevel@tonic-gate 		 * There are considerable problems here manipulating
1160Sstevel@tonic-gate 		 * resources shared by many running lwps.  Get everyone
1170Sstevel@tonic-gate 		 * into a safe state before changing the LDT.
1180Sstevel@tonic-gate 		 */
1190Sstevel@tonic-gate 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
1200Sstevel@tonic-gate 			error = EINTR;
1210Sstevel@tonic-gate 			break;
1220Sstevel@tonic-gate 		}
123*2712Snn35248 
124*2712Snn35248 		if (get_udatamodel() == DATAMODEL_LP64) {
125*2712Snn35248 			error = EINVAL;
126*2712Snn35248 			break;
127*2712Snn35248 		}
128*2712Snn35248 
129*2712Snn35248 		if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) {
130*2712Snn35248 			error = EFAULT;
131*2712Snn35248 			break;
132*2712Snn35248 		}
133*2712Snn35248 
134*2712Snn35248 		error = setdscr(&ssd);
135*2712Snn35248 
1360Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
1370Sstevel@tonic-gate 		if (curthread != pp->p_agenttp)
1380Sstevel@tonic-gate 			continuelwps(pp);
1390Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
1400Sstevel@tonic-gate 		break;
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate 	case SI86FPHW:
1430Sstevel@tonic-gate 		c = fp_kind & 0xff;
1440Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1)
1450Sstevel@tonic-gate 			error = EFAULT;
1460Sstevel@tonic-gate 		break;
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate 	case SI86FPSTART:
1490Sstevel@tonic-gate 		/*
1500Sstevel@tonic-gate 		 * arg1 is the address of _fp_hw
1510Sstevel@tonic-gate 		 * arg2 is the desired x87 FCW value
1520Sstevel@tonic-gate 		 * arg3 is the desired SSE MXCSR value
1530Sstevel@tonic-gate 		 * a return value of one means SSE hardware, else none.
1540Sstevel@tonic-gate 		 */
1550Sstevel@tonic-gate 		c = fp_kind & 0xff;
1560Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1) {
1570Sstevel@tonic-gate 			error = EFAULT;
1580Sstevel@tonic-gate 			break;
1590Sstevel@tonic-gate 		}
1600Sstevel@tonic-gate 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
1610Sstevel@tonic-gate 		return (fp_kind == __FP_SSE ? 1 : 0);
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	/* real time clock management commands */
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 	case WTODC:
1660Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1670Sstevel@tonic-gate 			timestruc_t ts;
1680Sstevel@tonic-gate 			mutex_enter(&tod_lock);
1690Sstevel@tonic-gate 			gethrestime(&ts);
1700Sstevel@tonic-gate 			tod_set(ts);
1710Sstevel@tonic-gate 			mutex_exit(&tod_lock);
1720Sstevel@tonic-gate 		}
1730Sstevel@tonic-gate 		break;
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate /* Give some timezone playing room */
1760Sstevel@tonic-gate #define	ONEWEEK	(7 * 24 * 60 * 60)
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate 	case SGMTL:
1790Sstevel@tonic-gate 		/*
1800Sstevel@tonic-gate 		 * Called from 32 bit land, negative values
1810Sstevel@tonic-gate 		 * are not sign extended, so we do that here
1820Sstevel@tonic-gate 		 * by casting it to an int and back.  We also
1830Sstevel@tonic-gate 		 * clamp the value to within reason and detect
1840Sstevel@tonic-gate 		 * when a 64 bit call overflows an int.
1850Sstevel@tonic-gate 		 */
1860Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1870Sstevel@tonic-gate 			int newlag = (int)arg1;
1880Sstevel@tonic-gate 
1890Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1900Sstevel@tonic-gate 			if (get_udatamodel() == DATAMODEL_NATIVE &&
1910Sstevel@tonic-gate 			    (long)newlag != (long)arg1) {
1920Sstevel@tonic-gate 				error = EOVERFLOW;
1930Sstevel@tonic-gate 			} else
1940Sstevel@tonic-gate #endif
1950Sstevel@tonic-gate 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
1960Sstevel@tonic-gate 				sgmtl(newlag);
1970Sstevel@tonic-gate 			else
1980Sstevel@tonic-gate 				error = EOVERFLOW;
1990Sstevel@tonic-gate 		}
2000Sstevel@tonic-gate 		break;
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	case GGMTL:
2030Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
2040Sstevel@tonic-gate 			if (sulword((void *)arg1, ggmtl()) == -1)
2050Sstevel@tonic-gate 				error = EFAULT;
2060Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
2070Sstevel@tonic-gate 		} else {
2080Sstevel@tonic-gate 			time_t gmtl;
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 			if ((gmtl = ggmtl()) > INT32_MAX) {
2110Sstevel@tonic-gate 				/*
2120Sstevel@tonic-gate 				 * Since gmt_lag can at most be
2130Sstevel@tonic-gate 				 * +/- 12 hours, something is
2140Sstevel@tonic-gate 				 * *seriously* messed up here.
2150Sstevel@tonic-gate 				 */
2160Sstevel@tonic-gate 				error = EOVERFLOW;
2170Sstevel@tonic-gate 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
2180Sstevel@tonic-gate 				error = EFAULT;
2190Sstevel@tonic-gate #endif
2200Sstevel@tonic-gate 		}
2210Sstevel@tonic-gate 		break;
2220Sstevel@tonic-gate 
2230Sstevel@tonic-gate 	case RTCSYNC:
2240Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0)
2250Sstevel@tonic-gate 			rtcsync();
2260Sstevel@tonic-gate 		break;
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate 	/* END OF real time clock management commands */
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 	default:
2310Sstevel@tonic-gate 		error = EINVAL;
2320Sstevel@tonic-gate 		break;
2330Sstevel@tonic-gate 	}
2340Sstevel@tonic-gate 	return (error == 0 ? 0 : set_errno(error));
2350Sstevel@tonic-gate }
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate void
2380Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
2390Sstevel@tonic-gate {
2400Sstevel@tonic-gate 	ssd->bo = USEGD_GETBASE(usd);
2410Sstevel@tonic-gate 	ssd->ls = USEGD_GETLIMIT(usd);
2420Sstevel@tonic-gate 	ssd->sel = sel;
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate 	/*
2450Sstevel@tonic-gate 	 * set type, dpl and present bits.
2460Sstevel@tonic-gate 	 */
2470Sstevel@tonic-gate 	ssd->acc1 = usd->usd_type;
2480Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_dpl << 5;
2490Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_p << (5 + 2);
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate 	/*
2520Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
2530Sstevel@tonic-gate 	 */
2540Sstevel@tonic-gate 	ssd->acc2 = usd->usd_avl;
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate #if defined(__amd64)
2570Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_long << 1;
2580Sstevel@tonic-gate #else
2590Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_reserved << 1;
2600Sstevel@tonic-gate #endif
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
2630Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
2640Sstevel@tonic-gate }
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate static void
2670Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
2680Sstevel@tonic-gate {
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 	USEGD_SETBASE(usd, ssd->bo);
2710Sstevel@tonic-gate 	USEGD_SETLIMIT(usd, ssd->ls);
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate 	/*
2740Sstevel@tonic-gate 	 * set type, dpl and present bits.
2750Sstevel@tonic-gate 	 */
2760Sstevel@tonic-gate 	usd->usd_type = ssd->acc1;
2770Sstevel@tonic-gate 	usd->usd_dpl = ssd->acc1 >> 5;
2780Sstevel@tonic-gate 	usd->usd_p = ssd->acc1 >> (5 + 2);
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate 	ASSERT(usd->usd_type >= SDT_MEMRO);
2810Sstevel@tonic-gate 	ASSERT(usd->usd_dpl == SEL_UPL);
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	/*
2840Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
2850Sstevel@tonic-gate 	 */
2860Sstevel@tonic-gate 	usd->usd_avl = ssd->acc2;
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate #if defined(__amd64)
2890Sstevel@tonic-gate 	usd->usd_long = ssd->acc2 >> 1;
2900Sstevel@tonic-gate #else
2910Sstevel@tonic-gate 	usd->usd_reserved = ssd->acc2 >> 1;
2920Sstevel@tonic-gate #endif
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
2950Sstevel@tonic-gate 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate static void
2990Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
3000Sstevel@tonic-gate {
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	sgd->sgd_looffset = ssd->bo;
3030Sstevel@tonic-gate 	sgd->sgd_hioffset = ssd->bo >> 16;
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	sgd->sgd_selector = ssd->ls;
3060Sstevel@tonic-gate 	/*
3070Sstevel@tonic-gate 	 * set type, dpl and present bits.
3080Sstevel@tonic-gate 	 */
3090Sstevel@tonic-gate 	sgd->sgd_type = ssd->acc1;
3100Sstevel@tonic-gate 	sgd->sgd_dpl = ssd->acc1 >> 5;
3110Sstevel@tonic-gate 	sgd->sgd_p = ssd->acc1 >> 7;
3120Sstevel@tonic-gate 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
3130Sstevel@tonic-gate 	ASSERT(sgd->sgd_dpl == SEL_UPL);
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate #if defined(__i386)	/* reserved, ignored in amd64 */
3160Sstevel@tonic-gate 	sgd->sgd_stkcpy = 0;
3170Sstevel@tonic-gate #endif
3180Sstevel@tonic-gate }
3190Sstevel@tonic-gate 
3201217Srab /*
3211217Srab  * Load LDT register with the current process's LDT.
3221217Srab  */
3231217Srab void
3241217Srab ldt_load(void)
3251217Srab {
3261217Srab 	/*
3271217Srab 	 */
3281217Srab 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
3291217Srab 	wr_ldtr(ULDT_SEL);
3301217Srab }
3311217Srab 
3321217Srab /*
3331217Srab  * Store a NULL selector in the LDTR. All subsequent illegal references to
3341217Srab  * the LDT will result in a #gp.
3351217Srab  */
3361217Srab void
3371217Srab ldt_unload(void)
3381217Srab {
3391217Srab 	CPU->cpu_gdt[GDT_LDT] = zero_udesc;
3401217Srab 	wr_ldtr(0);
3411217Srab }
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate /*ARGSUSED*/
3440Sstevel@tonic-gate static void
3451217Srab ldt_savectx(proc_t *p)
3460Sstevel@tonic-gate {
3471217Srab 	ASSERT(p->p_ldt != NULL);
3481217Srab 	ASSERT(p == curproc);
3491217Srab 
3500Sstevel@tonic-gate #if defined(__amd64)
3510Sstevel@tonic-gate 	/*
3520Sstevel@tonic-gate 	 * The 64-bit kernel must be sure to clear any stale ldt
3530Sstevel@tonic-gate 	 * selectors when context switching away from a process that
3540Sstevel@tonic-gate 	 * has a private ldt. Consider the following example:
3550Sstevel@tonic-gate 	 *
3560Sstevel@tonic-gate 	 * 	Wine creats a ldt descriptor and points a segment register
3570Sstevel@tonic-gate 	 * 	to it.
3580Sstevel@tonic-gate 	 *
3590Sstevel@tonic-gate 	 *	We then context switch away from wine lwp to kernel
3600Sstevel@tonic-gate 	 *	thread and hit breakpoint in kernel with kmdb
3610Sstevel@tonic-gate 	 *
3620Sstevel@tonic-gate 	 *	When we continue and resume from kmdb we will #gp
3630Sstevel@tonic-gate 	 * 	fault since kmdb will have saved the stale ldt selector
3640Sstevel@tonic-gate 	 *	from wine and will try to restore it but we are no longer in
3650Sstevel@tonic-gate 	 *	the context of the wine process and do not have our
3660Sstevel@tonic-gate 	 *	ldtr register pointing to the private ldt.
3670Sstevel@tonic-gate 	 */
3680Sstevel@tonic-gate 	clr_ldt_sregs();
3690Sstevel@tonic-gate #endif
3700Sstevel@tonic-gate 
3711217Srab 	ldt_unload();
3720Sstevel@tonic-gate 	cpu_fast_syscall_enable(NULL);
3730Sstevel@tonic-gate }
3740Sstevel@tonic-gate 
3751217Srab static void
3761217Srab ldt_restorectx(proc_t *p)
3771217Srab {
3781217Srab 	ASSERT(p->p_ldt != NULL);
3791217Srab 	ASSERT(p == curproc);
3801217Srab 
3811217Srab 	ldt_load();
3821217Srab 	cpu_fast_syscall_disable(NULL);
3831217Srab }
3841217Srab 
3850Sstevel@tonic-gate /*
3861217Srab  * When a process with a private LDT execs, fast syscalls must be enabled for
3871217Srab  * the new process image.
3880Sstevel@tonic-gate  */
3890Sstevel@tonic-gate /* ARGSUSED */
3900Sstevel@tonic-gate static void
3911217Srab ldt_freectx(proc_t *p, int isexec)
3920Sstevel@tonic-gate {
3931217Srab 	ASSERT(p->p_ldt);
3941217Srab 
3950Sstevel@tonic-gate 	if (isexec) {
3960Sstevel@tonic-gate 		kpreempt_disable();
3970Sstevel@tonic-gate 		cpu_fast_syscall_enable(NULL);
3980Sstevel@tonic-gate 		kpreempt_enable();
3990Sstevel@tonic-gate 	}
4001217Srab 
4011217Srab 	/*
4021217Srab 	 * ldt_free() will free the memory used by the private LDT, reset the
4031217Srab 	 * process's descriptor, and re-program the LDTR.
4041217Srab 	 */
4051217Srab 	ldt_free(p);
4060Sstevel@tonic-gate }
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate /*
4090Sstevel@tonic-gate  * Install ctx op that ensures syscall/sysenter are disabled.
4100Sstevel@tonic-gate  * See comments below.
4110Sstevel@tonic-gate  *
4121217Srab  * When a thread with a private LDT forks, the new process
4130Sstevel@tonic-gate  * must have the LDT context ops installed.
4140Sstevel@tonic-gate  */
4150Sstevel@tonic-gate /* ARGSUSED */
4160Sstevel@tonic-gate static void
4171217Srab ldt_installctx(proc_t *p, proc_t *cp)
4180Sstevel@tonic-gate {
4191217Srab 	proc_t		*targ = p;
4201217Srab 	kthread_t	*t;
4210Sstevel@tonic-gate 
4220Sstevel@tonic-gate 	/*
4231217Srab 	 * If this is a fork, operate on the child process.
4240Sstevel@tonic-gate 	 */
4251217Srab 	if (cp != NULL) {
4261217Srab 		targ = cp;
4271217Srab 		ldt_dup(p, cp);
4281217Srab 	}
4290Sstevel@tonic-gate 
4301217Srab 	/*
4311217Srab 	 * The process context ops expect the target process as their argument.
4321217Srab 	 */
4331217Srab 	ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
4341217Srab 	    ldt_installctx, ldt_savectx, ldt_freectx) == 0);
4350Sstevel@tonic-gate 
4361217Srab 	installpctx(targ, targ, ldt_savectx, ldt_restorectx,
4371217Srab 	    ldt_installctx, ldt_savectx, ldt_freectx);
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 	/*
4400Sstevel@tonic-gate 	 * We've just disabled fast system call and return instructions; take
4410Sstevel@tonic-gate 	 * the slow path out to make sure we don't try to use one to return
4421217Srab 	 * back to user. We must set t_post_sys for every thread in the
4431217Srab 	 * process to make sure none of them escape out via fast return.
4440Sstevel@tonic-gate 	 */
4451217Srab 
4461217Srab 	mutex_enter(&targ->p_lock);
4471217Srab 	t = targ->p_tlist;
4481217Srab 	do {
4491217Srab 		t->t_post_sys = 1;
4501217Srab 	} while ((t = t->t_forw) != targ->p_tlist);
4511217Srab 	mutex_exit(&targ->p_lock);
4520Sstevel@tonic-gate }
4530Sstevel@tonic-gate 
454*2712Snn35248 int
455*2712Snn35248 setdscr(struct ssd *ssd)
4560Sstevel@tonic-gate {
4570Sstevel@tonic-gate 	ushort_t seli; 		/* selector index */
4580Sstevel@tonic-gate 	user_desc_t *dscrp;	/* descriptor pointer */
4590Sstevel@tonic-gate 	proc_t	*pp = ttoproc(curthread);
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	/*
4620Sstevel@tonic-gate 	 * LDT segments: executable and data at DPL 3 only.
4630Sstevel@tonic-gate 	 */
464*2712Snn35248 	if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
4650Sstevel@tonic-gate 		return (EINVAL);
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	/*
4680Sstevel@tonic-gate 	 * check the selector index.
4690Sstevel@tonic-gate 	 */
470*2712Snn35248 	seli = SELTOIDX(ssd->sel);
4711217Srab 	if (seli >= MAXNLDT || seli < LDT_UDBASE)
4720Sstevel@tonic-gate 		return (EINVAL);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
4750Sstevel@tonic-gate 
4760Sstevel@tonic-gate 	/*
4770Sstevel@tonic-gate 	 * If this is the first time for this process then setup a
4780Sstevel@tonic-gate 	 * private LDT for it.
4790Sstevel@tonic-gate 	 */
4800Sstevel@tonic-gate 	if (pp->p_ldt == NULL) {
4811217Srab 		kpreempt_disable();
4821217Srab 		setup_ldt(pp);
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 		/*
4850Sstevel@tonic-gate 		 * Now that this process has a private LDT, the use of
4860Sstevel@tonic-gate 		 * the syscall/sysret and sysenter/sysexit instructions
4870Sstevel@tonic-gate 		 * is forbidden for this processes because they destroy
4880Sstevel@tonic-gate 		 * the contents of %cs and %ss segment registers.
4890Sstevel@tonic-gate 		 *
4901217Srab 		 * Explicity disable them here and add a context handler
4911217Srab 		 * to the process. Note that disabling
4920Sstevel@tonic-gate 		 * them here means we can't use sysret or sysexit on
4930Sstevel@tonic-gate 		 * the way out of this system call - so we force this
4940Sstevel@tonic-gate 		 * thread to take the slow path (which doesn't make use
4950Sstevel@tonic-gate 		 * of sysenter or sysexit) back out.
4960Sstevel@tonic-gate 		 */
4970Sstevel@tonic-gate 
4981217Srab 		ldt_installctx(pp, NULL);
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate 		cpu_fast_syscall_disable(NULL);
5011217Srab 
5020Sstevel@tonic-gate 		ASSERT(curthread->t_post_sys != 0);
5030Sstevel@tonic-gate 		wr_ldtr(ULDT_SEL);
5041217Srab 		kpreempt_enable();
5050Sstevel@tonic-gate 	}
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate 	if (ldt_map(pp, seli) == NULL) {
5080Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
5090Sstevel@tonic-gate 		return (ENOMEM);
5100Sstevel@tonic-gate 	}
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate 	ASSERT(seli <= pp->p_ldtlimit);
5130Sstevel@tonic-gate 	dscrp = &pp->p_ldt[seli];
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 	/*
5160Sstevel@tonic-gate 	 * On the 64-bit kernel, this is where things get more subtle.
5170Sstevel@tonic-gate 	 * Recall that in the 64-bit kernel, when we enter the kernel we
5180Sstevel@tonic-gate 	 * deliberately -don't- reload the segment selectors we came in on
5190Sstevel@tonic-gate 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
5200Sstevel@tonic-gate 	 * and the underlying descriptors are essentially ignored by the
5210Sstevel@tonic-gate 	 * hardware in long mode - except for the base that we override with
5220Sstevel@tonic-gate 	 * the gsbase MSRs.
5230Sstevel@tonic-gate 	 *
5240Sstevel@tonic-gate 	 * However, there's one unfortunate issue with this rosy picture --
5250Sstevel@tonic-gate 	 * a descriptor that's not marked as 'present' will still generate
5260Sstevel@tonic-gate 	 * an #np when loading a segment register.
5270Sstevel@tonic-gate 	 *
5280Sstevel@tonic-gate 	 * Consider this case.  An lwp creates a harmless LDT entry, points
5290Sstevel@tonic-gate 	 * one of it's segment registers at it, then tells the kernel (here)
5300Sstevel@tonic-gate 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
5310Sstevel@tonic-gate 	 * way back to userland where we reload the segment registers, and be
5320Sstevel@tonic-gate 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
5330Sstevel@tonic-gate 	 * will happen in the normal case too.  However, if we're trying to
5340Sstevel@tonic-gate 	 * use a debugger that wants to save and restore the segment registers,
5350Sstevel@tonic-gate 	 * and the debugger things that we have valid segment registers, we
5360Sstevel@tonic-gate 	 * have the problem that the debugger will try and restore the
5370Sstevel@tonic-gate 	 * segment register that points at the now 'not present' descriptor
5380Sstevel@tonic-gate 	 * and will take a #np right there.
5390Sstevel@tonic-gate 	 *
5400Sstevel@tonic-gate 	 * We should obviously fix the debugger to be paranoid about
5410Sstevel@tonic-gate 	 * -not- restoring segment registers that point to bad descriptors;
5420Sstevel@tonic-gate 	 * however we can prevent the problem here if we check to see if any
5430Sstevel@tonic-gate 	 * of the segment registers are still pointing at the thing we're
5440Sstevel@tonic-gate 	 * destroying; if they are, return an error instead. (That also seems
5450Sstevel@tonic-gate 	 * a lot better failure mode than SIGKILL and a core file
5460Sstevel@tonic-gate 	 * from kern_gpfault() too.)
5470Sstevel@tonic-gate 	 */
548*2712Snn35248 	if (SI86SSD_PRES(ssd) == 0) {
5490Sstevel@tonic-gate 		kthread_t *t;
5500Sstevel@tonic-gate 		int bad = 0;
5510Sstevel@tonic-gate 
5520Sstevel@tonic-gate 		/*
5530Sstevel@tonic-gate 		 * Look carefully at the segment registers of every lwp
5540Sstevel@tonic-gate 		 * in the process (they're all stopped by our caller).
5550Sstevel@tonic-gate 		 * If we're about to invalidate a descriptor that's still
5560Sstevel@tonic-gate 		 * being referenced by *any* of them, return an error,
5570Sstevel@tonic-gate 		 * rather than having them #gp on their way out of the kernel.
5580Sstevel@tonic-gate 		 */
5590Sstevel@tonic-gate 		ASSERT(pp->p_lwprcnt == 1);
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
5620Sstevel@tonic-gate 		t = pp->p_tlist;
5630Sstevel@tonic-gate 		do {
5640Sstevel@tonic-gate 			klwp_t *lwp = ttolwp(t);
5650Sstevel@tonic-gate 			struct regs *rp = lwp->lwp_regs;
5660Sstevel@tonic-gate #if defined(__amd64)
5670Sstevel@tonic-gate 			pcb_t *pcb = &lwp->lwp_pcb;
5680Sstevel@tonic-gate #endif
5690Sstevel@tonic-gate 
570*2712Snn35248 			if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) {
5710Sstevel@tonic-gate 				bad = 1;
5720Sstevel@tonic-gate 				break;
5730Sstevel@tonic-gate 			}
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate #if defined(__amd64)
5760Sstevel@tonic-gate 			if (pcb->pcb_flags & RUPDATE_PENDING) {
577*2712Snn35248 				if (ssd->sel == pcb->pcb_ds ||
578*2712Snn35248 				    ssd->sel == pcb->pcb_es ||
579*2712Snn35248 				    ssd->sel == pcb->pcb_fs ||
580*2712Snn35248 				    ssd->sel == pcb->pcb_gs) {
5810Sstevel@tonic-gate 					bad = 1;
5820Sstevel@tonic-gate 					break;
5830Sstevel@tonic-gate 				}
5840Sstevel@tonic-gate 			} else
5850Sstevel@tonic-gate #endif
5860Sstevel@tonic-gate 			{
587*2712Snn35248 				if (ssd->sel == rp->r_ds ||
588*2712Snn35248 				    ssd->sel == rp->r_es ||
589*2712Snn35248 				    ssd->sel == rp->r_fs ||
590*2712Snn35248 				    ssd->sel == rp->r_gs) {
5910Sstevel@tonic-gate 					bad = 1;
5920Sstevel@tonic-gate 					break;
5930Sstevel@tonic-gate 				}
5940Sstevel@tonic-gate 			}
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate 		} while ((t = t->t_forw) != pp->p_tlist);
5970Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate 		if (bad) {
6000Sstevel@tonic-gate 			mutex_exit(&pp->p_ldtlock);
6010Sstevel@tonic-gate 			return (EBUSY);
6020Sstevel@tonic-gate 		}
6030Sstevel@tonic-gate 	}
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 	/*
6060Sstevel@tonic-gate 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
6070Sstevel@tonic-gate 	 */
608*2712Snn35248 	if (ssd->acc1 == 0) {
6090Sstevel@tonic-gate 		bzero(dscrp, sizeof (*dscrp));
6100Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6110Sstevel@tonic-gate 		return (0);
6120Sstevel@tonic-gate 	}
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate 	/*
6150Sstevel@tonic-gate 	 * Check segment type, allow segment not present and
6160Sstevel@tonic-gate 	 * only user DPL (3).
6170Sstevel@tonic-gate 	 */
618*2712Snn35248 	if (SI86SSD_DPL(ssd) != SEL_UPL) {
6190Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6200Sstevel@tonic-gate 		return (EINVAL);
6210Sstevel@tonic-gate 	}
6220Sstevel@tonic-gate 
6230Sstevel@tonic-gate #if defined(__amd64)
6240Sstevel@tonic-gate 	/*
625*2712Snn35248 	 * Do not allow 32-bit applications to create 64-bit mode code
626*2712Snn35248 	 * segments.
6270Sstevel@tonic-gate 	 */
628*2712Snn35248 	if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
629*2712Snn35248 	    SI86SSD_ISLONG(ssd)) {
6300Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6310Sstevel@tonic-gate 		return (EINVAL);
6320Sstevel@tonic-gate 	}
6330Sstevel@tonic-gate #endif /* __amd64 */
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 	/*
6360Sstevel@tonic-gate 	 * Set up a code or data user segment descriptor.
6370Sstevel@tonic-gate 	 */
638*2712Snn35248 	if (SI86SSD_ISUSEG(ssd)) {
639*2712Snn35248 		ssd_to_usd(ssd, dscrp);
6400Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6410Sstevel@tonic-gate 		return (0);
6420Sstevel@tonic-gate 	}
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	/*
6450Sstevel@tonic-gate 	 * Allow a call gate only if the destination is in the LDT.
6460Sstevel@tonic-gate 	 */
647*2712Snn35248 	if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
648*2712Snn35248 		ssd_to_sgd(ssd, (gate_desc_t *)dscrp);
6490Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6500Sstevel@tonic-gate 		return (0);
6510Sstevel@tonic-gate 	}
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
6540Sstevel@tonic-gate 	return (EINVAL);
6550Sstevel@tonic-gate }
6560Sstevel@tonic-gate 
6570Sstevel@tonic-gate /*
6580Sstevel@tonic-gate  * Allocate a private LDT for this process and initialize it with the
6591217Srab  * default entries.
6600Sstevel@tonic-gate  */
661*2712Snn35248 static void
6620Sstevel@tonic-gate setup_ldt(proc_t *pp)
6630Sstevel@tonic-gate {
6640Sstevel@tonic-gate 	user_desc_t *ldtp;	/* descriptor pointer */
6650Sstevel@tonic-gate 	pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t));
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate 	/*
6680Sstevel@tonic-gate 	 * Allocate maximum virtual space we need for this LDT.
6690Sstevel@tonic-gate 	 */
6700Sstevel@tonic-gate 	ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP);
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	/*
6730Sstevel@tonic-gate 	 * Allocate the minimum number of physical pages for LDT.
6740Sstevel@tonic-gate 	 */
6751217Srab 	(void) segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t),
6761217Srab 	    VM_SLEEP, 0, segkmem_page_create, NULL);
6771217Srab 
6780Sstevel@tonic-gate 	bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t))));
6790Sstevel@tonic-gate 
6800Sstevel@tonic-gate 	kpreempt_disable();
6810Sstevel@tonic-gate 
6820Sstevel@tonic-gate 	/* Update proc structure. XXX - need any locks here??? */
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate 	set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1,
6850Sstevel@tonic-gate 	    SDT_SYSLDT, SEL_KPL);
6860Sstevel@tonic-gate 
6870Sstevel@tonic-gate 	pp->p_ldtlimit = MINNLDT - 1;
6880Sstevel@tonic-gate 	pp->p_ldt = ldtp;
6890Sstevel@tonic-gate 	if (pp == curproc)
6900Sstevel@tonic-gate 		*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc;
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 	kpreempt_enable();
6930Sstevel@tonic-gate }
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate /*
6960Sstevel@tonic-gate  * Map the page corresponding to the selector entry. If the page is
6970Sstevel@tonic-gate  * already mapped then it simply returns with the pointer to the entry.
6980Sstevel@tonic-gate  * Otherwise it allocates a physical page for it and returns the pointer
6990Sstevel@tonic-gate  * to the entry.  Returns 0 for errors.
7000Sstevel@tonic-gate  */
7010Sstevel@tonic-gate static void *
7020Sstevel@tonic-gate ldt_map(proc_t *pp, uint_t seli)
7030Sstevel@tonic-gate {
7040Sstevel@tonic-gate 	caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0];
7050Sstevel@tonic-gate 	caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli];
7060Sstevel@tonic-gate 	volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET));
7070Sstevel@tonic-gate 	caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET));
7080Sstevel@tonic-gate 	on_trap_data_t otd;
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate 	ASSERT(pp->p_ldt != NULL);
7110Sstevel@tonic-gate 
7120Sstevel@tonic-gate 	if (seli <= pp->p_ldtlimit)
7130Sstevel@tonic-gate 		return (ent_addr);
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate 	/*
7160Sstevel@tonic-gate 	 * We are increasing the size of the process's LDT.
7170Sstevel@tonic-gate 	 * Make sure this and all intervening pages are mapped.
7180Sstevel@tonic-gate 	 */
7190Sstevel@tonic-gate 	while (page <= epage) {
7200Sstevel@tonic-gate 		if (!on_trap(&otd, OT_DATA_ACCESS))
7210Sstevel@tonic-gate 			(void) *(volatile int *)page;	/* peek at the page */
7220Sstevel@tonic-gate 		else {		/* Allocate a physical page */
7231217Srab 			(void) segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0,
7241217Srab 			    segkmem_page_create, NULL);
7250Sstevel@tonic-gate 			bzero(page, PAGESIZE);
7260Sstevel@tonic-gate 		}
7270Sstevel@tonic-gate 		no_trap();
7280Sstevel@tonic-gate 		page += PAGESIZE;
7290Sstevel@tonic-gate 	}
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	/* XXX - need any locks to update proc_t or gdt ??? */
7320Sstevel@tonic-gate 
7330Sstevel@tonic-gate 	ASSERT(curproc == pp);
7340Sstevel@tonic-gate 
7350Sstevel@tonic-gate 	kpreempt_disable();
7360Sstevel@tonic-gate 	pp->p_ldtlimit = seli;
7370Sstevel@tonic-gate 	SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1);
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate 	ldt_load();
7400Sstevel@tonic-gate 	kpreempt_enable();
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 	return (ent_addr);
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate /*
7460Sstevel@tonic-gate  * Free up the kernel memory used for LDT of this process.
7470Sstevel@tonic-gate  */
7481217Srab static void
7490Sstevel@tonic-gate ldt_free(proc_t *pp)
7500Sstevel@tonic-gate {
7510Sstevel@tonic-gate 	on_trap_data_t otd;
7520Sstevel@tonic-gate 	caddr_t start, end;
7530Sstevel@tonic-gate 	volatile caddr_t addr;
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate 	ASSERT(pp->p_ldt != NULL);
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
7580Sstevel@tonic-gate 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
7590Sstevel@tonic-gate 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 	/* Free the physical page(s) used for mapping LDT */
7620Sstevel@tonic-gate 	for (addr = start; addr <= end; addr += PAGESIZE) {
7630Sstevel@tonic-gate 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
7640Sstevel@tonic-gate 			/* peek at the address */
7650Sstevel@tonic-gate 			(void) *(volatile int *)addr;
7660Sstevel@tonic-gate 			segkmem_free(NULL, addr, PAGESIZE);
7670Sstevel@tonic-gate 		}
7680Sstevel@tonic-gate 	}
7690Sstevel@tonic-gate 	no_trap();
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 	/* Free up the virtual address space used for this LDT */
7720Sstevel@tonic-gate 	vmem_free(heap_arena, pp->p_ldt,
7730Sstevel@tonic-gate 	    ptob(btopr(MAXNLDT * sizeof (user_desc_t))));
7740Sstevel@tonic-gate 	kpreempt_disable();
7750Sstevel@tonic-gate 	pp->p_ldt = NULL;
7761217Srab 	pp->p_ldt_desc = zero_sdesc;
7771217Srab 	pp->p_ldtlimit = 0;
7781217Srab 
7790Sstevel@tonic-gate 	if (pp == curproc)
7801217Srab 		ldt_unload();
7810Sstevel@tonic-gate 	kpreempt_enable();
7820Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
7830Sstevel@tonic-gate }
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate /*
7860Sstevel@tonic-gate  * On fork copy new ldt for child.
7870Sstevel@tonic-gate  */
7881217Srab void
7890Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp)
7900Sstevel@tonic-gate {
7910Sstevel@tonic-gate 	on_trap_data_t otd;
7920Sstevel@tonic-gate 	caddr_t start, end;
7930Sstevel@tonic-gate 	volatile caddr_t addr, caddr;
7940Sstevel@tonic-gate 	int	minsize;
7950Sstevel@tonic-gate 
7961217Srab 	ASSERT(pp->p_ldt);
7970Sstevel@tonic-gate 
7981217Srab 	setup_ldt(cp);
7990Sstevel@tonic-gate 
8000Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
8010Sstevel@tonic-gate 	cp->p_ldtlimit = pp->p_ldtlimit;
8020Sstevel@tonic-gate 	SYSSEGD_SETLIMIT(&cp->p_ldt_desc,
8030Sstevel@tonic-gate 	    (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1);
8040Sstevel@tonic-gate 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
8050Sstevel@tonic-gate 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
8060Sstevel@tonic-gate 	caddr = (caddr_t)cp->p_ldt; /* child LDT start */
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 	minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET;
8090Sstevel@tonic-gate 	/* Walk thru the physical page(s) used for parent's LDT */
8100Sstevel@tonic-gate 	for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) {
8110Sstevel@tonic-gate 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
8120Sstevel@tonic-gate 			(void) *(volatile int *)addr; /* peek at the address */
8130Sstevel@tonic-gate 			/* allocate a page if necessary */
8140Sstevel@tonic-gate 			if (caddr >= ((caddr_t)cp->p_ldt + minsize)) {
8151217Srab 				(void) segkmem_xalloc(NULL, caddr, PAGESIZE,
8161217Srab 				    VM_SLEEP, 0, segkmem_page_create, NULL);
8170Sstevel@tonic-gate 			}
8180Sstevel@tonic-gate 			bcopy(addr, caddr, PAGESIZE);
8190Sstevel@tonic-gate 		}
8200Sstevel@tonic-gate 	}
8210Sstevel@tonic-gate 	no_trap();
8220Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
8230Sstevel@tonic-gate }
824