xref: /onnv-gate/usr/src/uts/intel/ia32/os/sysi86.c (revision 1217:f95ffdc997b7)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*1217Srab  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
280Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
290Sstevel@tonic-gate /*	  All Rights Reserved  	*/
300Sstevel@tonic-gate 
310Sstevel@tonic-gate /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
320Sstevel@tonic-gate /*	  All Rights Reserved	*/
330Sstevel@tonic-gate 
340Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
350Sstevel@tonic-gate 
360Sstevel@tonic-gate #include <sys/param.h>
370Sstevel@tonic-gate #include <sys/types.h>
380Sstevel@tonic-gate #include <sys/sysmacros.h>
390Sstevel@tonic-gate #include <sys/systm.h>
400Sstevel@tonic-gate #include <sys/signal.h>
410Sstevel@tonic-gate #include <sys/errno.h>
420Sstevel@tonic-gate #include <sys/fault.h>
430Sstevel@tonic-gate #include <sys/syscall.h>
440Sstevel@tonic-gate #include <sys/cpuvar.h>
450Sstevel@tonic-gate #include <sys/sysi86.h>
460Sstevel@tonic-gate #include <sys/psw.h>
470Sstevel@tonic-gate #include <sys/cred.h>
480Sstevel@tonic-gate #include <sys/policy.h>
490Sstevel@tonic-gate #include <sys/thread.h>
500Sstevel@tonic-gate #include <sys/debug.h>
510Sstevel@tonic-gate #include <sys/ontrap.h>
520Sstevel@tonic-gate #include <sys/privregs.h>
530Sstevel@tonic-gate #include <sys/x86_archext.h>
540Sstevel@tonic-gate #include <sys/vmem.h>
550Sstevel@tonic-gate #include <sys/kmem.h>
560Sstevel@tonic-gate #include <sys/mman.h>
570Sstevel@tonic-gate #include <sys/archsystm.h>
580Sstevel@tonic-gate #include <vm/hat.h>
590Sstevel@tonic-gate #include <vm/as.h>
600Sstevel@tonic-gate #include <vm/seg.h>
610Sstevel@tonic-gate #include <vm/seg_kmem.h>
620Sstevel@tonic-gate #include <vm/faultcode.h>
630Sstevel@tonic-gate #include <sys/fp.h>
640Sstevel@tonic-gate #include <sys/cmn_err.h>
650Sstevel@tonic-gate 
660Sstevel@tonic-gate static int setdscr(caddr_t ap);
67*1217Srab static void setup_ldt(proc_t *pp);
680Sstevel@tonic-gate static void *ldt_map(proc_t *pp, uint_t seli);
69*1217Srab static void ldt_free(proc_t *pp);
700Sstevel@tonic-gate 
710Sstevel@tonic-gate extern void rtcsync(void);
720Sstevel@tonic-gate extern long ggmtl(void);
730Sstevel@tonic-gate extern void sgmtl(long);
740Sstevel@tonic-gate 
750Sstevel@tonic-gate /*
760Sstevel@tonic-gate  * sysi86 System Call
770Sstevel@tonic-gate  */
780Sstevel@tonic-gate 
790Sstevel@tonic-gate /* ARGSUSED */
800Sstevel@tonic-gate int
810Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
820Sstevel@tonic-gate {
830Sstevel@tonic-gate 	int error = 0;
840Sstevel@tonic-gate 	int c;
850Sstevel@tonic-gate 	proc_t *pp = curproc;
860Sstevel@tonic-gate 
870Sstevel@tonic-gate 	switch (cmd) {
880Sstevel@tonic-gate 
890Sstevel@tonic-gate 	/*
900Sstevel@tonic-gate 	 * The SI86V86 subsystem call of the SYSI86 system call
910Sstevel@tonic-gate 	 * supports only one subcode -- V86SC_IOPL.
920Sstevel@tonic-gate 	 */
930Sstevel@tonic-gate 	case SI86V86:
940Sstevel@tonic-gate 		if (arg1 == V86SC_IOPL) {
950Sstevel@tonic-gate 			struct regs *rp = lwptoregs(ttolwp(curthread));
960Sstevel@tonic-gate 			greg_t oldpl = rp->r_ps & PS_IOPL;
970Sstevel@tonic-gate 			greg_t newpl = arg2 & PS_IOPL;
980Sstevel@tonic-gate 
990Sstevel@tonic-gate 			/*
1000Sstevel@tonic-gate 			 * Must be privileged to run this system call
1010Sstevel@tonic-gate 			 * if giving more io privilege.
1020Sstevel@tonic-gate 			 */
1030Sstevel@tonic-gate 			if (newpl > oldpl && (error =
1040Sstevel@tonic-gate 			    secpolicy_sys_config(CRED(), B_FALSE)) != 0)
1050Sstevel@tonic-gate 				return (set_errno(error));
1060Sstevel@tonic-gate 			rp->r_ps ^= oldpl ^ newpl;
1070Sstevel@tonic-gate 		} else
1080Sstevel@tonic-gate 			error = EINVAL;
1090Sstevel@tonic-gate 		break;
1100Sstevel@tonic-gate 
1110Sstevel@tonic-gate 	/*
1120Sstevel@tonic-gate 	 * Set a segment descriptor
1130Sstevel@tonic-gate 	 */
1140Sstevel@tonic-gate 	case SI86DSCR:
1150Sstevel@tonic-gate 		/*
1160Sstevel@tonic-gate 		 * There are considerable problems here manipulating
1170Sstevel@tonic-gate 		 * resources shared by many running lwps.  Get everyone
1180Sstevel@tonic-gate 		 * into a safe state before changing the LDT.
1190Sstevel@tonic-gate 		 */
1200Sstevel@tonic-gate 		if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
1210Sstevel@tonic-gate 			error = EINTR;
1220Sstevel@tonic-gate 			break;
1230Sstevel@tonic-gate 		}
1240Sstevel@tonic-gate 		error = setdscr((caddr_t)arg1);
1250Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
1260Sstevel@tonic-gate 		if (curthread != pp->p_agenttp)
1270Sstevel@tonic-gate 			continuelwps(pp);
1280Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
1290Sstevel@tonic-gate 		break;
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate 	case SI86FPHW:
1320Sstevel@tonic-gate 		c = fp_kind & 0xff;
1330Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1)
1340Sstevel@tonic-gate 			error = EFAULT;
1350Sstevel@tonic-gate 		break;
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate 	case SI86FPSTART:
1380Sstevel@tonic-gate 		/*
1390Sstevel@tonic-gate 		 * arg1 is the address of _fp_hw
1400Sstevel@tonic-gate 		 * arg2 is the desired x87 FCW value
1410Sstevel@tonic-gate 		 * arg3 is the desired SSE MXCSR value
1420Sstevel@tonic-gate 		 * a return value of one means SSE hardware, else none.
1430Sstevel@tonic-gate 		 */
1440Sstevel@tonic-gate 		c = fp_kind & 0xff;
1450Sstevel@tonic-gate 		if (suword32((void *)arg1, c) == -1) {
1460Sstevel@tonic-gate 			error = EFAULT;
1470Sstevel@tonic-gate 			break;
1480Sstevel@tonic-gate 		}
1490Sstevel@tonic-gate 		fpsetcw((uint16_t)arg2, (uint32_t)arg3);
1500Sstevel@tonic-gate 		return (fp_kind == __FP_SSE ? 1 : 0);
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate 	/* real time clock management commands */
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	case WTODC:
1550Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1560Sstevel@tonic-gate 			timestruc_t ts;
1570Sstevel@tonic-gate 			mutex_enter(&tod_lock);
1580Sstevel@tonic-gate 			gethrestime(&ts);
1590Sstevel@tonic-gate 			tod_set(ts);
1600Sstevel@tonic-gate 			mutex_exit(&tod_lock);
1610Sstevel@tonic-gate 		}
1620Sstevel@tonic-gate 		break;
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate /* Give some timezone playing room */
1650Sstevel@tonic-gate #define	ONEWEEK	(7 * 24 * 60 * 60)
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate 	case SGMTL:
1680Sstevel@tonic-gate 		/*
1690Sstevel@tonic-gate 		 * Called from 32 bit land, negative values
1700Sstevel@tonic-gate 		 * are not sign extended, so we do that here
1710Sstevel@tonic-gate 		 * by casting it to an int and back.  We also
1720Sstevel@tonic-gate 		 * clamp the value to within reason and detect
1730Sstevel@tonic-gate 		 * when a 64 bit call overflows an int.
1740Sstevel@tonic-gate 		 */
1750Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0) {
1760Sstevel@tonic-gate 			int newlag = (int)arg1;
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1790Sstevel@tonic-gate 			if (get_udatamodel() == DATAMODEL_NATIVE &&
1800Sstevel@tonic-gate 			    (long)newlag != (long)arg1) {
1810Sstevel@tonic-gate 				error = EOVERFLOW;
1820Sstevel@tonic-gate 			} else
1830Sstevel@tonic-gate #endif
1840Sstevel@tonic-gate 			if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
1850Sstevel@tonic-gate 				sgmtl(newlag);
1860Sstevel@tonic-gate 			else
1870Sstevel@tonic-gate 				error = EOVERFLOW;
1880Sstevel@tonic-gate 		}
1890Sstevel@tonic-gate 		break;
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate 	case GGMTL:
1920Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
1930Sstevel@tonic-gate 			if (sulword((void *)arg1, ggmtl()) == -1)
1940Sstevel@tonic-gate 				error = EFAULT;
1950Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1960Sstevel@tonic-gate 		} else {
1970Sstevel@tonic-gate 			time_t gmtl;
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate 			if ((gmtl = ggmtl()) > INT32_MAX) {
2000Sstevel@tonic-gate 				/*
2010Sstevel@tonic-gate 				 * Since gmt_lag can at most be
2020Sstevel@tonic-gate 				 * +/- 12 hours, something is
2030Sstevel@tonic-gate 				 * *seriously* messed up here.
2040Sstevel@tonic-gate 				 */
2050Sstevel@tonic-gate 				error = EOVERFLOW;
2060Sstevel@tonic-gate 			} else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
2070Sstevel@tonic-gate 				error = EFAULT;
2080Sstevel@tonic-gate #endif
2090Sstevel@tonic-gate 		}
2100Sstevel@tonic-gate 		break;
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 	case RTCSYNC:
2130Sstevel@tonic-gate 		if ((error = secpolicy_settime(CRED())) == 0)
2140Sstevel@tonic-gate 			rtcsync();
2150Sstevel@tonic-gate 		break;
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	/* END OF real time clock management commands */
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 	default:
2200Sstevel@tonic-gate 		error = EINVAL;
2210Sstevel@tonic-gate 		break;
2220Sstevel@tonic-gate 	}
2230Sstevel@tonic-gate 	return (error == 0 ? 0 : set_errno(error));
2240Sstevel@tonic-gate }
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate void
2270Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
2280Sstevel@tonic-gate {
2290Sstevel@tonic-gate 	ssd->bo = USEGD_GETBASE(usd);
2300Sstevel@tonic-gate 	ssd->ls = USEGD_GETLIMIT(usd);
2310Sstevel@tonic-gate 	ssd->sel = sel;
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	/*
2340Sstevel@tonic-gate 	 * set type, dpl and present bits.
2350Sstevel@tonic-gate 	 */
2360Sstevel@tonic-gate 	ssd->acc1 = usd->usd_type;
2370Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_dpl << 5;
2380Sstevel@tonic-gate 	ssd->acc1 |= usd->usd_p << (5 + 2);
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	/*
2410Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
2420Sstevel@tonic-gate 	 */
2430Sstevel@tonic-gate 	ssd->acc2 = usd->usd_avl;
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate #if defined(__amd64)
2460Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_long << 1;
2470Sstevel@tonic-gate #else
2480Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_reserved << 1;
2490Sstevel@tonic-gate #endif
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_def32 << (1 + 1);
2520Sstevel@tonic-gate 	ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
2530Sstevel@tonic-gate }
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate static void
2560Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
2570Sstevel@tonic-gate {
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate 	USEGD_SETBASE(usd, ssd->bo);
2600Sstevel@tonic-gate 	USEGD_SETLIMIT(usd, ssd->ls);
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate 	/*
2630Sstevel@tonic-gate 	 * set type, dpl and present bits.
2640Sstevel@tonic-gate 	 */
2650Sstevel@tonic-gate 	usd->usd_type = ssd->acc1;
2660Sstevel@tonic-gate 	usd->usd_dpl = ssd->acc1 >> 5;
2670Sstevel@tonic-gate 	usd->usd_p = ssd->acc1 >> (5 + 2);
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate 	ASSERT(usd->usd_type >= SDT_MEMRO);
2700Sstevel@tonic-gate 	ASSERT(usd->usd_dpl == SEL_UPL);
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 	/*
2730Sstevel@tonic-gate 	 * set avl, DB and granularity bits.
2740Sstevel@tonic-gate 	 */
2750Sstevel@tonic-gate 	usd->usd_avl = ssd->acc2;
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate #if defined(__amd64)
2780Sstevel@tonic-gate 	usd->usd_long = ssd->acc2 >> 1;
2790Sstevel@tonic-gate #else
2800Sstevel@tonic-gate 	usd->usd_reserved = ssd->acc2 >> 1;
2810Sstevel@tonic-gate #endif
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	usd->usd_def32 = ssd->acc2 >> (1 + 1);
2840Sstevel@tonic-gate 	usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
2850Sstevel@tonic-gate }
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate static void
2880Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
2890Sstevel@tonic-gate {
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate 	sgd->sgd_looffset = ssd->bo;
2920Sstevel@tonic-gate 	sgd->sgd_hioffset = ssd->bo >> 16;
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	sgd->sgd_selector = ssd->ls;
2950Sstevel@tonic-gate 	/*
2960Sstevel@tonic-gate 	 * set type, dpl and present bits.
2970Sstevel@tonic-gate 	 */
2980Sstevel@tonic-gate 	sgd->sgd_type = ssd->acc1;
2990Sstevel@tonic-gate 	sgd->sgd_dpl = ssd->acc1 >> 5;
3000Sstevel@tonic-gate 	sgd->sgd_p = ssd->acc1 >> 7;
3010Sstevel@tonic-gate 	ASSERT(sgd->sgd_type == SDT_SYSCGT);
3020Sstevel@tonic-gate 	ASSERT(sgd->sgd_dpl == SEL_UPL);
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate #if defined(__i386)	/* reserved, ignored in amd64 */
3050Sstevel@tonic-gate 	sgd->sgd_stkcpy = 0;
3060Sstevel@tonic-gate #endif
3070Sstevel@tonic-gate }
3080Sstevel@tonic-gate 
309*1217Srab /*
310*1217Srab  * Load LDT register with the current process's LDT.
311*1217Srab  */
312*1217Srab void
313*1217Srab ldt_load(void)
314*1217Srab {
315*1217Srab 	/*
316*1217Srab 	 */
317*1217Srab 	*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
318*1217Srab 	wr_ldtr(ULDT_SEL);
319*1217Srab }
320*1217Srab 
321*1217Srab /*
322*1217Srab  * Store a NULL selector in the LDTR. All subsequent illegal references to
323*1217Srab  * the LDT will result in a #gp.
324*1217Srab  */
325*1217Srab void
326*1217Srab ldt_unload(void)
327*1217Srab {
328*1217Srab 	CPU->cpu_gdt[GDT_LDT] = zero_udesc;
329*1217Srab 	wr_ldtr(0);
330*1217Srab }
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate /*ARGSUSED*/
3330Sstevel@tonic-gate static void
334*1217Srab ldt_savectx(proc_t *p)
3350Sstevel@tonic-gate {
336*1217Srab 	ASSERT(p->p_ldt != NULL);
337*1217Srab 	ASSERT(p == curproc);
338*1217Srab 
3390Sstevel@tonic-gate #if defined(__amd64)
3400Sstevel@tonic-gate 	/*
3410Sstevel@tonic-gate 	 * The 64-bit kernel must be sure to clear any stale ldt
3420Sstevel@tonic-gate 	 * selectors when context switching away from a process that
3430Sstevel@tonic-gate 	 * has a private ldt. Consider the following example:
3440Sstevel@tonic-gate 	 *
3450Sstevel@tonic-gate 	 * 	Wine creats a ldt descriptor and points a segment register
3460Sstevel@tonic-gate 	 * 	to it.
3470Sstevel@tonic-gate 	 *
3480Sstevel@tonic-gate 	 *	We then context switch away from wine lwp to kernel
3490Sstevel@tonic-gate 	 *	thread and hit breakpoint in kernel with kmdb
3500Sstevel@tonic-gate 	 *
3510Sstevel@tonic-gate 	 *	When we continue and resume from kmdb we will #gp
3520Sstevel@tonic-gate 	 * 	fault since kmdb will have saved the stale ldt selector
3530Sstevel@tonic-gate 	 *	from wine and will try to restore it but we are no longer in
3540Sstevel@tonic-gate 	 *	the context of the wine process and do not have our
3550Sstevel@tonic-gate 	 *	ldtr register pointing to the private ldt.
3560Sstevel@tonic-gate 	 */
3570Sstevel@tonic-gate 	clr_ldt_sregs();
3580Sstevel@tonic-gate #endif
3590Sstevel@tonic-gate 
360*1217Srab 	ldt_unload();
3610Sstevel@tonic-gate 	cpu_fast_syscall_enable(NULL);
3620Sstevel@tonic-gate }
3630Sstevel@tonic-gate 
364*1217Srab static void
365*1217Srab ldt_restorectx(proc_t *p)
366*1217Srab {
367*1217Srab 	ASSERT(p->p_ldt != NULL);
368*1217Srab 	ASSERT(p == curproc);
369*1217Srab 
370*1217Srab 	ldt_load();
371*1217Srab 	cpu_fast_syscall_disable(NULL);
372*1217Srab }
373*1217Srab 
3740Sstevel@tonic-gate /*
375*1217Srab  * When a process with a private LDT execs, fast syscalls must be enabled for
376*1217Srab  * the new process image.
3770Sstevel@tonic-gate  */
3780Sstevel@tonic-gate /* ARGSUSED */
3790Sstevel@tonic-gate static void
380*1217Srab ldt_freectx(proc_t *p, int isexec)
3810Sstevel@tonic-gate {
382*1217Srab 	ASSERT(p->p_ldt);
383*1217Srab 
3840Sstevel@tonic-gate 	if (isexec) {
3850Sstevel@tonic-gate 		kpreempt_disable();
3860Sstevel@tonic-gate 		cpu_fast_syscall_enable(NULL);
3870Sstevel@tonic-gate 		kpreempt_enable();
3880Sstevel@tonic-gate 	}
389*1217Srab 
390*1217Srab 	/*
391*1217Srab 	 * ldt_free() will free the memory used by the private LDT, reset the
392*1217Srab 	 * process's descriptor, and re-program the LDTR.
393*1217Srab 	 */
394*1217Srab 	ldt_free(p);
3950Sstevel@tonic-gate }
3960Sstevel@tonic-gate 
3970Sstevel@tonic-gate /*
3980Sstevel@tonic-gate  * Install ctx op that ensures syscall/sysenter are disabled.
3990Sstevel@tonic-gate  * See comments below.
4000Sstevel@tonic-gate  *
401*1217Srab  * When a thread with a private LDT forks, the new process
4020Sstevel@tonic-gate  * must have the LDT context ops installed.
4030Sstevel@tonic-gate  */
4040Sstevel@tonic-gate /* ARGSUSED */
4050Sstevel@tonic-gate static void
406*1217Srab ldt_installctx(proc_t *p, proc_t *cp)
4070Sstevel@tonic-gate {
408*1217Srab 	proc_t		*targ = p;
409*1217Srab 	kthread_t	*t;
4100Sstevel@tonic-gate 
4110Sstevel@tonic-gate 	/*
412*1217Srab 	 * If this is a fork, operate on the child process.
4130Sstevel@tonic-gate 	 */
414*1217Srab 	if (cp != NULL) {
415*1217Srab 		targ = cp;
416*1217Srab 		ldt_dup(p, cp);
417*1217Srab 	}
4180Sstevel@tonic-gate 
419*1217Srab 	/*
420*1217Srab 	 * The process context ops expect the target process as their argument.
421*1217Srab 	 */
422*1217Srab 	ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
423*1217Srab 	    ldt_installctx, ldt_savectx, ldt_freectx) == 0);
4240Sstevel@tonic-gate 
425*1217Srab 	installpctx(targ, targ, ldt_savectx, ldt_restorectx,
426*1217Srab 	    ldt_installctx, ldt_savectx, ldt_freectx);
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate 	/*
4290Sstevel@tonic-gate 	 * We've just disabled fast system call and return instructions; take
4300Sstevel@tonic-gate 	 * the slow path out to make sure we don't try to use one to return
431*1217Srab 	 * back to user. We must set t_post_sys for every thread in the
432*1217Srab 	 * process to make sure none of them escape out via fast return.
4330Sstevel@tonic-gate 	 */
434*1217Srab 
435*1217Srab 	mutex_enter(&targ->p_lock);
436*1217Srab 	t = targ->p_tlist;
437*1217Srab 	do {
438*1217Srab 		t->t_post_sys = 1;
439*1217Srab 	} while ((t = t->t_forw) != targ->p_tlist);
440*1217Srab 	mutex_exit(&targ->p_lock);
4410Sstevel@tonic-gate }
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate static int
4440Sstevel@tonic-gate setdscr(caddr_t ap)
4450Sstevel@tonic-gate {
4460Sstevel@tonic-gate 	struct ssd ssd;		/* request structure buffer */
4470Sstevel@tonic-gate 	ushort_t seli; 		/* selector index */
4480Sstevel@tonic-gate 	user_desc_t *dscrp;	/* descriptor pointer */
4490Sstevel@tonic-gate 	proc_t	*pp = ttoproc(curthread);
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_LP64)
4520Sstevel@tonic-gate 		return (EINVAL);
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate 	if (copyin(ap, &ssd, sizeof (ssd)) < 0)
4550Sstevel@tonic-gate 		return (EFAULT);
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	/*
4580Sstevel@tonic-gate 	 * LDT segments: executable and data at DPL 3 only.
4590Sstevel@tonic-gate 	 */
4600Sstevel@tonic-gate 	if (!SELISLDT(ssd.sel) || !SELISUPL(ssd.sel))
4610Sstevel@tonic-gate 		return (EINVAL);
4620Sstevel@tonic-gate 
4630Sstevel@tonic-gate 	/*
4640Sstevel@tonic-gate 	 * check the selector index.
4650Sstevel@tonic-gate 	 */
4660Sstevel@tonic-gate 	seli = SELTOIDX(ssd.sel);
467*1217Srab 	if (seli >= MAXNLDT || seli < LDT_UDBASE)
4680Sstevel@tonic-gate 		return (EINVAL);
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate 	/*
4730Sstevel@tonic-gate 	 * If this is the first time for this process then setup a
4740Sstevel@tonic-gate 	 * private LDT for it.
4750Sstevel@tonic-gate 	 */
4760Sstevel@tonic-gate 	if (pp->p_ldt == NULL) {
477*1217Srab 		kpreempt_disable();
478*1217Srab 		setup_ldt(pp);
4790Sstevel@tonic-gate 
4800Sstevel@tonic-gate 		/*
4810Sstevel@tonic-gate 		 * Now that this process has a private LDT, the use of
4820Sstevel@tonic-gate 		 * the syscall/sysret and sysenter/sysexit instructions
4830Sstevel@tonic-gate 		 * is forbidden for this processes because they destroy
4840Sstevel@tonic-gate 		 * the contents of %cs and %ss segment registers.
4850Sstevel@tonic-gate 		 *
486*1217Srab 		 * Explicity disable them here and add a context handler
487*1217Srab 		 * to the process. Note that disabling
4880Sstevel@tonic-gate 		 * them here means we can't use sysret or sysexit on
4890Sstevel@tonic-gate 		 * the way out of this system call - so we force this
4900Sstevel@tonic-gate 		 * thread to take the slow path (which doesn't make use
4910Sstevel@tonic-gate 		 * of sysenter or sysexit) back out.
4920Sstevel@tonic-gate 		 */
4930Sstevel@tonic-gate 
494*1217Srab 		ldt_installctx(pp, NULL);
4950Sstevel@tonic-gate 
4960Sstevel@tonic-gate 		cpu_fast_syscall_disable(NULL);
497*1217Srab 
4980Sstevel@tonic-gate 		ASSERT(curthread->t_post_sys != 0);
4990Sstevel@tonic-gate 		wr_ldtr(ULDT_SEL);
500*1217Srab 		kpreempt_enable();
5010Sstevel@tonic-gate 	}
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 	if (ldt_map(pp, seli) == NULL) {
5040Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
5050Sstevel@tonic-gate 		return (ENOMEM);
5060Sstevel@tonic-gate 	}
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 	ASSERT(seli <= pp->p_ldtlimit);
5090Sstevel@tonic-gate 	dscrp = &pp->p_ldt[seli];
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate 	/*
5120Sstevel@tonic-gate 	 * On the 64-bit kernel, this is where things get more subtle.
5130Sstevel@tonic-gate 	 * Recall that in the 64-bit kernel, when we enter the kernel we
5140Sstevel@tonic-gate 	 * deliberately -don't- reload the segment selectors we came in on
5150Sstevel@tonic-gate 	 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
5160Sstevel@tonic-gate 	 * and the underlying descriptors are essentially ignored by the
5170Sstevel@tonic-gate 	 * hardware in long mode - except for the base that we override with
5180Sstevel@tonic-gate 	 * the gsbase MSRs.
5190Sstevel@tonic-gate 	 *
5200Sstevel@tonic-gate 	 * However, there's one unfortunate issue with this rosy picture --
5210Sstevel@tonic-gate 	 * a descriptor that's not marked as 'present' will still generate
5220Sstevel@tonic-gate 	 * an #np when loading a segment register.
5230Sstevel@tonic-gate 	 *
5240Sstevel@tonic-gate 	 * Consider this case.  An lwp creates a harmless LDT entry, points
5250Sstevel@tonic-gate 	 * one of it's segment registers at it, then tells the kernel (here)
5260Sstevel@tonic-gate 	 * to delete it.  In the 32-bit kernel, the #np will happen on the
5270Sstevel@tonic-gate 	 * way back to userland where we reload the segment registers, and be
5280Sstevel@tonic-gate 	 * handled in kern_gpfault().  In the 64-bit kernel, the same thing
5290Sstevel@tonic-gate 	 * will happen in the normal case too.  However, if we're trying to
5300Sstevel@tonic-gate 	 * use a debugger that wants to save and restore the segment registers,
5310Sstevel@tonic-gate 	 * and the debugger things that we have valid segment registers, we
5320Sstevel@tonic-gate 	 * have the problem that the debugger will try and restore the
5330Sstevel@tonic-gate 	 * segment register that points at the now 'not present' descriptor
5340Sstevel@tonic-gate 	 * and will take a #np right there.
5350Sstevel@tonic-gate 	 *
5360Sstevel@tonic-gate 	 * We should obviously fix the debugger to be paranoid about
5370Sstevel@tonic-gate 	 * -not- restoring segment registers that point to bad descriptors;
5380Sstevel@tonic-gate 	 * however we can prevent the problem here if we check to see if any
5390Sstevel@tonic-gate 	 * of the segment registers are still pointing at the thing we're
5400Sstevel@tonic-gate 	 * destroying; if they are, return an error instead. (That also seems
5410Sstevel@tonic-gate 	 * a lot better failure mode than SIGKILL and a core file
5420Sstevel@tonic-gate 	 * from kern_gpfault() too.)
5430Sstevel@tonic-gate 	 */
5440Sstevel@tonic-gate 	if (SI86SSD_PRES(&ssd) == 0) {
5450Sstevel@tonic-gate 		kthread_t *t;
5460Sstevel@tonic-gate 		int bad = 0;
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate 		/*
5490Sstevel@tonic-gate 		 * Look carefully at the segment registers of every lwp
5500Sstevel@tonic-gate 		 * in the process (they're all stopped by our caller).
5510Sstevel@tonic-gate 		 * If we're about to invalidate a descriptor that's still
5520Sstevel@tonic-gate 		 * being referenced by *any* of them, return an error,
5530Sstevel@tonic-gate 		 * rather than having them #gp on their way out of the kernel.
5540Sstevel@tonic-gate 		 */
5550Sstevel@tonic-gate 		ASSERT(pp->p_lwprcnt == 1);
5560Sstevel@tonic-gate 
5570Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
5580Sstevel@tonic-gate 		t = pp->p_tlist;
5590Sstevel@tonic-gate 		do {
5600Sstevel@tonic-gate 			klwp_t *lwp = ttolwp(t);
5610Sstevel@tonic-gate 			struct regs *rp = lwp->lwp_regs;
5620Sstevel@tonic-gate #if defined(__amd64)
5630Sstevel@tonic-gate 			pcb_t *pcb = &lwp->lwp_pcb;
5640Sstevel@tonic-gate #endif
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 			if (ssd.sel == rp->r_cs || ssd.sel == rp->r_ss) {
5670Sstevel@tonic-gate 				bad = 1;
5680Sstevel@tonic-gate 				break;
5690Sstevel@tonic-gate 			}
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate #if defined(__amd64)
5720Sstevel@tonic-gate 			if (pcb->pcb_flags & RUPDATE_PENDING) {
5730Sstevel@tonic-gate 				if (ssd.sel == pcb->pcb_ds ||
5740Sstevel@tonic-gate 				    ssd.sel == pcb->pcb_es ||
5750Sstevel@tonic-gate 				    ssd.sel == pcb->pcb_fs ||
5760Sstevel@tonic-gate 				    ssd.sel == pcb->pcb_gs) {
5770Sstevel@tonic-gate 					bad = 1;
5780Sstevel@tonic-gate 					break;
5790Sstevel@tonic-gate 				}
5800Sstevel@tonic-gate 			} else
5810Sstevel@tonic-gate #endif
5820Sstevel@tonic-gate 			{
5830Sstevel@tonic-gate 				if (ssd.sel == rp->r_ds ||
5840Sstevel@tonic-gate 				    ssd.sel == rp->r_es ||
5850Sstevel@tonic-gate 				    ssd.sel == rp->r_fs ||
5860Sstevel@tonic-gate 				    ssd.sel == rp->r_gs) {
5870Sstevel@tonic-gate 					bad = 1;
5880Sstevel@tonic-gate 					break;
5890Sstevel@tonic-gate 				}
5900Sstevel@tonic-gate 			}
5910Sstevel@tonic-gate 
5920Sstevel@tonic-gate 		} while ((t = t->t_forw) != pp->p_tlist);
5930Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 		if (bad) {
5960Sstevel@tonic-gate 			mutex_exit(&pp->p_ldtlock);
5970Sstevel@tonic-gate 			return (EBUSY);
5980Sstevel@tonic-gate 		}
5990Sstevel@tonic-gate 	}
6000Sstevel@tonic-gate 
6010Sstevel@tonic-gate 	/*
6020Sstevel@tonic-gate 	 * If acc1 is zero, clear the descriptor (including the 'present' bit)
6030Sstevel@tonic-gate 	 */
6040Sstevel@tonic-gate 	if (ssd.acc1 == 0) {
6050Sstevel@tonic-gate 		bzero(dscrp, sizeof (*dscrp));
6060Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6070Sstevel@tonic-gate 		return (0);
6080Sstevel@tonic-gate 	}
6090Sstevel@tonic-gate 
6100Sstevel@tonic-gate 	/*
6110Sstevel@tonic-gate 	 * Check segment type, allow segment not present and
6120Sstevel@tonic-gate 	 * only user DPL (3).
6130Sstevel@tonic-gate 	 */
6140Sstevel@tonic-gate 	if (SI86SSD_DPL(&ssd) != SEL_UPL) {
6150Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6160Sstevel@tonic-gate 		return (EINVAL);
6170Sstevel@tonic-gate 	}
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate #if defined(__amd64)
6200Sstevel@tonic-gate 	/*
6210Sstevel@tonic-gate 	 * Do not allow 32-bit applications to create 64-bit mode code segments.
6220Sstevel@tonic-gate 	 */
6230Sstevel@tonic-gate 	if (SI86SSD_ISUSEG(&ssd) && ((SI86SSD_TYPE(&ssd) >> 3) & 1) == 1 &&
6240Sstevel@tonic-gate 	    SI86SSD_ISLONG(&ssd)) {
6250Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6260Sstevel@tonic-gate 		return (EINVAL);
6270Sstevel@tonic-gate 	}
6280Sstevel@tonic-gate #endif /* __amd64 */
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate 	/*
6310Sstevel@tonic-gate 	 * Set up a code or data user segment descriptor.
6320Sstevel@tonic-gate 	 */
6330Sstevel@tonic-gate 	if (SI86SSD_ISUSEG(&ssd)) {
6340Sstevel@tonic-gate 		ssd_to_usd(&ssd, dscrp);
6350Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6360Sstevel@tonic-gate 		return (0);
6370Sstevel@tonic-gate 	}
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate 	/*
6400Sstevel@tonic-gate 	 * Allow a call gate only if the destination is in the LDT.
6410Sstevel@tonic-gate 	 */
6420Sstevel@tonic-gate 	if (SI86SSD_TYPE(&ssd) == SDT_SYSCGT && SELISLDT(ssd.ls)) {
6430Sstevel@tonic-gate 		ssd_to_sgd(&ssd, (gate_desc_t *)dscrp);
6440Sstevel@tonic-gate 		mutex_exit(&pp->p_ldtlock);
6450Sstevel@tonic-gate 		return (0);
6460Sstevel@tonic-gate 	}
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
6490Sstevel@tonic-gate 	return (EINVAL);
6500Sstevel@tonic-gate }
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate /*
6530Sstevel@tonic-gate  * Allocate a private LDT for this process and initialize it with the
654*1217Srab  * default entries.
6550Sstevel@tonic-gate  */
656*1217Srab void
6570Sstevel@tonic-gate setup_ldt(proc_t *pp)
6580Sstevel@tonic-gate {
6590Sstevel@tonic-gate 	user_desc_t *ldtp;	/* descriptor pointer */
6600Sstevel@tonic-gate 	pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t));
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate 	/*
6630Sstevel@tonic-gate 	 * Allocate maximum virtual space we need for this LDT.
6640Sstevel@tonic-gate 	 */
6650Sstevel@tonic-gate 	ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP);
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate 	/*
6680Sstevel@tonic-gate 	 * Allocate the minimum number of physical pages for LDT.
6690Sstevel@tonic-gate 	 */
670*1217Srab 	(void) segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t),
671*1217Srab 	    VM_SLEEP, 0, segkmem_page_create, NULL);
672*1217Srab 
6730Sstevel@tonic-gate 	bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t))));
6740Sstevel@tonic-gate 
6750Sstevel@tonic-gate 	kpreempt_disable();
6760Sstevel@tonic-gate 
6770Sstevel@tonic-gate 	/* Update proc structure. XXX - need any locks here??? */
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 	set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1,
6800Sstevel@tonic-gate 	    SDT_SYSLDT, SEL_KPL);
6810Sstevel@tonic-gate 
6820Sstevel@tonic-gate 	pp->p_ldtlimit = MINNLDT - 1;
6830Sstevel@tonic-gate 	pp->p_ldt = ldtp;
6840Sstevel@tonic-gate 	if (pp == curproc)
6850Sstevel@tonic-gate 		*((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc;
6860Sstevel@tonic-gate 
6870Sstevel@tonic-gate 	kpreempt_enable();
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate /*
6910Sstevel@tonic-gate  * Map the page corresponding to the selector entry. If the page is
6920Sstevel@tonic-gate  * already mapped then it simply returns with the pointer to the entry.
6930Sstevel@tonic-gate  * Otherwise it allocates a physical page for it and returns the pointer
6940Sstevel@tonic-gate  * to the entry.  Returns 0 for errors.
6950Sstevel@tonic-gate  */
6960Sstevel@tonic-gate static void *
6970Sstevel@tonic-gate ldt_map(proc_t *pp, uint_t seli)
6980Sstevel@tonic-gate {
6990Sstevel@tonic-gate 	caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0];
7000Sstevel@tonic-gate 	caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli];
7010Sstevel@tonic-gate 	volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET));
7020Sstevel@tonic-gate 	caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET));
7030Sstevel@tonic-gate 	on_trap_data_t otd;
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 	ASSERT(pp->p_ldt != NULL);
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate 	if (seli <= pp->p_ldtlimit)
7080Sstevel@tonic-gate 		return (ent_addr);
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate 	/*
7110Sstevel@tonic-gate 	 * We are increasing the size of the process's LDT.
7120Sstevel@tonic-gate 	 * Make sure this and all intervening pages are mapped.
7130Sstevel@tonic-gate 	 */
7140Sstevel@tonic-gate 	while (page <= epage) {
7150Sstevel@tonic-gate 		if (!on_trap(&otd, OT_DATA_ACCESS))
7160Sstevel@tonic-gate 			(void) *(volatile int *)page;	/* peek at the page */
7170Sstevel@tonic-gate 		else {		/* Allocate a physical page */
718*1217Srab 			(void) segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0,
719*1217Srab 			    segkmem_page_create, NULL);
7200Sstevel@tonic-gate 			bzero(page, PAGESIZE);
7210Sstevel@tonic-gate 		}
7220Sstevel@tonic-gate 		no_trap();
7230Sstevel@tonic-gate 		page += PAGESIZE;
7240Sstevel@tonic-gate 	}
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate 	/* XXX - need any locks to update proc_t or gdt ??? */
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate 	ASSERT(curproc == pp);
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	kpreempt_disable();
7310Sstevel@tonic-gate 	pp->p_ldtlimit = seli;
7320Sstevel@tonic-gate 	SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1);
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate 	ldt_load();
7350Sstevel@tonic-gate 	kpreempt_enable();
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 	return (ent_addr);
7380Sstevel@tonic-gate }
7390Sstevel@tonic-gate 
7400Sstevel@tonic-gate /*
7410Sstevel@tonic-gate  * Free up the kernel memory used for LDT of this process.
7420Sstevel@tonic-gate  */
743*1217Srab static void
7440Sstevel@tonic-gate ldt_free(proc_t *pp)
7450Sstevel@tonic-gate {
7460Sstevel@tonic-gate 	on_trap_data_t otd;
7470Sstevel@tonic-gate 	caddr_t start, end;
7480Sstevel@tonic-gate 	volatile caddr_t addr;
7490Sstevel@tonic-gate 
7500Sstevel@tonic-gate 	ASSERT(pp->p_ldt != NULL);
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
7530Sstevel@tonic-gate 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
7540Sstevel@tonic-gate 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
7550Sstevel@tonic-gate 
7560Sstevel@tonic-gate 	/* Free the physical page(s) used for mapping LDT */
7570Sstevel@tonic-gate 	for (addr = start; addr <= end; addr += PAGESIZE) {
7580Sstevel@tonic-gate 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
7590Sstevel@tonic-gate 			/* peek at the address */
7600Sstevel@tonic-gate 			(void) *(volatile int *)addr;
7610Sstevel@tonic-gate 			segkmem_free(NULL, addr, PAGESIZE);
7620Sstevel@tonic-gate 		}
7630Sstevel@tonic-gate 	}
7640Sstevel@tonic-gate 	no_trap();
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	/* Free up the virtual address space used for this LDT */
7670Sstevel@tonic-gate 	vmem_free(heap_arena, pp->p_ldt,
7680Sstevel@tonic-gate 	    ptob(btopr(MAXNLDT * sizeof (user_desc_t))));
7690Sstevel@tonic-gate 	kpreempt_disable();
7700Sstevel@tonic-gate 	pp->p_ldt = NULL;
771*1217Srab 	pp->p_ldt_desc = zero_sdesc;
772*1217Srab 	pp->p_ldtlimit = 0;
773*1217Srab 
7740Sstevel@tonic-gate 	if (pp == curproc)
775*1217Srab 		ldt_unload();
7760Sstevel@tonic-gate 	kpreempt_enable();
7770Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
7780Sstevel@tonic-gate }
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate /*
7810Sstevel@tonic-gate  * On fork copy new ldt for child.
7820Sstevel@tonic-gate  */
783*1217Srab void
7840Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp)
7850Sstevel@tonic-gate {
7860Sstevel@tonic-gate 	on_trap_data_t otd;
7870Sstevel@tonic-gate 	caddr_t start, end;
7880Sstevel@tonic-gate 	volatile caddr_t addr, caddr;
7890Sstevel@tonic-gate 	int	minsize;
7900Sstevel@tonic-gate 
791*1217Srab 	ASSERT(pp->p_ldt);
7920Sstevel@tonic-gate 
793*1217Srab 	setup_ldt(cp);
7940Sstevel@tonic-gate 
7950Sstevel@tonic-gate 	mutex_enter(&pp->p_ldtlock);
7960Sstevel@tonic-gate 	cp->p_ldtlimit = pp->p_ldtlimit;
7970Sstevel@tonic-gate 	SYSSEGD_SETLIMIT(&cp->p_ldt_desc,
7980Sstevel@tonic-gate 	    (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1);
7990Sstevel@tonic-gate 	start = (caddr_t)pp->p_ldt; /* beginning of the LDT */
8000Sstevel@tonic-gate 	end = start + (pp->p_ldtlimit * sizeof (user_desc_t));
8010Sstevel@tonic-gate 	caddr = (caddr_t)cp->p_ldt; /* child LDT start */
8020Sstevel@tonic-gate 
8030Sstevel@tonic-gate 	minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET;
8040Sstevel@tonic-gate 	/* Walk thru the physical page(s) used for parent's LDT */
8050Sstevel@tonic-gate 	for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) {
8060Sstevel@tonic-gate 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
8070Sstevel@tonic-gate 			(void) *(volatile int *)addr; /* peek at the address */
8080Sstevel@tonic-gate 			/* allocate a page if necessary */
8090Sstevel@tonic-gate 			if (caddr >= ((caddr_t)cp->p_ldt + minsize)) {
810*1217Srab 				(void) segkmem_xalloc(NULL, caddr, PAGESIZE,
811*1217Srab 				    VM_SLEEP, 0, segkmem_page_create, NULL);
8120Sstevel@tonic-gate 			}
8130Sstevel@tonic-gate 			bcopy(addr, caddr, PAGESIZE);
8140Sstevel@tonic-gate 		}
8150Sstevel@tonic-gate 	}
8160Sstevel@tonic-gate 	no_trap();
8170Sstevel@tonic-gate 	mutex_exit(&pp->p_ldtlock);
8180Sstevel@tonic-gate }
819