10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52712Snn35248 * Common Development and Distribution License (the "License").
62712Snn35248 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*13134Skuriakose.kuruvilla@oracle.com * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
250Sstevel@tonic-gate /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
270Sstevel@tonic-gate /* All Rights Reserved */
280Sstevel@tonic-gate
290Sstevel@tonic-gate /* Copyright (c) 1987, 1988 Microsoft Corporation */
300Sstevel@tonic-gate /* All Rights Reserved */
310Sstevel@tonic-gate
320Sstevel@tonic-gate #include <sys/param.h>
330Sstevel@tonic-gate #include <sys/types.h>
340Sstevel@tonic-gate #include <sys/sysmacros.h>
350Sstevel@tonic-gate #include <sys/systm.h>
360Sstevel@tonic-gate #include <sys/signal.h>
370Sstevel@tonic-gate #include <sys/errno.h>
380Sstevel@tonic-gate #include <sys/fault.h>
390Sstevel@tonic-gate #include <sys/syscall.h>
400Sstevel@tonic-gate #include <sys/cpuvar.h>
410Sstevel@tonic-gate #include <sys/sysi86.h>
420Sstevel@tonic-gate #include <sys/psw.h>
430Sstevel@tonic-gate #include <sys/cred.h>
440Sstevel@tonic-gate #include <sys/policy.h>
450Sstevel@tonic-gate #include <sys/thread.h>
460Sstevel@tonic-gate #include <sys/debug.h>
470Sstevel@tonic-gate #include <sys/ontrap.h>
480Sstevel@tonic-gate #include <sys/privregs.h>
490Sstevel@tonic-gate #include <sys/x86_archext.h>
500Sstevel@tonic-gate #include <sys/vmem.h>
510Sstevel@tonic-gate #include <sys/kmem.h>
520Sstevel@tonic-gate #include <sys/mman.h>
530Sstevel@tonic-gate #include <sys/archsystm.h>
540Sstevel@tonic-gate #include <vm/hat.h>
550Sstevel@tonic-gate #include <vm/as.h>
560Sstevel@tonic-gate #include <vm/seg.h>
570Sstevel@tonic-gate #include <vm/seg_kmem.h>
580Sstevel@tonic-gate #include <vm/faultcode.h>
590Sstevel@tonic-gate #include <sys/fp.h>
600Sstevel@tonic-gate #include <sys/cmn_err.h>
613446Smrj #include <sys/segments.h>
623446Smrj #include <sys/clock.h>
635084Sjohnlev #if defined(__xpv)
645084Sjohnlev #include <sys/hypervisor.h>
655084Sjohnlev #include <sys/note.h>
665084Sjohnlev #endif
670Sstevel@tonic-gate
685084Sjohnlev static void ldt_alloc(proc_t *, uint_t);
695084Sjohnlev static void ldt_free(proc_t *);
705084Sjohnlev static void ldt_dup(proc_t *, proc_t *);
715084Sjohnlev static void ldt_grow(proc_t *, uint_t);
720Sstevel@tonic-gate
730Sstevel@tonic-gate /*
740Sstevel@tonic-gate * sysi86 System Call
750Sstevel@tonic-gate */
760Sstevel@tonic-gate
770Sstevel@tonic-gate /* ARGSUSED */
780Sstevel@tonic-gate int
sysi86(short cmd,uintptr_t arg1,uintptr_t arg2,uintptr_t arg3)790Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
800Sstevel@tonic-gate {
812712Snn35248 struct ssd ssd;
820Sstevel@tonic-gate int error = 0;
830Sstevel@tonic-gate int c;
840Sstevel@tonic-gate proc_t *pp = curproc;
850Sstevel@tonic-gate
860Sstevel@tonic-gate switch (cmd) {
870Sstevel@tonic-gate
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate * The SI86V86 subsystem call of the SYSI86 system call
900Sstevel@tonic-gate * supports only one subcode -- V86SC_IOPL.
910Sstevel@tonic-gate */
920Sstevel@tonic-gate case SI86V86:
930Sstevel@tonic-gate if (arg1 == V86SC_IOPL) {
940Sstevel@tonic-gate struct regs *rp = lwptoregs(ttolwp(curthread));
950Sstevel@tonic-gate greg_t oldpl = rp->r_ps & PS_IOPL;
960Sstevel@tonic-gate greg_t newpl = arg2 & PS_IOPL;
970Sstevel@tonic-gate
980Sstevel@tonic-gate /*
990Sstevel@tonic-gate * Must be privileged to run this system call
1000Sstevel@tonic-gate * if giving more io privilege.
1010Sstevel@tonic-gate */
1020Sstevel@tonic-gate if (newpl > oldpl && (error =
1030Sstevel@tonic-gate secpolicy_sys_config(CRED(), B_FALSE)) != 0)
1040Sstevel@tonic-gate return (set_errno(error));
1055084Sjohnlev #if defined(__xpv)
1065084Sjohnlev kpreempt_disable();
1075084Sjohnlev installctx(curthread, NULL, xen_disable_user_iopl,
1085084Sjohnlev xen_enable_user_iopl, NULL, NULL,
1095084Sjohnlev xen_disable_user_iopl, NULL);
1105084Sjohnlev xen_enable_user_iopl();
1115084Sjohnlev kpreempt_enable();
1125084Sjohnlev #else
1130Sstevel@tonic-gate rp->r_ps ^= oldpl ^ newpl;
1145084Sjohnlev #endif
1150Sstevel@tonic-gate } else
1160Sstevel@tonic-gate error = EINVAL;
1170Sstevel@tonic-gate break;
1180Sstevel@tonic-gate
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate * Set a segment descriptor
1210Sstevel@tonic-gate */
1220Sstevel@tonic-gate case SI86DSCR:
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate * There are considerable problems here manipulating
1250Sstevel@tonic-gate * resources shared by many running lwps. Get everyone
1260Sstevel@tonic-gate * into a safe state before changing the LDT.
1270Sstevel@tonic-gate */
1280Sstevel@tonic-gate if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) {
1290Sstevel@tonic-gate error = EINTR;
1300Sstevel@tonic-gate break;
1310Sstevel@tonic-gate }
1322712Snn35248
1332712Snn35248 if (get_udatamodel() == DATAMODEL_LP64) {
1342712Snn35248 error = EINVAL;
1352712Snn35248 break;
1362712Snn35248 }
1372712Snn35248
1382712Snn35248 if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) {
1392712Snn35248 error = EFAULT;
1402712Snn35248 break;
1412712Snn35248 }
1422712Snn35248
1432712Snn35248 error = setdscr(&ssd);
1442712Snn35248
1450Sstevel@tonic-gate mutex_enter(&pp->p_lock);
1460Sstevel@tonic-gate if (curthread != pp->p_agenttp)
1470Sstevel@tonic-gate continuelwps(pp);
1480Sstevel@tonic-gate mutex_exit(&pp->p_lock);
1490Sstevel@tonic-gate break;
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate case SI86FPHW:
1520Sstevel@tonic-gate c = fp_kind & 0xff;
1530Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1)
1540Sstevel@tonic-gate error = EFAULT;
1550Sstevel@tonic-gate break;
1560Sstevel@tonic-gate
1570Sstevel@tonic-gate case SI86FPSTART:
1580Sstevel@tonic-gate /*
1590Sstevel@tonic-gate * arg1 is the address of _fp_hw
1600Sstevel@tonic-gate * arg2 is the desired x87 FCW value
1610Sstevel@tonic-gate * arg3 is the desired SSE MXCSR value
1620Sstevel@tonic-gate * a return value of one means SSE hardware, else none.
1630Sstevel@tonic-gate */
1640Sstevel@tonic-gate c = fp_kind & 0xff;
1650Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) {
1660Sstevel@tonic-gate error = EFAULT;
1670Sstevel@tonic-gate break;
1680Sstevel@tonic-gate }
1690Sstevel@tonic-gate fpsetcw((uint16_t)arg2, (uint32_t)arg3);
170*13134Skuriakose.kuruvilla@oracle.com return ((fp_kind & __FP_SSE) ? 1 : 0);
1710Sstevel@tonic-gate
1720Sstevel@tonic-gate /* real time clock management commands */
1730Sstevel@tonic-gate
1740Sstevel@tonic-gate case WTODC:
1750Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) {
1760Sstevel@tonic-gate timestruc_t ts;
1770Sstevel@tonic-gate mutex_enter(&tod_lock);
1780Sstevel@tonic-gate gethrestime(&ts);
1790Sstevel@tonic-gate tod_set(ts);
1800Sstevel@tonic-gate mutex_exit(&tod_lock);
1810Sstevel@tonic-gate }
1820Sstevel@tonic-gate break;
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate /* Give some timezone playing room */
1850Sstevel@tonic-gate #define ONEWEEK (7 * 24 * 60 * 60)
1860Sstevel@tonic-gate
1870Sstevel@tonic-gate case SGMTL:
1880Sstevel@tonic-gate /*
1890Sstevel@tonic-gate * Called from 32 bit land, negative values
1900Sstevel@tonic-gate * are not sign extended, so we do that here
1910Sstevel@tonic-gate * by casting it to an int and back. We also
1920Sstevel@tonic-gate * clamp the value to within reason and detect
1930Sstevel@tonic-gate * when a 64 bit call overflows an int.
1940Sstevel@tonic-gate */
1950Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) {
1960Sstevel@tonic-gate int newlag = (int)arg1;
1970Sstevel@tonic-gate
1980Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1990Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE &&
2000Sstevel@tonic-gate (long)newlag != (long)arg1) {
2010Sstevel@tonic-gate error = EOVERFLOW;
2020Sstevel@tonic-gate } else
2030Sstevel@tonic-gate #endif
2040Sstevel@tonic-gate if (newlag >= -ONEWEEK && newlag <= ONEWEEK)
2050Sstevel@tonic-gate sgmtl(newlag);
2060Sstevel@tonic-gate else
2070Sstevel@tonic-gate error = EOVERFLOW;
2080Sstevel@tonic-gate }
2090Sstevel@tonic-gate break;
2100Sstevel@tonic-gate
2110Sstevel@tonic-gate case GGMTL:
2120Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) {
2130Sstevel@tonic-gate if (sulword((void *)arg1, ggmtl()) == -1)
2140Sstevel@tonic-gate error = EFAULT;
2150Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
2160Sstevel@tonic-gate } else {
2170Sstevel@tonic-gate time_t gmtl;
2180Sstevel@tonic-gate
2190Sstevel@tonic-gate if ((gmtl = ggmtl()) > INT32_MAX) {
2200Sstevel@tonic-gate /*
2210Sstevel@tonic-gate * Since gmt_lag can at most be
2220Sstevel@tonic-gate * +/- 12 hours, something is
2230Sstevel@tonic-gate * *seriously* messed up here.
2240Sstevel@tonic-gate */
2250Sstevel@tonic-gate error = EOVERFLOW;
2260Sstevel@tonic-gate } else if (suword32((void *)arg1, (int32_t)gmtl) == -1)
2270Sstevel@tonic-gate error = EFAULT;
2280Sstevel@tonic-gate #endif
2290Sstevel@tonic-gate }
2300Sstevel@tonic-gate break;
2310Sstevel@tonic-gate
2320Sstevel@tonic-gate case RTCSYNC:
2330Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0)
2340Sstevel@tonic-gate rtcsync();
2350Sstevel@tonic-gate break;
2360Sstevel@tonic-gate
2370Sstevel@tonic-gate /* END OF real time clock management commands */
2380Sstevel@tonic-gate
2390Sstevel@tonic-gate default:
2400Sstevel@tonic-gate error = EINVAL;
2410Sstevel@tonic-gate break;
2420Sstevel@tonic-gate }
2430Sstevel@tonic-gate return (error == 0 ? 0 : set_errno(error));
2440Sstevel@tonic-gate }
2450Sstevel@tonic-gate
2460Sstevel@tonic-gate void
usd_to_ssd(user_desc_t * usd,struct ssd * ssd,selector_t sel)2470Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel)
2480Sstevel@tonic-gate {
2490Sstevel@tonic-gate ssd->bo = USEGD_GETBASE(usd);
2500Sstevel@tonic-gate ssd->ls = USEGD_GETLIMIT(usd);
2510Sstevel@tonic-gate ssd->sel = sel;
2520Sstevel@tonic-gate
2530Sstevel@tonic-gate /*
2540Sstevel@tonic-gate * set type, dpl and present bits.
2550Sstevel@tonic-gate */
2560Sstevel@tonic-gate ssd->acc1 = usd->usd_type;
2570Sstevel@tonic-gate ssd->acc1 |= usd->usd_dpl << 5;
2580Sstevel@tonic-gate ssd->acc1 |= usd->usd_p << (5 + 2);
2590Sstevel@tonic-gate
2600Sstevel@tonic-gate /*
2610Sstevel@tonic-gate * set avl, DB and granularity bits.
2620Sstevel@tonic-gate */
2630Sstevel@tonic-gate ssd->acc2 = usd->usd_avl;
2640Sstevel@tonic-gate
2650Sstevel@tonic-gate #if defined(__amd64)
2660Sstevel@tonic-gate ssd->acc2 |= usd->usd_long << 1;
2670Sstevel@tonic-gate #else
2680Sstevel@tonic-gate ssd->acc2 |= usd->usd_reserved << 1;
2690Sstevel@tonic-gate #endif
2700Sstevel@tonic-gate
2710Sstevel@tonic-gate ssd->acc2 |= usd->usd_def32 << (1 + 1);
2720Sstevel@tonic-gate ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
2730Sstevel@tonic-gate }
2740Sstevel@tonic-gate
2750Sstevel@tonic-gate static void
ssd_to_usd(struct ssd * ssd,user_desc_t * usd)2760Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
2770Sstevel@tonic-gate {
2780Sstevel@tonic-gate
2795084Sjohnlev ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0);
2805084Sjohnlev
2810Sstevel@tonic-gate USEGD_SETBASE(usd, ssd->bo);
2820Sstevel@tonic-gate USEGD_SETLIMIT(usd, ssd->ls);
2830Sstevel@tonic-gate
2840Sstevel@tonic-gate /*
2850Sstevel@tonic-gate * set type, dpl and present bits.
2860Sstevel@tonic-gate */
2870Sstevel@tonic-gate usd->usd_type = ssd->acc1;
2880Sstevel@tonic-gate usd->usd_dpl = ssd->acc1 >> 5;
2890Sstevel@tonic-gate usd->usd_p = ssd->acc1 >> (5 + 2);
2900Sstevel@tonic-gate
2910Sstevel@tonic-gate ASSERT(usd->usd_type >= SDT_MEMRO);
2920Sstevel@tonic-gate ASSERT(usd->usd_dpl == SEL_UPL);
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate /*
2955084Sjohnlev * 64-bit code selectors are never allowed in the LDT.
2965084Sjohnlev * Reserved bit is always 0 on 32-bit sytems.
2975084Sjohnlev */
2985084Sjohnlev #if defined(__amd64)
2995084Sjohnlev usd->usd_long = 0;
3005084Sjohnlev #else
3015084Sjohnlev usd->usd_reserved = 0;
3025084Sjohnlev #endif
3035084Sjohnlev
3045084Sjohnlev /*
3050Sstevel@tonic-gate * set avl, DB and granularity bits.
3060Sstevel@tonic-gate */
3070Sstevel@tonic-gate usd->usd_avl = ssd->acc2;
3080Sstevel@tonic-gate usd->usd_def32 = ssd->acc2 >> (1 + 1);
3090Sstevel@tonic-gate usd->usd_gran = ssd->acc2 >> (1 + 1 + 1);
3100Sstevel@tonic-gate }
3110Sstevel@tonic-gate
3125084Sjohnlev
3135084Sjohnlev #if defined(__i386)
3145084Sjohnlev
3150Sstevel@tonic-gate static void
ssd_to_sgd(struct ssd * ssd,gate_desc_t * sgd)3160Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd)
3170Sstevel@tonic-gate {
3180Sstevel@tonic-gate
3195084Sjohnlev ASSERT(bcmp(sgd, &null_sdesc, sizeof (*sgd)) == 0);
3205084Sjohnlev
3210Sstevel@tonic-gate sgd->sgd_looffset = ssd->bo;
3220Sstevel@tonic-gate sgd->sgd_hioffset = ssd->bo >> 16;
3230Sstevel@tonic-gate
3240Sstevel@tonic-gate sgd->sgd_selector = ssd->ls;
3255084Sjohnlev
3260Sstevel@tonic-gate /*
3270Sstevel@tonic-gate * set type, dpl and present bits.
3280Sstevel@tonic-gate */
3290Sstevel@tonic-gate sgd->sgd_type = ssd->acc1;
3300Sstevel@tonic-gate sgd->sgd_dpl = ssd->acc1 >> 5;
3310Sstevel@tonic-gate sgd->sgd_p = ssd->acc1 >> 7;
3320Sstevel@tonic-gate ASSERT(sgd->sgd_type == SDT_SYSCGT);
3330Sstevel@tonic-gate ASSERT(sgd->sgd_dpl == SEL_UPL);
3345084Sjohnlev sgd->sgd_stkcpy = 0;
3355084Sjohnlev }
3360Sstevel@tonic-gate
3375084Sjohnlev #endif /* __i386 */
3380Sstevel@tonic-gate
3391217Srab /*
3401217Srab * Load LDT register with the current process's LDT.
3411217Srab */
3425084Sjohnlev static void
ldt_load(void)3431217Srab ldt_load(void)
3441217Srab {
3455084Sjohnlev #if defined(__xpv)
3465084Sjohnlev xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc),
3475084Sjohnlev curproc->p_ldtlimit + 1);
3485084Sjohnlev #else
3491217Srab *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc;
3501217Srab wr_ldtr(ULDT_SEL);
3515084Sjohnlev #endif
3521217Srab }
3531217Srab
3541217Srab /*
3551217Srab * Store a NULL selector in the LDTR. All subsequent illegal references to
3561217Srab * the LDT will result in a #gp.
3571217Srab */
3581217Srab void
ldt_unload(void)3591217Srab ldt_unload(void)
3601217Srab {
3615084Sjohnlev #if defined(__xpv)
3625084Sjohnlev xen_set_ldt(NULL, 0);
3635084Sjohnlev #else
3645084Sjohnlev *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc;
3651217Srab wr_ldtr(0);
3665084Sjohnlev #endif
3671217Srab }
3680Sstevel@tonic-gate
3690Sstevel@tonic-gate /*ARGSUSED*/
3700Sstevel@tonic-gate static void
ldt_savectx(proc_t * p)3711217Srab ldt_savectx(proc_t *p)
3720Sstevel@tonic-gate {
3731217Srab ASSERT(p->p_ldt != NULL);
3741217Srab ASSERT(p == curproc);
3751217Srab
3760Sstevel@tonic-gate #if defined(__amd64)
3770Sstevel@tonic-gate /*
3780Sstevel@tonic-gate * The 64-bit kernel must be sure to clear any stale ldt
3790Sstevel@tonic-gate * selectors when context switching away from a process that
3800Sstevel@tonic-gate * has a private ldt. Consider the following example:
3810Sstevel@tonic-gate *
3820Sstevel@tonic-gate * Wine creats a ldt descriptor and points a segment register
3830Sstevel@tonic-gate * to it.
3840Sstevel@tonic-gate *
3850Sstevel@tonic-gate * We then context switch away from wine lwp to kernel
3860Sstevel@tonic-gate * thread and hit breakpoint in kernel with kmdb
3870Sstevel@tonic-gate *
3880Sstevel@tonic-gate * When we continue and resume from kmdb we will #gp
3890Sstevel@tonic-gate * fault since kmdb will have saved the stale ldt selector
3900Sstevel@tonic-gate * from wine and will try to restore it but we are no longer in
3910Sstevel@tonic-gate * the context of the wine process and do not have our
3920Sstevel@tonic-gate * ldtr register pointing to the private ldt.
3930Sstevel@tonic-gate */
3945084Sjohnlev reset_sregs();
3950Sstevel@tonic-gate #endif
3960Sstevel@tonic-gate
3971217Srab ldt_unload();
3980Sstevel@tonic-gate cpu_fast_syscall_enable(NULL);
3990Sstevel@tonic-gate }
4000Sstevel@tonic-gate
4011217Srab static void
ldt_restorectx(proc_t * p)4021217Srab ldt_restorectx(proc_t *p)
4031217Srab {
4041217Srab ASSERT(p->p_ldt != NULL);
4051217Srab ASSERT(p == curproc);
4061217Srab
4071217Srab ldt_load();
4081217Srab cpu_fast_syscall_disable(NULL);
4091217Srab }
4101217Srab
4110Sstevel@tonic-gate /*
4121217Srab * When a process with a private LDT execs, fast syscalls must be enabled for
4131217Srab * the new process image.
4140Sstevel@tonic-gate */
4150Sstevel@tonic-gate /* ARGSUSED */
4160Sstevel@tonic-gate static void
ldt_freectx(proc_t * p,int isexec)4171217Srab ldt_freectx(proc_t *p, int isexec)
4180Sstevel@tonic-gate {
4191217Srab ASSERT(p->p_ldt);
4201217Srab
4210Sstevel@tonic-gate if (isexec) {
4220Sstevel@tonic-gate kpreempt_disable();
4230Sstevel@tonic-gate cpu_fast_syscall_enable(NULL);
4240Sstevel@tonic-gate kpreempt_enable();
4250Sstevel@tonic-gate }
4261217Srab
4271217Srab /*
4281217Srab * ldt_free() will free the memory used by the private LDT, reset the
4291217Srab * process's descriptor, and re-program the LDTR.
4301217Srab */
4311217Srab ldt_free(p);
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate
4340Sstevel@tonic-gate /*
4350Sstevel@tonic-gate * Install ctx op that ensures syscall/sysenter are disabled.
4360Sstevel@tonic-gate * See comments below.
4370Sstevel@tonic-gate *
4381217Srab * When a thread with a private LDT forks, the new process
4390Sstevel@tonic-gate * must have the LDT context ops installed.
4400Sstevel@tonic-gate */
4410Sstevel@tonic-gate /* ARGSUSED */
4420Sstevel@tonic-gate static void
ldt_installctx(proc_t * p,proc_t * cp)4431217Srab ldt_installctx(proc_t *p, proc_t *cp)
4440Sstevel@tonic-gate {
4451217Srab proc_t *targ = p;
4461217Srab kthread_t *t;
4470Sstevel@tonic-gate
4480Sstevel@tonic-gate /*
4491217Srab * If this is a fork, operate on the child process.
4500Sstevel@tonic-gate */
4511217Srab if (cp != NULL) {
4521217Srab targ = cp;
4531217Srab ldt_dup(p, cp);
4541217Srab }
4550Sstevel@tonic-gate
4561217Srab /*
4571217Srab * The process context ops expect the target process as their argument.
4581217Srab */
4591217Srab ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx,
4601217Srab ldt_installctx, ldt_savectx, ldt_freectx) == 0);
4610Sstevel@tonic-gate
4621217Srab installpctx(targ, targ, ldt_savectx, ldt_restorectx,
4631217Srab ldt_installctx, ldt_savectx, ldt_freectx);
4640Sstevel@tonic-gate
4650Sstevel@tonic-gate /*
4660Sstevel@tonic-gate * We've just disabled fast system call and return instructions; take
4670Sstevel@tonic-gate * the slow path out to make sure we don't try to use one to return
4681217Srab * back to user. We must set t_post_sys for every thread in the
4691217Srab * process to make sure none of them escape out via fast return.
4700Sstevel@tonic-gate */
4711217Srab
4721217Srab mutex_enter(&targ->p_lock);
4731217Srab t = targ->p_tlist;
4741217Srab do {
4751217Srab t->t_post_sys = 1;
4761217Srab } while ((t = t->t_forw) != targ->p_tlist);
4771217Srab mutex_exit(&targ->p_lock);
4780Sstevel@tonic-gate }
4790Sstevel@tonic-gate
4802712Snn35248 int
setdscr(struct ssd * ssd)4812712Snn35248 setdscr(struct ssd *ssd)
4820Sstevel@tonic-gate {
4830Sstevel@tonic-gate ushort_t seli; /* selector index */
4845084Sjohnlev user_desc_t *ldp; /* descriptor pointer */
4855084Sjohnlev user_desc_t ndesc; /* new descriptor */
4860Sstevel@tonic-gate proc_t *pp = ttoproc(curthread);
4875084Sjohnlev int rc = 0;
4880Sstevel@tonic-gate
4890Sstevel@tonic-gate /*
4900Sstevel@tonic-gate * LDT segments: executable and data at DPL 3 only.
4910Sstevel@tonic-gate */
4922712Snn35248 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
4930Sstevel@tonic-gate return (EINVAL);
4940Sstevel@tonic-gate
4950Sstevel@tonic-gate /*
4960Sstevel@tonic-gate * check the selector index.
4970Sstevel@tonic-gate */
4982712Snn35248 seli = SELTOIDX(ssd->sel);
4991217Srab if (seli >= MAXNLDT || seli < LDT_UDBASE)
5000Sstevel@tonic-gate return (EINVAL);
5010Sstevel@tonic-gate
5025084Sjohnlev ndesc = null_udesc;
5030Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock);
5040Sstevel@tonic-gate
5050Sstevel@tonic-gate /*
5060Sstevel@tonic-gate * If this is the first time for this process then setup a
5070Sstevel@tonic-gate * private LDT for it.
5080Sstevel@tonic-gate */
5090Sstevel@tonic-gate if (pp->p_ldt == NULL) {
5105084Sjohnlev ldt_alloc(pp, seli);
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate /*
5130Sstevel@tonic-gate * Now that this process has a private LDT, the use of
5140Sstevel@tonic-gate * the syscall/sysret and sysenter/sysexit instructions
5150Sstevel@tonic-gate * is forbidden for this processes because they destroy
5160Sstevel@tonic-gate * the contents of %cs and %ss segment registers.
5170Sstevel@tonic-gate *
5181217Srab * Explicity disable them here and add a context handler
5191217Srab * to the process. Note that disabling
5200Sstevel@tonic-gate * them here means we can't use sysret or sysexit on
5210Sstevel@tonic-gate * the way out of this system call - so we force this
5220Sstevel@tonic-gate * thread to take the slow path (which doesn't make use
5230Sstevel@tonic-gate * of sysenter or sysexit) back out.
5240Sstevel@tonic-gate */
5255084Sjohnlev kpreempt_disable();
5261217Srab ldt_installctx(pp, NULL);
5270Sstevel@tonic-gate cpu_fast_syscall_disable(NULL);
5280Sstevel@tonic-gate ASSERT(curthread->t_post_sys != 0);
5291217Srab kpreempt_enable();
5305084Sjohnlev
5315084Sjohnlev } else if (seli > pp->p_ldtlimit) {
5320Sstevel@tonic-gate
5335084Sjohnlev /*
5345084Sjohnlev * Increase size of ldt to include seli.
5355084Sjohnlev */
5365084Sjohnlev ldt_grow(pp, seli);
5370Sstevel@tonic-gate }
5380Sstevel@tonic-gate
5390Sstevel@tonic-gate ASSERT(seli <= pp->p_ldtlimit);
5405084Sjohnlev ldp = &pp->p_ldt[seli];
5410Sstevel@tonic-gate
5420Sstevel@tonic-gate /*
5430Sstevel@tonic-gate * On the 64-bit kernel, this is where things get more subtle.
5440Sstevel@tonic-gate * Recall that in the 64-bit kernel, when we enter the kernel we
5450Sstevel@tonic-gate * deliberately -don't- reload the segment selectors we came in on
5460Sstevel@tonic-gate * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
5470Sstevel@tonic-gate * and the underlying descriptors are essentially ignored by the
5480Sstevel@tonic-gate * hardware in long mode - except for the base that we override with
5490Sstevel@tonic-gate * the gsbase MSRs.
5500Sstevel@tonic-gate *
5510Sstevel@tonic-gate * However, there's one unfortunate issue with this rosy picture --
5520Sstevel@tonic-gate * a descriptor that's not marked as 'present' will still generate
5530Sstevel@tonic-gate * an #np when loading a segment register.
5540Sstevel@tonic-gate *
5550Sstevel@tonic-gate * Consider this case. An lwp creates a harmless LDT entry, points
5560Sstevel@tonic-gate * one of it's segment registers at it, then tells the kernel (here)
5570Sstevel@tonic-gate * to delete it. In the 32-bit kernel, the #np will happen on the
5580Sstevel@tonic-gate * way back to userland where we reload the segment registers, and be
5590Sstevel@tonic-gate * handled in kern_gpfault(). In the 64-bit kernel, the same thing
5600Sstevel@tonic-gate * will happen in the normal case too. However, if we're trying to
5610Sstevel@tonic-gate * use a debugger that wants to save and restore the segment registers,
5620Sstevel@tonic-gate * and the debugger things that we have valid segment registers, we
5630Sstevel@tonic-gate * have the problem that the debugger will try and restore the
5640Sstevel@tonic-gate * segment register that points at the now 'not present' descriptor
5650Sstevel@tonic-gate * and will take a #np right there.
5660Sstevel@tonic-gate *
5670Sstevel@tonic-gate * We should obviously fix the debugger to be paranoid about
5680Sstevel@tonic-gate * -not- restoring segment registers that point to bad descriptors;
5690Sstevel@tonic-gate * however we can prevent the problem here if we check to see if any
5700Sstevel@tonic-gate * of the segment registers are still pointing at the thing we're
5710Sstevel@tonic-gate * destroying; if they are, return an error instead. (That also seems
5720Sstevel@tonic-gate * a lot better failure mode than SIGKILL and a core file
5730Sstevel@tonic-gate * from kern_gpfault() too.)
5740Sstevel@tonic-gate */
5752712Snn35248 if (SI86SSD_PRES(ssd) == 0) {
5760Sstevel@tonic-gate kthread_t *t;
5770Sstevel@tonic-gate int bad = 0;
5780Sstevel@tonic-gate
5790Sstevel@tonic-gate /*
5800Sstevel@tonic-gate * Look carefully at the segment registers of every lwp
5810Sstevel@tonic-gate * in the process (they're all stopped by our caller).
5820Sstevel@tonic-gate * If we're about to invalidate a descriptor that's still
5830Sstevel@tonic-gate * being referenced by *any* of them, return an error,
5840Sstevel@tonic-gate * rather than having them #gp on their way out of the kernel.
5850Sstevel@tonic-gate */
5860Sstevel@tonic-gate ASSERT(pp->p_lwprcnt == 1);
5870Sstevel@tonic-gate
5880Sstevel@tonic-gate mutex_enter(&pp->p_lock);
5890Sstevel@tonic-gate t = pp->p_tlist;
5900Sstevel@tonic-gate do {
5910Sstevel@tonic-gate klwp_t *lwp = ttolwp(t);
5920Sstevel@tonic-gate struct regs *rp = lwp->lwp_regs;
5930Sstevel@tonic-gate #if defined(__amd64)
5940Sstevel@tonic-gate pcb_t *pcb = &lwp->lwp_pcb;
5950Sstevel@tonic-gate #endif
5960Sstevel@tonic-gate
5972712Snn35248 if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) {
5980Sstevel@tonic-gate bad = 1;
5990Sstevel@tonic-gate break;
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate
6020Sstevel@tonic-gate #if defined(__amd64)
6034503Ssudheer if (pcb->pcb_rupdate == 1) {
6042712Snn35248 if (ssd->sel == pcb->pcb_ds ||
6052712Snn35248 ssd->sel == pcb->pcb_es ||
6062712Snn35248 ssd->sel == pcb->pcb_fs ||
6072712Snn35248 ssd->sel == pcb->pcb_gs) {
6080Sstevel@tonic-gate bad = 1;
6090Sstevel@tonic-gate break;
6100Sstevel@tonic-gate }
6110Sstevel@tonic-gate } else
6120Sstevel@tonic-gate #endif
6130Sstevel@tonic-gate {
6142712Snn35248 if (ssd->sel == rp->r_ds ||
6152712Snn35248 ssd->sel == rp->r_es ||
6162712Snn35248 ssd->sel == rp->r_fs ||
6172712Snn35248 ssd->sel == rp->r_gs) {
6180Sstevel@tonic-gate bad = 1;
6190Sstevel@tonic-gate break;
6200Sstevel@tonic-gate }
6210Sstevel@tonic-gate }
6220Sstevel@tonic-gate
6230Sstevel@tonic-gate } while ((t = t->t_forw) != pp->p_tlist);
6240Sstevel@tonic-gate mutex_exit(&pp->p_lock);
6250Sstevel@tonic-gate
6260Sstevel@tonic-gate if (bad) {
6270Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock);
6280Sstevel@tonic-gate return (EBUSY);
6290Sstevel@tonic-gate }
6300Sstevel@tonic-gate }
6310Sstevel@tonic-gate
6320Sstevel@tonic-gate /*
6330Sstevel@tonic-gate * If acc1 is zero, clear the descriptor (including the 'present' bit)
6340Sstevel@tonic-gate */
6352712Snn35248 if (ssd->acc1 == 0) {
6365084Sjohnlev rc = ldt_update_segd(ldp, &null_udesc);
6370Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock);
6385084Sjohnlev return (rc);
6390Sstevel@tonic-gate }
6400Sstevel@tonic-gate
6410Sstevel@tonic-gate /*
6420Sstevel@tonic-gate * Check segment type, allow segment not present and
6430Sstevel@tonic-gate * only user DPL (3).
6440Sstevel@tonic-gate */
6452712Snn35248 if (SI86SSD_DPL(ssd) != SEL_UPL) {
6460Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock);
6470Sstevel@tonic-gate return (EINVAL);
6480Sstevel@tonic-gate }
6490Sstevel@tonic-gate
6500Sstevel@tonic-gate #if defined(__amd64)
6510Sstevel@tonic-gate /*
6522712Snn35248 * Do not allow 32-bit applications to create 64-bit mode code
6532712Snn35248 * segments.
6540Sstevel@tonic-gate */
6552712Snn35248 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
6562712Snn35248 SI86SSD_ISLONG(ssd)) {
6570Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock);
6580Sstevel@tonic-gate return (EINVAL);
6590Sstevel@tonic-gate }
6600Sstevel@tonic-gate #endif /* __amd64 */
6610Sstevel@tonic-gate
6620Sstevel@tonic-gate /*
6630Sstevel@tonic-gate * Set up a code or data user segment descriptor.
6640Sstevel@tonic-gate */
6652712Snn35248 if (SI86SSD_ISUSEG(ssd)) {
6665084Sjohnlev ssd_to_usd(ssd, &ndesc);
6675084Sjohnlev rc = ldt_update_segd(ldp, &ndesc);
6680Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock);
6695084Sjohnlev return (rc);
6700Sstevel@tonic-gate }
6710Sstevel@tonic-gate
6725084Sjohnlev #if defined(__i386)
6730Sstevel@tonic-gate /*
6745084Sjohnlev * Allow a call gate only if the destination is in the LDT
6755084Sjohnlev * and the system is running in 32-bit legacy mode.
6765084Sjohnlev *
6775084Sjohnlev * In long mode 32-bit call gates are redefined as 64-bit call
6785084Sjohnlev * gates and the hw enforces that the target code selector
6795084Sjohnlev * of the call gate must be 64-bit selector. A #gp fault is
6805084Sjohnlev * generated if otherwise. Since we do not allow 32-bit processes
6815084Sjohnlev * to switch themselves to 64-bits we never allow call gates
6825084Sjohnlev * on 64-bit system system.
6830Sstevel@tonic-gate */
6842712Snn35248 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
6855084Sjohnlev
6865084Sjohnlev
6875084Sjohnlev ssd_to_sgd(ssd, (gate_desc_t *)&ndesc);
6885084Sjohnlev rc = ldt_update_segd(ldp, &ndesc);
6890Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock);
6905084Sjohnlev return (rc);
6910Sstevel@tonic-gate }
6925084Sjohnlev #endif /* __i386 */
6930Sstevel@tonic-gate
6940Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock);
6950Sstevel@tonic-gate return (EINVAL);
6960Sstevel@tonic-gate }
6970Sstevel@tonic-gate
6980Sstevel@tonic-gate /*
6995084Sjohnlev * Allocate new LDT for process just large enough to contain seli.
7005084Sjohnlev * Note we allocate and grow LDT in PAGESIZE chunks. We do this
7015084Sjohnlev * to simplify the implementation and because on the hypervisor it's
7025084Sjohnlev * required, since the LDT must live on pages that have PROT_WRITE
7035084Sjohnlev * removed and which are given to the hypervisor.
7040Sstevel@tonic-gate */
7052712Snn35248 static void
ldt_alloc(proc_t * pp,uint_t seli)7065084Sjohnlev ldt_alloc(proc_t *pp, uint_t seli)
7070Sstevel@tonic-gate {
7085084Sjohnlev user_desc_t *ldt;
7095084Sjohnlev size_t ldtsz;
7105084Sjohnlev uint_t nsels;
7110Sstevel@tonic-gate
7125084Sjohnlev ASSERT(MUTEX_HELD(&pp->p_ldtlock));
7135084Sjohnlev ASSERT(pp->p_ldt == NULL);
7145084Sjohnlev ASSERT(pp->p_ldtlimit == 0);
7150Sstevel@tonic-gate
7160Sstevel@tonic-gate /*
7175084Sjohnlev * Allocate new LDT just large enough to contain seli.
7180Sstevel@tonic-gate */
7195084Sjohnlev ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
7205084Sjohnlev nsels = ldtsz / sizeof (user_desc_t);
7215084Sjohnlev ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
7221217Srab
7235084Sjohnlev ldt = kmem_zalloc(ldtsz, KM_SLEEP);
7245084Sjohnlev ASSERT(IS_P2ALIGNED(ldt, PAGESIZE));
7250Sstevel@tonic-gate
7265084Sjohnlev #if defined(__xpv)
7275084Sjohnlev if (xen_ldt_setprot(ldt, ldtsz, PROT_READ))
7285084Sjohnlev panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
7295084Sjohnlev #endif
7300Sstevel@tonic-gate
7315084Sjohnlev pp->p_ldt = ldt;
7325084Sjohnlev pp->p_ldtlimit = nsels - 1;
7335084Sjohnlev set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL);
7340Sstevel@tonic-gate
7355084Sjohnlev if (pp == curproc) {
7365084Sjohnlev kpreempt_disable();
7375084Sjohnlev ldt_load();
7385084Sjohnlev kpreempt_enable();
7395084Sjohnlev }
7400Sstevel@tonic-gate }
7410Sstevel@tonic-gate
7421217Srab static void
ldt_free(proc_t * pp)7430Sstevel@tonic-gate ldt_free(proc_t *pp)
7440Sstevel@tonic-gate {
7455084Sjohnlev user_desc_t *ldt;
7465084Sjohnlev size_t ldtsz;
7470Sstevel@tonic-gate
7480Sstevel@tonic-gate ASSERT(pp->p_ldt != NULL);
7490Sstevel@tonic-gate
7500Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock);
7515084Sjohnlev ldt = pp->p_ldt;
7525084Sjohnlev ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
7535084Sjohnlev
7545084Sjohnlev ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
7550Sstevel@tonic-gate
7565084Sjohnlev pp->p_ldt = NULL;
7575084Sjohnlev pp->p_ldtlimit = 0;
7585084Sjohnlev pp->p_ldt_desc = null_sdesc;
7595084Sjohnlev mutex_exit(&pp->p_ldtlock);
7600Sstevel@tonic-gate
7615084Sjohnlev if (pp == curproc) {
7625084Sjohnlev kpreempt_disable();
7635084Sjohnlev ldt_unload();
7645084Sjohnlev kpreempt_enable();
7655084Sjohnlev }
7661217Srab
7675084Sjohnlev #if defined(__xpv)
7685084Sjohnlev /*
7695084Sjohnlev * We are not allowed to make the ldt writable until after
7705084Sjohnlev * we tell the hypervisor to unload it.
7715084Sjohnlev */
7725084Sjohnlev if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE))
7735084Sjohnlev panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
7745084Sjohnlev #endif
7755084Sjohnlev
7765084Sjohnlev kmem_free(ldt, ldtsz);
7770Sstevel@tonic-gate }
7780Sstevel@tonic-gate
7790Sstevel@tonic-gate /*
7800Sstevel@tonic-gate * On fork copy new ldt for child.
7810Sstevel@tonic-gate */
7825084Sjohnlev static void
ldt_dup(proc_t * pp,proc_t * cp)7830Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp)
7840Sstevel@tonic-gate {
7855084Sjohnlev size_t ldtsz;
7865084Sjohnlev
7875084Sjohnlev ASSERT(pp->p_ldt != NULL);
7885084Sjohnlev ASSERT(cp != curproc);
7890Sstevel@tonic-gate
7905084Sjohnlev /*
7915084Sjohnlev * I assume the parent's ldt can't increase since we're in a fork.
7925084Sjohnlev */
7935084Sjohnlev mutex_enter(&pp->p_ldtlock);
7945084Sjohnlev mutex_enter(&cp->p_ldtlock);
7955084Sjohnlev
7965084Sjohnlev ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
7975084Sjohnlev
7985084Sjohnlev ldt_alloc(cp, pp->p_ldtlimit);
7990Sstevel@tonic-gate
8005084Sjohnlev #if defined(__xpv)
8015084Sjohnlev /*
8025084Sjohnlev * Make child's ldt writable so it can be copied into from
8035084Sjohnlev * parent's ldt. This works since ldt_alloc above did not load
8045084Sjohnlev * the ldt since its for the child process. If we tried to make
8055084Sjohnlev * an LDT writable that is loaded in hw the setprot operation
8065084Sjohnlev * would fail.
8075084Sjohnlev */
8085084Sjohnlev if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE))
8095084Sjohnlev panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
8105084Sjohnlev #endif
8115084Sjohnlev
8125084Sjohnlev bcopy(pp->p_ldt, cp->p_ldt, ldtsz);
8130Sstevel@tonic-gate
8145084Sjohnlev #if defined(__xpv)
8155084Sjohnlev if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ))
8165084Sjohnlev panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed");
8175084Sjohnlev #endif
8185084Sjohnlev mutex_exit(&cp->p_ldtlock);
8195084Sjohnlev mutex_exit(&pp->p_ldtlock);
8205084Sjohnlev
8215084Sjohnlev }
8225084Sjohnlev
8235084Sjohnlev static void
ldt_grow(proc_t * pp,uint_t seli)8245084Sjohnlev ldt_grow(proc_t *pp, uint_t seli)
8255084Sjohnlev {
8265084Sjohnlev user_desc_t *oldt, *nldt;
8275084Sjohnlev uint_t nsels;
8285084Sjohnlev size_t oldtsz, nldtsz;
8295084Sjohnlev
8305084Sjohnlev ASSERT(MUTEX_HELD(&pp->p_ldtlock));
8315084Sjohnlev ASSERT(pp->p_ldt != NULL);
8325084Sjohnlev ASSERT(pp->p_ldtlimit != 0);
8330Sstevel@tonic-gate
8345084Sjohnlev /*
8355084Sjohnlev * Allocate larger LDT just large enough to contain seli.
8365084Sjohnlev */
8375084Sjohnlev nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
8385084Sjohnlev nsels = nldtsz / sizeof (user_desc_t);
8395084Sjohnlev ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
8405084Sjohnlev ASSERT(nsels > pp->p_ldtlimit);
8415084Sjohnlev
8425084Sjohnlev oldt = pp->p_ldt;
8435084Sjohnlev oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
8445084Sjohnlev
8455084Sjohnlev nldt = kmem_zalloc(nldtsz, KM_SLEEP);
8465084Sjohnlev ASSERT(IS_P2ALIGNED(nldt, PAGESIZE));
8475084Sjohnlev
8485084Sjohnlev bcopy(oldt, nldt, oldtsz);
8495084Sjohnlev
8505084Sjohnlev /*
8515084Sjohnlev * unload old ldt.
8525084Sjohnlev */
8535084Sjohnlev kpreempt_disable();
8545084Sjohnlev ldt_unload();
8555084Sjohnlev kpreempt_enable();
8565084Sjohnlev
8575084Sjohnlev #if defined(__xpv)
8585084Sjohnlev
8595084Sjohnlev /*
8605084Sjohnlev * Make old ldt writable and new ldt read only.
8615084Sjohnlev */
8625084Sjohnlev if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE))
8635084Sjohnlev panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
8645084Sjohnlev
8655084Sjohnlev if (xen_ldt_setprot(nldt, nldtsz, PROT_READ))
8665084Sjohnlev panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed");
8675084Sjohnlev #endif
8685084Sjohnlev
8695084Sjohnlev pp->p_ldt = nldt;
8705084Sjohnlev pp->p_ldtlimit = nsels - 1;
8715084Sjohnlev
8725084Sjohnlev /*
8735084Sjohnlev * write new ldt segment descriptor.
8745084Sjohnlev */
8755084Sjohnlev set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL);
8765084Sjohnlev
8775084Sjohnlev /*
8785084Sjohnlev * load the new ldt.
8795084Sjohnlev */
8805084Sjohnlev kpreempt_disable();
8815084Sjohnlev ldt_load();
8825084Sjohnlev kpreempt_enable();
8835084Sjohnlev
8845084Sjohnlev kmem_free(oldt, oldtsz);
8850Sstevel@tonic-gate }
886