10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52712Snn35248 * Common Development and Distribution License (the "License"). 62712Snn35248 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 223446Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 270Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 280Sstevel@tonic-gate /* All Rights Reserved */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* Copyright (c) 1987, 1988 Microsoft Corporation */ 310Sstevel@tonic-gate /* All Rights Reserved */ 320Sstevel@tonic-gate 330Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 340Sstevel@tonic-gate 350Sstevel@tonic-gate #include <sys/param.h> 360Sstevel@tonic-gate #include <sys/types.h> 370Sstevel@tonic-gate #include <sys/sysmacros.h> 380Sstevel@tonic-gate #include <sys/systm.h> 390Sstevel@tonic-gate #include <sys/signal.h> 400Sstevel@tonic-gate #include <sys/errno.h> 410Sstevel@tonic-gate #include <sys/fault.h> 420Sstevel@tonic-gate #include <sys/syscall.h> 430Sstevel@tonic-gate #include <sys/cpuvar.h> 440Sstevel@tonic-gate #include <sys/sysi86.h> 450Sstevel@tonic-gate #include <sys/psw.h> 460Sstevel@tonic-gate #include <sys/cred.h> 470Sstevel@tonic-gate #include <sys/policy.h> 480Sstevel@tonic-gate #include <sys/thread.h> 490Sstevel@tonic-gate #include <sys/debug.h> 500Sstevel@tonic-gate #include <sys/ontrap.h> 510Sstevel@tonic-gate #include <sys/privregs.h> 520Sstevel@tonic-gate #include <sys/x86_archext.h> 530Sstevel@tonic-gate #include <sys/vmem.h> 540Sstevel@tonic-gate #include <sys/kmem.h> 550Sstevel@tonic-gate #include <sys/mman.h> 560Sstevel@tonic-gate #include <sys/archsystm.h> 570Sstevel@tonic-gate #include <vm/hat.h> 580Sstevel@tonic-gate #include <vm/as.h> 590Sstevel@tonic-gate #include <vm/seg.h> 600Sstevel@tonic-gate #include <vm/seg_kmem.h> 610Sstevel@tonic-gate #include <vm/faultcode.h> 620Sstevel@tonic-gate #include <sys/fp.h> 630Sstevel@tonic-gate #include <sys/cmn_err.h> 643446Smrj #include <sys/segments.h> 653446Smrj #include <sys/clock.h> 66*5084Sjohnlev #if defined(__xpv) 67*5084Sjohnlev #include <sys/hypervisor.h> 68*5084Sjohnlev #include <sys/note.h> 69*5084Sjohnlev #endif 700Sstevel@tonic-gate 71*5084Sjohnlev static void ldt_alloc(proc_t *, uint_t); 72*5084Sjohnlev static void ldt_free(proc_t *); 73*5084Sjohnlev static void ldt_dup(proc_t *, proc_t *); 74*5084Sjohnlev static void ldt_grow(proc_t *, uint_t); 750Sstevel@tonic-gate 760Sstevel@tonic-gate /* 770Sstevel@tonic-gate * sysi86 System Call 780Sstevel@tonic-gate */ 790Sstevel@tonic-gate 800Sstevel@tonic-gate /* ARGSUSED */ 810Sstevel@tonic-gate int 820Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) 830Sstevel@tonic-gate { 842712Snn35248 struct ssd ssd; 850Sstevel@tonic-gate int error = 0; 860Sstevel@tonic-gate int c; 870Sstevel@tonic-gate proc_t *pp = curproc; 880Sstevel@tonic-gate 890Sstevel@tonic-gate switch (cmd) { 900Sstevel@tonic-gate 910Sstevel@tonic-gate /* 920Sstevel@tonic-gate * The SI86V86 subsystem call of the SYSI86 system call 930Sstevel@tonic-gate * supports only one subcode -- V86SC_IOPL. 940Sstevel@tonic-gate */ 950Sstevel@tonic-gate case SI86V86: 960Sstevel@tonic-gate if (arg1 == V86SC_IOPL) { 970Sstevel@tonic-gate struct regs *rp = lwptoregs(ttolwp(curthread)); 980Sstevel@tonic-gate greg_t oldpl = rp->r_ps & PS_IOPL; 990Sstevel@tonic-gate greg_t newpl = arg2 & PS_IOPL; 1000Sstevel@tonic-gate 1010Sstevel@tonic-gate /* 1020Sstevel@tonic-gate * Must be privileged to run this system call 1030Sstevel@tonic-gate * if giving more io privilege. 1040Sstevel@tonic-gate */ 1050Sstevel@tonic-gate if (newpl > oldpl && (error = 1060Sstevel@tonic-gate secpolicy_sys_config(CRED(), B_FALSE)) != 0) 1070Sstevel@tonic-gate return (set_errno(error)); 108*5084Sjohnlev #if defined(__xpv) 109*5084Sjohnlev kpreempt_disable(); 110*5084Sjohnlev installctx(curthread, NULL, xen_disable_user_iopl, 111*5084Sjohnlev xen_enable_user_iopl, NULL, NULL, 112*5084Sjohnlev xen_disable_user_iopl, NULL); 113*5084Sjohnlev xen_enable_user_iopl(); 114*5084Sjohnlev kpreempt_enable(); 115*5084Sjohnlev #else 1160Sstevel@tonic-gate rp->r_ps ^= oldpl ^ newpl; 117*5084Sjohnlev #endif 1180Sstevel@tonic-gate } else 1190Sstevel@tonic-gate error = EINVAL; 1200Sstevel@tonic-gate break; 1210Sstevel@tonic-gate 1220Sstevel@tonic-gate /* 1230Sstevel@tonic-gate * Set a segment descriptor 1240Sstevel@tonic-gate */ 1250Sstevel@tonic-gate case SI86DSCR: 1260Sstevel@tonic-gate /* 1270Sstevel@tonic-gate * There are considerable problems here manipulating 1280Sstevel@tonic-gate * resources shared by many running lwps. Get everyone 1290Sstevel@tonic-gate * into a safe state before changing the LDT. 1300Sstevel@tonic-gate */ 1310Sstevel@tonic-gate if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) { 1320Sstevel@tonic-gate error = EINTR; 1330Sstevel@tonic-gate break; 1340Sstevel@tonic-gate } 1352712Snn35248 1362712Snn35248 if (get_udatamodel() == DATAMODEL_LP64) { 1372712Snn35248 error = EINVAL; 1382712Snn35248 break; 1392712Snn35248 } 1402712Snn35248 1412712Snn35248 if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) { 1422712Snn35248 error = EFAULT; 1432712Snn35248 break; 1442712Snn35248 } 1452712Snn35248 1462712Snn35248 error = setdscr(&ssd); 1472712Snn35248 1480Sstevel@tonic-gate mutex_enter(&pp->p_lock); 1490Sstevel@tonic-gate if (curthread != pp->p_agenttp) 1500Sstevel@tonic-gate continuelwps(pp); 1510Sstevel@tonic-gate mutex_exit(&pp->p_lock); 1520Sstevel@tonic-gate break; 1530Sstevel@tonic-gate 1540Sstevel@tonic-gate case SI86FPHW: 1550Sstevel@tonic-gate c = fp_kind & 0xff; 1560Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) 1570Sstevel@tonic-gate error = EFAULT; 1580Sstevel@tonic-gate break; 1590Sstevel@tonic-gate 1600Sstevel@tonic-gate case SI86FPSTART: 1610Sstevel@tonic-gate /* 1620Sstevel@tonic-gate * arg1 is the address of _fp_hw 1630Sstevel@tonic-gate * arg2 is the desired x87 FCW value 1640Sstevel@tonic-gate * arg3 is the desired SSE MXCSR value 1650Sstevel@tonic-gate * a return value of one means SSE hardware, else none. 1660Sstevel@tonic-gate */ 1670Sstevel@tonic-gate c = fp_kind & 0xff; 1680Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) { 1690Sstevel@tonic-gate error = EFAULT; 1700Sstevel@tonic-gate break; 1710Sstevel@tonic-gate } 1720Sstevel@tonic-gate fpsetcw((uint16_t)arg2, (uint32_t)arg3); 1730Sstevel@tonic-gate return (fp_kind == __FP_SSE ? 1 : 0); 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate /* real time clock management commands */ 1760Sstevel@tonic-gate 1770Sstevel@tonic-gate case WTODC: 1780Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1790Sstevel@tonic-gate timestruc_t ts; 1800Sstevel@tonic-gate mutex_enter(&tod_lock); 1810Sstevel@tonic-gate gethrestime(&ts); 1820Sstevel@tonic-gate tod_set(ts); 1830Sstevel@tonic-gate mutex_exit(&tod_lock); 1840Sstevel@tonic-gate } 1850Sstevel@tonic-gate break; 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate /* Give some timezone playing room */ 1880Sstevel@tonic-gate #define ONEWEEK (7 * 24 * 60 * 60) 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate case SGMTL: 1910Sstevel@tonic-gate /* 1920Sstevel@tonic-gate * Called from 32 bit land, negative values 1930Sstevel@tonic-gate * are not sign extended, so we do that here 1940Sstevel@tonic-gate * by casting it to an int and back. We also 1950Sstevel@tonic-gate * clamp the value to within reason and detect 1960Sstevel@tonic-gate * when a 64 bit call overflows an int. 1970Sstevel@tonic-gate */ 1980Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1990Sstevel@tonic-gate int newlag = (int)arg1; 2000Sstevel@tonic-gate 2010Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 2020Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE && 2030Sstevel@tonic-gate (long)newlag != (long)arg1) { 2040Sstevel@tonic-gate error = EOVERFLOW; 2050Sstevel@tonic-gate } else 2060Sstevel@tonic-gate #endif 2070Sstevel@tonic-gate if (newlag >= -ONEWEEK && newlag <= ONEWEEK) 2080Sstevel@tonic-gate sgmtl(newlag); 2090Sstevel@tonic-gate else 2100Sstevel@tonic-gate error = EOVERFLOW; 2110Sstevel@tonic-gate } 2120Sstevel@tonic-gate break; 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate case GGMTL: 2150Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 2160Sstevel@tonic-gate if (sulword((void *)arg1, ggmtl()) == -1) 2170Sstevel@tonic-gate error = EFAULT; 2180Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 2190Sstevel@tonic-gate } else { 2200Sstevel@tonic-gate time_t gmtl; 2210Sstevel@tonic-gate 2220Sstevel@tonic-gate if ((gmtl = ggmtl()) > INT32_MAX) { 2230Sstevel@tonic-gate /* 2240Sstevel@tonic-gate * Since gmt_lag can at most be 2250Sstevel@tonic-gate * +/- 12 hours, something is 2260Sstevel@tonic-gate * *seriously* messed up here. 2270Sstevel@tonic-gate */ 2280Sstevel@tonic-gate error = EOVERFLOW; 2290Sstevel@tonic-gate } else if (suword32((void *)arg1, (int32_t)gmtl) == -1) 2300Sstevel@tonic-gate error = EFAULT; 2310Sstevel@tonic-gate #endif 2320Sstevel@tonic-gate } 2330Sstevel@tonic-gate break; 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate case RTCSYNC: 2360Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) 2370Sstevel@tonic-gate rtcsync(); 2380Sstevel@tonic-gate break; 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate /* END OF real time clock management commands */ 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate default: 2430Sstevel@tonic-gate error = EINVAL; 2440Sstevel@tonic-gate break; 2450Sstevel@tonic-gate } 2460Sstevel@tonic-gate return (error == 0 ? 0 : set_errno(error)); 2470Sstevel@tonic-gate } 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate void 2500Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel) 2510Sstevel@tonic-gate { 2520Sstevel@tonic-gate ssd->bo = USEGD_GETBASE(usd); 2530Sstevel@tonic-gate ssd->ls = USEGD_GETLIMIT(usd); 2540Sstevel@tonic-gate ssd->sel = sel; 2550Sstevel@tonic-gate 2560Sstevel@tonic-gate /* 2570Sstevel@tonic-gate * set type, dpl and present bits. 2580Sstevel@tonic-gate */ 2590Sstevel@tonic-gate ssd->acc1 = usd->usd_type; 2600Sstevel@tonic-gate ssd->acc1 |= usd->usd_dpl << 5; 2610Sstevel@tonic-gate ssd->acc1 |= usd->usd_p << (5 + 2); 2620Sstevel@tonic-gate 2630Sstevel@tonic-gate /* 2640Sstevel@tonic-gate * set avl, DB and granularity bits. 2650Sstevel@tonic-gate */ 2660Sstevel@tonic-gate ssd->acc2 = usd->usd_avl; 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate #if defined(__amd64) 2690Sstevel@tonic-gate ssd->acc2 |= usd->usd_long << 1; 2700Sstevel@tonic-gate #else 2710Sstevel@tonic-gate ssd->acc2 |= usd->usd_reserved << 1; 2720Sstevel@tonic-gate #endif 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate ssd->acc2 |= usd->usd_def32 << (1 + 1); 2750Sstevel@tonic-gate ssd->acc2 |= usd->usd_gran << (1 + 1 + 1); 2760Sstevel@tonic-gate } 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate static void 2790Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd) 2800Sstevel@tonic-gate { 2810Sstevel@tonic-gate 282*5084Sjohnlev ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0); 283*5084Sjohnlev 2840Sstevel@tonic-gate USEGD_SETBASE(usd, ssd->bo); 2850Sstevel@tonic-gate USEGD_SETLIMIT(usd, ssd->ls); 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate /* 2880Sstevel@tonic-gate * set type, dpl and present bits. 2890Sstevel@tonic-gate */ 2900Sstevel@tonic-gate usd->usd_type = ssd->acc1; 2910Sstevel@tonic-gate usd->usd_dpl = ssd->acc1 >> 5; 2920Sstevel@tonic-gate usd->usd_p = ssd->acc1 >> (5 + 2); 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate ASSERT(usd->usd_type >= SDT_MEMRO); 2950Sstevel@tonic-gate ASSERT(usd->usd_dpl == SEL_UPL); 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate /* 298*5084Sjohnlev * 64-bit code selectors are never allowed in the LDT. 299*5084Sjohnlev * Reserved bit is always 0 on 32-bit sytems. 300*5084Sjohnlev */ 301*5084Sjohnlev #if defined(__amd64) 302*5084Sjohnlev usd->usd_long = 0; 303*5084Sjohnlev #else 304*5084Sjohnlev usd->usd_reserved = 0; 305*5084Sjohnlev #endif 306*5084Sjohnlev 307*5084Sjohnlev /* 3080Sstevel@tonic-gate * set avl, DB and granularity bits. 3090Sstevel@tonic-gate */ 3100Sstevel@tonic-gate usd->usd_avl = ssd->acc2; 3110Sstevel@tonic-gate usd->usd_def32 = ssd->acc2 >> (1 + 1); 3120Sstevel@tonic-gate usd->usd_gran = ssd->acc2 >> (1 + 1 + 1); 3130Sstevel@tonic-gate } 3140Sstevel@tonic-gate 315*5084Sjohnlev 316*5084Sjohnlev #if defined(__i386) 317*5084Sjohnlev 3180Sstevel@tonic-gate static void 3190Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd) 3200Sstevel@tonic-gate { 3210Sstevel@tonic-gate 322*5084Sjohnlev ASSERT(bcmp(sgd, &null_sdesc, sizeof (*sgd)) == 0); 323*5084Sjohnlev 3240Sstevel@tonic-gate sgd->sgd_looffset = ssd->bo; 3250Sstevel@tonic-gate sgd->sgd_hioffset = ssd->bo >> 16; 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate sgd->sgd_selector = ssd->ls; 328*5084Sjohnlev 3290Sstevel@tonic-gate /* 3300Sstevel@tonic-gate * set type, dpl and present bits. 3310Sstevel@tonic-gate */ 3320Sstevel@tonic-gate sgd->sgd_type = ssd->acc1; 3330Sstevel@tonic-gate sgd->sgd_dpl = ssd->acc1 >> 5; 3340Sstevel@tonic-gate sgd->sgd_p = ssd->acc1 >> 7; 3350Sstevel@tonic-gate ASSERT(sgd->sgd_type == SDT_SYSCGT); 3360Sstevel@tonic-gate ASSERT(sgd->sgd_dpl == SEL_UPL); 337*5084Sjohnlev sgd->sgd_stkcpy = 0; 338*5084Sjohnlev } 3390Sstevel@tonic-gate 340*5084Sjohnlev #endif /* __i386 */ 3410Sstevel@tonic-gate 3421217Srab /* 3431217Srab * Load LDT register with the current process's LDT. 3441217Srab */ 345*5084Sjohnlev static void 3461217Srab ldt_load(void) 3471217Srab { 348*5084Sjohnlev #if defined(__xpv) 349*5084Sjohnlev xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc), 350*5084Sjohnlev curproc->p_ldtlimit + 1); 351*5084Sjohnlev #else 3521217Srab *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc; 3531217Srab wr_ldtr(ULDT_SEL); 354*5084Sjohnlev #endif 3551217Srab } 3561217Srab 3571217Srab /* 3581217Srab * Store a NULL selector in the LDTR. All subsequent illegal references to 3591217Srab * the LDT will result in a #gp. 3601217Srab */ 3611217Srab void 3621217Srab ldt_unload(void) 3631217Srab { 364*5084Sjohnlev #if defined(__xpv) 365*5084Sjohnlev xen_set_ldt(NULL, 0); 366*5084Sjohnlev #else 367*5084Sjohnlev *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc; 3681217Srab wr_ldtr(0); 369*5084Sjohnlev #endif 3701217Srab } 3710Sstevel@tonic-gate 3720Sstevel@tonic-gate /*ARGSUSED*/ 3730Sstevel@tonic-gate static void 3741217Srab ldt_savectx(proc_t *p) 3750Sstevel@tonic-gate { 3761217Srab ASSERT(p->p_ldt != NULL); 3771217Srab ASSERT(p == curproc); 3781217Srab 3790Sstevel@tonic-gate #if defined(__amd64) 3800Sstevel@tonic-gate /* 3810Sstevel@tonic-gate * The 64-bit kernel must be sure to clear any stale ldt 3820Sstevel@tonic-gate * selectors when context switching away from a process that 3830Sstevel@tonic-gate * has a private ldt. Consider the following example: 3840Sstevel@tonic-gate * 3850Sstevel@tonic-gate * Wine creats a ldt descriptor and points a segment register 3860Sstevel@tonic-gate * to it. 3870Sstevel@tonic-gate * 3880Sstevel@tonic-gate * We then context switch away from wine lwp to kernel 3890Sstevel@tonic-gate * thread and hit breakpoint in kernel with kmdb 3900Sstevel@tonic-gate * 3910Sstevel@tonic-gate * When we continue and resume from kmdb we will #gp 3920Sstevel@tonic-gate * fault since kmdb will have saved the stale ldt selector 3930Sstevel@tonic-gate * from wine and will try to restore it but we are no longer in 3940Sstevel@tonic-gate * the context of the wine process and do not have our 3950Sstevel@tonic-gate * ldtr register pointing to the private ldt. 3960Sstevel@tonic-gate */ 397*5084Sjohnlev reset_sregs(); 3980Sstevel@tonic-gate #endif 3990Sstevel@tonic-gate 4001217Srab ldt_unload(); 4010Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 4020Sstevel@tonic-gate } 4030Sstevel@tonic-gate 4041217Srab static void 4051217Srab ldt_restorectx(proc_t *p) 4061217Srab { 4071217Srab ASSERT(p->p_ldt != NULL); 4081217Srab ASSERT(p == curproc); 4091217Srab 4101217Srab ldt_load(); 4111217Srab cpu_fast_syscall_disable(NULL); 4121217Srab } 4131217Srab 4140Sstevel@tonic-gate /* 4151217Srab * When a process with a private LDT execs, fast syscalls must be enabled for 4161217Srab * the new process image. 4170Sstevel@tonic-gate */ 4180Sstevel@tonic-gate /* ARGSUSED */ 4190Sstevel@tonic-gate static void 4201217Srab ldt_freectx(proc_t *p, int isexec) 4210Sstevel@tonic-gate { 4221217Srab ASSERT(p->p_ldt); 4231217Srab 4240Sstevel@tonic-gate if (isexec) { 4250Sstevel@tonic-gate kpreempt_disable(); 4260Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 4270Sstevel@tonic-gate kpreempt_enable(); 4280Sstevel@tonic-gate } 4291217Srab 4301217Srab /* 4311217Srab * ldt_free() will free the memory used by the private LDT, reset the 4321217Srab * process's descriptor, and re-program the LDTR. 4331217Srab */ 4341217Srab ldt_free(p); 4350Sstevel@tonic-gate } 4360Sstevel@tonic-gate 4370Sstevel@tonic-gate /* 4380Sstevel@tonic-gate * Install ctx op that ensures syscall/sysenter are disabled. 4390Sstevel@tonic-gate * See comments below. 4400Sstevel@tonic-gate * 4411217Srab * When a thread with a private LDT forks, the new process 4420Sstevel@tonic-gate * must have the LDT context ops installed. 4430Sstevel@tonic-gate */ 4440Sstevel@tonic-gate /* ARGSUSED */ 4450Sstevel@tonic-gate static void 4461217Srab ldt_installctx(proc_t *p, proc_t *cp) 4470Sstevel@tonic-gate { 4481217Srab proc_t *targ = p; 4491217Srab kthread_t *t; 4500Sstevel@tonic-gate 4510Sstevel@tonic-gate /* 4521217Srab * If this is a fork, operate on the child process. 4530Sstevel@tonic-gate */ 4541217Srab if (cp != NULL) { 4551217Srab targ = cp; 4561217Srab ldt_dup(p, cp); 4571217Srab } 4580Sstevel@tonic-gate 4591217Srab /* 4601217Srab * The process context ops expect the target process as their argument. 4611217Srab */ 4621217Srab ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx, 4631217Srab ldt_installctx, ldt_savectx, ldt_freectx) == 0); 4640Sstevel@tonic-gate 4651217Srab installpctx(targ, targ, ldt_savectx, ldt_restorectx, 4661217Srab ldt_installctx, ldt_savectx, ldt_freectx); 4670Sstevel@tonic-gate 4680Sstevel@tonic-gate /* 4690Sstevel@tonic-gate * We've just disabled fast system call and return instructions; take 4700Sstevel@tonic-gate * the slow path out to make sure we don't try to use one to return 4711217Srab * back to user. We must set t_post_sys for every thread in the 4721217Srab * process to make sure none of them escape out via fast return. 4730Sstevel@tonic-gate */ 4741217Srab 4751217Srab mutex_enter(&targ->p_lock); 4761217Srab t = targ->p_tlist; 4771217Srab do { 4781217Srab t->t_post_sys = 1; 4791217Srab } while ((t = t->t_forw) != targ->p_tlist); 4801217Srab mutex_exit(&targ->p_lock); 4810Sstevel@tonic-gate } 4820Sstevel@tonic-gate 4832712Snn35248 int 4842712Snn35248 setdscr(struct ssd *ssd) 4850Sstevel@tonic-gate { 4860Sstevel@tonic-gate ushort_t seli; /* selector index */ 487*5084Sjohnlev user_desc_t *ldp; /* descriptor pointer */ 488*5084Sjohnlev user_desc_t ndesc; /* new descriptor */ 4890Sstevel@tonic-gate proc_t *pp = ttoproc(curthread); 490*5084Sjohnlev int rc = 0; 4910Sstevel@tonic-gate 4920Sstevel@tonic-gate /* 4930Sstevel@tonic-gate * LDT segments: executable and data at DPL 3 only. 4940Sstevel@tonic-gate */ 4952712Snn35248 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel)) 4960Sstevel@tonic-gate return (EINVAL); 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate /* 4990Sstevel@tonic-gate * check the selector index. 5000Sstevel@tonic-gate */ 5012712Snn35248 seli = SELTOIDX(ssd->sel); 5021217Srab if (seli >= MAXNLDT || seli < LDT_UDBASE) 5030Sstevel@tonic-gate return (EINVAL); 5040Sstevel@tonic-gate 505*5084Sjohnlev ndesc = null_udesc; 5060Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate /* 5090Sstevel@tonic-gate * If this is the first time for this process then setup a 5100Sstevel@tonic-gate * private LDT for it. 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate if (pp->p_ldt == NULL) { 513*5084Sjohnlev ldt_alloc(pp, seli); 5140Sstevel@tonic-gate 5150Sstevel@tonic-gate /* 5160Sstevel@tonic-gate * Now that this process has a private LDT, the use of 5170Sstevel@tonic-gate * the syscall/sysret and sysenter/sysexit instructions 5180Sstevel@tonic-gate * is forbidden for this processes because they destroy 5190Sstevel@tonic-gate * the contents of %cs and %ss segment registers. 5200Sstevel@tonic-gate * 5211217Srab * Explicity disable them here and add a context handler 5221217Srab * to the process. Note that disabling 5230Sstevel@tonic-gate * them here means we can't use sysret or sysexit on 5240Sstevel@tonic-gate * the way out of this system call - so we force this 5250Sstevel@tonic-gate * thread to take the slow path (which doesn't make use 5260Sstevel@tonic-gate * of sysenter or sysexit) back out. 5270Sstevel@tonic-gate */ 528*5084Sjohnlev kpreempt_disable(); 5291217Srab ldt_installctx(pp, NULL); 5300Sstevel@tonic-gate cpu_fast_syscall_disable(NULL); 5310Sstevel@tonic-gate ASSERT(curthread->t_post_sys != 0); 5321217Srab kpreempt_enable(); 533*5084Sjohnlev 534*5084Sjohnlev } else if (seli > pp->p_ldtlimit) { 5350Sstevel@tonic-gate 536*5084Sjohnlev /* 537*5084Sjohnlev * Increase size of ldt to include seli. 538*5084Sjohnlev */ 539*5084Sjohnlev ldt_grow(pp, seli); 5400Sstevel@tonic-gate } 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate ASSERT(seli <= pp->p_ldtlimit); 543*5084Sjohnlev ldp = &pp->p_ldt[seli]; 5440Sstevel@tonic-gate 5450Sstevel@tonic-gate /* 5460Sstevel@tonic-gate * On the 64-bit kernel, this is where things get more subtle. 5470Sstevel@tonic-gate * Recall that in the 64-bit kernel, when we enter the kernel we 5480Sstevel@tonic-gate * deliberately -don't- reload the segment selectors we came in on 5490Sstevel@tonic-gate * for %ds, %es, %fs or %gs. Messing with selectors is expensive, 5500Sstevel@tonic-gate * and the underlying descriptors are essentially ignored by the 5510Sstevel@tonic-gate * hardware in long mode - except for the base that we override with 5520Sstevel@tonic-gate * the gsbase MSRs. 5530Sstevel@tonic-gate * 5540Sstevel@tonic-gate * However, there's one unfortunate issue with this rosy picture -- 5550Sstevel@tonic-gate * a descriptor that's not marked as 'present' will still generate 5560Sstevel@tonic-gate * an #np when loading a segment register. 5570Sstevel@tonic-gate * 5580Sstevel@tonic-gate * Consider this case. An lwp creates a harmless LDT entry, points 5590Sstevel@tonic-gate * one of it's segment registers at it, then tells the kernel (here) 5600Sstevel@tonic-gate * to delete it. In the 32-bit kernel, the #np will happen on the 5610Sstevel@tonic-gate * way back to userland where we reload the segment registers, and be 5620Sstevel@tonic-gate * handled in kern_gpfault(). In the 64-bit kernel, the same thing 5630Sstevel@tonic-gate * will happen in the normal case too. However, if we're trying to 5640Sstevel@tonic-gate * use a debugger that wants to save and restore the segment registers, 5650Sstevel@tonic-gate * and the debugger things that we have valid segment registers, we 5660Sstevel@tonic-gate * have the problem that the debugger will try and restore the 5670Sstevel@tonic-gate * segment register that points at the now 'not present' descriptor 5680Sstevel@tonic-gate * and will take a #np right there. 5690Sstevel@tonic-gate * 5700Sstevel@tonic-gate * We should obviously fix the debugger to be paranoid about 5710Sstevel@tonic-gate * -not- restoring segment registers that point to bad descriptors; 5720Sstevel@tonic-gate * however we can prevent the problem here if we check to see if any 5730Sstevel@tonic-gate * of the segment registers are still pointing at the thing we're 5740Sstevel@tonic-gate * destroying; if they are, return an error instead. (That also seems 5750Sstevel@tonic-gate * a lot better failure mode than SIGKILL and a core file 5760Sstevel@tonic-gate * from kern_gpfault() too.) 5770Sstevel@tonic-gate */ 5782712Snn35248 if (SI86SSD_PRES(ssd) == 0) { 5790Sstevel@tonic-gate kthread_t *t; 5800Sstevel@tonic-gate int bad = 0; 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate /* 5830Sstevel@tonic-gate * Look carefully at the segment registers of every lwp 5840Sstevel@tonic-gate * in the process (they're all stopped by our caller). 5850Sstevel@tonic-gate * If we're about to invalidate a descriptor that's still 5860Sstevel@tonic-gate * being referenced by *any* of them, return an error, 5870Sstevel@tonic-gate * rather than having them #gp on their way out of the kernel. 5880Sstevel@tonic-gate */ 5890Sstevel@tonic-gate ASSERT(pp->p_lwprcnt == 1); 5900Sstevel@tonic-gate 5910Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5920Sstevel@tonic-gate t = pp->p_tlist; 5930Sstevel@tonic-gate do { 5940Sstevel@tonic-gate klwp_t *lwp = ttolwp(t); 5950Sstevel@tonic-gate struct regs *rp = lwp->lwp_regs; 5960Sstevel@tonic-gate #if defined(__amd64) 5970Sstevel@tonic-gate pcb_t *pcb = &lwp->lwp_pcb; 5980Sstevel@tonic-gate #endif 5990Sstevel@tonic-gate 6002712Snn35248 if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) { 6010Sstevel@tonic-gate bad = 1; 6020Sstevel@tonic-gate break; 6030Sstevel@tonic-gate } 6040Sstevel@tonic-gate 6050Sstevel@tonic-gate #if defined(__amd64) 6064503Ssudheer if (pcb->pcb_rupdate == 1) { 6072712Snn35248 if (ssd->sel == pcb->pcb_ds || 6082712Snn35248 ssd->sel == pcb->pcb_es || 6092712Snn35248 ssd->sel == pcb->pcb_fs || 6102712Snn35248 ssd->sel == pcb->pcb_gs) { 6110Sstevel@tonic-gate bad = 1; 6120Sstevel@tonic-gate break; 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate } else 6150Sstevel@tonic-gate #endif 6160Sstevel@tonic-gate { 6172712Snn35248 if (ssd->sel == rp->r_ds || 6182712Snn35248 ssd->sel == rp->r_es || 6192712Snn35248 ssd->sel == rp->r_fs || 6202712Snn35248 ssd->sel == rp->r_gs) { 6210Sstevel@tonic-gate bad = 1; 6220Sstevel@tonic-gate break; 6230Sstevel@tonic-gate } 6240Sstevel@tonic-gate } 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate } while ((t = t->t_forw) != pp->p_tlist); 6270Sstevel@tonic-gate mutex_exit(&pp->p_lock); 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate if (bad) { 6300Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6310Sstevel@tonic-gate return (EBUSY); 6320Sstevel@tonic-gate } 6330Sstevel@tonic-gate } 6340Sstevel@tonic-gate 6350Sstevel@tonic-gate /* 6360Sstevel@tonic-gate * If acc1 is zero, clear the descriptor (including the 'present' bit) 6370Sstevel@tonic-gate */ 6382712Snn35248 if (ssd->acc1 == 0) { 639*5084Sjohnlev rc = ldt_update_segd(ldp, &null_udesc); 6400Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 641*5084Sjohnlev return (rc); 6420Sstevel@tonic-gate } 6430Sstevel@tonic-gate 6440Sstevel@tonic-gate /* 6450Sstevel@tonic-gate * Check segment type, allow segment not present and 6460Sstevel@tonic-gate * only user DPL (3). 6470Sstevel@tonic-gate */ 6482712Snn35248 if (SI86SSD_DPL(ssd) != SEL_UPL) { 6490Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6500Sstevel@tonic-gate return (EINVAL); 6510Sstevel@tonic-gate } 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate #if defined(__amd64) 6540Sstevel@tonic-gate /* 6552712Snn35248 * Do not allow 32-bit applications to create 64-bit mode code 6562712Snn35248 * segments. 6570Sstevel@tonic-gate */ 6582712Snn35248 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 && 6592712Snn35248 SI86SSD_ISLONG(ssd)) { 6600Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6610Sstevel@tonic-gate return (EINVAL); 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate #endif /* __amd64 */ 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate /* 6660Sstevel@tonic-gate * Set up a code or data user segment descriptor. 6670Sstevel@tonic-gate */ 6682712Snn35248 if (SI86SSD_ISUSEG(ssd)) { 669*5084Sjohnlev ssd_to_usd(ssd, &ndesc); 670*5084Sjohnlev rc = ldt_update_segd(ldp, &ndesc); 6710Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 672*5084Sjohnlev return (rc); 6730Sstevel@tonic-gate } 6740Sstevel@tonic-gate 675*5084Sjohnlev #if defined(__i386) 6760Sstevel@tonic-gate /* 677*5084Sjohnlev * Allow a call gate only if the destination is in the LDT 678*5084Sjohnlev * and the system is running in 32-bit legacy mode. 679*5084Sjohnlev * 680*5084Sjohnlev * In long mode 32-bit call gates are redefined as 64-bit call 681*5084Sjohnlev * gates and the hw enforces that the target code selector 682*5084Sjohnlev * of the call gate must be 64-bit selector. A #gp fault is 683*5084Sjohnlev * generated if otherwise. Since we do not allow 32-bit processes 684*5084Sjohnlev * to switch themselves to 64-bits we never allow call gates 685*5084Sjohnlev * on 64-bit system system. 6860Sstevel@tonic-gate */ 6872712Snn35248 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) { 688*5084Sjohnlev 689*5084Sjohnlev 690*5084Sjohnlev ssd_to_sgd(ssd, (gate_desc_t *)&ndesc); 691*5084Sjohnlev rc = ldt_update_segd(ldp, &ndesc); 6920Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 693*5084Sjohnlev return (rc); 6940Sstevel@tonic-gate } 695*5084Sjohnlev #endif /* __i386 */ 6960Sstevel@tonic-gate 6970Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6980Sstevel@tonic-gate return (EINVAL); 6990Sstevel@tonic-gate } 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate /* 702*5084Sjohnlev * Allocate new LDT for process just large enough to contain seli. 703*5084Sjohnlev * Note we allocate and grow LDT in PAGESIZE chunks. We do this 704*5084Sjohnlev * to simplify the implementation and because on the hypervisor it's 705*5084Sjohnlev * required, since the LDT must live on pages that have PROT_WRITE 706*5084Sjohnlev * removed and which are given to the hypervisor. 7070Sstevel@tonic-gate */ 7082712Snn35248 static void 709*5084Sjohnlev ldt_alloc(proc_t *pp, uint_t seli) 7100Sstevel@tonic-gate { 711*5084Sjohnlev user_desc_t *ldt; 712*5084Sjohnlev size_t ldtsz; 713*5084Sjohnlev uint_t nsels; 7140Sstevel@tonic-gate 715*5084Sjohnlev ASSERT(MUTEX_HELD(&pp->p_ldtlock)); 716*5084Sjohnlev ASSERT(pp->p_ldt == NULL); 717*5084Sjohnlev ASSERT(pp->p_ldtlimit == 0); 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate /* 720*5084Sjohnlev * Allocate new LDT just large enough to contain seli. 7210Sstevel@tonic-gate */ 722*5084Sjohnlev ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); 723*5084Sjohnlev nsels = ldtsz / sizeof (user_desc_t); 724*5084Sjohnlev ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); 7251217Srab 726*5084Sjohnlev ldt = kmem_zalloc(ldtsz, KM_SLEEP); 727*5084Sjohnlev ASSERT(IS_P2ALIGNED(ldt, PAGESIZE)); 7280Sstevel@tonic-gate 729*5084Sjohnlev #if defined(__xpv) 730*5084Sjohnlev if (xen_ldt_setprot(ldt, ldtsz, PROT_READ)) 731*5084Sjohnlev panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed"); 732*5084Sjohnlev #endif 7330Sstevel@tonic-gate 734*5084Sjohnlev pp->p_ldt = ldt; 735*5084Sjohnlev pp->p_ldtlimit = nsels - 1; 736*5084Sjohnlev set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL); 7370Sstevel@tonic-gate 738*5084Sjohnlev if (pp == curproc) { 739*5084Sjohnlev kpreempt_disable(); 740*5084Sjohnlev ldt_load(); 741*5084Sjohnlev kpreempt_enable(); 742*5084Sjohnlev } 7430Sstevel@tonic-gate } 7440Sstevel@tonic-gate 7451217Srab static void 7460Sstevel@tonic-gate ldt_free(proc_t *pp) 7470Sstevel@tonic-gate { 748*5084Sjohnlev user_desc_t *ldt; 749*5084Sjohnlev size_t ldtsz; 7500Sstevel@tonic-gate 7510Sstevel@tonic-gate ASSERT(pp->p_ldt != NULL); 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 754*5084Sjohnlev ldt = pp->p_ldt; 755*5084Sjohnlev ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 756*5084Sjohnlev 757*5084Sjohnlev ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE)); 7580Sstevel@tonic-gate 759*5084Sjohnlev pp->p_ldt = NULL; 760*5084Sjohnlev pp->p_ldtlimit = 0; 761*5084Sjohnlev pp->p_ldt_desc = null_sdesc; 762*5084Sjohnlev mutex_exit(&pp->p_ldtlock); 7630Sstevel@tonic-gate 764*5084Sjohnlev if (pp == curproc) { 765*5084Sjohnlev kpreempt_disable(); 766*5084Sjohnlev ldt_unload(); 767*5084Sjohnlev kpreempt_enable(); 768*5084Sjohnlev } 7691217Srab 770*5084Sjohnlev #if defined(__xpv) 771*5084Sjohnlev /* 772*5084Sjohnlev * We are not allowed to make the ldt writable until after 773*5084Sjohnlev * we tell the hypervisor to unload it. 774*5084Sjohnlev */ 775*5084Sjohnlev if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE)) 776*5084Sjohnlev panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 777*5084Sjohnlev #endif 778*5084Sjohnlev 779*5084Sjohnlev kmem_free(ldt, ldtsz); 7800Sstevel@tonic-gate } 7810Sstevel@tonic-gate 7820Sstevel@tonic-gate /* 7830Sstevel@tonic-gate * On fork copy new ldt for child. 7840Sstevel@tonic-gate */ 785*5084Sjohnlev static void 7860Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp) 7870Sstevel@tonic-gate { 788*5084Sjohnlev size_t ldtsz; 789*5084Sjohnlev 790*5084Sjohnlev ASSERT(pp->p_ldt != NULL); 791*5084Sjohnlev ASSERT(cp != curproc); 7920Sstevel@tonic-gate 793*5084Sjohnlev /* 794*5084Sjohnlev * I assume the parent's ldt can't increase since we're in a fork. 795*5084Sjohnlev */ 796*5084Sjohnlev mutex_enter(&pp->p_ldtlock); 797*5084Sjohnlev mutex_enter(&cp->p_ldtlock); 798*5084Sjohnlev 799*5084Sjohnlev ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 800*5084Sjohnlev 801*5084Sjohnlev ldt_alloc(cp, pp->p_ldtlimit); 8020Sstevel@tonic-gate 803*5084Sjohnlev #if defined(__xpv) 804*5084Sjohnlev /* 805*5084Sjohnlev * Make child's ldt writable so it can be copied into from 806*5084Sjohnlev * parent's ldt. This works since ldt_alloc above did not load 807*5084Sjohnlev * the ldt since its for the child process. If we tried to make 808*5084Sjohnlev * an LDT writable that is loaded in hw the setprot operation 809*5084Sjohnlev * would fail. 810*5084Sjohnlev */ 811*5084Sjohnlev if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE)) 812*5084Sjohnlev panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 813*5084Sjohnlev #endif 814*5084Sjohnlev 815*5084Sjohnlev bcopy(pp->p_ldt, cp->p_ldt, ldtsz); 8160Sstevel@tonic-gate 817*5084Sjohnlev #if defined(__xpv) 818*5084Sjohnlev if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ)) 819*5084Sjohnlev panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed"); 820*5084Sjohnlev #endif 821*5084Sjohnlev mutex_exit(&cp->p_ldtlock); 822*5084Sjohnlev mutex_exit(&pp->p_ldtlock); 823*5084Sjohnlev 824*5084Sjohnlev } 825*5084Sjohnlev 826*5084Sjohnlev static void 827*5084Sjohnlev ldt_grow(proc_t *pp, uint_t seli) 828*5084Sjohnlev { 829*5084Sjohnlev user_desc_t *oldt, *nldt; 830*5084Sjohnlev uint_t nsels; 831*5084Sjohnlev size_t oldtsz, nldtsz; 832*5084Sjohnlev 833*5084Sjohnlev ASSERT(MUTEX_HELD(&pp->p_ldtlock)); 834*5084Sjohnlev ASSERT(pp->p_ldt != NULL); 835*5084Sjohnlev ASSERT(pp->p_ldtlimit != 0); 8360Sstevel@tonic-gate 837*5084Sjohnlev /* 838*5084Sjohnlev * Allocate larger LDT just large enough to contain seli. 839*5084Sjohnlev */ 840*5084Sjohnlev nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); 841*5084Sjohnlev nsels = nldtsz / sizeof (user_desc_t); 842*5084Sjohnlev ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); 843*5084Sjohnlev ASSERT(nsels > pp->p_ldtlimit); 844*5084Sjohnlev 845*5084Sjohnlev oldt = pp->p_ldt; 846*5084Sjohnlev oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); 847*5084Sjohnlev 848*5084Sjohnlev nldt = kmem_zalloc(nldtsz, KM_SLEEP); 849*5084Sjohnlev ASSERT(IS_P2ALIGNED(nldt, PAGESIZE)); 850*5084Sjohnlev 851*5084Sjohnlev bcopy(oldt, nldt, oldtsz); 852*5084Sjohnlev 853*5084Sjohnlev /* 854*5084Sjohnlev * unload old ldt. 855*5084Sjohnlev */ 856*5084Sjohnlev kpreempt_disable(); 857*5084Sjohnlev ldt_unload(); 858*5084Sjohnlev kpreempt_enable(); 859*5084Sjohnlev 860*5084Sjohnlev #if defined(__xpv) 861*5084Sjohnlev 862*5084Sjohnlev /* 863*5084Sjohnlev * Make old ldt writable and new ldt read only. 864*5084Sjohnlev */ 865*5084Sjohnlev if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE)) 866*5084Sjohnlev panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); 867*5084Sjohnlev 868*5084Sjohnlev if (xen_ldt_setprot(nldt, nldtsz, PROT_READ)) 869*5084Sjohnlev panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed"); 870*5084Sjohnlev #endif 871*5084Sjohnlev 872*5084Sjohnlev pp->p_ldt = nldt; 873*5084Sjohnlev pp->p_ldtlimit = nsels - 1; 874*5084Sjohnlev 875*5084Sjohnlev /* 876*5084Sjohnlev * write new ldt segment descriptor. 877*5084Sjohnlev */ 878*5084Sjohnlev set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL); 879*5084Sjohnlev 880*5084Sjohnlev /* 881*5084Sjohnlev * load the new ldt. 882*5084Sjohnlev */ 883*5084Sjohnlev kpreempt_disable(); 884*5084Sjohnlev ldt_load(); 885*5084Sjohnlev kpreempt_enable(); 886*5084Sjohnlev 887*5084Sjohnlev kmem_free(oldt, oldtsz); 8880Sstevel@tonic-gate } 889