10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52712Snn35248 * Common Development and Distribution License (the "License"). 62712Snn35248 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 223446Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 270Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 280Sstevel@tonic-gate /* All Rights Reserved */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* Copyright (c) 1987, 1988 Microsoft Corporation */ 310Sstevel@tonic-gate /* All Rights Reserved */ 320Sstevel@tonic-gate 330Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 340Sstevel@tonic-gate 350Sstevel@tonic-gate #include <sys/param.h> 360Sstevel@tonic-gate #include <sys/types.h> 370Sstevel@tonic-gate #include <sys/sysmacros.h> 380Sstevel@tonic-gate #include <sys/systm.h> 390Sstevel@tonic-gate #include <sys/signal.h> 400Sstevel@tonic-gate #include <sys/errno.h> 410Sstevel@tonic-gate #include <sys/fault.h> 420Sstevel@tonic-gate #include <sys/syscall.h> 430Sstevel@tonic-gate #include <sys/cpuvar.h> 440Sstevel@tonic-gate #include <sys/sysi86.h> 450Sstevel@tonic-gate #include <sys/psw.h> 460Sstevel@tonic-gate #include <sys/cred.h> 470Sstevel@tonic-gate #include <sys/policy.h> 480Sstevel@tonic-gate #include <sys/thread.h> 490Sstevel@tonic-gate #include <sys/debug.h> 500Sstevel@tonic-gate #include <sys/ontrap.h> 510Sstevel@tonic-gate #include <sys/privregs.h> 520Sstevel@tonic-gate #include <sys/x86_archext.h> 530Sstevel@tonic-gate #include <sys/vmem.h> 540Sstevel@tonic-gate #include <sys/kmem.h> 550Sstevel@tonic-gate #include <sys/mman.h> 560Sstevel@tonic-gate #include <sys/archsystm.h> 570Sstevel@tonic-gate #include <vm/hat.h> 580Sstevel@tonic-gate #include <vm/as.h> 590Sstevel@tonic-gate #include <vm/seg.h> 600Sstevel@tonic-gate #include <vm/seg_kmem.h> 610Sstevel@tonic-gate #include <vm/faultcode.h> 620Sstevel@tonic-gate #include <sys/fp.h> 630Sstevel@tonic-gate #include <sys/cmn_err.h> 643446Smrj #include <sys/segments.h> 653446Smrj #include <sys/clock.h> 660Sstevel@tonic-gate 671217Srab static void setup_ldt(proc_t *pp); 680Sstevel@tonic-gate static void *ldt_map(proc_t *pp, uint_t seli); 691217Srab static void ldt_free(proc_t *pp); 700Sstevel@tonic-gate 710Sstevel@tonic-gate /* 720Sstevel@tonic-gate * sysi86 System Call 730Sstevel@tonic-gate */ 740Sstevel@tonic-gate 750Sstevel@tonic-gate /* ARGSUSED */ 760Sstevel@tonic-gate int 770Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) 780Sstevel@tonic-gate { 792712Snn35248 struct ssd ssd; 800Sstevel@tonic-gate int error = 0; 810Sstevel@tonic-gate int c; 820Sstevel@tonic-gate proc_t *pp = curproc; 830Sstevel@tonic-gate 840Sstevel@tonic-gate switch (cmd) { 850Sstevel@tonic-gate 860Sstevel@tonic-gate /* 870Sstevel@tonic-gate * The SI86V86 subsystem call of the SYSI86 system call 880Sstevel@tonic-gate * supports only one subcode -- V86SC_IOPL. 890Sstevel@tonic-gate */ 900Sstevel@tonic-gate case SI86V86: 910Sstevel@tonic-gate if (arg1 == V86SC_IOPL) { 920Sstevel@tonic-gate struct regs *rp = lwptoregs(ttolwp(curthread)); 930Sstevel@tonic-gate greg_t oldpl = rp->r_ps & PS_IOPL; 940Sstevel@tonic-gate greg_t newpl = arg2 & PS_IOPL; 950Sstevel@tonic-gate 960Sstevel@tonic-gate /* 970Sstevel@tonic-gate * Must be privileged to run this system call 980Sstevel@tonic-gate * if giving more io privilege. 990Sstevel@tonic-gate */ 1000Sstevel@tonic-gate if (newpl > oldpl && (error = 1010Sstevel@tonic-gate secpolicy_sys_config(CRED(), B_FALSE)) != 0) 1020Sstevel@tonic-gate return (set_errno(error)); 1030Sstevel@tonic-gate rp->r_ps ^= oldpl ^ newpl; 1040Sstevel@tonic-gate } else 1050Sstevel@tonic-gate error = EINVAL; 1060Sstevel@tonic-gate break; 1070Sstevel@tonic-gate 1080Sstevel@tonic-gate /* 1090Sstevel@tonic-gate * Set a segment descriptor 1100Sstevel@tonic-gate */ 1110Sstevel@tonic-gate case SI86DSCR: 1120Sstevel@tonic-gate /* 1130Sstevel@tonic-gate * There are considerable problems here manipulating 1140Sstevel@tonic-gate * resources shared by many running lwps. Get everyone 1150Sstevel@tonic-gate * into a safe state before changing the LDT. 1160Sstevel@tonic-gate */ 1170Sstevel@tonic-gate if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) { 1180Sstevel@tonic-gate error = EINTR; 1190Sstevel@tonic-gate break; 1200Sstevel@tonic-gate } 1212712Snn35248 1222712Snn35248 if (get_udatamodel() == DATAMODEL_LP64) { 1232712Snn35248 error = EINVAL; 1242712Snn35248 break; 1252712Snn35248 } 1262712Snn35248 1272712Snn35248 if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) { 1282712Snn35248 error = EFAULT; 1292712Snn35248 break; 1302712Snn35248 } 1312712Snn35248 1322712Snn35248 error = setdscr(&ssd); 1332712Snn35248 1340Sstevel@tonic-gate mutex_enter(&pp->p_lock); 1350Sstevel@tonic-gate if (curthread != pp->p_agenttp) 1360Sstevel@tonic-gate continuelwps(pp); 1370Sstevel@tonic-gate mutex_exit(&pp->p_lock); 1380Sstevel@tonic-gate break; 1390Sstevel@tonic-gate 1400Sstevel@tonic-gate case SI86FPHW: 1410Sstevel@tonic-gate c = fp_kind & 0xff; 1420Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) 1430Sstevel@tonic-gate error = EFAULT; 1440Sstevel@tonic-gate break; 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate case SI86FPSTART: 1470Sstevel@tonic-gate /* 1480Sstevel@tonic-gate * arg1 is the address of _fp_hw 1490Sstevel@tonic-gate * arg2 is the desired x87 FCW value 1500Sstevel@tonic-gate * arg3 is the desired SSE MXCSR value 1510Sstevel@tonic-gate * a return value of one means SSE hardware, else none. 1520Sstevel@tonic-gate */ 1530Sstevel@tonic-gate c = fp_kind & 0xff; 1540Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) { 1550Sstevel@tonic-gate error = EFAULT; 1560Sstevel@tonic-gate break; 1570Sstevel@tonic-gate } 1580Sstevel@tonic-gate fpsetcw((uint16_t)arg2, (uint32_t)arg3); 1590Sstevel@tonic-gate return (fp_kind == __FP_SSE ? 1 : 0); 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate /* real time clock management commands */ 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate case WTODC: 1640Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1650Sstevel@tonic-gate timestruc_t ts; 1660Sstevel@tonic-gate mutex_enter(&tod_lock); 1670Sstevel@tonic-gate gethrestime(&ts); 1680Sstevel@tonic-gate tod_set(ts); 1690Sstevel@tonic-gate mutex_exit(&tod_lock); 1700Sstevel@tonic-gate } 1710Sstevel@tonic-gate break; 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate /* Give some timezone playing room */ 1740Sstevel@tonic-gate #define ONEWEEK (7 * 24 * 60 * 60) 1750Sstevel@tonic-gate 1760Sstevel@tonic-gate case SGMTL: 1770Sstevel@tonic-gate /* 1780Sstevel@tonic-gate * Called from 32 bit land, negative values 1790Sstevel@tonic-gate * are not sign extended, so we do that here 1800Sstevel@tonic-gate * by casting it to an int and back. We also 1810Sstevel@tonic-gate * clamp the value to within reason and detect 1820Sstevel@tonic-gate * when a 64 bit call overflows an int. 1830Sstevel@tonic-gate */ 1840Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1850Sstevel@tonic-gate int newlag = (int)arg1; 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1880Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE && 1890Sstevel@tonic-gate (long)newlag != (long)arg1) { 1900Sstevel@tonic-gate error = EOVERFLOW; 1910Sstevel@tonic-gate } else 1920Sstevel@tonic-gate #endif 1930Sstevel@tonic-gate if (newlag >= -ONEWEEK && newlag <= ONEWEEK) 1940Sstevel@tonic-gate sgmtl(newlag); 1950Sstevel@tonic-gate else 1960Sstevel@tonic-gate error = EOVERFLOW; 1970Sstevel@tonic-gate } 1980Sstevel@tonic-gate break; 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate case GGMTL: 2010Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 2020Sstevel@tonic-gate if (sulword((void *)arg1, ggmtl()) == -1) 2030Sstevel@tonic-gate error = EFAULT; 2040Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 2050Sstevel@tonic-gate } else { 2060Sstevel@tonic-gate time_t gmtl; 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate if ((gmtl = ggmtl()) > INT32_MAX) { 2090Sstevel@tonic-gate /* 2100Sstevel@tonic-gate * Since gmt_lag can at most be 2110Sstevel@tonic-gate * +/- 12 hours, something is 2120Sstevel@tonic-gate * *seriously* messed up here. 2130Sstevel@tonic-gate */ 2140Sstevel@tonic-gate error = EOVERFLOW; 2150Sstevel@tonic-gate } else if (suword32((void *)arg1, (int32_t)gmtl) == -1) 2160Sstevel@tonic-gate error = EFAULT; 2170Sstevel@tonic-gate #endif 2180Sstevel@tonic-gate } 2190Sstevel@tonic-gate break; 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate case RTCSYNC: 2220Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) 2230Sstevel@tonic-gate rtcsync(); 2240Sstevel@tonic-gate break; 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate /* END OF real time clock management commands */ 2270Sstevel@tonic-gate 2280Sstevel@tonic-gate default: 2290Sstevel@tonic-gate error = EINVAL; 2300Sstevel@tonic-gate break; 2310Sstevel@tonic-gate } 2320Sstevel@tonic-gate return (error == 0 ? 0 : set_errno(error)); 2330Sstevel@tonic-gate } 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate void 2360Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel) 2370Sstevel@tonic-gate { 2380Sstevel@tonic-gate ssd->bo = USEGD_GETBASE(usd); 2390Sstevel@tonic-gate ssd->ls = USEGD_GETLIMIT(usd); 2400Sstevel@tonic-gate ssd->sel = sel; 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate /* 2430Sstevel@tonic-gate * set type, dpl and present bits. 2440Sstevel@tonic-gate */ 2450Sstevel@tonic-gate ssd->acc1 = usd->usd_type; 2460Sstevel@tonic-gate ssd->acc1 |= usd->usd_dpl << 5; 2470Sstevel@tonic-gate ssd->acc1 |= usd->usd_p << (5 + 2); 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate /* 2500Sstevel@tonic-gate * set avl, DB and granularity bits. 2510Sstevel@tonic-gate */ 2520Sstevel@tonic-gate ssd->acc2 = usd->usd_avl; 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate #if defined(__amd64) 2550Sstevel@tonic-gate ssd->acc2 |= usd->usd_long << 1; 2560Sstevel@tonic-gate #else 2570Sstevel@tonic-gate ssd->acc2 |= usd->usd_reserved << 1; 2580Sstevel@tonic-gate #endif 2590Sstevel@tonic-gate 2600Sstevel@tonic-gate ssd->acc2 |= usd->usd_def32 << (1 + 1); 2610Sstevel@tonic-gate ssd->acc2 |= usd->usd_gran << (1 + 1 + 1); 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate static void 2650Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd) 2660Sstevel@tonic-gate { 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate USEGD_SETBASE(usd, ssd->bo); 2690Sstevel@tonic-gate USEGD_SETLIMIT(usd, ssd->ls); 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate /* 2720Sstevel@tonic-gate * set type, dpl and present bits. 2730Sstevel@tonic-gate */ 2740Sstevel@tonic-gate usd->usd_type = ssd->acc1; 2750Sstevel@tonic-gate usd->usd_dpl = ssd->acc1 >> 5; 2760Sstevel@tonic-gate usd->usd_p = ssd->acc1 >> (5 + 2); 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate ASSERT(usd->usd_type >= SDT_MEMRO); 2790Sstevel@tonic-gate ASSERT(usd->usd_dpl == SEL_UPL); 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate /* 2820Sstevel@tonic-gate * set avl, DB and granularity bits. 2830Sstevel@tonic-gate */ 2840Sstevel@tonic-gate usd->usd_avl = ssd->acc2; 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate #if defined(__amd64) 2870Sstevel@tonic-gate usd->usd_long = ssd->acc2 >> 1; 2880Sstevel@tonic-gate #else 2890Sstevel@tonic-gate usd->usd_reserved = ssd->acc2 >> 1; 2900Sstevel@tonic-gate #endif 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate usd->usd_def32 = ssd->acc2 >> (1 + 1); 2930Sstevel@tonic-gate usd->usd_gran = ssd->acc2 >> (1 + 1 + 1); 2940Sstevel@tonic-gate } 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate static void 2970Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd) 2980Sstevel@tonic-gate { 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate sgd->sgd_looffset = ssd->bo; 3010Sstevel@tonic-gate sgd->sgd_hioffset = ssd->bo >> 16; 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate sgd->sgd_selector = ssd->ls; 3040Sstevel@tonic-gate /* 3050Sstevel@tonic-gate * set type, dpl and present bits. 3060Sstevel@tonic-gate */ 3070Sstevel@tonic-gate sgd->sgd_type = ssd->acc1; 3080Sstevel@tonic-gate sgd->sgd_dpl = ssd->acc1 >> 5; 3090Sstevel@tonic-gate sgd->sgd_p = ssd->acc1 >> 7; 3100Sstevel@tonic-gate ASSERT(sgd->sgd_type == SDT_SYSCGT); 3110Sstevel@tonic-gate ASSERT(sgd->sgd_dpl == SEL_UPL); 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate #if defined(__i386) /* reserved, ignored in amd64 */ 3140Sstevel@tonic-gate sgd->sgd_stkcpy = 0; 3150Sstevel@tonic-gate #endif 3160Sstevel@tonic-gate } 3170Sstevel@tonic-gate 3181217Srab /* 3191217Srab * Load LDT register with the current process's LDT. 3201217Srab */ 3211217Srab void 3221217Srab ldt_load(void) 3231217Srab { 3241217Srab /* 3251217Srab */ 3261217Srab *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc; 3271217Srab wr_ldtr(ULDT_SEL); 3281217Srab } 3291217Srab 3301217Srab /* 3311217Srab * Store a NULL selector in the LDTR. All subsequent illegal references to 3321217Srab * the LDT will result in a #gp. 3331217Srab */ 3341217Srab void 3351217Srab ldt_unload(void) 3361217Srab { 3371217Srab CPU->cpu_gdt[GDT_LDT] = zero_udesc; 3381217Srab wr_ldtr(0); 3391217Srab } 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate /*ARGSUSED*/ 3420Sstevel@tonic-gate static void 3431217Srab ldt_savectx(proc_t *p) 3440Sstevel@tonic-gate { 3451217Srab ASSERT(p->p_ldt != NULL); 3461217Srab ASSERT(p == curproc); 3471217Srab 3480Sstevel@tonic-gate #if defined(__amd64) 3490Sstevel@tonic-gate /* 3500Sstevel@tonic-gate * The 64-bit kernel must be sure to clear any stale ldt 3510Sstevel@tonic-gate * selectors when context switching away from a process that 3520Sstevel@tonic-gate * has a private ldt. Consider the following example: 3530Sstevel@tonic-gate * 3540Sstevel@tonic-gate * Wine creats a ldt descriptor and points a segment register 3550Sstevel@tonic-gate * to it. 3560Sstevel@tonic-gate * 3570Sstevel@tonic-gate * We then context switch away from wine lwp to kernel 3580Sstevel@tonic-gate * thread and hit breakpoint in kernel with kmdb 3590Sstevel@tonic-gate * 3600Sstevel@tonic-gate * When we continue and resume from kmdb we will #gp 3610Sstevel@tonic-gate * fault since kmdb will have saved the stale ldt selector 3620Sstevel@tonic-gate * from wine and will try to restore it but we are no longer in 3630Sstevel@tonic-gate * the context of the wine process and do not have our 3640Sstevel@tonic-gate * ldtr register pointing to the private ldt. 3650Sstevel@tonic-gate */ 3660Sstevel@tonic-gate clr_ldt_sregs(); 3670Sstevel@tonic-gate #endif 3680Sstevel@tonic-gate 3691217Srab ldt_unload(); 3700Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 3710Sstevel@tonic-gate } 3720Sstevel@tonic-gate 3731217Srab static void 3741217Srab ldt_restorectx(proc_t *p) 3751217Srab { 3761217Srab ASSERT(p->p_ldt != NULL); 3771217Srab ASSERT(p == curproc); 3781217Srab 3791217Srab ldt_load(); 3801217Srab cpu_fast_syscall_disable(NULL); 3811217Srab } 3821217Srab 3830Sstevel@tonic-gate /* 3841217Srab * When a process with a private LDT execs, fast syscalls must be enabled for 3851217Srab * the new process image. 3860Sstevel@tonic-gate */ 3870Sstevel@tonic-gate /* ARGSUSED */ 3880Sstevel@tonic-gate static void 3891217Srab ldt_freectx(proc_t *p, int isexec) 3900Sstevel@tonic-gate { 3911217Srab ASSERT(p->p_ldt); 3921217Srab 3930Sstevel@tonic-gate if (isexec) { 3940Sstevel@tonic-gate kpreempt_disable(); 3950Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 3960Sstevel@tonic-gate kpreempt_enable(); 3970Sstevel@tonic-gate } 3981217Srab 3991217Srab /* 4001217Srab * ldt_free() will free the memory used by the private LDT, reset the 4011217Srab * process's descriptor, and re-program the LDTR. 4021217Srab */ 4031217Srab ldt_free(p); 4040Sstevel@tonic-gate } 4050Sstevel@tonic-gate 4060Sstevel@tonic-gate /* 4070Sstevel@tonic-gate * Install ctx op that ensures syscall/sysenter are disabled. 4080Sstevel@tonic-gate * See comments below. 4090Sstevel@tonic-gate * 4101217Srab * When a thread with a private LDT forks, the new process 4110Sstevel@tonic-gate * must have the LDT context ops installed. 4120Sstevel@tonic-gate */ 4130Sstevel@tonic-gate /* ARGSUSED */ 4140Sstevel@tonic-gate static void 4151217Srab ldt_installctx(proc_t *p, proc_t *cp) 4160Sstevel@tonic-gate { 4171217Srab proc_t *targ = p; 4181217Srab kthread_t *t; 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate /* 4211217Srab * If this is a fork, operate on the child process. 4220Sstevel@tonic-gate */ 4231217Srab if (cp != NULL) { 4241217Srab targ = cp; 4251217Srab ldt_dup(p, cp); 4261217Srab } 4270Sstevel@tonic-gate 4281217Srab /* 4291217Srab * The process context ops expect the target process as their argument. 4301217Srab */ 4311217Srab ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx, 4321217Srab ldt_installctx, ldt_savectx, ldt_freectx) == 0); 4330Sstevel@tonic-gate 4341217Srab installpctx(targ, targ, ldt_savectx, ldt_restorectx, 4351217Srab ldt_installctx, ldt_savectx, ldt_freectx); 4360Sstevel@tonic-gate 4370Sstevel@tonic-gate /* 4380Sstevel@tonic-gate * We've just disabled fast system call and return instructions; take 4390Sstevel@tonic-gate * the slow path out to make sure we don't try to use one to return 4401217Srab * back to user. We must set t_post_sys for every thread in the 4411217Srab * process to make sure none of them escape out via fast return. 4420Sstevel@tonic-gate */ 4431217Srab 4441217Srab mutex_enter(&targ->p_lock); 4451217Srab t = targ->p_tlist; 4461217Srab do { 4471217Srab t->t_post_sys = 1; 4481217Srab } while ((t = t->t_forw) != targ->p_tlist); 4491217Srab mutex_exit(&targ->p_lock); 4500Sstevel@tonic-gate } 4510Sstevel@tonic-gate 4522712Snn35248 int 4532712Snn35248 setdscr(struct ssd *ssd) 4540Sstevel@tonic-gate { 4550Sstevel@tonic-gate ushort_t seli; /* selector index */ 4560Sstevel@tonic-gate user_desc_t *dscrp; /* descriptor pointer */ 4570Sstevel@tonic-gate proc_t *pp = ttoproc(curthread); 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate /* 4600Sstevel@tonic-gate * LDT segments: executable and data at DPL 3 only. 4610Sstevel@tonic-gate */ 4622712Snn35248 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel)) 4630Sstevel@tonic-gate return (EINVAL); 4640Sstevel@tonic-gate 4650Sstevel@tonic-gate /* 4660Sstevel@tonic-gate * check the selector index. 4670Sstevel@tonic-gate */ 4682712Snn35248 seli = SELTOIDX(ssd->sel); 4691217Srab if (seli >= MAXNLDT || seli < LDT_UDBASE) 4700Sstevel@tonic-gate return (EINVAL); 4710Sstevel@tonic-gate 4720Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 4730Sstevel@tonic-gate 4740Sstevel@tonic-gate /* 4750Sstevel@tonic-gate * If this is the first time for this process then setup a 4760Sstevel@tonic-gate * private LDT for it. 4770Sstevel@tonic-gate */ 4780Sstevel@tonic-gate if (pp->p_ldt == NULL) { 4791217Srab kpreempt_disable(); 4801217Srab setup_ldt(pp); 4810Sstevel@tonic-gate 4820Sstevel@tonic-gate /* 4830Sstevel@tonic-gate * Now that this process has a private LDT, the use of 4840Sstevel@tonic-gate * the syscall/sysret and sysenter/sysexit instructions 4850Sstevel@tonic-gate * is forbidden for this processes because they destroy 4860Sstevel@tonic-gate * the contents of %cs and %ss segment registers. 4870Sstevel@tonic-gate * 4881217Srab * Explicity disable them here and add a context handler 4891217Srab * to the process. Note that disabling 4900Sstevel@tonic-gate * them here means we can't use sysret or sysexit on 4910Sstevel@tonic-gate * the way out of this system call - so we force this 4920Sstevel@tonic-gate * thread to take the slow path (which doesn't make use 4930Sstevel@tonic-gate * of sysenter or sysexit) back out. 4940Sstevel@tonic-gate */ 4950Sstevel@tonic-gate 4961217Srab ldt_installctx(pp, NULL); 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate cpu_fast_syscall_disable(NULL); 4991217Srab 5000Sstevel@tonic-gate ASSERT(curthread->t_post_sys != 0); 5010Sstevel@tonic-gate wr_ldtr(ULDT_SEL); 5021217Srab kpreempt_enable(); 5030Sstevel@tonic-gate } 5040Sstevel@tonic-gate 5050Sstevel@tonic-gate if (ldt_map(pp, seli) == NULL) { 5060Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 5070Sstevel@tonic-gate return (ENOMEM); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate ASSERT(seli <= pp->p_ldtlimit); 5110Sstevel@tonic-gate dscrp = &pp->p_ldt[seli]; 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate /* 5140Sstevel@tonic-gate * On the 64-bit kernel, this is where things get more subtle. 5150Sstevel@tonic-gate * Recall that in the 64-bit kernel, when we enter the kernel we 5160Sstevel@tonic-gate * deliberately -don't- reload the segment selectors we came in on 5170Sstevel@tonic-gate * for %ds, %es, %fs or %gs. Messing with selectors is expensive, 5180Sstevel@tonic-gate * and the underlying descriptors are essentially ignored by the 5190Sstevel@tonic-gate * hardware in long mode - except for the base that we override with 5200Sstevel@tonic-gate * the gsbase MSRs. 5210Sstevel@tonic-gate * 5220Sstevel@tonic-gate * However, there's one unfortunate issue with this rosy picture -- 5230Sstevel@tonic-gate * a descriptor that's not marked as 'present' will still generate 5240Sstevel@tonic-gate * an #np when loading a segment register. 5250Sstevel@tonic-gate * 5260Sstevel@tonic-gate * Consider this case. An lwp creates a harmless LDT entry, points 5270Sstevel@tonic-gate * one of it's segment registers at it, then tells the kernel (here) 5280Sstevel@tonic-gate * to delete it. In the 32-bit kernel, the #np will happen on the 5290Sstevel@tonic-gate * way back to userland where we reload the segment registers, and be 5300Sstevel@tonic-gate * handled in kern_gpfault(). In the 64-bit kernel, the same thing 5310Sstevel@tonic-gate * will happen in the normal case too. However, if we're trying to 5320Sstevel@tonic-gate * use a debugger that wants to save and restore the segment registers, 5330Sstevel@tonic-gate * and the debugger things that we have valid segment registers, we 5340Sstevel@tonic-gate * have the problem that the debugger will try and restore the 5350Sstevel@tonic-gate * segment register that points at the now 'not present' descriptor 5360Sstevel@tonic-gate * and will take a #np right there. 5370Sstevel@tonic-gate * 5380Sstevel@tonic-gate * We should obviously fix the debugger to be paranoid about 5390Sstevel@tonic-gate * -not- restoring segment registers that point to bad descriptors; 5400Sstevel@tonic-gate * however we can prevent the problem here if we check to see if any 5410Sstevel@tonic-gate * of the segment registers are still pointing at the thing we're 5420Sstevel@tonic-gate * destroying; if they are, return an error instead. (That also seems 5430Sstevel@tonic-gate * a lot better failure mode than SIGKILL and a core file 5440Sstevel@tonic-gate * from kern_gpfault() too.) 5450Sstevel@tonic-gate */ 5462712Snn35248 if (SI86SSD_PRES(ssd) == 0) { 5470Sstevel@tonic-gate kthread_t *t; 5480Sstevel@tonic-gate int bad = 0; 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate /* 5510Sstevel@tonic-gate * Look carefully at the segment registers of every lwp 5520Sstevel@tonic-gate * in the process (they're all stopped by our caller). 5530Sstevel@tonic-gate * If we're about to invalidate a descriptor that's still 5540Sstevel@tonic-gate * being referenced by *any* of them, return an error, 5550Sstevel@tonic-gate * rather than having them #gp on their way out of the kernel. 5560Sstevel@tonic-gate */ 5570Sstevel@tonic-gate ASSERT(pp->p_lwprcnt == 1); 5580Sstevel@tonic-gate 5590Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5600Sstevel@tonic-gate t = pp->p_tlist; 5610Sstevel@tonic-gate do { 5620Sstevel@tonic-gate klwp_t *lwp = ttolwp(t); 5630Sstevel@tonic-gate struct regs *rp = lwp->lwp_regs; 5640Sstevel@tonic-gate #if defined(__amd64) 5650Sstevel@tonic-gate pcb_t *pcb = &lwp->lwp_pcb; 5660Sstevel@tonic-gate #endif 5670Sstevel@tonic-gate 5682712Snn35248 if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) { 5690Sstevel@tonic-gate bad = 1; 5700Sstevel@tonic-gate break; 5710Sstevel@tonic-gate } 5720Sstevel@tonic-gate 5730Sstevel@tonic-gate #if defined(__amd64) 574*4503Ssudheer if (pcb->pcb_rupdate == 1) { 5752712Snn35248 if (ssd->sel == pcb->pcb_ds || 5762712Snn35248 ssd->sel == pcb->pcb_es || 5772712Snn35248 ssd->sel == pcb->pcb_fs || 5782712Snn35248 ssd->sel == pcb->pcb_gs) { 5790Sstevel@tonic-gate bad = 1; 5800Sstevel@tonic-gate break; 5810Sstevel@tonic-gate } 5820Sstevel@tonic-gate } else 5830Sstevel@tonic-gate #endif 5840Sstevel@tonic-gate { 5852712Snn35248 if (ssd->sel == rp->r_ds || 5862712Snn35248 ssd->sel == rp->r_es || 5872712Snn35248 ssd->sel == rp->r_fs || 5882712Snn35248 ssd->sel == rp->r_gs) { 5890Sstevel@tonic-gate bad = 1; 5900Sstevel@tonic-gate break; 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate } 5930Sstevel@tonic-gate 5940Sstevel@tonic-gate } while ((t = t->t_forw) != pp->p_tlist); 5950Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5960Sstevel@tonic-gate 5970Sstevel@tonic-gate if (bad) { 5980Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 5990Sstevel@tonic-gate return (EBUSY); 6000Sstevel@tonic-gate } 6010Sstevel@tonic-gate } 6020Sstevel@tonic-gate 6030Sstevel@tonic-gate /* 6040Sstevel@tonic-gate * If acc1 is zero, clear the descriptor (including the 'present' bit) 6050Sstevel@tonic-gate */ 6062712Snn35248 if (ssd->acc1 == 0) { 6070Sstevel@tonic-gate bzero(dscrp, sizeof (*dscrp)); 6080Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6090Sstevel@tonic-gate return (0); 6100Sstevel@tonic-gate } 6110Sstevel@tonic-gate 6120Sstevel@tonic-gate /* 6130Sstevel@tonic-gate * Check segment type, allow segment not present and 6140Sstevel@tonic-gate * only user DPL (3). 6150Sstevel@tonic-gate */ 6162712Snn35248 if (SI86SSD_DPL(ssd) != SEL_UPL) { 6170Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6180Sstevel@tonic-gate return (EINVAL); 6190Sstevel@tonic-gate } 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate #if defined(__amd64) 6220Sstevel@tonic-gate /* 6232712Snn35248 * Do not allow 32-bit applications to create 64-bit mode code 6242712Snn35248 * segments. 6250Sstevel@tonic-gate */ 6262712Snn35248 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 && 6272712Snn35248 SI86SSD_ISLONG(ssd)) { 6280Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6290Sstevel@tonic-gate return (EINVAL); 6300Sstevel@tonic-gate } 6310Sstevel@tonic-gate #endif /* __amd64 */ 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate /* 6340Sstevel@tonic-gate * Set up a code or data user segment descriptor. 6350Sstevel@tonic-gate */ 6362712Snn35248 if (SI86SSD_ISUSEG(ssd)) { 6372712Snn35248 ssd_to_usd(ssd, dscrp); 6380Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6390Sstevel@tonic-gate return (0); 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate /* 6430Sstevel@tonic-gate * Allow a call gate only if the destination is in the LDT. 6440Sstevel@tonic-gate */ 6452712Snn35248 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) { 6462712Snn35248 ssd_to_sgd(ssd, (gate_desc_t *)dscrp); 6470Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6480Sstevel@tonic-gate return (0); 6490Sstevel@tonic-gate } 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6520Sstevel@tonic-gate return (EINVAL); 6530Sstevel@tonic-gate } 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate /* 6560Sstevel@tonic-gate * Allocate a private LDT for this process and initialize it with the 6571217Srab * default entries. 6580Sstevel@tonic-gate */ 6592712Snn35248 static void 6600Sstevel@tonic-gate setup_ldt(proc_t *pp) 6610Sstevel@tonic-gate { 6620Sstevel@tonic-gate user_desc_t *ldtp; /* descriptor pointer */ 6630Sstevel@tonic-gate pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t)); 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate /* 6660Sstevel@tonic-gate * Allocate maximum virtual space we need for this LDT. 6670Sstevel@tonic-gate */ 6680Sstevel@tonic-gate ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP); 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate /* 6710Sstevel@tonic-gate * Allocate the minimum number of physical pages for LDT. 6720Sstevel@tonic-gate */ 6731217Srab (void) segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t), 6741217Srab VM_SLEEP, 0, segkmem_page_create, NULL); 6751217Srab 6760Sstevel@tonic-gate bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t)))); 6770Sstevel@tonic-gate 6780Sstevel@tonic-gate kpreempt_disable(); 6790Sstevel@tonic-gate 6800Sstevel@tonic-gate /* Update proc structure. XXX - need any locks here??? */ 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1, 6830Sstevel@tonic-gate SDT_SYSLDT, SEL_KPL); 6840Sstevel@tonic-gate 6850Sstevel@tonic-gate pp->p_ldtlimit = MINNLDT - 1; 6860Sstevel@tonic-gate pp->p_ldt = ldtp; 6870Sstevel@tonic-gate if (pp == curproc) 6880Sstevel@tonic-gate *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc; 6890Sstevel@tonic-gate 6900Sstevel@tonic-gate kpreempt_enable(); 6910Sstevel@tonic-gate } 6920Sstevel@tonic-gate 6930Sstevel@tonic-gate /* 6940Sstevel@tonic-gate * Map the page corresponding to the selector entry. If the page is 6950Sstevel@tonic-gate * already mapped then it simply returns with the pointer to the entry. 6960Sstevel@tonic-gate * Otherwise it allocates a physical page for it and returns the pointer 6970Sstevel@tonic-gate * to the entry. Returns 0 for errors. 6980Sstevel@tonic-gate */ 6990Sstevel@tonic-gate static void * 7000Sstevel@tonic-gate ldt_map(proc_t *pp, uint_t seli) 7010Sstevel@tonic-gate { 7020Sstevel@tonic-gate caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0]; 7030Sstevel@tonic-gate caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli]; 7040Sstevel@tonic-gate volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET)); 7050Sstevel@tonic-gate caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET)); 7060Sstevel@tonic-gate on_trap_data_t otd; 7070Sstevel@tonic-gate 7080Sstevel@tonic-gate ASSERT(pp->p_ldt != NULL); 7090Sstevel@tonic-gate 7100Sstevel@tonic-gate if (seli <= pp->p_ldtlimit) 7110Sstevel@tonic-gate return (ent_addr); 7120Sstevel@tonic-gate 7130Sstevel@tonic-gate /* 7140Sstevel@tonic-gate * We are increasing the size of the process's LDT. 7150Sstevel@tonic-gate * Make sure this and all intervening pages are mapped. 7160Sstevel@tonic-gate */ 7170Sstevel@tonic-gate while (page <= epage) { 7180Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) 7190Sstevel@tonic-gate (void) *(volatile int *)page; /* peek at the page */ 7200Sstevel@tonic-gate else { /* Allocate a physical page */ 7211217Srab (void) segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0, 7221217Srab segkmem_page_create, NULL); 7230Sstevel@tonic-gate bzero(page, PAGESIZE); 7240Sstevel@tonic-gate } 7250Sstevel@tonic-gate no_trap(); 7260Sstevel@tonic-gate page += PAGESIZE; 7270Sstevel@tonic-gate } 7280Sstevel@tonic-gate 7290Sstevel@tonic-gate /* XXX - need any locks to update proc_t or gdt ??? */ 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate ASSERT(curproc == pp); 7320Sstevel@tonic-gate 7330Sstevel@tonic-gate kpreempt_disable(); 7340Sstevel@tonic-gate pp->p_ldtlimit = seli; 7350Sstevel@tonic-gate SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1); 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate ldt_load(); 7380Sstevel@tonic-gate kpreempt_enable(); 7390Sstevel@tonic-gate 7400Sstevel@tonic-gate return (ent_addr); 7410Sstevel@tonic-gate } 7420Sstevel@tonic-gate 7430Sstevel@tonic-gate /* 7440Sstevel@tonic-gate * Free up the kernel memory used for LDT of this process. 7450Sstevel@tonic-gate */ 7461217Srab static void 7470Sstevel@tonic-gate ldt_free(proc_t *pp) 7480Sstevel@tonic-gate { 7490Sstevel@tonic-gate on_trap_data_t otd; 7500Sstevel@tonic-gate caddr_t start, end; 7510Sstevel@tonic-gate volatile caddr_t addr; 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate ASSERT(pp->p_ldt != NULL); 7540Sstevel@tonic-gate 7550Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 7560Sstevel@tonic-gate start = (caddr_t)pp->p_ldt; /* beginning of the LDT */ 7570Sstevel@tonic-gate end = start + (pp->p_ldtlimit * sizeof (user_desc_t)); 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate /* Free the physical page(s) used for mapping LDT */ 7600Sstevel@tonic-gate for (addr = start; addr <= end; addr += PAGESIZE) { 7610Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) { 7620Sstevel@tonic-gate /* peek at the address */ 7630Sstevel@tonic-gate (void) *(volatile int *)addr; 7640Sstevel@tonic-gate segkmem_free(NULL, addr, PAGESIZE); 7650Sstevel@tonic-gate } 7660Sstevel@tonic-gate } 7670Sstevel@tonic-gate no_trap(); 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate /* Free up the virtual address space used for this LDT */ 7700Sstevel@tonic-gate vmem_free(heap_arena, pp->p_ldt, 7710Sstevel@tonic-gate ptob(btopr(MAXNLDT * sizeof (user_desc_t)))); 7720Sstevel@tonic-gate kpreempt_disable(); 7730Sstevel@tonic-gate pp->p_ldt = NULL; 7741217Srab pp->p_ldt_desc = zero_sdesc; 7751217Srab pp->p_ldtlimit = 0; 7761217Srab 7770Sstevel@tonic-gate if (pp == curproc) 7781217Srab ldt_unload(); 7790Sstevel@tonic-gate kpreempt_enable(); 7800Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 7810Sstevel@tonic-gate } 7820Sstevel@tonic-gate 7830Sstevel@tonic-gate /* 7840Sstevel@tonic-gate * On fork copy new ldt for child. 7850Sstevel@tonic-gate */ 7861217Srab void 7870Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp) 7880Sstevel@tonic-gate { 7890Sstevel@tonic-gate on_trap_data_t otd; 7900Sstevel@tonic-gate caddr_t start, end; 7910Sstevel@tonic-gate volatile caddr_t addr, caddr; 7920Sstevel@tonic-gate int minsize; 7930Sstevel@tonic-gate 7941217Srab ASSERT(pp->p_ldt); 7950Sstevel@tonic-gate 7961217Srab setup_ldt(cp); 7970Sstevel@tonic-gate 7980Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 7990Sstevel@tonic-gate cp->p_ldtlimit = pp->p_ldtlimit; 8000Sstevel@tonic-gate SYSSEGD_SETLIMIT(&cp->p_ldt_desc, 8010Sstevel@tonic-gate (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1); 8020Sstevel@tonic-gate start = (caddr_t)pp->p_ldt; /* beginning of the LDT */ 8030Sstevel@tonic-gate end = start + (pp->p_ldtlimit * sizeof (user_desc_t)); 8040Sstevel@tonic-gate caddr = (caddr_t)cp->p_ldt; /* child LDT start */ 8050Sstevel@tonic-gate 8060Sstevel@tonic-gate minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET; 8070Sstevel@tonic-gate /* Walk thru the physical page(s) used for parent's LDT */ 8080Sstevel@tonic-gate for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) { 8090Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) { 8100Sstevel@tonic-gate (void) *(volatile int *)addr; /* peek at the address */ 8110Sstevel@tonic-gate /* allocate a page if necessary */ 8120Sstevel@tonic-gate if (caddr >= ((caddr_t)cp->p_ldt + minsize)) { 8131217Srab (void) segkmem_xalloc(NULL, caddr, PAGESIZE, 8141217Srab VM_SLEEP, 0, segkmem_page_create, NULL); 8150Sstevel@tonic-gate } 8160Sstevel@tonic-gate bcopy(addr, caddr, PAGESIZE); 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate } 8190Sstevel@tonic-gate no_trap(); 8200Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 8210Sstevel@tonic-gate } 822