10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*2712Snn35248 * Common Development and Distribution License (the "License"). 6*2712Snn35248 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 221217Srab * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 270Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 280Sstevel@tonic-gate /* All Rights Reserved */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* Copyright (c) 1987, 1988 Microsoft Corporation */ 310Sstevel@tonic-gate /* All Rights Reserved */ 320Sstevel@tonic-gate 330Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 340Sstevel@tonic-gate 350Sstevel@tonic-gate #include <sys/param.h> 360Sstevel@tonic-gate #include <sys/types.h> 370Sstevel@tonic-gate #include <sys/sysmacros.h> 380Sstevel@tonic-gate #include <sys/systm.h> 390Sstevel@tonic-gate #include <sys/signal.h> 400Sstevel@tonic-gate #include <sys/errno.h> 410Sstevel@tonic-gate #include <sys/fault.h> 420Sstevel@tonic-gate #include <sys/syscall.h> 430Sstevel@tonic-gate #include <sys/cpuvar.h> 440Sstevel@tonic-gate #include <sys/sysi86.h> 450Sstevel@tonic-gate #include <sys/psw.h> 460Sstevel@tonic-gate #include <sys/cred.h> 470Sstevel@tonic-gate #include <sys/policy.h> 480Sstevel@tonic-gate #include <sys/thread.h> 490Sstevel@tonic-gate #include <sys/debug.h> 500Sstevel@tonic-gate #include <sys/ontrap.h> 510Sstevel@tonic-gate #include <sys/privregs.h> 520Sstevel@tonic-gate #include <sys/x86_archext.h> 530Sstevel@tonic-gate #include <sys/vmem.h> 540Sstevel@tonic-gate #include <sys/kmem.h> 550Sstevel@tonic-gate #include <sys/mman.h> 560Sstevel@tonic-gate #include <sys/archsystm.h> 570Sstevel@tonic-gate #include <vm/hat.h> 580Sstevel@tonic-gate #include <vm/as.h> 590Sstevel@tonic-gate #include <vm/seg.h> 600Sstevel@tonic-gate #include <vm/seg_kmem.h> 610Sstevel@tonic-gate #include <vm/faultcode.h> 620Sstevel@tonic-gate #include <sys/fp.h> 630Sstevel@tonic-gate #include <sys/cmn_err.h> 640Sstevel@tonic-gate 651217Srab static void setup_ldt(proc_t *pp); 660Sstevel@tonic-gate static void *ldt_map(proc_t *pp, uint_t seli); 671217Srab static void ldt_free(proc_t *pp); 680Sstevel@tonic-gate 690Sstevel@tonic-gate extern void rtcsync(void); 700Sstevel@tonic-gate extern long ggmtl(void); 710Sstevel@tonic-gate extern void sgmtl(long); 720Sstevel@tonic-gate 730Sstevel@tonic-gate /* 740Sstevel@tonic-gate * sysi86 System Call 750Sstevel@tonic-gate */ 760Sstevel@tonic-gate 770Sstevel@tonic-gate /* ARGSUSED */ 780Sstevel@tonic-gate int 790Sstevel@tonic-gate sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) 800Sstevel@tonic-gate { 81*2712Snn35248 struct ssd ssd; 820Sstevel@tonic-gate int error = 0; 830Sstevel@tonic-gate int c; 840Sstevel@tonic-gate proc_t *pp = curproc; 850Sstevel@tonic-gate 860Sstevel@tonic-gate switch (cmd) { 870Sstevel@tonic-gate 880Sstevel@tonic-gate /* 890Sstevel@tonic-gate * The SI86V86 subsystem call of the SYSI86 system call 900Sstevel@tonic-gate * supports only one subcode -- V86SC_IOPL. 910Sstevel@tonic-gate */ 920Sstevel@tonic-gate case SI86V86: 930Sstevel@tonic-gate if (arg1 == V86SC_IOPL) { 940Sstevel@tonic-gate struct regs *rp = lwptoregs(ttolwp(curthread)); 950Sstevel@tonic-gate greg_t oldpl = rp->r_ps & PS_IOPL; 960Sstevel@tonic-gate greg_t newpl = arg2 & PS_IOPL; 970Sstevel@tonic-gate 980Sstevel@tonic-gate /* 990Sstevel@tonic-gate * Must be privileged to run this system call 1000Sstevel@tonic-gate * if giving more io privilege. 1010Sstevel@tonic-gate */ 1020Sstevel@tonic-gate if (newpl > oldpl && (error = 1030Sstevel@tonic-gate secpolicy_sys_config(CRED(), B_FALSE)) != 0) 1040Sstevel@tonic-gate return (set_errno(error)); 1050Sstevel@tonic-gate rp->r_ps ^= oldpl ^ newpl; 1060Sstevel@tonic-gate } else 1070Sstevel@tonic-gate error = EINVAL; 1080Sstevel@tonic-gate break; 1090Sstevel@tonic-gate 1100Sstevel@tonic-gate /* 1110Sstevel@tonic-gate * Set a segment descriptor 1120Sstevel@tonic-gate */ 1130Sstevel@tonic-gate case SI86DSCR: 1140Sstevel@tonic-gate /* 1150Sstevel@tonic-gate * There are considerable problems here manipulating 1160Sstevel@tonic-gate * resources shared by many running lwps. Get everyone 1170Sstevel@tonic-gate * into a safe state before changing the LDT. 1180Sstevel@tonic-gate */ 1190Sstevel@tonic-gate if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) { 1200Sstevel@tonic-gate error = EINTR; 1210Sstevel@tonic-gate break; 1220Sstevel@tonic-gate } 123*2712Snn35248 124*2712Snn35248 if (get_udatamodel() == DATAMODEL_LP64) { 125*2712Snn35248 error = EINVAL; 126*2712Snn35248 break; 127*2712Snn35248 } 128*2712Snn35248 129*2712Snn35248 if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) { 130*2712Snn35248 error = EFAULT; 131*2712Snn35248 break; 132*2712Snn35248 } 133*2712Snn35248 134*2712Snn35248 error = setdscr(&ssd); 135*2712Snn35248 1360Sstevel@tonic-gate mutex_enter(&pp->p_lock); 1370Sstevel@tonic-gate if (curthread != pp->p_agenttp) 1380Sstevel@tonic-gate continuelwps(pp); 1390Sstevel@tonic-gate mutex_exit(&pp->p_lock); 1400Sstevel@tonic-gate break; 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate case SI86FPHW: 1430Sstevel@tonic-gate c = fp_kind & 0xff; 1440Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) 1450Sstevel@tonic-gate error = EFAULT; 1460Sstevel@tonic-gate break; 1470Sstevel@tonic-gate 1480Sstevel@tonic-gate case SI86FPSTART: 1490Sstevel@tonic-gate /* 1500Sstevel@tonic-gate * arg1 is the address of _fp_hw 1510Sstevel@tonic-gate * arg2 is the desired x87 FCW value 1520Sstevel@tonic-gate * arg3 is the desired SSE MXCSR value 1530Sstevel@tonic-gate * a return value of one means SSE hardware, else none. 1540Sstevel@tonic-gate */ 1550Sstevel@tonic-gate c = fp_kind & 0xff; 1560Sstevel@tonic-gate if (suword32((void *)arg1, c) == -1) { 1570Sstevel@tonic-gate error = EFAULT; 1580Sstevel@tonic-gate break; 1590Sstevel@tonic-gate } 1600Sstevel@tonic-gate fpsetcw((uint16_t)arg2, (uint32_t)arg3); 1610Sstevel@tonic-gate return (fp_kind == __FP_SSE ? 1 : 0); 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate /* real time clock management commands */ 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate case WTODC: 1660Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1670Sstevel@tonic-gate timestruc_t ts; 1680Sstevel@tonic-gate mutex_enter(&tod_lock); 1690Sstevel@tonic-gate gethrestime(&ts); 1700Sstevel@tonic-gate tod_set(ts); 1710Sstevel@tonic-gate mutex_exit(&tod_lock); 1720Sstevel@tonic-gate } 1730Sstevel@tonic-gate break; 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate /* Give some timezone playing room */ 1760Sstevel@tonic-gate #define ONEWEEK (7 * 24 * 60 * 60) 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate case SGMTL: 1790Sstevel@tonic-gate /* 1800Sstevel@tonic-gate * Called from 32 bit land, negative values 1810Sstevel@tonic-gate * are not sign extended, so we do that here 1820Sstevel@tonic-gate * by casting it to an int and back. We also 1830Sstevel@tonic-gate * clamp the value to within reason and detect 1840Sstevel@tonic-gate * when a 64 bit call overflows an int. 1850Sstevel@tonic-gate */ 1860Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) { 1870Sstevel@tonic-gate int newlag = (int)arg1; 1880Sstevel@tonic-gate 1890Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1900Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE && 1910Sstevel@tonic-gate (long)newlag != (long)arg1) { 1920Sstevel@tonic-gate error = EOVERFLOW; 1930Sstevel@tonic-gate } else 1940Sstevel@tonic-gate #endif 1950Sstevel@tonic-gate if (newlag >= -ONEWEEK && newlag <= ONEWEEK) 1960Sstevel@tonic-gate sgmtl(newlag); 1970Sstevel@tonic-gate else 1980Sstevel@tonic-gate error = EOVERFLOW; 1990Sstevel@tonic-gate } 2000Sstevel@tonic-gate break; 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate case GGMTL: 2030Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 2040Sstevel@tonic-gate if (sulword((void *)arg1, ggmtl()) == -1) 2050Sstevel@tonic-gate error = EFAULT; 2060Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 2070Sstevel@tonic-gate } else { 2080Sstevel@tonic-gate time_t gmtl; 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate if ((gmtl = ggmtl()) > INT32_MAX) { 2110Sstevel@tonic-gate /* 2120Sstevel@tonic-gate * Since gmt_lag can at most be 2130Sstevel@tonic-gate * +/- 12 hours, something is 2140Sstevel@tonic-gate * *seriously* messed up here. 2150Sstevel@tonic-gate */ 2160Sstevel@tonic-gate error = EOVERFLOW; 2170Sstevel@tonic-gate } else if (suword32((void *)arg1, (int32_t)gmtl) == -1) 2180Sstevel@tonic-gate error = EFAULT; 2190Sstevel@tonic-gate #endif 2200Sstevel@tonic-gate } 2210Sstevel@tonic-gate break; 2220Sstevel@tonic-gate 2230Sstevel@tonic-gate case RTCSYNC: 2240Sstevel@tonic-gate if ((error = secpolicy_settime(CRED())) == 0) 2250Sstevel@tonic-gate rtcsync(); 2260Sstevel@tonic-gate break; 2270Sstevel@tonic-gate 2280Sstevel@tonic-gate /* END OF real time clock management commands */ 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate default: 2310Sstevel@tonic-gate error = EINVAL; 2320Sstevel@tonic-gate break; 2330Sstevel@tonic-gate } 2340Sstevel@tonic-gate return (error == 0 ? 0 : set_errno(error)); 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate void 2380Sstevel@tonic-gate usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel) 2390Sstevel@tonic-gate { 2400Sstevel@tonic-gate ssd->bo = USEGD_GETBASE(usd); 2410Sstevel@tonic-gate ssd->ls = USEGD_GETLIMIT(usd); 2420Sstevel@tonic-gate ssd->sel = sel; 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate /* 2450Sstevel@tonic-gate * set type, dpl and present bits. 2460Sstevel@tonic-gate */ 2470Sstevel@tonic-gate ssd->acc1 = usd->usd_type; 2480Sstevel@tonic-gate ssd->acc1 |= usd->usd_dpl << 5; 2490Sstevel@tonic-gate ssd->acc1 |= usd->usd_p << (5 + 2); 2500Sstevel@tonic-gate 2510Sstevel@tonic-gate /* 2520Sstevel@tonic-gate * set avl, DB and granularity bits. 2530Sstevel@tonic-gate */ 2540Sstevel@tonic-gate ssd->acc2 = usd->usd_avl; 2550Sstevel@tonic-gate 2560Sstevel@tonic-gate #if defined(__amd64) 2570Sstevel@tonic-gate ssd->acc2 |= usd->usd_long << 1; 2580Sstevel@tonic-gate #else 2590Sstevel@tonic-gate ssd->acc2 |= usd->usd_reserved << 1; 2600Sstevel@tonic-gate #endif 2610Sstevel@tonic-gate 2620Sstevel@tonic-gate ssd->acc2 |= usd->usd_def32 << (1 + 1); 2630Sstevel@tonic-gate ssd->acc2 |= usd->usd_gran << (1 + 1 + 1); 2640Sstevel@tonic-gate } 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate static void 2670Sstevel@tonic-gate ssd_to_usd(struct ssd *ssd, user_desc_t *usd) 2680Sstevel@tonic-gate { 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate USEGD_SETBASE(usd, ssd->bo); 2710Sstevel@tonic-gate USEGD_SETLIMIT(usd, ssd->ls); 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate /* 2740Sstevel@tonic-gate * set type, dpl and present bits. 2750Sstevel@tonic-gate */ 2760Sstevel@tonic-gate usd->usd_type = ssd->acc1; 2770Sstevel@tonic-gate usd->usd_dpl = ssd->acc1 >> 5; 2780Sstevel@tonic-gate usd->usd_p = ssd->acc1 >> (5 + 2); 2790Sstevel@tonic-gate 2800Sstevel@tonic-gate ASSERT(usd->usd_type >= SDT_MEMRO); 2810Sstevel@tonic-gate ASSERT(usd->usd_dpl == SEL_UPL); 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate /* 2840Sstevel@tonic-gate * set avl, DB and granularity bits. 2850Sstevel@tonic-gate */ 2860Sstevel@tonic-gate usd->usd_avl = ssd->acc2; 2870Sstevel@tonic-gate 2880Sstevel@tonic-gate #if defined(__amd64) 2890Sstevel@tonic-gate usd->usd_long = ssd->acc2 >> 1; 2900Sstevel@tonic-gate #else 2910Sstevel@tonic-gate usd->usd_reserved = ssd->acc2 >> 1; 2920Sstevel@tonic-gate #endif 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate usd->usd_def32 = ssd->acc2 >> (1 + 1); 2950Sstevel@tonic-gate usd->usd_gran = ssd->acc2 >> (1 + 1 + 1); 2960Sstevel@tonic-gate } 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate static void 2990Sstevel@tonic-gate ssd_to_sgd(struct ssd *ssd, gate_desc_t *sgd) 3000Sstevel@tonic-gate { 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate sgd->sgd_looffset = ssd->bo; 3030Sstevel@tonic-gate sgd->sgd_hioffset = ssd->bo >> 16; 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate sgd->sgd_selector = ssd->ls; 3060Sstevel@tonic-gate /* 3070Sstevel@tonic-gate * set type, dpl and present bits. 3080Sstevel@tonic-gate */ 3090Sstevel@tonic-gate sgd->sgd_type = ssd->acc1; 3100Sstevel@tonic-gate sgd->sgd_dpl = ssd->acc1 >> 5; 3110Sstevel@tonic-gate sgd->sgd_p = ssd->acc1 >> 7; 3120Sstevel@tonic-gate ASSERT(sgd->sgd_type == SDT_SYSCGT); 3130Sstevel@tonic-gate ASSERT(sgd->sgd_dpl == SEL_UPL); 3140Sstevel@tonic-gate 3150Sstevel@tonic-gate #if defined(__i386) /* reserved, ignored in amd64 */ 3160Sstevel@tonic-gate sgd->sgd_stkcpy = 0; 3170Sstevel@tonic-gate #endif 3180Sstevel@tonic-gate } 3190Sstevel@tonic-gate 3201217Srab /* 3211217Srab * Load LDT register with the current process's LDT. 3221217Srab */ 3231217Srab void 3241217Srab ldt_load(void) 3251217Srab { 3261217Srab /* 3271217Srab */ 3281217Srab *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = curproc->p_ldt_desc; 3291217Srab wr_ldtr(ULDT_SEL); 3301217Srab } 3311217Srab 3321217Srab /* 3331217Srab * Store a NULL selector in the LDTR. All subsequent illegal references to 3341217Srab * the LDT will result in a #gp. 3351217Srab */ 3361217Srab void 3371217Srab ldt_unload(void) 3381217Srab { 3391217Srab CPU->cpu_gdt[GDT_LDT] = zero_udesc; 3401217Srab wr_ldtr(0); 3411217Srab } 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate /*ARGSUSED*/ 3440Sstevel@tonic-gate static void 3451217Srab ldt_savectx(proc_t *p) 3460Sstevel@tonic-gate { 3471217Srab ASSERT(p->p_ldt != NULL); 3481217Srab ASSERT(p == curproc); 3491217Srab 3500Sstevel@tonic-gate #if defined(__amd64) 3510Sstevel@tonic-gate /* 3520Sstevel@tonic-gate * The 64-bit kernel must be sure to clear any stale ldt 3530Sstevel@tonic-gate * selectors when context switching away from a process that 3540Sstevel@tonic-gate * has a private ldt. Consider the following example: 3550Sstevel@tonic-gate * 3560Sstevel@tonic-gate * Wine creats a ldt descriptor and points a segment register 3570Sstevel@tonic-gate * to it. 3580Sstevel@tonic-gate * 3590Sstevel@tonic-gate * We then context switch away from wine lwp to kernel 3600Sstevel@tonic-gate * thread and hit breakpoint in kernel with kmdb 3610Sstevel@tonic-gate * 3620Sstevel@tonic-gate * When we continue and resume from kmdb we will #gp 3630Sstevel@tonic-gate * fault since kmdb will have saved the stale ldt selector 3640Sstevel@tonic-gate * from wine and will try to restore it but we are no longer in 3650Sstevel@tonic-gate * the context of the wine process and do not have our 3660Sstevel@tonic-gate * ldtr register pointing to the private ldt. 3670Sstevel@tonic-gate */ 3680Sstevel@tonic-gate clr_ldt_sregs(); 3690Sstevel@tonic-gate #endif 3700Sstevel@tonic-gate 3711217Srab ldt_unload(); 3720Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 3730Sstevel@tonic-gate } 3740Sstevel@tonic-gate 3751217Srab static void 3761217Srab ldt_restorectx(proc_t *p) 3771217Srab { 3781217Srab ASSERT(p->p_ldt != NULL); 3791217Srab ASSERT(p == curproc); 3801217Srab 3811217Srab ldt_load(); 3821217Srab cpu_fast_syscall_disable(NULL); 3831217Srab } 3841217Srab 3850Sstevel@tonic-gate /* 3861217Srab * When a process with a private LDT execs, fast syscalls must be enabled for 3871217Srab * the new process image. 3880Sstevel@tonic-gate */ 3890Sstevel@tonic-gate /* ARGSUSED */ 3900Sstevel@tonic-gate static void 3911217Srab ldt_freectx(proc_t *p, int isexec) 3920Sstevel@tonic-gate { 3931217Srab ASSERT(p->p_ldt); 3941217Srab 3950Sstevel@tonic-gate if (isexec) { 3960Sstevel@tonic-gate kpreempt_disable(); 3970Sstevel@tonic-gate cpu_fast_syscall_enable(NULL); 3980Sstevel@tonic-gate kpreempt_enable(); 3990Sstevel@tonic-gate } 4001217Srab 4011217Srab /* 4021217Srab * ldt_free() will free the memory used by the private LDT, reset the 4031217Srab * process's descriptor, and re-program the LDTR. 4041217Srab */ 4051217Srab ldt_free(p); 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate /* 4090Sstevel@tonic-gate * Install ctx op that ensures syscall/sysenter are disabled. 4100Sstevel@tonic-gate * See comments below. 4110Sstevel@tonic-gate * 4121217Srab * When a thread with a private LDT forks, the new process 4130Sstevel@tonic-gate * must have the LDT context ops installed. 4140Sstevel@tonic-gate */ 4150Sstevel@tonic-gate /* ARGSUSED */ 4160Sstevel@tonic-gate static void 4171217Srab ldt_installctx(proc_t *p, proc_t *cp) 4180Sstevel@tonic-gate { 4191217Srab proc_t *targ = p; 4201217Srab kthread_t *t; 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate /* 4231217Srab * If this is a fork, operate on the child process. 4240Sstevel@tonic-gate */ 4251217Srab if (cp != NULL) { 4261217Srab targ = cp; 4271217Srab ldt_dup(p, cp); 4281217Srab } 4290Sstevel@tonic-gate 4301217Srab /* 4311217Srab * The process context ops expect the target process as their argument. 4321217Srab */ 4331217Srab ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx, 4341217Srab ldt_installctx, ldt_savectx, ldt_freectx) == 0); 4350Sstevel@tonic-gate 4361217Srab installpctx(targ, targ, ldt_savectx, ldt_restorectx, 4371217Srab ldt_installctx, ldt_savectx, ldt_freectx); 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate /* 4400Sstevel@tonic-gate * We've just disabled fast system call and return instructions; take 4410Sstevel@tonic-gate * the slow path out to make sure we don't try to use one to return 4421217Srab * back to user. We must set t_post_sys for every thread in the 4431217Srab * process to make sure none of them escape out via fast return. 4440Sstevel@tonic-gate */ 4451217Srab 4461217Srab mutex_enter(&targ->p_lock); 4471217Srab t = targ->p_tlist; 4481217Srab do { 4491217Srab t->t_post_sys = 1; 4501217Srab } while ((t = t->t_forw) != targ->p_tlist); 4511217Srab mutex_exit(&targ->p_lock); 4520Sstevel@tonic-gate } 4530Sstevel@tonic-gate 454*2712Snn35248 int 455*2712Snn35248 setdscr(struct ssd *ssd) 4560Sstevel@tonic-gate { 4570Sstevel@tonic-gate ushort_t seli; /* selector index */ 4580Sstevel@tonic-gate user_desc_t *dscrp; /* descriptor pointer */ 4590Sstevel@tonic-gate proc_t *pp = ttoproc(curthread); 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate /* 4620Sstevel@tonic-gate * LDT segments: executable and data at DPL 3 only. 4630Sstevel@tonic-gate */ 464*2712Snn35248 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel)) 4650Sstevel@tonic-gate return (EINVAL); 4660Sstevel@tonic-gate 4670Sstevel@tonic-gate /* 4680Sstevel@tonic-gate * check the selector index. 4690Sstevel@tonic-gate */ 470*2712Snn35248 seli = SELTOIDX(ssd->sel); 4711217Srab if (seli >= MAXNLDT || seli < LDT_UDBASE) 4720Sstevel@tonic-gate return (EINVAL); 4730Sstevel@tonic-gate 4740Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate /* 4770Sstevel@tonic-gate * If this is the first time for this process then setup a 4780Sstevel@tonic-gate * private LDT for it. 4790Sstevel@tonic-gate */ 4800Sstevel@tonic-gate if (pp->p_ldt == NULL) { 4811217Srab kpreempt_disable(); 4821217Srab setup_ldt(pp); 4830Sstevel@tonic-gate 4840Sstevel@tonic-gate /* 4850Sstevel@tonic-gate * Now that this process has a private LDT, the use of 4860Sstevel@tonic-gate * the syscall/sysret and sysenter/sysexit instructions 4870Sstevel@tonic-gate * is forbidden for this processes because they destroy 4880Sstevel@tonic-gate * the contents of %cs and %ss segment registers. 4890Sstevel@tonic-gate * 4901217Srab * Explicity disable them here and add a context handler 4911217Srab * to the process. Note that disabling 4920Sstevel@tonic-gate * them here means we can't use sysret or sysexit on 4930Sstevel@tonic-gate * the way out of this system call - so we force this 4940Sstevel@tonic-gate * thread to take the slow path (which doesn't make use 4950Sstevel@tonic-gate * of sysenter or sysexit) back out. 4960Sstevel@tonic-gate */ 4970Sstevel@tonic-gate 4981217Srab ldt_installctx(pp, NULL); 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate cpu_fast_syscall_disable(NULL); 5011217Srab 5020Sstevel@tonic-gate ASSERT(curthread->t_post_sys != 0); 5030Sstevel@tonic-gate wr_ldtr(ULDT_SEL); 5041217Srab kpreempt_enable(); 5050Sstevel@tonic-gate } 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate if (ldt_map(pp, seli) == NULL) { 5080Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 5090Sstevel@tonic-gate return (ENOMEM); 5100Sstevel@tonic-gate } 5110Sstevel@tonic-gate 5120Sstevel@tonic-gate ASSERT(seli <= pp->p_ldtlimit); 5130Sstevel@tonic-gate dscrp = &pp->p_ldt[seli]; 5140Sstevel@tonic-gate 5150Sstevel@tonic-gate /* 5160Sstevel@tonic-gate * On the 64-bit kernel, this is where things get more subtle. 5170Sstevel@tonic-gate * Recall that in the 64-bit kernel, when we enter the kernel we 5180Sstevel@tonic-gate * deliberately -don't- reload the segment selectors we came in on 5190Sstevel@tonic-gate * for %ds, %es, %fs or %gs. Messing with selectors is expensive, 5200Sstevel@tonic-gate * and the underlying descriptors are essentially ignored by the 5210Sstevel@tonic-gate * hardware in long mode - except for the base that we override with 5220Sstevel@tonic-gate * the gsbase MSRs. 5230Sstevel@tonic-gate * 5240Sstevel@tonic-gate * However, there's one unfortunate issue with this rosy picture -- 5250Sstevel@tonic-gate * a descriptor that's not marked as 'present' will still generate 5260Sstevel@tonic-gate * an #np when loading a segment register. 5270Sstevel@tonic-gate * 5280Sstevel@tonic-gate * Consider this case. An lwp creates a harmless LDT entry, points 5290Sstevel@tonic-gate * one of it's segment registers at it, then tells the kernel (here) 5300Sstevel@tonic-gate * to delete it. In the 32-bit kernel, the #np will happen on the 5310Sstevel@tonic-gate * way back to userland where we reload the segment registers, and be 5320Sstevel@tonic-gate * handled in kern_gpfault(). In the 64-bit kernel, the same thing 5330Sstevel@tonic-gate * will happen in the normal case too. However, if we're trying to 5340Sstevel@tonic-gate * use a debugger that wants to save and restore the segment registers, 5350Sstevel@tonic-gate * and the debugger things that we have valid segment registers, we 5360Sstevel@tonic-gate * have the problem that the debugger will try and restore the 5370Sstevel@tonic-gate * segment register that points at the now 'not present' descriptor 5380Sstevel@tonic-gate * and will take a #np right there. 5390Sstevel@tonic-gate * 5400Sstevel@tonic-gate * We should obviously fix the debugger to be paranoid about 5410Sstevel@tonic-gate * -not- restoring segment registers that point to bad descriptors; 5420Sstevel@tonic-gate * however we can prevent the problem here if we check to see if any 5430Sstevel@tonic-gate * of the segment registers are still pointing at the thing we're 5440Sstevel@tonic-gate * destroying; if they are, return an error instead. (That also seems 5450Sstevel@tonic-gate * a lot better failure mode than SIGKILL and a core file 5460Sstevel@tonic-gate * from kern_gpfault() too.) 5470Sstevel@tonic-gate */ 548*2712Snn35248 if (SI86SSD_PRES(ssd) == 0) { 5490Sstevel@tonic-gate kthread_t *t; 5500Sstevel@tonic-gate int bad = 0; 5510Sstevel@tonic-gate 5520Sstevel@tonic-gate /* 5530Sstevel@tonic-gate * Look carefully at the segment registers of every lwp 5540Sstevel@tonic-gate * in the process (they're all stopped by our caller). 5550Sstevel@tonic-gate * If we're about to invalidate a descriptor that's still 5560Sstevel@tonic-gate * being referenced by *any* of them, return an error, 5570Sstevel@tonic-gate * rather than having them #gp on their way out of the kernel. 5580Sstevel@tonic-gate */ 5590Sstevel@tonic-gate ASSERT(pp->p_lwprcnt == 1); 5600Sstevel@tonic-gate 5610Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5620Sstevel@tonic-gate t = pp->p_tlist; 5630Sstevel@tonic-gate do { 5640Sstevel@tonic-gate klwp_t *lwp = ttolwp(t); 5650Sstevel@tonic-gate struct regs *rp = lwp->lwp_regs; 5660Sstevel@tonic-gate #if defined(__amd64) 5670Sstevel@tonic-gate pcb_t *pcb = &lwp->lwp_pcb; 5680Sstevel@tonic-gate #endif 5690Sstevel@tonic-gate 570*2712Snn35248 if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) { 5710Sstevel@tonic-gate bad = 1; 5720Sstevel@tonic-gate break; 5730Sstevel@tonic-gate } 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate #if defined(__amd64) 5760Sstevel@tonic-gate if (pcb->pcb_flags & RUPDATE_PENDING) { 577*2712Snn35248 if (ssd->sel == pcb->pcb_ds || 578*2712Snn35248 ssd->sel == pcb->pcb_es || 579*2712Snn35248 ssd->sel == pcb->pcb_fs || 580*2712Snn35248 ssd->sel == pcb->pcb_gs) { 5810Sstevel@tonic-gate bad = 1; 5820Sstevel@tonic-gate break; 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate } else 5850Sstevel@tonic-gate #endif 5860Sstevel@tonic-gate { 587*2712Snn35248 if (ssd->sel == rp->r_ds || 588*2712Snn35248 ssd->sel == rp->r_es || 589*2712Snn35248 ssd->sel == rp->r_fs || 590*2712Snn35248 ssd->sel == rp->r_gs) { 5910Sstevel@tonic-gate bad = 1; 5920Sstevel@tonic-gate break; 5930Sstevel@tonic-gate } 5940Sstevel@tonic-gate } 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate } while ((t = t->t_forw) != pp->p_tlist); 5970Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5980Sstevel@tonic-gate 5990Sstevel@tonic-gate if (bad) { 6000Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6010Sstevel@tonic-gate return (EBUSY); 6020Sstevel@tonic-gate } 6030Sstevel@tonic-gate } 6040Sstevel@tonic-gate 6050Sstevel@tonic-gate /* 6060Sstevel@tonic-gate * If acc1 is zero, clear the descriptor (including the 'present' bit) 6070Sstevel@tonic-gate */ 608*2712Snn35248 if (ssd->acc1 == 0) { 6090Sstevel@tonic-gate bzero(dscrp, sizeof (*dscrp)); 6100Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6110Sstevel@tonic-gate return (0); 6120Sstevel@tonic-gate } 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate /* 6150Sstevel@tonic-gate * Check segment type, allow segment not present and 6160Sstevel@tonic-gate * only user DPL (3). 6170Sstevel@tonic-gate */ 618*2712Snn35248 if (SI86SSD_DPL(ssd) != SEL_UPL) { 6190Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6200Sstevel@tonic-gate return (EINVAL); 6210Sstevel@tonic-gate } 6220Sstevel@tonic-gate 6230Sstevel@tonic-gate #if defined(__amd64) 6240Sstevel@tonic-gate /* 625*2712Snn35248 * Do not allow 32-bit applications to create 64-bit mode code 626*2712Snn35248 * segments. 6270Sstevel@tonic-gate */ 628*2712Snn35248 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 && 629*2712Snn35248 SI86SSD_ISLONG(ssd)) { 6300Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6310Sstevel@tonic-gate return (EINVAL); 6320Sstevel@tonic-gate } 6330Sstevel@tonic-gate #endif /* __amd64 */ 6340Sstevel@tonic-gate 6350Sstevel@tonic-gate /* 6360Sstevel@tonic-gate * Set up a code or data user segment descriptor. 6370Sstevel@tonic-gate */ 638*2712Snn35248 if (SI86SSD_ISUSEG(ssd)) { 639*2712Snn35248 ssd_to_usd(ssd, dscrp); 6400Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6410Sstevel@tonic-gate return (0); 6420Sstevel@tonic-gate } 6430Sstevel@tonic-gate 6440Sstevel@tonic-gate /* 6450Sstevel@tonic-gate * Allow a call gate only if the destination is in the LDT. 6460Sstevel@tonic-gate */ 647*2712Snn35248 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) { 648*2712Snn35248 ssd_to_sgd(ssd, (gate_desc_t *)dscrp); 6490Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6500Sstevel@tonic-gate return (0); 6510Sstevel@tonic-gate } 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 6540Sstevel@tonic-gate return (EINVAL); 6550Sstevel@tonic-gate } 6560Sstevel@tonic-gate 6570Sstevel@tonic-gate /* 6580Sstevel@tonic-gate * Allocate a private LDT for this process and initialize it with the 6591217Srab * default entries. 6600Sstevel@tonic-gate */ 661*2712Snn35248 static void 6620Sstevel@tonic-gate setup_ldt(proc_t *pp) 6630Sstevel@tonic-gate { 6640Sstevel@tonic-gate user_desc_t *ldtp; /* descriptor pointer */ 6650Sstevel@tonic-gate pgcnt_t npages = btopr(MAXNLDT * sizeof (user_desc_t)); 6660Sstevel@tonic-gate 6670Sstevel@tonic-gate /* 6680Sstevel@tonic-gate * Allocate maximum virtual space we need for this LDT. 6690Sstevel@tonic-gate */ 6700Sstevel@tonic-gate ldtp = vmem_alloc(heap_arena, ptob(npages), VM_SLEEP); 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate /* 6730Sstevel@tonic-gate * Allocate the minimum number of physical pages for LDT. 6740Sstevel@tonic-gate */ 6751217Srab (void) segkmem_xalloc(NULL, ldtp, MINNLDT * sizeof (user_desc_t), 6761217Srab VM_SLEEP, 0, segkmem_page_create, NULL); 6771217Srab 6780Sstevel@tonic-gate bzero(ldtp, ptob(btopr(MINNLDT * sizeof (user_desc_t)))); 6790Sstevel@tonic-gate 6800Sstevel@tonic-gate kpreempt_disable(); 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate /* Update proc structure. XXX - need any locks here??? */ 6830Sstevel@tonic-gate 6840Sstevel@tonic-gate set_syssegd(&pp->p_ldt_desc, ldtp, MINNLDT * sizeof (user_desc_t) - 1, 6850Sstevel@tonic-gate SDT_SYSLDT, SEL_KPL); 6860Sstevel@tonic-gate 6870Sstevel@tonic-gate pp->p_ldtlimit = MINNLDT - 1; 6880Sstevel@tonic-gate pp->p_ldt = ldtp; 6890Sstevel@tonic-gate if (pp == curproc) 6900Sstevel@tonic-gate *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = pp->p_ldt_desc; 6910Sstevel@tonic-gate 6920Sstevel@tonic-gate kpreempt_enable(); 6930Sstevel@tonic-gate } 6940Sstevel@tonic-gate 6950Sstevel@tonic-gate /* 6960Sstevel@tonic-gate * Map the page corresponding to the selector entry. If the page is 6970Sstevel@tonic-gate * already mapped then it simply returns with the pointer to the entry. 6980Sstevel@tonic-gate * Otherwise it allocates a physical page for it and returns the pointer 6990Sstevel@tonic-gate * to the entry. Returns 0 for errors. 7000Sstevel@tonic-gate */ 7010Sstevel@tonic-gate static void * 7020Sstevel@tonic-gate ldt_map(proc_t *pp, uint_t seli) 7030Sstevel@tonic-gate { 7040Sstevel@tonic-gate caddr_t ent0_addr = (caddr_t)&pp->p_ldt[0]; 7050Sstevel@tonic-gate caddr_t ent_addr = (caddr_t)&pp->p_ldt[seli]; 7060Sstevel@tonic-gate volatile caddr_t page = (caddr_t)((uintptr_t)ent0_addr & (~PAGEOFFSET)); 7070Sstevel@tonic-gate caddr_t epage = (caddr_t)((uintptr_t)ent_addr & (~PAGEOFFSET)); 7080Sstevel@tonic-gate on_trap_data_t otd; 7090Sstevel@tonic-gate 7100Sstevel@tonic-gate ASSERT(pp->p_ldt != NULL); 7110Sstevel@tonic-gate 7120Sstevel@tonic-gate if (seli <= pp->p_ldtlimit) 7130Sstevel@tonic-gate return (ent_addr); 7140Sstevel@tonic-gate 7150Sstevel@tonic-gate /* 7160Sstevel@tonic-gate * We are increasing the size of the process's LDT. 7170Sstevel@tonic-gate * Make sure this and all intervening pages are mapped. 7180Sstevel@tonic-gate */ 7190Sstevel@tonic-gate while (page <= epage) { 7200Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) 7210Sstevel@tonic-gate (void) *(volatile int *)page; /* peek at the page */ 7220Sstevel@tonic-gate else { /* Allocate a physical page */ 7231217Srab (void) segkmem_xalloc(NULL, page, PAGESIZE, VM_SLEEP, 0, 7241217Srab segkmem_page_create, NULL); 7250Sstevel@tonic-gate bzero(page, PAGESIZE); 7260Sstevel@tonic-gate } 7270Sstevel@tonic-gate no_trap(); 7280Sstevel@tonic-gate page += PAGESIZE; 7290Sstevel@tonic-gate } 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate /* XXX - need any locks to update proc_t or gdt ??? */ 7320Sstevel@tonic-gate 7330Sstevel@tonic-gate ASSERT(curproc == pp); 7340Sstevel@tonic-gate 7350Sstevel@tonic-gate kpreempt_disable(); 7360Sstevel@tonic-gate pp->p_ldtlimit = seli; 7370Sstevel@tonic-gate SYSSEGD_SETLIMIT(&pp->p_ldt_desc, (seli+1) * sizeof (user_desc_t) -1); 7380Sstevel@tonic-gate 7390Sstevel@tonic-gate ldt_load(); 7400Sstevel@tonic-gate kpreempt_enable(); 7410Sstevel@tonic-gate 7420Sstevel@tonic-gate return (ent_addr); 7430Sstevel@tonic-gate } 7440Sstevel@tonic-gate 7450Sstevel@tonic-gate /* 7460Sstevel@tonic-gate * Free up the kernel memory used for LDT of this process. 7470Sstevel@tonic-gate */ 7481217Srab static void 7490Sstevel@tonic-gate ldt_free(proc_t *pp) 7500Sstevel@tonic-gate { 7510Sstevel@tonic-gate on_trap_data_t otd; 7520Sstevel@tonic-gate caddr_t start, end; 7530Sstevel@tonic-gate volatile caddr_t addr; 7540Sstevel@tonic-gate 7550Sstevel@tonic-gate ASSERT(pp->p_ldt != NULL); 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 7580Sstevel@tonic-gate start = (caddr_t)pp->p_ldt; /* beginning of the LDT */ 7590Sstevel@tonic-gate end = start + (pp->p_ldtlimit * sizeof (user_desc_t)); 7600Sstevel@tonic-gate 7610Sstevel@tonic-gate /* Free the physical page(s) used for mapping LDT */ 7620Sstevel@tonic-gate for (addr = start; addr <= end; addr += PAGESIZE) { 7630Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) { 7640Sstevel@tonic-gate /* peek at the address */ 7650Sstevel@tonic-gate (void) *(volatile int *)addr; 7660Sstevel@tonic-gate segkmem_free(NULL, addr, PAGESIZE); 7670Sstevel@tonic-gate } 7680Sstevel@tonic-gate } 7690Sstevel@tonic-gate no_trap(); 7700Sstevel@tonic-gate 7710Sstevel@tonic-gate /* Free up the virtual address space used for this LDT */ 7720Sstevel@tonic-gate vmem_free(heap_arena, pp->p_ldt, 7730Sstevel@tonic-gate ptob(btopr(MAXNLDT * sizeof (user_desc_t)))); 7740Sstevel@tonic-gate kpreempt_disable(); 7750Sstevel@tonic-gate pp->p_ldt = NULL; 7761217Srab pp->p_ldt_desc = zero_sdesc; 7771217Srab pp->p_ldtlimit = 0; 7781217Srab 7790Sstevel@tonic-gate if (pp == curproc) 7801217Srab ldt_unload(); 7810Sstevel@tonic-gate kpreempt_enable(); 7820Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 7830Sstevel@tonic-gate } 7840Sstevel@tonic-gate 7850Sstevel@tonic-gate /* 7860Sstevel@tonic-gate * On fork copy new ldt for child. 7870Sstevel@tonic-gate */ 7881217Srab void 7890Sstevel@tonic-gate ldt_dup(proc_t *pp, proc_t *cp) 7900Sstevel@tonic-gate { 7910Sstevel@tonic-gate on_trap_data_t otd; 7920Sstevel@tonic-gate caddr_t start, end; 7930Sstevel@tonic-gate volatile caddr_t addr, caddr; 7940Sstevel@tonic-gate int minsize; 7950Sstevel@tonic-gate 7961217Srab ASSERT(pp->p_ldt); 7970Sstevel@tonic-gate 7981217Srab setup_ldt(cp); 7990Sstevel@tonic-gate 8000Sstevel@tonic-gate mutex_enter(&pp->p_ldtlock); 8010Sstevel@tonic-gate cp->p_ldtlimit = pp->p_ldtlimit; 8020Sstevel@tonic-gate SYSSEGD_SETLIMIT(&cp->p_ldt_desc, 8030Sstevel@tonic-gate (pp->p_ldtlimit+1) * sizeof (user_desc_t) -1); 8040Sstevel@tonic-gate start = (caddr_t)pp->p_ldt; /* beginning of the LDT */ 8050Sstevel@tonic-gate end = start + (pp->p_ldtlimit * sizeof (user_desc_t)); 8060Sstevel@tonic-gate caddr = (caddr_t)cp->p_ldt; /* child LDT start */ 8070Sstevel@tonic-gate 8080Sstevel@tonic-gate minsize = ((MINNLDT * sizeof (user_desc_t)) + PAGESIZE) & ~PAGEOFFSET; 8090Sstevel@tonic-gate /* Walk thru the physical page(s) used for parent's LDT */ 8100Sstevel@tonic-gate for (addr = start; addr <= end; addr += PAGESIZE, caddr += PAGESIZE) { 8110Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_ACCESS)) { 8120Sstevel@tonic-gate (void) *(volatile int *)addr; /* peek at the address */ 8130Sstevel@tonic-gate /* allocate a page if necessary */ 8140Sstevel@tonic-gate if (caddr >= ((caddr_t)cp->p_ldt + minsize)) { 8151217Srab (void) segkmem_xalloc(NULL, caddr, PAGESIZE, 8161217Srab VM_SLEEP, 0, segkmem_page_create, NULL); 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate bcopy(addr, caddr, PAGESIZE); 8190Sstevel@tonic-gate } 8200Sstevel@tonic-gate } 8210Sstevel@tonic-gate no_trap(); 8220Sstevel@tonic-gate mutex_exit(&pp->p_ldtlock); 8230Sstevel@tonic-gate } 824