1 /* $NetBSD: linux_exec_machdep.c,v 1.12 2009/03/29 01:02:50 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 2004 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christos Zoulas. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.12 2009/03/29 01:02:50 mrg Exp $"); 34 35 #if defined(_KERNEL_OPT) 36 #include "opt_vm86.h" 37 #include "opt_user_ldt.h" 38 #endif 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/resource.h> 43 #include <sys/proc.h> 44 #include <sys/conf.h> 45 #include <sys/exec.h> 46 #include <sys/exec_elf.h> 47 #include <sys/vnode.h> 48 #include <sys/lwp.h> 49 50 #include <sys/cpu.h> 51 #include <machine/vmparam.h> 52 53 #include <uvm/uvm.h> 54 55 #include <sys/syscallargs.h> 56 57 #ifndef DEBUG_LINUX 58 #define DPRINTF(a) 59 #else 60 #define DPRINTF(a) uprintf a 61 #endif 62 63 #include <compat/linux/common/linux_types.h> 64 #include <compat/linux/common/linux_signal.h> 65 #include <compat/linux/common/linux_machdep.h> 66 #include <compat/linux/common/linux_util.h> 67 #include <compat/linux/common/linux_ioctl.h> 68 #include <compat/linux/common/linux_hdio.h> 69 #include <compat/linux/common/linux_exec.h> 70 #include <compat/linux/common/linux_errno.h> 71 #include <compat/linux/linux_syscallargs.h> 72 73 74 int 75 linux_exec_setup_stack(struct lwp *l, struct exec_package *epp) 76 { 77 u_long max_stack_size; 78 u_long access_linear_min, access_size; 79 u_long noaccess_linear_min, noaccess_size; 80 81 #ifndef USRSTACK32 82 #define USRSTACK32 (0x00000000ffffffffL&~PGOFSET) 83 #endif 84 85 if (epp->ep_flags & EXEC_32) { 86 epp->ep_minsaddr = USRSTACK32; 87 max_stack_size = MAXSSIZ; 88 } else { 89 epp->ep_minsaddr = USRSTACK; 90 max_stack_size = MAXSSIZ; 91 } 92 93 if (epp->ep_minsaddr > LINUX_USRSTACK) 94 epp->ep_minsaddr = LINUX_USRSTACK; 95 #ifdef DEBUG_LINUX 96 else { 97 /* 98 * Someone needs to make KERNBASE and TEXTADDR 99 * java versions < 1.4.2 need the stack to be 100 * at 0xC0000000 101 */ 102 uprintf("Cannot setup stack to 0xC0000000, " 103 "java will not work properly\n"); 104 } 105 #endif 106 epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr, 107 max_stack_size); 108 epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur; 109 110 /* 111 * set up commands for stack. note that this takes *two*, one to 112 * map the part of the stack which we can access, and one to map 113 * the part which we can't. 114 * 115 * arguably, it could be made into one, but that would require the 116 * addition of another mapping proc, which is unnecessary 117 */ 118 access_size = epp->ep_ssize; 119 access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size); 120 noaccess_size = max_stack_size - access_size; 121 noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, 122 access_size), noaccess_size); 123 if (noaccess_size > 0) { 124 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size, 125 noaccess_linear_min, NULLVP, 0, VM_PROT_NONE, VMCMD_STACK); 126 } 127 KASSERT(access_size > 0); 128 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size, 129 access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE, 130 VMCMD_STACK); 131 132 return 0; 133 } 134 135 136 #ifdef LINUX_NPTL 137 static __inline void 138 load_gs(u_int sel) 139 { 140 __asm __volatile("movl %0,%%gs" : : "rm" (sel)); 141 } 142 143 144 int 145 linux_init_thread_area(struct lwp *l, struct lwp *l2) 146 { 147 struct trapframe *tf = l->l_md.md_regs, *tf2 = l2->l_md.md_regs; 148 struct pcb *pcb2 = &l2->l_addr->u_pcb; 149 struct linux_user_desc info; 150 struct segment_descriptor sd; 151 int error, idx, a[2]; 152 153 error = copyin((void *)tf->tf_esi, &info, sizeof(info)); 154 if (error) 155 return error; 156 idx = info.entry_number; 157 158 /* 159 * looks like we're getting the idx we returned 160 * in the set_thread_area() syscall 161 */ 162 if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL) { 163 DPRINTF(("resetting idx %d to GUGS_SEL", idx)); 164 idx = GUGS_SEL; 165 } 166 167 /* this doesnt happen in practice */ 168 if (idx == LINUX_GLIBC_TLS_SEL) { 169 /* we might copy out the entry_number as 3 */ 170 info.entry_number = GUGS_SEL; 171 error = copyout(&info, (void *)tf->tf_esi, sizeof(info)); 172 if (error) 173 return error; 174 } 175 176 a[0] = LINUX_LDT_entry_a(&info); 177 a[1] = LINUX_LDT_entry_b(&info); 178 179 (void)memcpy(&sd, &a, sizeof(a)); 180 KASSERT(ISMEMSDP((&sd))); 181 DPRINTF(("Segment created in clone with CLONE_SETTLS: lobase: %x, " 182 "hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, " 183 "xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, 184 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl, 185 sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran)); 186 187 (void)memcpy(&pcb2->pcb_gsd, &sd, sizeof(sd)); 188 tf2->tf_gs = GSEL(GUGS_SEL, SEL_UPL); 189 190 return 0; 191 } 192 193 194 int 195 linux_sys_set_thread_area(struct lwp *l, 196 const struct linux_sys_set_thread_area_args *uap, register_t *retval) 197 { 198 struct pcb *pcb = &l->l_addr->u_pcb; 199 struct linux_user_desc info; 200 struct segment_descriptor sd; 201 int error, idx, a[2]; 202 203 *retval = 0; 204 error = copyin(SCARG(uap, desc), &info, sizeof(info)); 205 if (error) 206 return error; 207 208 DPRINTF(("set thread area: %i, %x, %x, %i, %i, %i, %i, %i, %i\n", 209 info.entry_number, info.base_addr, info.limit, info.seg_32bit, 210 info.contents, info.read_exec_only, info.limit_in_pages, 211 info.seg_not_present, info.useable)); 212 213 idx = info.entry_number; 214 /* 215 * Semantics of linux version: every thread in the system has array of 216 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This 217 * syscall loads one of the selected tls decriptors with a value and 218 * also loads GDT descriptors 6, 7 and 8 with the content of the 219 * per-thread descriptors. 220 * 221 * Semantics of fbsd version: I think we can ignore that linux has 3 222 * per-thread descriptors and use just the 1st one. The tls_array[] 223 * is used only in set/get-thread_area() syscalls and for loading the 224 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so 225 * we will load just one. 226 * 227 * XXX: this doesn't work when a user space process tries to use more 228 * than 1 TLS segment. Comment in the linux sources says wine might do 229 * this. 230 */ 231 232 /* 233 * we support just GLIBC TLS now 234 * we should let 3 proceed as well because we use this segment so 235 * if code does two subsequent calls it should succeed 236 */ 237 if (idx != LINUX_GLIBC_TLS_SEL && idx != -1 && idx != GUGS_SEL) 238 return EINVAL; 239 240 /* 241 * we have to copy out the GDT entry we use 242 * FreeBSD uses GDT entry #3 for storing %gs so load that 243 * 244 * XXX: what if a user space program doesn't check this value and tries 245 * to use 6, 7 or 8? 246 */ 247 idx = info.entry_number = GUGS_SEL; 248 error = copyout(&info, SCARG(uap, desc), sizeof(info)); 249 if (error) 250 return error; 251 252 if (LINUX_LDT_empty(&info)) { 253 a[0] = 0; 254 a[1] = 0; 255 } else { 256 a[0] = LINUX_LDT_entry_a(&info); 257 a[1] = LINUX_LDT_entry_b(&info); 258 } 259 260 (void)memcpy(&sd, &a, sizeof(a)); 261 KASSERT(ISMEMSDP((&sd))); 262 DPRINTF(("Segment created in set_thread_area: lobase: %x, hibase: %x, " 263 "lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, " 264 "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit, 265 sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 266 sd.sd_def32, sd.sd_gran)); 267 268 kpreempt_disable(); 269 (void)memcpy(&pcb->pcb_gsd, &sd, sizeof(sd)); 270 (void)memcpy(&curcpu()->ci_gdt[GUGS_SEL], &sd, sizeof(sd)); 271 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 272 kpreempt_enable(); 273 return 0; 274 } 275 276 int 277 linux_sys_get_thread_area(struct lwp *l, 278 const struct linux_sys_get_thread_area_args *uap, register_t *retval) 279 { 280 struct pcb *pcb = &l->l_addr->u_pcb; 281 struct linux_user_desc info; 282 struct linux_desc_struct desc; 283 struct segment_descriptor sd; 284 int error, idx; 285 286 *retval = 0; 287 error = copyin(SCARG(uap, desc), &info, sizeof(info)); 288 if (error) 289 return error; 290 291 idx = info.entry_number; 292 /* XXX: I am not sure if we want 3 to be allowed too. */ 293 if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL) 294 return EINVAL; 295 296 idx = GUGS_SEL; 297 298 (void)memset(&info, 0, sizeof(info)); 299 (void)memcpy(&sd, pcb->pcb_gsd, sizeof(sd)); 300 (void)memcpy(&desc, &sd, sizeof(desc)); 301 302 info.entry_number = idx; 303 info.base_addr = LINUX_GET_BASE(&desc); 304 info.limit = LINUX_GET_LIMIT(&desc); 305 info.seg_32bit = LINUX_GET_32BIT(&desc); 306 info.contents = LINUX_GET_CONTENTS(&desc); 307 info.read_exec_only = !LINUX_GET_WRITABLE(&desc); 308 info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); 309 info.seg_not_present = !LINUX_GET_PRESENT(&desc); 310 info.useable = LINUX_GET_USEABLE(&desc); 311 312 return copyout(&info, SCARG(uap, desc), sizeof(info)); 313 } 314 315 #endif 316