1 /* $NetBSD: linux_exec_machdep.c,v 1.11 2008/10/26 20:46:05 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2004 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christos Zoulas. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.11 2008/10/26 20:46:05 christos Exp $"); 34 35 #if defined(_KERNEL_OPT) 36 #include "opt_vm86.h" 37 #include "opt_user_ldt.h" 38 #endif 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/resource.h> 43 #include <sys/proc.h> 44 #include <sys/conf.h> 45 #include <sys/exec.h> 46 #include <sys/exec_elf.h> 47 #include <sys/vnode.h> 48 #include <sys/lwp.h> 49 50 #include <sys/cpu.h> 51 #include <machine/vmparam.h> 52 53 #include <uvm/uvm.h> 54 55 #include <sys/syscallargs.h> 56 57 #ifndef DEBUG_LINUX 58 #define DPRINTF(a) 59 #else 60 #define DPRINTF(a) uprintf a 61 #endif 62 63 #include <compat/linux/common/linux_types.h> 64 #include <compat/linux/common/linux_signal.h> 65 #include <compat/linux/common/linux_machdep.h> 66 #include <compat/linux/common/linux_util.h> 67 #include <compat/linux/common/linux_ioctl.h> 68 #include <compat/linux/common/linux_hdio.h> 69 #include <compat/linux/common/linux_exec.h> 70 #include <compat/linux/common/linux_errno.h> 71 #include <compat/linux/linux_syscallargs.h> 72 73 74 int 75 linux_exec_setup_stack(struct lwp *l, struct exec_package *epp) 76 { 77 u_long max_stack_size; 78 u_long access_linear_min, access_size; 79 u_long noaccess_linear_min, noaccess_size; 80 81 #ifndef USRSTACK32 82 #define USRSTACK32 (0x00000000ffffffffL&~PGOFSET) 83 #endif 84 85 if (epp->ep_flags & EXEC_32) { 86 epp->ep_minsaddr = USRSTACK32; 87 max_stack_size = MAXSSIZ; 88 } else { 89 epp->ep_minsaddr = USRSTACK; 90 max_stack_size = MAXSSIZ; 91 } 92 93 if (epp->ep_minsaddr > LINUX_USRSTACK) 94 epp->ep_minsaddr = LINUX_USRSTACK; 95 #ifdef DEBUG_LINUX 96 else { 97 /* 98 * Someone needs to make KERNBASE and TEXTADDR 99 * java versions < 1.4.2 need the stack to be 100 * at 0xC0000000 101 */ 102 uprintf("Cannot setup stack to 0xC0000000, " 103 "java will not work properly\n"); 104 } 105 #endif 106 epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr, 107 max_stack_size); 108 epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur; 109 110 /* 111 * set up commands for stack. note that this takes *two*, one to 112 * map the part of the stack which we can access, and one to map 113 * the part which we can't. 114 * 115 * arguably, it could be made into one, but that would require the 116 * addition of another mapping proc, which is unnecessary 117 */ 118 access_size = epp->ep_ssize; 119 access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size); 120 noaccess_size = max_stack_size - access_size; 121 noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, 122 access_size), noaccess_size); 123 if (noaccess_size > 0) { 124 NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size, 125 noaccess_linear_min, NULLVP, 0, VM_PROT_NONE); 126 } 127 KASSERT(access_size > 0); 128 NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size, 129 access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE); 130 131 return 0; 132 } 133 134 135 #ifdef LINUX_NPTL 136 static __inline void 137 load_gs(u_int sel) 138 { 139 __asm __volatile("movl %0,%%gs" : : "rm" (sel)); 140 } 141 142 143 int 144 linux_init_thread_area(struct lwp *l, struct lwp *l2) 145 { 146 struct trapframe *tf = l->l_md.md_regs, *tf2 = l2->l_md.md_regs; 147 struct pcb *pcb2 = &l2->l_addr->u_pcb; 148 struct linux_user_desc info; 149 struct segment_descriptor sd; 150 int error, idx, a[2]; 151 152 error = copyin((void *)tf->tf_esi, &info, sizeof(info)); 153 if (error) 154 return error; 155 idx = info.entry_number; 156 157 /* 158 * looks like we're getting the idx we returned 159 * in the set_thread_area() syscall 160 */ 161 if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL) { 162 DPRINTF(("resetting idx %d to GUGS_SEL", idx)); 163 idx = GUGS_SEL; 164 } 165 166 /* this doesnt happen in practice */ 167 if (idx == LINUX_GLIBC_TLS_SEL) { 168 /* we might copy out the entry_number as 3 */ 169 info.entry_number = GUGS_SEL; 170 error = copyout(&info, (void *)tf->tf_esi, sizeof(info)); 171 if (error) 172 return error; 173 } 174 175 a[0] = LINUX_LDT_entry_a(&info); 176 a[1] = LINUX_LDT_entry_b(&info); 177 178 (void)memcpy(&sd, &a, sizeof(a)); 179 KASSERT(ISMEMSDP((&sd))); 180 DPRINTF(("Segment created in clone with CLONE_SETTLS: lobase: %x, " 181 "hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, " 182 "xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, 183 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl, 184 sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran)); 185 186 (void)memcpy(&pcb2->pcb_gsd, &sd, sizeof(sd)); 187 tf2->tf_gs = GSEL(GUGS_SEL, SEL_UPL); 188 189 return 0; 190 } 191 192 193 int 194 linux_sys_set_thread_area(struct lwp *l, 195 const struct linux_sys_set_thread_area_args *uap, register_t *retval) 196 { 197 struct pcb *pcb = &l->l_addr->u_pcb; 198 struct linux_user_desc info; 199 struct segment_descriptor sd; 200 int error, idx, a[2]; 201 202 *retval = 0; 203 error = copyin(SCARG(uap, desc), &info, sizeof(info)); 204 if (error) 205 return error; 206 207 DPRINTF(("set thread area: %i, %x, %x, %i, %i, %i, %i, %i, %i\n", 208 info.entry_number, info.base_addr, info.limit, info.seg_32bit, 209 info.contents, info.read_exec_only, info.limit_in_pages, 210 info.seg_not_present, info.useable)); 211 212 idx = info.entry_number; 213 /* 214 * Semantics of linux version: every thread in the system has array of 215 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This 216 * syscall loads one of the selected tls decriptors with a value and 217 * also loads GDT descriptors 6, 7 and 8 with the content of the 218 * per-thread descriptors. 219 * 220 * Semantics of fbsd version: I think we can ignore that linux has 3 221 * per-thread descriptors and use just the 1st one. The tls_array[] 222 * is used only in set/get-thread_area() syscalls and for loading the 223 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so 224 * we will load just one. 225 * 226 * XXX: this doesn't work when a user space process tries to use more 227 * than 1 TLS segment. Comment in the linux sources says wine might do 228 * this. 229 */ 230 231 /* 232 * we support just GLIBC TLS now 233 * we should let 3 proceed as well because we use this segment so 234 * if code does two subsequent calls it should succeed 235 */ 236 if (idx != LINUX_GLIBC_TLS_SEL && idx != -1 && idx != GUGS_SEL) 237 return EINVAL; 238 239 /* 240 * we have to copy out the GDT entry we use 241 * FreeBSD uses GDT entry #3 for storing %gs so load that 242 * 243 * XXX: what if a user space program doesn't check this value and tries 244 * to use 6, 7 or 8? 245 */ 246 idx = info.entry_number = GUGS_SEL; 247 error = copyout(&info, SCARG(uap, desc), sizeof(info)); 248 if (error) 249 return error; 250 251 if (LINUX_LDT_empty(&info)) { 252 a[0] = 0; 253 a[1] = 0; 254 } else { 255 a[0] = LINUX_LDT_entry_a(&info); 256 a[1] = LINUX_LDT_entry_b(&info); 257 } 258 259 (void)memcpy(&sd, &a, sizeof(a)); 260 KASSERT(ISMEMSDP((&sd))); 261 DPRINTF(("Segment created in set_thread_area: lobase: %x, hibase: %x, " 262 "lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, " 263 "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit, 264 sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 265 sd.sd_def32, sd.sd_gran)); 266 267 kpreempt_disable(); 268 (void)memcpy(&pcb->pcb_gsd, &sd, sizeof(sd)); 269 (void)memcpy(&curcpu()->ci_gdt[GUGS_SEL], &sd, sizeof(sd)); 270 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 271 kpreempt_enable(); 272 return 0; 273 } 274 275 int 276 linux_sys_get_thread_area(struct lwp *l, 277 const struct linux_sys_get_thread_area_args *uap, register_t *retval) 278 { 279 struct pcb *pcb = &l->l_addr->u_pcb; 280 struct linux_user_desc info; 281 struct linux_desc_struct desc; 282 struct segment_descriptor sd; 283 int error, idx; 284 285 *retval = 0; 286 error = copyin(SCARG(uap, desc), &info, sizeof(info)); 287 if (error) 288 return error; 289 290 idx = info.entry_number; 291 /* XXX: I am not sure if we want 3 to be allowed too. */ 292 if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL) 293 return EINVAL; 294 295 idx = GUGS_SEL; 296 297 (void)memset(&info, 0, sizeof(info)); 298 (void)memcpy(&sd, pcb->pcb_gsd, sizeof(sd)); 299 (void)memcpy(&desc, &sd, sizeof(desc)); 300 301 info.entry_number = idx; 302 info.base_addr = LINUX_GET_BASE(&desc); 303 info.limit = LINUX_GET_LIMIT(&desc); 304 info.seg_32bit = LINUX_GET_32BIT(&desc); 305 info.contents = LINUX_GET_CONTENTS(&desc); 306 info.read_exec_only = !LINUX_GET_WRITABLE(&desc); 307 info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); 308 info.seg_not_present = !LINUX_GET_PRESENT(&desc); 309 info.useable = LINUX_GET_USEABLE(&desc); 310 311 return copyout(&info, SCARG(uap, desc), sizeof(info)); 312 } 313 314 #endif 315