xref: /netbsd-src/sys/compat/linux/common/linux_exec.c (revision ace896fac114f559f7469472324fbe68bbe378e5)
1 /*	$NetBSD: linux_exec.c,v 1.25 1996/10/13 18:30:05 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1995 Frank van der Linden
5  * Copyright (c) 1994 Christos Zoulas
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * based on exec_aout.c, sunos_exec.c and svr4_exec.c
31  */
32 
33 #define	ELFSIZE		32				/* XXX should die */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/proc.h>
39 #include <sys/malloc.h>
40 #include <sys/namei.h>
41 #include <sys/vnode.h>
42 #include <sys/mount.h>
43 #include <sys/exec.h>
44 #include <sys/exec_elf.h>
45 
46 #include <sys/mman.h>
47 #include <sys/syscallargs.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_map.h>
52 
53 #include <machine/cpu.h>
54 #include <machine/reg.h>
55 #include <machine/linux_machdep.h>
56 
57 #include <compat/linux/linux_types.h>
58 #include <compat/linux/linux_syscall.h>
59 #include <compat/linux/linux_signal.h>
60 #include <compat/linux/linux_syscallargs.h>
61 #include <compat/linux/linux_util.h>
62 #include <compat/linux/linux_exec.h>
63 
64 static void *linux_aout_copyargs __P((struct exec_package *,
65     struct ps_strings *, void *, void *));
66 static int linux_elf32_signature __P((struct proc *p, struct exec_package *,
67     Elf32_Ehdr *));
68 
69 #define	LINUX_AOUT_AUX_ARGSIZ	2
70 #define LINUX_ELF_AUX_ARGSIZ (sizeof(AuxInfo) * 8 / sizeof(char *))
71 
72 
73 const char linux_emul_path[] = "/emul/linux";
74 extern int linux_error[];
75 extern char linux_sigcode[], linux_esigcode[];
76 extern struct sysent linux_sysent[];
77 extern char *linux_syscallnames[];
78 
79 int exec_linux_aout_prep_zmagic __P((struct proc *, struct exec_package *));
80 int exec_linux_aout_prep_nmagic __P((struct proc *, struct exec_package *));
81 int exec_linux_aout_prep_omagic __P((struct proc *, struct exec_package *));
82 int exec_linux_aout_prep_qmagic __P((struct proc *, struct exec_package *));
83 
84 struct emul emul_linux_aout = {
85 	"linux",
86 	linux_error,
87 	linux_sendsig,
88 	LINUX_SYS_syscall,
89 	LINUX_SYS_MAXSYSCALL,
90 	linux_sysent,
91 	linux_syscallnames,
92 	LINUX_AOUT_AUX_ARGSIZ,
93 	linux_aout_copyargs,
94 	setregs,
95 	linux_sigcode,
96 	linux_esigcode,
97 };
98 
99 struct emul emul_linux_elf = {
100 	"linux",
101 	linux_error,
102 	linux_sendsig,
103 	LINUX_SYS_syscall,
104 	LINUX_SYS_MAXSYSCALL,
105 	linux_sysent,
106 	linux_syscallnames,
107 	LINUX_ELF_AUX_ARGSIZ,
108 	elf32_copyargs,
109 	setregs,
110 	linux_sigcode,
111 	linux_esigcode,
112 };
113 
114 
115 static void *
116 linux_aout_copyargs(pack, arginfo, stack, argp)
117 	struct exec_package *pack;
118 	struct ps_strings *arginfo;
119 	void *stack;
120 	void *argp;
121 {
122 	char **cpp = stack;
123 	char **stk = stack;
124 	char *dp, *sp;
125 	size_t len;
126 	void *nullp = NULL;
127 	int argc = arginfo->ps_nargvstr;
128 	int envc = arginfo->ps_nenvstr;
129 
130 	if (copyout(&argc, cpp++, sizeof(argc)))
131 		return NULL;
132 
133 	/* leave room for envp and argv */
134 	cpp += 2;
135 	if (copyout(&cpp, &stk[1], sizeof (cpp)))
136 		return NULL;
137 
138 	dp = (char *) (cpp + argc + envc + 2);
139 	sp = argp;
140 
141 	/* XXX don't copy them out, remap them! */
142 	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
143 
144 	for (; --argc >= 0; sp += len, dp += len)
145 		if (copyout(&dp, cpp++, sizeof(dp)) ||
146 		    copyoutstr(sp, dp, ARG_MAX, &len))
147 			return NULL;
148 
149 	if (copyout(&nullp, cpp++, sizeof(nullp)))
150 		return NULL;
151 
152 	if (copyout(&cpp, &stk[2], sizeof (cpp)))
153 		return NULL;
154 
155 	arginfo->ps_envstr = cpp; /* remember location of envp for later */
156 
157 	for (; --envc >= 0; sp += len, dp += len)
158 		if (copyout(&dp, cpp++, sizeof(dp)) ||
159 		    copyoutstr(sp, dp, ARG_MAX, &len))
160 			return NULL;
161 
162 	if (copyout(&nullp, cpp++, sizeof(nullp)))
163 		return NULL;
164 
165 	return cpp;
166 }
167 
168 int
169 exec_linux_aout_makecmds(p, epp)
170 	struct proc *p;
171 	struct exec_package *epp;
172 {
173 	struct exec *linux_ep = epp->ep_hdr;
174 	int machtype, magic;
175 	int error = ENOEXEC;
176 
177 	magic = LINUX_N_MAGIC(linux_ep);
178 	machtype = LINUX_N_MACHTYPE(linux_ep);
179 
180 
181 	if (machtype != LINUX_MID_MACHINE)
182 		return (ENOEXEC);
183 
184 	switch (magic) {
185 	case QMAGIC:
186 		error = exec_linux_aout_prep_qmagic(p, epp);
187 		break;
188 	case ZMAGIC:
189 		error = exec_linux_aout_prep_zmagic(p, epp);
190 		break;
191 	case NMAGIC:
192 		error = exec_linux_aout_prep_nmagic(p, epp);
193 		break;
194 	case OMAGIC:
195 		error = exec_linux_aout_prep_omagic(p, epp);
196 		break;
197 	}
198 	if (error == 0)
199 		epp->ep_emul = &emul_linux_aout;
200 	return error;
201 }
202 
203 /*
204  * Since text starts at 0x400 in Linux ZMAGIC executables, and 0x400
205  * is very likely not page aligned on most architectures, it is treated
206  * as an NMAGIC here. XXX
207  */
208 
209 int
210 exec_linux_aout_prep_zmagic(p, epp)
211 	struct proc *p;
212 	struct exec_package *epp;
213 {
214 	struct exec *execp = epp->ep_hdr;
215 
216 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, ZMAGIC);
217 	epp->ep_tsize = execp->a_text;
218 	epp->ep_daddr = LINUX_N_DATADDR(*execp, ZMAGIC);
219 	epp->ep_dsize = execp->a_data + execp->a_bss;
220 	epp->ep_entry = execp->a_entry;
221 
222 	/* set up command for text segment */
223 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
224 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, ZMAGIC),
225 	    VM_PROT_READ|VM_PROT_EXECUTE);
226 
227 	/* set up command for data segment */
228 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
229 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, ZMAGIC),
230 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
231 
232 	/* set up command for bss segment */
233 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
234 	    epp->ep_daddr + execp->a_data, NULLVP, 0,
235 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
236 
237 	return exec_aout_setup_stack(p, epp);
238 }
239 
240 /*
241  * exec_aout_prep_nmagic(): Prepare Linux NMAGIC package.
242  * Not different from the normal stuff.
243  */
244 
245 int
246 exec_linux_aout_prep_nmagic(p, epp)
247 	struct proc *p;
248 	struct exec_package *epp;
249 {
250 	struct exec *execp = epp->ep_hdr;
251 	long bsize, baddr;
252 
253 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, NMAGIC);
254 	epp->ep_tsize = execp->a_text;
255 	epp->ep_daddr = LINUX_N_DATADDR(*execp, NMAGIC);
256 	epp->ep_dsize = execp->a_data + execp->a_bss;
257 	epp->ep_entry = execp->a_entry;
258 
259 	/* set up command for text segment */
260 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
261 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, NMAGIC),
262 	    VM_PROT_READ|VM_PROT_EXECUTE);
263 
264 	/* set up command for data segment */
265 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
266 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, NMAGIC),
267 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
268 
269 	/* set up command for bss segment */
270 	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
271 	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
272 	if (bsize > 0)
273 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
274 		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
275 
276 	return exec_aout_setup_stack(p, epp);
277 }
278 
279 /*
280  * exec_aout_prep_omagic(): Prepare Linux OMAGIC package.
281  * Business as usual.
282  */
283 
284 int
285 exec_linux_aout_prep_omagic(p, epp)
286 	struct proc *p;
287 	struct exec_package *epp;
288 {
289 	struct exec *execp = epp->ep_hdr;
290 	long dsize, bsize, baddr;
291 
292 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, OMAGIC);
293 	epp->ep_tsize = execp->a_text;
294 	epp->ep_daddr = LINUX_N_DATADDR(*execp, OMAGIC);
295 	epp->ep_dsize = execp->a_data + execp->a_bss;
296 	epp->ep_entry = execp->a_entry;
297 
298 	/* set up command for text and data segments */
299 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
300 	    execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
301 	    LINUX_N_TXTOFF(*execp, OMAGIC), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
302 
303 	/* set up command for bss segment */
304 	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
305 	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
306 	if (bsize > 0)
307 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
308 		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
309 
310 	/*
311 	 * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
312 	 * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
313 	 * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
314 	 * respectively to page boundaries.
315 	 * Compensate `ep_dsize' for the amount of data covered by the last
316 	 * text page.
317 	 */
318 	dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text, NBPG);
319 	epp->ep_dsize = (dsize > 0) ? dsize : 0;
320 	return exec_aout_setup_stack(p, epp);
321 }
322 
323 int
324 exec_linux_aout_prep_qmagic(p, epp)
325 	struct proc *p;
326 	struct exec_package *epp;
327 {
328 	struct exec *execp = epp->ep_hdr;
329 
330 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, QMAGIC);
331 	epp->ep_tsize = execp->a_text;
332 	epp->ep_daddr = LINUX_N_DATADDR(*execp, QMAGIC);
333 	epp->ep_dsize = execp->a_data + execp->a_bss;
334 	epp->ep_entry = execp->a_entry;
335 
336 	/*
337 	 * check if vnode is in open for writing, because we want to
338 	 * demand-page out of it.  if it is, don't do it, for various
339 	 * reasons
340 	 */
341 	if ((execp->a_text != 0 || execp->a_data != 0) &&
342 	    epp->ep_vp->v_writecount != 0) {
343 #ifdef DIAGNOSTIC
344 		if (epp->ep_vp->v_flag & VTEXT)
345 			panic("exec: a VTEXT vnode has writecount != 0\n");
346 #endif
347 		return ETXTBSY;
348 	}
349 	epp->ep_vp->v_flag |= VTEXT;
350 
351 	/* set up command for text segment */
352 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
353 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, QMAGIC),
354 	    VM_PROT_READ|VM_PROT_EXECUTE);
355 
356 	/* set up command for data segment */
357 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
358 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, QMAGIC),
359 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
360 
361 	/* set up command for bss segment */
362 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
363 	    epp->ep_daddr + execp->a_data, NULLVP, 0,
364 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
365 
366 	return exec_aout_setup_stack(p, epp);
367 }
368 
369 /*
370  * Take advantage of the fact that all the linux binaries are compiled
371  * with gcc, and gcc sticks in the comment field a signature. Note that
372  * on SVR4 binaries, the gcc signature will follow the OS name signature,
373  * that will not be a problem. We don't bother to read in the string table,
374  * but we check all the progbits headers.
375  */
376 static int
377 linux_elf32_signature(p, epp, eh)
378 	struct proc *p;
379 	struct exec_package *epp;
380 	Elf32_Ehdr *eh;
381 {
382 	size_t shsize = sizeof(Elf32_Shdr) * eh->e_shnum;
383 	size_t i;
384 	static const char signature[] = "\0GCC: (GNU) ";
385 	char buf[sizeof(signature) - 1];
386 	Elf32_Shdr *sh;
387 	int error;
388 
389 	sh = (Elf32_Shdr *) malloc(shsize, M_TEMP, M_WAITOK);
390 
391 	if ((error = elf32_read_from(p, epp->ep_vp, eh->e_shoff,
392 	    (caddr_t) sh, shsize)) != 0)
393 		goto out;
394 
395 	for (i = 0; i < eh->e_shnum; i++) {
396 		Elf32_Shdr *s = &sh[i];
397 
398 		/*
399 		 * Identify candidates for the comment header;
400 		 * Header cannot have a load address, or flags and
401 		 * it must be large enough.
402 		 */
403 		if (s->sh_type != Elf_sht_progbits ||
404 		    s->sh_addr != 0 ||
405 		    s->sh_flags != 0 ||
406 		    s->sh_size < sizeof(signature) - 1)
407 			continue;
408 
409 		if ((error = elf32_read_from(p, epp->ep_vp, s->sh_offset,
410 		    (caddr_t) buf, sizeof(signature) - 1)) != 0)
411 			goto out;
412 
413 		/*
414 		 * error is 0, if the signatures match we are done.
415 		 */
416 		if (bcmp(buf, signature, sizeof(signature) - 1) == 0)
417 			goto out;
418 	}
419 	error = EFTYPE;
420 
421 out:
422 	free(sh, M_TEMP);
423 	return error;
424 }
425 
426 int
427 linux_elf32_probe(p, epp, eh, itp, pos)
428 	struct proc *p;
429 	struct exec_package *epp;
430 	Elf32_Ehdr *eh;
431 	char *itp;
432 	Elf32_Addr *pos;
433 {
434 	char *bp;
435 	int error;
436 	size_t len;
437 
438 	if ((error = linux_elf32_signature(p, epp, eh)) != 0)
439 		return error;
440 
441 	if (itp[0]) {
442 		if ((error = emul_find(p, NULL, linux_emul_path, itp, &bp, 0)))
443 			return error;
444 		if ((error = copystr(bp, itp, MAXPATHLEN, &len)))
445 			return error;
446 		free(bp, M_TEMP);
447 	}
448 	epp->ep_emul = &emul_linux_elf;
449 	*pos = ELF32_NO_ADDR;
450 	return 0;
451 }
452 
453 /*
454  * The Linux system call to load shared libraries, a.out version. The
455  * a.out shared libs are just files that are mapped onto a fixed
456  * address in the process' address space. The address is given in
457  * a_entry. Read in the header, set up some VM commands and run them.
458  *
459  * Yes, both text and data are mapped at once, so we're left with
460  * writeable text for the shared libs. The Linux crt0 seemed to break
461  * sometimes when data was mapped seperately. It munmapped a uselib()
462  * of ld.so by hand, which failed with shared text and data for ld.so
463  * Yuck.
464  *
465  * Because of the problem with ZMAGIC executables (text starts
466  * at 0x400 in the file, but needs to be mapped at 0), ZMAGIC
467  * shared libs are not handled very efficiently :-(
468  */
469 
470 int
471 linux_sys_uselib(p, v, retval)
472 	struct proc *p;
473 	void *v;
474 	register_t *retval;
475 {
476 	struct linux_sys_uselib_args /* {
477 		syscallarg(char *) path;
478 	} */ *uap = v;
479 	caddr_t sg;
480 	long bsize, dsize, tsize, taddr, baddr, daddr;
481 	struct nameidata ni;
482 	struct vnode *vp;
483 	struct exec hdr;
484 	struct exec_vmcmd_set vcset;
485 	int rem, i, magic, error;
486 
487 	sg = stackgap_init(p->p_emul);
488 	LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
489 
490 	NDINIT(&ni, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
491 
492 	if ((error = namei(&ni)))
493 		return error;
494 
495 	vp = ni.ni_vp;
496 
497 	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t) &hdr, LINUX_AOUT_HDR_SIZE,
498 			     0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
499 			     &rem, p))) {
500 		vrele(vp);
501 		return error;
502 	}
503 
504 	if (rem != 0) {
505 		vrele(vp);
506 		return ENOEXEC;
507 	}
508 
509 	if (LINUX_N_MACHTYPE(&hdr) != LINUX_MID_MACHINE)
510 		return ENOEXEC;
511 
512 	magic = LINUX_N_MAGIC(&hdr);
513 	taddr = hdr.a_entry & (~(NBPG - 1));
514 	tsize = hdr.a_text;
515 	daddr = taddr + tsize;
516 	dsize = hdr.a_data + hdr.a_bss;
517 
518 	if ((hdr.a_text != 0 || hdr.a_data != 0) && vp->v_writecount != 0) {
519 		vrele(vp);
520                 return ETXTBSY;
521         }
522 	vp->v_flag |= VTEXT;
523 
524 	vcset.evs_cnt = 0;
525 	vcset.evs_used = 0;
526 
527 	NEW_VMCMD(&vcset,
528 		  magic == ZMAGIC ? vmcmd_map_readvn : vmcmd_map_pagedvn,
529 		  hdr.a_text + hdr.a_data, taddr,
530 		  vp, LINUX_N_TXTOFF(hdr, magic),
531 		  VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE);
532 
533 	baddr = roundup(daddr + hdr.a_data, NBPG);
534 	bsize = daddr + dsize - baddr;
535         if (bsize > 0) {
536                 NEW_VMCMD(&vcset, vmcmd_map_zero, bsize, baddr,
537                     NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
538 	}
539 
540 	for (i = 0; i < vcset.evs_used && !error; i++) {
541 		struct exec_vmcmd *vcp;
542 
543 		vcp = &vcset.evs_cmds[i];
544 		error = (*vcp->ev_proc)(p, vcp);
545 	}
546 
547 	kill_vmcmds(&vcset);
548 
549 	vrele(vp);
550 
551 	return error;
552 }
553 
554 /*
555  * Execve(2). Just check the alternate emulation path, and pass it on
556  * to the NetBSD execve().
557  */
558 int
559 linux_sys_execve(p, v, retval)
560 	struct proc *p;
561 	void *v;
562 	register_t *retval;
563 {
564 	struct linux_sys_execve_args /* {
565 		syscallarg(char *) path;
566 		syscallarg(char **) argv;
567 		syscallarg(char **) envp;
568 	} */ *uap = v;
569 	struct sys_execve_args ap;
570 	caddr_t sg;
571 
572 	sg = stackgap_init(p->p_emul);
573 	LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
574 
575 	SCARG(&ap, path) = SCARG(uap, path);
576 	SCARG(&ap, argp) = SCARG(uap, argp);
577 	SCARG(&ap, envp) = SCARG(uap, envp);
578 
579 	return sys_execve(p, &ap, retval);
580 }
581