xref: /netbsd-src/sys/compat/linux/common/linux_exec.c (revision 81b108b45f75f89f1e3ffad9fb6f074e771c0935)
1 /*	$NetBSD: linux_exec.c,v 1.16 1996/09/03 03:12:28 mycroft Exp $	*/
2 
3 /*
4  * Copyright (c) 1995 Frank van der Linden
5  * Copyright (c) 1994 Christos Zoulas
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * based on exec_aout.c, sunos_exec.c and svr4_exec.c
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/proc.h>
37 #include <sys/malloc.h>
38 #include <sys/namei.h>
39 #include <sys/vnode.h>
40 #include <sys/mount.h>
41 #include <sys/exec_elf.h>
42 
43 #include <sys/mman.h>
44 #include <sys/syscallargs.h>
45 
46 #include <vm/vm.h>
47 #include <vm/vm_param.h>
48 #include <vm/vm_map.h>
49 
50 #include <machine/cpu.h>
51 #include <machine/reg.h>
52 #include <machine/exec.h>
53 #include <machine/linux_machdep.h>
54 
55 #include <compat/linux/linux_types.h>
56 #include <compat/linux/linux_syscall.h>
57 #include <compat/linux/linux_signal.h>
58 #include <compat/linux/linux_syscallargs.h>
59 #include <compat/linux/linux_util.h>
60 #include <compat/linux/linux_exec.h>
61 
62 static void *linux_aout_copyargs __P((struct exec_package *,
63     struct ps_strings *, void *, void *));
64 static int linux_elf_signature __P((struct proc *p, struct exec_package *,
65     Elf32_Ehdr *));
66 
67 #define	LINUX_AOUT_AUX_ARGSIZ	2
68 #define LINUX_ELF_AUX_ARGSIZ (sizeof(AuxInfo) * 8 / sizeof(char *))
69 
70 
71 const char linux_emul_path[] = "/emul/linux";
72 extern int linux_error[];
73 extern char linux_sigcode[], linux_esigcode[];
74 extern struct sysent linux_sysent[];
75 extern char *linux_syscallnames[];
76 
77 int exec_linux_aout_prep_zmagic __P((struct proc *, struct exec_package *));
78 int exec_linux_aout_prep_nmagic __P((struct proc *, struct exec_package *));
79 int exec_linux_aout_prep_omagic __P((struct proc *, struct exec_package *));
80 int exec_linux_aout_prep_qmagic __P((struct proc *, struct exec_package *));
81 
82 struct emul emul_linux_aout = {
83 	"linux",
84 	linux_error,
85 	linux_sendsig,
86 	LINUX_SYS_syscall,
87 	LINUX_SYS_MAXSYSCALL,
88 	linux_sysent,
89 	linux_syscallnames,
90 	LINUX_AOUT_AUX_ARGSIZ,
91 	linux_aout_copyargs,
92 	setregs,
93 	linux_sigcode,
94 	linux_esigcode,
95 };
96 
97 struct emul emul_linux_elf = {
98 	"linux",
99 	linux_error,
100 	linux_sendsig,
101 	LINUX_SYS_syscall,
102 	LINUX_SYS_MAXSYSCALL,
103 	linux_sysent,
104 	linux_syscallnames,
105 	LINUX_ELF_AUX_ARGSIZ,
106 	elf_copyargs,
107 	setregs,
108 	linux_sigcode,
109 	linux_esigcode,
110 };
111 
112 
113 static void *
114 linux_aout_copyargs(pack, arginfo, stack, argp)
115 	struct exec_package *pack;
116 	struct ps_strings *arginfo;
117 	void *stack;
118 	void *argp;
119 {
120 	char **cpp = stack;
121 	char **stk = stack;
122 	char *dp, *sp;
123 	size_t len;
124 	void *nullp = NULL;
125 	int argc = arginfo->ps_nargvstr;
126 	int envc = arginfo->ps_nenvstr;
127 
128 	if (copyout(&argc, cpp++, sizeof(argc)))
129 		return NULL;
130 
131 	/* leave room for envp and argv */
132 	cpp += 2;
133 	if (copyout(&cpp, &stk[1], sizeof (cpp)))
134 		return NULL;
135 
136 	dp = (char *) (cpp + argc + envc + 2);
137 	sp = argp;
138 
139 	/* XXX don't copy them out, remap them! */
140 	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
141 
142 	for (; --argc >= 0; sp += len, dp += len)
143 		if (copyout(&dp, cpp++, sizeof(dp)) ||
144 		    copyoutstr(sp, dp, ARG_MAX, &len))
145 			return NULL;
146 
147 	if (copyout(&nullp, cpp++, sizeof(nullp)))
148 		return NULL;
149 
150 	if (copyout(&cpp, &stk[2], sizeof (cpp)))
151 		return NULL;
152 
153 	arginfo->ps_envstr = cpp; /* remember location of envp for later */
154 
155 	for (; --envc >= 0; sp += len, dp += len)
156 		if (copyout(&dp, cpp++, sizeof(dp)) ||
157 		    copyoutstr(sp, dp, ARG_MAX, &len))
158 			return NULL;
159 
160 	if (copyout(&nullp, cpp++, sizeof(nullp)))
161 		return NULL;
162 
163 	return cpp;
164 }
165 
166 int
167 exec_linux_aout_makecmds(p, epp)
168 	struct proc *p;
169 	struct exec_package *epp;
170 {
171 	struct exec *linux_ep = epp->ep_hdr;
172 	int machtype, magic;
173 	int error = ENOEXEC;
174 
175 	magic = LINUX_N_MAGIC(linux_ep);
176 	machtype = LINUX_N_MACHTYPE(linux_ep);
177 
178 
179 	if (machtype != LINUX_MID_MACHINE)
180 		return (ENOEXEC);
181 
182 	switch (magic) {
183 	case QMAGIC:
184 		error = exec_linux_aout_prep_qmagic(p, epp);
185 		break;
186 	case ZMAGIC:
187 		error = exec_linux_aout_prep_zmagic(p, epp);
188 		break;
189 	case NMAGIC:
190 		error = exec_linux_aout_prep_nmagic(p, epp);
191 		break;
192 	case OMAGIC:
193 		error = exec_linux_aout_prep_omagic(p, epp);
194 		break;
195 	}
196 	if (error == 0)
197 		epp->ep_emul = &emul_linux_aout;
198 	return error;
199 }
200 
201 /*
202  * Since text starts at 0x400 in Linux ZMAGIC executables, and 0x400
203  * is very likely not page aligned on most architectures, it is treated
204  * as an NMAGIC here. XXX
205  */
206 
207 int
208 exec_linux_aout_prep_zmagic(p, epp)
209 	struct proc *p;
210 	struct exec_package *epp;
211 {
212 	struct exec *execp = epp->ep_hdr;
213 
214 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, ZMAGIC);
215 	epp->ep_tsize = execp->a_text;
216 	epp->ep_daddr = LINUX_N_DATADDR(*execp, ZMAGIC);
217 	epp->ep_dsize = execp->a_data + execp->a_bss;
218 	epp->ep_entry = execp->a_entry;
219 
220 	/* set up command for text segment */
221 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
222 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, ZMAGIC),
223 	    VM_PROT_READ|VM_PROT_EXECUTE);
224 
225 	/* set up command for data segment */
226 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
227 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, ZMAGIC),
228 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
229 
230 	/* set up command for bss segment */
231 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
232 	    epp->ep_daddr + execp->a_data, NULLVP, 0,
233 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
234 
235 	return exec_aout_setup_stack(p, epp);
236 }
237 
238 /*
239  * exec_aout_prep_nmagic(): Prepare Linux NMAGIC package.
240  * Not different from the normal stuff.
241  */
242 
243 int
244 exec_linux_aout_prep_nmagic(p, epp)
245 	struct proc *p;
246 	struct exec_package *epp;
247 {
248 	struct exec *execp = epp->ep_hdr;
249 	long bsize, baddr;
250 
251 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, NMAGIC);
252 	epp->ep_tsize = execp->a_text;
253 	epp->ep_daddr = LINUX_N_DATADDR(*execp, NMAGIC);
254 	epp->ep_dsize = execp->a_data + execp->a_bss;
255 	epp->ep_entry = execp->a_entry;
256 
257 	/* set up command for text segment */
258 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
259 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, NMAGIC),
260 	    VM_PROT_READ|VM_PROT_EXECUTE);
261 
262 	/* set up command for data segment */
263 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
264 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, NMAGIC),
265 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
266 
267 	/* set up command for bss segment */
268 	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
269 	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
270 	if (bsize > 0)
271 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
272 		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
273 
274 	return exec_aout_setup_stack(p, epp);
275 }
276 
277 /*
278  * exec_aout_prep_omagic(): Prepare Linux OMAGIC package.
279  * Business as usual.
280  */
281 
282 int
283 exec_linux_aout_prep_omagic(p, epp)
284 	struct proc *p;
285 	struct exec_package *epp;
286 {
287 	struct exec *execp = epp->ep_hdr;
288 	long dsize, bsize, baddr;
289 
290 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, OMAGIC);
291 	epp->ep_tsize = execp->a_text;
292 	epp->ep_daddr = LINUX_N_DATADDR(*execp, OMAGIC);
293 	epp->ep_dsize = execp->a_data + execp->a_bss;
294 	epp->ep_entry = execp->a_entry;
295 
296 	/* set up command for text and data segments */
297 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
298 	    execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
299 	    LINUX_N_TXTOFF(*execp, OMAGIC), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
300 
301 	/* set up command for bss segment */
302 	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
303 	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
304 	if (bsize > 0)
305 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
306 		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
307 
308 	/*
309 	 * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
310 	 * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
311 	 * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
312 	 * respectively to page boundaries.
313 	 * Compensate `ep_dsize' for the amount of data covered by the last
314 	 * text page.
315 	 */
316 	dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text, NBPG);
317 	epp->ep_dsize = (dsize > 0) ? dsize : 0;
318 	return exec_aout_setup_stack(p, epp);
319 }
320 
321 int
322 exec_linux_aout_prep_qmagic(p, epp)
323 	struct proc *p;
324 	struct exec_package *epp;
325 {
326 	struct exec *execp = epp->ep_hdr;
327 
328 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, QMAGIC);
329 	epp->ep_tsize = execp->a_text;
330 	epp->ep_daddr = LINUX_N_DATADDR(*execp, QMAGIC);
331 	epp->ep_dsize = execp->a_data + execp->a_bss;
332 	epp->ep_entry = execp->a_entry;
333 
334 	/*
335 	 * check if vnode is in open for writing, because we want to
336 	 * demand-page out of it.  if it is, don't do it, for various
337 	 * reasons
338 	 */
339 	if ((execp->a_text != 0 || execp->a_data != 0) &&
340 	    epp->ep_vp->v_writecount != 0) {
341 #ifdef DIAGNOSTIC
342 		if (epp->ep_vp->v_flag & VTEXT)
343 			panic("exec: a VTEXT vnode has writecount != 0\n");
344 #endif
345 		return ETXTBSY;
346 	}
347 	epp->ep_vp->v_flag |= VTEXT;
348 
349 	/* set up command for text segment */
350 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
351 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, QMAGIC),
352 	    VM_PROT_READ|VM_PROT_EXECUTE);
353 
354 	/* set up command for data segment */
355 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
356 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, QMAGIC),
357 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
358 
359 	/* set up command for bss segment */
360 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
361 	    epp->ep_daddr + execp->a_data, NULLVP, 0,
362 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
363 
364 	return exec_aout_setup_stack(p, epp);
365 }
366 
367 /*
368  * Take advantage of the fact that all the linux binaries are compiled
369  * with gcc, and gcc sticks in the comment field a signature. Note that
370  * on SVR4 binaries, the gcc signature will follow the OS name signature,
371  * that will not be a problem. We don't bother to read in the string table,
372  * but we check all the progbits headers.
373  */
374 static int
375 linux_elf_signature(p, epp, eh)
376 	struct proc *p;
377 	struct exec_package *epp;
378 	Elf32_Ehdr *eh;
379 {
380 	size_t shsize = sizeof(Elf32_Shdr) * eh->e_shnum;
381 	size_t i;
382 	static const char signature[] = "\0GCC: (GNU) ";
383 	char buf[sizeof(signature) - 1];
384 	Elf32_Shdr *sh;
385 	int error;
386 
387 	sh = (Elf32_Shdr *) malloc(shsize, M_TEMP, M_WAITOK);
388 
389 	if ((error = elf_read_from(p, epp->ep_vp, eh->e_shoff,
390 	    (caddr_t) sh, shsize)) != 0)
391 		goto out;
392 
393 	for (i = 0; i < eh->e_shnum; i++) {
394 		Elf32_Shdr *s = &sh[i];
395 
396 		/*
397 		 * Identify candidates for the comment header;
398 		 * Header cannot have a load address, or flags and
399 		 * it must be large enough.
400 		 */
401 		if (s->sh_type != Elf32_sht_progbits ||
402 		    s->sh_addr != 0 ||
403 		    s->sh_flags != 0 ||
404 		    s->sh_size < sizeof(signature) - 1)
405 			continue;
406 
407 		if ((error = elf_read_from(p, epp->ep_vp, s->sh_offset,
408 		    (caddr_t) buf, sizeof(signature) - 1)) != 0)
409 			goto out;
410 
411 		/*
412 		 * error is 0, if the signatures match we are done.
413 		 */
414 		if (bcmp(buf, signature, sizeof(signature) - 1) == 0)
415 			goto out;
416 	}
417 	error = EFTYPE;
418 
419 out:
420 	free(sh, M_TEMP);
421 	return error;
422 }
423 
424 int
425 linux_elf_probe(p, epp, eh, itp, pos)
426 	struct proc *p;
427 	struct exec_package *epp;
428 	Elf32_Ehdr *eh;
429 	char *itp;
430 	u_long *pos;
431 {
432 	char *bp;
433 	int error;
434 	size_t len;
435 
436 	if ((error = linux_elf_signature(p, epp, eh)) != 0)
437 		return error;
438 
439 	if (itp[0]) {
440 		if ((error = emul_find(p, NULL, linux_emul_path, itp, &bp, 0)))
441 			return error;
442 		if ((error = copystr(bp, itp, MAXPATHLEN, &len)))
443 			return error;
444 		free(bp, M_TEMP);
445 	}
446 	epp->ep_emul = &emul_linux_elf;
447 	*pos = ELF32_NO_ADDR;
448 	return 0;
449 }
450 
451 /*
452  * The Linux system call to load shared libraries, a.out version. The
453  * a.out shared libs are just files that are mapped onto a fixed
454  * address in the process' address space. The address is given in
455  * a_entry. Read in the header, set up some VM commands and run them.
456  *
457  * Yes, both text and data are mapped at once, so we're left with
458  * writeable text for the shared libs. The Linux crt0 seemed to break
459  * sometimes when data was mapped seperately. It munmapped a uselib()
460  * of ld.so by hand, which failed with shared text and data for ld.so
461  * Yuck.
462  *
463  * Because of the problem with ZMAGIC executables (text starts
464  * at 0x400 in the file, but needs to be mapped at 0), ZMAGIC
465  * shared libs are not handled very efficiently :-(
466  */
467 
468 int
469 linux_sys_uselib(p, v, retval)
470 	struct proc *p;
471 	void *v;
472 	register_t *retval;
473 {
474 	struct linux_sys_uselib_args /* {
475 		syscallarg(char *) path;
476 	} */ *uap = v;
477 	caddr_t sg;
478 	long bsize, dsize, tsize, taddr, baddr, daddr;
479 	struct nameidata ni;
480 	struct vnode *vp;
481 	struct exec hdr;
482 	struct exec_vmcmd_set vcset;
483 	int rem, i, magic, error;
484 
485 	sg = stackgap_init(p->p_emul);
486 	LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
487 
488 	NDINIT(&ni, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
489 
490 	if ((error = namei(&ni)))
491 		return error;
492 
493 	vp = ni.ni_vp;
494 
495 	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t) &hdr, LINUX_AOUT_HDR_SIZE,
496 			     0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
497 			     &rem, p))) {
498 		vrele(vp);
499 		return error;
500 	}
501 
502 	if (rem != 0) {
503 		vrele(vp);
504 		return ENOEXEC;
505 	}
506 
507 	if (LINUX_N_MACHTYPE(&hdr) != LINUX_MID_MACHINE)
508 		return ENOEXEC;
509 
510 	magic = LINUX_N_MAGIC(&hdr);
511 	taddr = hdr.a_entry & (~(NBPG - 1));
512 	tsize = hdr.a_text;
513 	daddr = taddr + tsize;
514 	dsize = hdr.a_data + hdr.a_bss;
515 
516 	if ((hdr.a_text != 0 || hdr.a_data != 0) && vp->v_writecount != 0) {
517 		vrele(vp);
518                 return ETXTBSY;
519         }
520 	vp->v_flag |= VTEXT;
521 
522 	vcset.evs_cnt = 0;
523 	vcset.evs_used = 0;
524 
525 	NEW_VMCMD(&vcset,
526 		  magic == ZMAGIC ? vmcmd_map_readvn : vmcmd_map_pagedvn,
527 		  hdr.a_text + hdr.a_data, taddr,
528 		  vp, LINUX_N_TXTOFF(hdr, magic),
529 		  VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE);
530 
531 	baddr = roundup(daddr + hdr.a_data, NBPG);
532 	bsize = daddr + dsize - baddr;
533         if (bsize > 0) {
534                 NEW_VMCMD(&vcset, vmcmd_map_zero, bsize, baddr,
535                     NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
536 	}
537 
538 	for (i = 0; i < vcset.evs_used && !error; i++) {
539 		struct exec_vmcmd *vcp;
540 
541 		vcp = &vcset.evs_cmds[i];
542 		error = (*vcp->ev_proc)(p, vcp);
543 	}
544 
545 	kill_vmcmds(&vcset);
546 
547 	vrele(vp);
548 
549 	return error;
550 }
551 
552 /*
553  * Execve(2). Just check the alternate emulation path, and pass it on
554  * to the NetBSD execve().
555  */
556 int
557 linux_sys_execve(p, v, retval)
558 	struct proc *p;
559 	void *v;
560 	register_t *retval;
561 {
562 	struct linux_sys_execve_args /* {
563 		syscallarg(char *) path;
564 		syscallarg(char **) argv;
565 		syscallarg(char **) envp;
566 	} */ *uap = v;
567 	struct sys_execve_args ap;
568 	caddr_t sg;
569 
570 	sg = stackgap_init(p->p_emul);
571 	LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
572 
573 	SCARG(&ap, path) = SCARG(uap, path);
574 	SCARG(&ap, argp) = SCARG(uap, argp);
575 	SCARG(&ap, envp) = SCARG(uap, envp);
576 
577 	return sys_execve(p, &ap, retval);
578 }
579