xref: /netbsd-src/sys/compat/linux/common/linux_sched.c (revision b7ae68fde0d8ef1c03714e8bbb1ee7c6118ea93b)
1 /*	$NetBSD: linux_sched.c,v 1.35 2006/08/23 21:17:48 dogcow Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center; by Matthias Scheler.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Linux compatibility module. Try to deal with scheduler related syscalls.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.35 2006/08/23 21:17:48 dogcow Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/sa.h>
54 #include <sys/syscallargs.h>
55 #include <sys/wait.h>
56 #include <sys/kauth.h>
57 #include <sys/ptrace.h>
58 
59 #include <machine/cpu.h>
60 
61 #include <compat/linux/common/linux_types.h>
62 #include <compat/linux/common/linux_signal.h>
63 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
64 #include <compat/linux/common/linux_emuldata.h>
65 
66 #include <compat/linux/linux_syscallargs.h>
67 
68 #include <compat/linux/common/linux_sched.h>
69 
70 int
71 linux_sys_clone(l, v, retval)
72 	struct lwp *l;
73 	void *v;
74 	register_t *retval;
75 {
76 	struct linux_sys_clone_args /* {
77 		syscallarg(int) flags;
78 		syscallarg(void *) stack;
79 #ifdef LINUX_NPTL
80 		syscallarg(void *) parent_tidptr;
81 		syscallarg(void *) child_tidptr;
82 #endif
83 	} */ *uap = v;
84 	int flags, sig;
85 	int error;
86 #ifdef LINUX_NPTL
87 	struct linux_emuldata *led;
88 #endif
89 
90 	/*
91 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
92 	 */
93 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
94 		return (EINVAL);
95 
96 	/*
97 	 * Thread group implies shared signals. Shared signals
98 	 * imply shared VM. This matches what Linux kernel does.
99 	 */
100 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
101 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
102 		return (EINVAL);
103 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
104 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
105 		return (EINVAL);
106 
107 	flags = 0;
108 
109 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
110 		flags |= FORK_SHAREVM;
111 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
112 		flags |= FORK_SHARECWD;
113 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
114 		flags |= FORK_SHAREFILES;
115 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
116 		flags |= FORK_SHARESIGS;
117 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
118 		flags |= FORK_PPWAIT;
119 
120 	sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
121 	if (sig < 0 || sig >= LINUX__NSIG)
122 		return (EINVAL);
123 	sig = linux_to_native_signo[sig];
124 
125 #ifdef LINUX_NPTL
126 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
127 
128 	led->parent_tidptr = SCARG(uap, parent_tidptr);
129 	led->child_tidptr = SCARG(uap, child_tidptr);
130 	led->clone_flags = SCARG(uap, flags);
131 #endif /* LINUX_NPTL */
132 
133 	/*
134 	 * Note that Linux does not provide a portable way of specifying
135 	 * the stack area; the caller must know if the stack grows up
136 	 * or down.  So, we pass a stack size of 0, so that the code
137 	 * that makes this adjustment is a noop.
138 	 */
139 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
140 	    NULL, NULL, retval, NULL)) != 0)
141 		return error;
142 
143 	return 0;
144 }
145 
146 int
147 linux_sys_sched_setparam(cl, v, retval)
148 	struct lwp *cl;
149 	void *v;
150 	register_t *retval;
151 {
152 	struct linux_sys_sched_setparam_args /* {
153 		syscallarg(linux_pid_t) pid;
154 		syscallarg(const struct linux_sched_param *) sp;
155 	} */ *uap = v;
156 	int error;
157 	struct linux_sched_param lp;
158 	struct proc *p;
159 
160 /*
161  * We only check for valid parameters and return afterwards.
162  */
163 
164 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
165 		return EINVAL;
166 
167 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
168 	if (error)
169 		return error;
170 
171 	if (SCARG(uap, pid) != 0) {
172 		kauth_cred_t pc = cl->l_cred;
173 
174 		if ((p = pfind(SCARG(uap, pid))) == NULL)
175 			return ESRCH;
176 		if (!(cl->l_proc == p ||
177 		      kauth_cred_geteuid(pc) == 0 ||
178 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
179 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
180 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
181 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
182 			return EPERM;
183 	}
184 
185 	return 0;
186 }
187 
188 int
189 linux_sys_sched_getparam(cl, v, retval)
190 	struct lwp *cl;
191 	void *v;
192 	register_t *retval;
193 {
194 	struct linux_sys_sched_getparam_args /* {
195 		syscallarg(linux_pid_t) pid;
196 		syscallarg(struct linux_sched_param *) sp;
197 	} */ *uap = v;
198 	struct proc *p;
199 	struct linux_sched_param lp;
200 
201 /*
202  * We only check for valid parameters and return a dummy priority afterwards.
203  */
204 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
205 		return EINVAL;
206 
207 	if (SCARG(uap, pid) != 0) {
208 		kauth_cred_t pc = cl->l_cred;
209 
210 		if ((p = pfind(SCARG(uap, pid))) == NULL)
211 			return ESRCH;
212 		if (!(cl->l_proc == p ||
213 		      kauth_cred_geteuid(pc) == 0 ||
214 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
215 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
216 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
217 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
218 			return EPERM;
219 	}
220 
221 	lp.sched_priority = 0;
222 	return copyout(&lp, SCARG(uap, sp), sizeof(lp));
223 }
224 
225 int
226 linux_sys_sched_setscheduler(cl, v, retval)
227 	struct lwp *cl;
228 	void *v;
229 	register_t *retval;
230 {
231 	struct linux_sys_sched_setscheduler_args /* {
232 		syscallarg(linux_pid_t) pid;
233 		syscallarg(int) policy;
234 		syscallarg(cont struct linux_sched_scheduler *) sp;
235 	} */ *uap = v;
236 	int error;
237 	struct linux_sched_param lp;
238 	struct proc *p;
239 
240 /*
241  * We only check for valid parameters and return afterwards.
242  */
243 
244 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
245 		return EINVAL;
246 
247 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
248 	if (error)
249 		return error;
250 
251 	if (SCARG(uap, pid) != 0) {
252 		kauth_cred_t pc = cl->l_cred;
253 
254 		if ((p = pfind(SCARG(uap, pid))) == NULL)
255 			return ESRCH;
256 		if (!(cl->l_proc == p ||
257 		      kauth_cred_geteuid(pc) == 0 ||
258 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
259 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
260 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
261 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
262 			return EPERM;
263 	}
264 
265 	return 0;
266 /*
267  * We can't emulate anything put the default scheduling policy.
268  */
269 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
270 		return EINVAL;
271 
272 	return 0;
273 }
274 
275 int
276 linux_sys_sched_getscheduler(cl, v, retval)
277 	struct lwp *cl;
278 	void *v;
279 	register_t *retval;
280 {
281 	struct linux_sys_sched_getscheduler_args /* {
282 		syscallarg(linux_pid_t) pid;
283 	} */ *uap = v;
284 	struct proc *p;
285 
286 	*retval = -1;
287 /*
288  * We only check for valid parameters and return afterwards.
289  */
290 
291 	if (SCARG(uap, pid) != 0) {
292 		kauth_cred_t pc = cl->l_cred;
293 
294 		if ((p = pfind(SCARG(uap, pid))) == NULL)
295 			return ESRCH;
296 		if (!(cl->l_proc == p ||
297 		      kauth_cred_geteuid(pc) == 0 ||
298 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
299 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
300 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
301 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
302 			return EPERM;
303 	}
304 
305 /*
306  * We can't emulate anything put the default scheduling policy.
307  */
308 	*retval = LINUX_SCHED_OTHER;
309 	return 0;
310 }
311 
312 int
313 linux_sys_sched_yield(cl, v, retval)
314 	struct lwp *cl;
315 	void *v;
316 	register_t *retval;
317 {
318 
319 	yield();
320 	return 0;
321 }
322 
323 int
324 linux_sys_sched_get_priority_max(cl, v, retval)
325 	struct lwp *cl;
326 	void *v;
327 	register_t *retval;
328 {
329 	struct linux_sys_sched_get_priority_max_args /* {
330 		syscallarg(int) policy;
331 	} */ *uap = v;
332 
333 /*
334  * We can't emulate anything put the default scheduling policy.
335  */
336 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
337 		*retval = -1;
338 		return EINVAL;
339 	}
340 
341 	*retval = 0;
342 	return 0;
343 }
344 
345 int
346 linux_sys_sched_get_priority_min(cl, v, retval)
347 	struct lwp *cl;
348 	void *v;
349 	register_t *retval;
350 {
351 	struct linux_sys_sched_get_priority_min_args /* {
352 		syscallarg(int) policy;
353 	} */ *uap = v;
354 
355 /*
356  * We can't emulate anything put the default scheduling policy.
357  */
358 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
359 		*retval = -1;
360 		return EINVAL;
361 	}
362 
363 	*retval = 0;
364 	return 0;
365 }
366 
367 #ifndef __m68k__
368 /* Present on everything but m68k */
369 int
370 linux_sys_exit_group(l, v, retval)
371 	struct lwp *l;
372 	void *v;
373 	register_t *retval;
374 {
375 #ifdef LINUX_NPTL
376 	struct linux_sys_exit_group_args /* {
377 		syscallarg(int) error_code;
378 	} */ *uap = v;
379 	struct proc *p = l->l_proc;
380 	struct linux_emuldata *led = p->p_emuldata;
381 	struct linux_emuldata *e;
382 
383 #ifdef DEBUG_LINUX
384 	printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, led->s->refs);
385 #endif
386 	/*
387 	 * The calling thread is supposed to kill all threads
388 	 * in the same thread group (i.e. all threads created
389 	 * via clone(2) with CLONE_THREAD flag set).
390 	 *
391 	 * If there is only one thread, things are quite simple
392 	 */
393 	if (led->s->refs == 1)
394 		return sys_exit(l, v, retval);
395 
396 #ifdef DEBUG_LINUX
397 	printf("%s:%d\n", __func__, __LINE__);
398 #endif
399 
400 	led->s->flags |= LINUX_LES_INEXITGROUP;
401 	led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
402 
403 	/*
404 	 * Kill all threads in the group. The emulation exit hook takes
405 	 * care of hiding the zombies and reporting the exit code properly
406 	 */
407       	LIST_FOREACH(e, &led->s->threads, threads) {
408 		if (e->proc == p)
409 			continue;
410 
411 #ifdef DEBUG_LINUX
412 		printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
413 #endif
414 		psignal(e->proc, SIGKILL);
415 	}
416 
417 	/* Now, kill ourselves */
418 	psignal(p, SIGKILL);
419 	return 0;
420 #else /* LINUX_NPTL */
421 	return sys_exit(l, v, retval);
422 #endif /* LINUX_NPTL */
423 }
424 #endif /* !__m68k__ */
425 
426 #ifdef LINUX_NPTL
427 int
428 linux_sys_set_tid_address(l, v, retval)
429 	struct lwp *l;
430 	void *v;
431 	register_t *retval;
432 {
433 	struct linux_sys_set_tid_address_args /* {
434 		syscallarg(int *) tidptr;
435 	} */ *uap = v;
436 	struct linux_emuldata *led;
437 
438 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
439 	led->clear_tid = SCARG(uap, tid);
440 
441 	*retval = l->l_proc->p_pid;
442 
443 	return 0;
444 }
445 
446 /* ARGUSED1 */
447 int
448 linux_sys_gettid(l, v, retval)
449 	struct lwp *l;
450 	void *v;
451 	register_t *retval;
452 {
453 	/* The Linux kernel does it exactly that way */
454 	*retval = l->l_proc->p_pid;
455 	return 0;
456 }
457 
458 #ifdef LINUX_NPTL
459 /* ARGUSED1 */
460 int
461 linux_sys_getpid(l, v, retval)
462 	struct lwp *l;
463 	void *v;
464 	register_t *retval;
465 {
466 	struct linux_emuldata *led;
467 
468 	led = l->l_proc->p_emuldata;
469 
470 	/* The Linux kernel does it exactly that way */
471 	*retval = led->s->group_pid;
472 
473 	return 0;
474 }
475 
476 /* ARGUSED1 */
477 int
478 linux_sys_getppid(l, v, retval)
479 	struct lwp *l;
480 	void *v;
481 	register_t *retval;
482 {
483 	struct proc *p = l->l_proc;
484 	struct linux_emuldata *led = p->p_emuldata;
485 	struct proc *glp;
486 	struct proc *pp;
487 
488 	/* Find the thread group leader's parent */
489 	if ((glp = pfind(led->s->group_pid)) == NULL) {
490 		/* Maybe panic... */
491 		printf("linux_sys_getppid: missing group leader PID %d\n",
492 		    led->s->group_pid);
493 		return -1;
494 	}
495 	pp = glp->p_pptr;
496 
497 	/* If this is a Linux process too, return thread group PID */
498 	if (pp->p_emul == p->p_emul) {
499 		struct linux_emuldata *pled;
500 
501 		pled = pp->p_emuldata;
502 		*retval = pled->s->group_pid;
503 	} else {
504 		*retval = pp->p_pid;
505 	}
506 
507 	return 0;
508 }
509 #endif /* LINUX_NPTL */
510 
511 int
512 linux_sys_sched_getaffinity(l, v, retval)
513 	struct lwp *l;
514 	void *v;
515 	register_t *retval;
516 {
517 	struct linux_sys_sched_getaffinity_args /* {
518 		syscallarg(pid_t) pid;
519 		syscallarg(unsigned int) len;
520 		syscallarg(unsigned long *) mask;
521 	} */ *uap = v;
522 	int error;
523 	int ret;
524 	int ncpu;
525 	int name[2];
526 	size_t sz;
527 	char *data;
528 	int *retp;
529 
530 	if (SCARG(uap, mask) == NULL)
531 		return EINVAL;
532 
533 	if (SCARG(uap, len) < sizeof(int))
534 		return EINVAL;
535 
536 	if (pfind(SCARG(uap, pid)) == NULL)
537 		return ESRCH;
538 
539 	/*
540 	 * return the actual number of CPU, tag all of them as available
541 	 * The result is a mask, the first CPU being in the least significant
542 	 * bit.
543 	 */
544 	name[0] = CTL_HW;
545 	name[1] = HW_NCPU;
546 	sz = sizeof(ncpu);
547 
548 	if ((error = old_sysctl(&name[0], 2, &ncpu, &sz, NULL, 0, NULL)) != 0)
549 		return error;
550 
551 	ret = (1 << ncpu) - 1;
552 
553 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
554 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
555 	*retp = ret;
556 
557 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
558 		return error;
559 
560 	free(data, M_TEMP);
561 
562 	return 0;
563 
564 }
565 
566 int
567 linux_sys_sched_setaffinity(l, v, retval)
568 	struct lwp *l;
569 	void *v;
570 	register_t *retval;
571 {
572 	struct linux_sys_sched_setaffinity_args /* {
573 		syscallarg(pid_t) pid;
574 		syscallarg(unsigned int) len;
575 		syscallarg(unsigned long *) mask;
576 	} */ *uap = v;
577 
578 	if (pfind(SCARG(uap, pid)) == NULL)
579 		return ESRCH;
580 
581 	/* Let's ignore it */
582 #ifdef DEBUG_LINUX
583 	printf("linux_sys_sched_setaffinity\n");
584 #endif
585 	return 0;
586 };
587 #endif /* LINUX_NPTL */
588