xref: /netbsd-src/sys/compat/linux/common/linux_sched.c (revision d48f14661dda8638fee055ba15d35bdfb29b9fa8)
1 /*	$NetBSD: linux_sched.c,v 1.32 2006/06/26 07:42:00 manu Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center; by Matthias Scheler.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Linux compatibility module. Try to deal with scheduler related syscalls.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.32 2006/06/26 07:42:00 manu Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/sa.h>
54 #include <sys/syscallargs.h>
55 #include <sys/wait.h>
56 #include <sys/kauth.h>
57 
58 #include <machine/cpu.h>
59 
60 #include <compat/linux/common/linux_types.h>
61 #include <compat/linux/common/linux_signal.h>
62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
63 #include <compat/linux/common/linux_emuldata.h>
64 
65 #include <compat/linux/linux_syscallargs.h>
66 
67 #include <compat/linux/common/linux_sched.h>
68 
69 int
70 linux_sys_clone(l, v, retval)
71 	struct lwp *l;
72 	void *v;
73 	register_t *retval;
74 {
75 	struct linux_sys_clone_args /* {
76 		syscallarg(int) flags;
77 		syscallarg(void *) stack;
78 #ifdef LINUX_NPTL
79 		syscallarg(void *) parent_tidptr;
80 		syscallarg(void *) child_tidptr;
81 #endif
82 	} */ *uap = v;
83 	int flags, sig;
84 	int error;
85 #ifdef LINUX_NPTL
86 	struct linux_emuldata *led;
87 #endif
88 
89 	/*
90 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
91 	 */
92 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
93 		return (EINVAL);
94 
95 	/*
96 	 * Thread group implies shared signals. Shared signals
97 	 * imply shared VM. This matches what Linux kernel does.
98 	 */
99 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
100 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
101 		return (EINVAL);
102 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
103 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
104 		return (EINVAL);
105 
106 	flags = 0;
107 
108 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
109 		flags |= FORK_SHAREVM;
110 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
111 		flags |= FORK_SHARECWD;
112 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
113 		flags |= FORK_SHAREFILES;
114 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
115 		flags |= FORK_SHARESIGS;
116 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
117 		flags |= FORK_PPWAIT;
118 
119 	/* Thread should not issue a SIGCHLD on termination */
120 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD) {
121 		sig = 0;
122 	} else {
123 		sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
124 		if (sig < 0 || sig >= LINUX__NSIG)
125 			return (EINVAL);
126 		sig = linux_to_native_signo[sig];
127 	}
128 
129 #ifdef LINUX_NPTL
130 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
131 
132 	if (SCARG(uap, flags) & LINUX_CLONE_PARENT_SETTID) {
133 		if (SCARG(uap, parent_tidptr) == NULL) {
134 			printf("linux_sys_clone: NULL parent_tidptr\n");
135 			return EINVAL;
136 		}
137 
138 		if ((error = copyout(&l->l_proc->p_pid,
139 		    SCARG(uap, parent_tidptr),
140 		    sizeof(l->l_proc->p_pid))) != 0)
141 			return error;
142 	}
143 
144 	/* CLONE_CHILD_CLEARTID: TID clear in the child on exit() */
145 	if (SCARG(uap, flags) & LINUX_CLONE_CHILD_CLEARTID)
146 		led->child_clear_tid = SCARG(uap, child_tidptr);
147 	else
148 		led->child_clear_tid = NULL;
149 
150 	/* CLONE_CHILD_SETTID: TID set in the child on clone() */
151 	if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID)
152 		led->child_set_tid = SCARG(uap, child_tidptr);
153 	else
154 		led->child_set_tid = NULL;
155 
156 	/* CLONE_SETTLS: new Thread Local Storage in the child */
157 	if (SCARG(uap, flags) & LINUX_CLONE_SETTLS)
158 		led->set_tls = linux_get_newtls(l);
159 	else
160 		led->set_tls = 0;
161 #endif /* LINUX_NPTL */
162 	/*
163 	 * Note that Linux does not provide a portable way of specifying
164 	 * the stack area; the caller must know if the stack grows up
165 	 * or down.  So, we pass a stack size of 0, so that the code
166 	 * that makes this adjustment is a noop.
167 	 */
168 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
169 	    NULL, NULL, retval, NULL)) != 0)
170 		return error;
171 
172 	return 0;
173 }
174 
175 int
176 linux_sys_sched_setparam(cl, v, retval)
177 	struct lwp *cl;
178 	void *v;
179 	register_t *retval;
180 {
181 	struct linux_sys_sched_setparam_args /* {
182 		syscallarg(linux_pid_t) pid;
183 		syscallarg(const struct linux_sched_param *) sp;
184 	} */ *uap = v;
185 	struct proc *cp = cl->l_proc;
186 	int error;
187 	struct linux_sched_param lp;
188 	struct proc *p;
189 
190 /*
191  * We only check for valid parameters and return afterwards.
192  */
193 
194 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
195 		return EINVAL;
196 
197 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
198 	if (error)
199 		return error;
200 
201 	if (SCARG(uap, pid) != 0) {
202 		kauth_cred_t pc = cp->p_cred;
203 
204 		if ((p = pfind(SCARG(uap, pid))) == NULL)
205 			return ESRCH;
206 		if (!(cp == p ||
207 		      kauth_cred_geteuid(pc) == 0 ||
208 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
209 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
210 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
211 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
212 			return EPERM;
213 	}
214 
215 	return 0;
216 }
217 
218 int
219 linux_sys_sched_getparam(cl, v, retval)
220 	struct lwp *cl;
221 	void *v;
222 	register_t *retval;
223 {
224 	struct linux_sys_sched_getparam_args /* {
225 		syscallarg(linux_pid_t) pid;
226 		syscallarg(struct linux_sched_param *) sp;
227 	} */ *uap = v;
228 	struct proc *cp = cl->l_proc;
229 	struct proc *p;
230 	struct linux_sched_param lp;
231 
232 /*
233  * We only check for valid parameters and return a dummy priority afterwards.
234  */
235 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
236 		return EINVAL;
237 
238 	if (SCARG(uap, pid) != 0) {
239 		kauth_cred_t pc = cp->p_cred;
240 
241 		if ((p = pfind(SCARG(uap, pid))) == NULL)
242 			return ESRCH;
243 		if (!(cp == p ||
244 		      kauth_cred_geteuid(pc) == 0 ||
245 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
246 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
247 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
248 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
249 			return EPERM;
250 	}
251 
252 	lp.sched_priority = 0;
253 	return copyout(&lp, SCARG(uap, sp), sizeof(lp));
254 }
255 
256 int
257 linux_sys_sched_setscheduler(cl, v, retval)
258 	struct lwp *cl;
259 	void *v;
260 	register_t *retval;
261 {
262 	struct linux_sys_sched_setscheduler_args /* {
263 		syscallarg(linux_pid_t) pid;
264 		syscallarg(int) policy;
265 		syscallarg(cont struct linux_sched_scheduler *) sp;
266 	} */ *uap = v;
267 	struct proc *cp = cl->l_proc;
268 	int error;
269 	struct linux_sched_param lp;
270 	struct proc *p;
271 
272 /*
273  * We only check for valid parameters and return afterwards.
274  */
275 
276 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
277 		return EINVAL;
278 
279 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
280 	if (error)
281 		return error;
282 
283 	if (SCARG(uap, pid) != 0) {
284 		kauth_cred_t pc = cp->p_cred;
285 
286 		if ((p = pfind(SCARG(uap, pid))) == NULL)
287 			return ESRCH;
288 		if (!(cp == p ||
289 		      kauth_cred_geteuid(pc) == 0 ||
290 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
291 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
292 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
293 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
294 			return EPERM;
295 	}
296 
297 /*
298  * We can't emulate anything put the default scheduling policy.
299  */
300 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
301 		return EINVAL;
302 
303 	return 0;
304 }
305 
306 int
307 linux_sys_sched_getscheduler(cl, v, retval)
308 	struct lwp *cl;
309 	void *v;
310 	register_t *retval;
311 {
312 	struct linux_sys_sched_getscheduler_args /* {
313 		syscallarg(linux_pid_t) pid;
314 	} */ *uap = v;
315 	struct proc *cp = cl->l_proc;
316 	struct proc *p;
317 
318 	*retval = -1;
319 /*
320  * We only check for valid parameters and return afterwards.
321  */
322 
323 	if (SCARG(uap, pid) != 0) {
324 		kauth_cred_t pc = cp->p_cred;
325 
326 		if ((p = pfind(SCARG(uap, pid))) == NULL)
327 			return ESRCH;
328 		if (!(cp == p ||
329 		      kauth_cred_geteuid(pc) == 0 ||
330 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
331 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
332 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
333 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
334 			return EPERM;
335 	}
336 
337 /*
338  * We can't emulate anything put the default scheduling policy.
339  */
340 	*retval = LINUX_SCHED_OTHER;
341 	return 0;
342 }
343 
344 int
345 linux_sys_sched_yield(cl, v, retval)
346 	struct lwp *cl;
347 	void *v;
348 	register_t *retval;
349 {
350 
351 	yield();
352 	return 0;
353 }
354 
355 int
356 linux_sys_sched_get_priority_max(cl, v, retval)
357 	struct lwp *cl;
358 	void *v;
359 	register_t *retval;
360 {
361 	struct linux_sys_sched_get_priority_max_args /* {
362 		syscallarg(int) policy;
363 	} */ *uap = v;
364 
365 /*
366  * We can't emulate anything put the default scheduling policy.
367  */
368 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
369 		*retval = -1;
370 		return EINVAL;
371 	}
372 
373 	*retval = 0;
374 	return 0;
375 }
376 
377 int
378 linux_sys_sched_get_priority_min(cl, v, retval)
379 	struct lwp *cl;
380 	void *v;
381 	register_t *retval;
382 {
383 	struct linux_sys_sched_get_priority_min_args /* {
384 		syscallarg(int) policy;
385 	} */ *uap = v;
386 
387 /*
388  * We can't emulate anything put the default scheduling policy.
389  */
390 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
391 		*retval = -1;
392 		return EINVAL;
393 	}
394 
395 	*retval = 0;
396 	return 0;
397 }
398 
399 #ifndef __m68k__
400 /* Present on everything but m68k */
401 int
402 linux_sys_exit_group(l, v, retval)
403 	struct lwp *l;
404 	void *v;
405 	register_t *retval;
406 {
407 	struct linux_sys_exit_group_args /* {
408 		syscallarg(int) error_code;
409 	} */ *uap = v;
410 #ifdef LINUX_NPTL
411 	struct proc *p = l->l_proc;
412 	struct linux_emuldata *led = p->p_emuldata;
413 	struct linux_emuldata *e;
414 	struct lwp *sl;
415 	struct proc *sp;
416 	int s;
417 
418 	SCHED_LOCK(s);
419 	/*
420 	 * The calling thread is supposed to kill all threads
421 	 * in the same thread group (i.e. all threads created
422 	 * via clone(2) with CLONE_THREAD flag set).
423 	 */
424         LIST_FOREACH(e, &led->s->threads, threads) {
425 		sp = e->proc;
426 
427 		if (sp == p)
428 			continue;
429 #ifdef DEBUG_LINUX
430 		printf("linux_sys_exit_group: kill PID %d\n", sp->p_pid);
431 #endif
432 		/* wakeup any waiter */
433 		if (sp->p_sigctx.ps_sigwaited &&
434 		    sigismember(sp->p_sigctx.ps_sigwait, SIGKILL) &&
435 		    sp->p_stat != SSTOP) {
436 			sched_wakeup(&sp->p_sigctx.ps_sigwait);
437 		}
438 
439 		/* post SIGKILL */
440 		sigaddset(&sp->p_sigctx.ps_siglist, SIGKILL);
441 		sp->p_sigctx.ps_sigcheck = 1;
442 
443 		/* Unblock the process if sleeping or stopped */
444 		switch(sp->p_stat) {
445 		case SSTOP:
446 			sl = proc_unstop(sp);
447 			break;
448 		case SACTIVE:
449 			sl = proc_representative_lwp(sp);
450 			break;
451 		default:
452 			sl = NULL;
453 			break;
454 		}
455 
456 		if (sl == NULL) {
457 			printf("linux_sys_exit_group: no lwp for process %d\n",
458 			    sp->p_pid);
459 			continue;
460 		}
461 
462 		if (sl->l_priority > PUSER)
463 			sl->l_priority = PUSER;
464 
465 		switch(sl->l_stat) {
466 		case LSSUSPENDED:
467 			lwp_continue(sl);
468 			/* FALLTHROUGH */
469 		case LSSTOP:
470 		case LSSLEEP:
471 		case LSIDL:
472 			setrunnable(sl);
473 			/* FALLTHROUGH */
474 		default:
475 			break;
476 		}
477 	}
478 	SCHED_UNLOCK(s);
479 #endif /* LINUX_NPTL */
480 
481 	exit1(l, W_EXITCODE(SCARG(uap, error_code), 0));
482 	/* NOTREACHED */
483 	return 0;
484 }
485 #endif /* !__m68k__ */
486 
487 #ifdef LINUX_NPTL
488 int
489 linux_sys_set_tid_address(l, v, retval)
490 	struct lwp *l;
491 	void *v;
492 	register_t *retval;
493 {
494 	struct linux_sys_set_tid_address_args /* {
495 		syscallarg(int *) tidptr;
496 	} */ *uap = v;
497 	struct linux_emuldata *led;
498 
499 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
500 	led->clear_tid = SCARG(uap, tid);
501 
502 	*retval = l->l_proc->p_pid;
503 
504 	return 0;
505 }
506 
507 /* ARGUSED1 */
508 int
509 linux_sys_gettid(l, v, retval)
510 	struct lwp *l;
511 	void *v;
512 	register_t *retval;
513 {
514 	/* The Linux kernel does it exactly that way */
515 	*retval = l->l_proc->p_pid;
516 	return 0;
517 }
518 
519 #ifdef LINUX_NPTL
520 /* ARGUSED1 */
521 int
522 linux_sys_getpid(l, v, retval)
523 	struct lwp *l;
524 	void *v;
525 	register_t *retval;
526 {
527 	struct linux_emuldata *led;
528 
529 	led = l->l_proc->p_emuldata;
530 
531 	/* The Linux kernel does it exactly that way */
532 	*retval = led->s->group_pid;
533 
534 	return 0;
535 }
536 
537 /* ARGUSED1 */
538 int
539 linux_sys_getppid(l, v, retval)
540 	struct lwp *l;
541 	void *v;
542 	register_t *retval;
543 {
544 	struct proc *p = l->l_proc;
545 	struct linux_emuldata *led = p->p_emuldata;
546 	struct proc *glp;
547 	struct proc *pp;
548 
549 	/* Find the thread group leader's parent */
550 	if ((glp = pfind(led->s->group_pid)) == NULL) {
551 		/* Maybe panic... */
552 		printf("linux_sys_getppid: missing group leader PID %d\n",
553 		    led->s->group_pid);
554 		return -1;
555 	}
556 	pp = glp->p_pptr;
557 
558 	/* If this is a Linux process too, return thread group PID */
559 	if (pp->p_emul == p->p_emul) {
560 		struct linux_emuldata *pled;
561 
562 		pled = pp->p_emuldata;
563 		*retval = pled->s->group_pid;
564 	} else {
565 		*retval = pp->p_pid;
566 	}
567 
568 	return 0;
569 }
570 #endif /* LINUX_NPTL */
571 
572 int
573 linux_sys_sched_getaffinity(l, v, retval)
574 	struct lwp *l;
575 	void *v;
576 	register_t *retval;
577 {
578 	struct linux_sys_sched_getaffinity_args /* {
579 		syscallarg(pid_t) pid;
580 		syscallarg(unsigned int) len;
581 		syscallarg(unsigned long *) mask;
582 	} */ *uap = v;
583 	int error;
584 	int ret;
585 	int ncpu;
586 	int name[2];
587 	size_t sz;
588 	char *data;
589 	int *retp;
590 
591 	if (SCARG(uap, mask) == NULL)
592 		return EINVAL;
593 
594 	if (SCARG(uap, len) < sizeof(int))
595 		return EINVAL;
596 
597 	if (pfind(SCARG(uap, pid)) == NULL)
598 		return ESRCH;
599 
600 	/*
601 	 * return the actual number of CPU, tag all of them as available
602 	 * The result is a mask, the first CPU being in the least significant
603 	 * bit.
604 	 */
605 	name[0] = CTL_HW;
606 	name[1] = HW_NCPU;
607 	sz = sizeof(ncpu);
608 
609 	if ((error = old_sysctl(&name[0], 2, &ncpu, &sz, NULL, 0, NULL)) != 0)
610 		return error;
611 
612 	ret = (1 << ncpu) - 1;
613 
614 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
615 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
616 	*retp = ret;
617 
618 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
619 		return error;
620 
621 	free(data, M_TEMP);
622 
623 	return 0;
624 
625 }
626 
627 int
628 linux_sys_sched_setaffinity(l, v, retval)
629 	struct lwp *l;
630 	void *v;
631 	register_t *retval;
632 {
633 	struct linux_sys_sched_setaffinity_args /* {
634 		syscallarg(pid_t) pid;
635 		syscallarg(unsigned int) len;
636 		syscallarg(unsigned long *) mask;
637 	} */ *uap = v;
638 
639 	if (pfind(SCARG(uap, pid)) == NULL)
640 		return ESRCH;
641 
642 	/* Let's ignore it */
643 #ifdef DEBUG_LINUX
644 	printf("linux_sys_sched_setaffinity\n");
645 #endif
646 	return 0;
647 };
648 #endif /* LINUX_NPTL */
649