xref: /netbsd-src/sys/compat/linux/common/linux_sched.c (revision 282f07d64373248f1314ff2f3c4d8747d6f372e1)
1 /*	$NetBSD: linux_sched.c,v 1.33 2006/07/23 22:06:09 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center; by Matthias Scheler.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Linux compatibility module. Try to deal with scheduler related syscalls.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.33 2006/07/23 22:06:09 ad Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/sa.h>
54 #include <sys/syscallargs.h>
55 #include <sys/wait.h>
56 #include <sys/kauth.h>
57 
58 #include <machine/cpu.h>
59 
60 #include <compat/linux/common/linux_types.h>
61 #include <compat/linux/common/linux_signal.h>
62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
63 #include <compat/linux/common/linux_emuldata.h>
64 
65 #include <compat/linux/linux_syscallargs.h>
66 
67 #include <compat/linux/common/linux_sched.h>
68 
69 int
70 linux_sys_clone(l, v, retval)
71 	struct lwp *l;
72 	void *v;
73 	register_t *retval;
74 {
75 	struct linux_sys_clone_args /* {
76 		syscallarg(int) flags;
77 		syscallarg(void *) stack;
78 #ifdef LINUX_NPTL
79 		syscallarg(void *) parent_tidptr;
80 		syscallarg(void *) child_tidptr;
81 #endif
82 	} */ *uap = v;
83 	int flags, sig;
84 	int error;
85 #ifdef LINUX_NPTL
86 	struct linux_emuldata *led;
87 #endif
88 
89 	/*
90 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
91 	 */
92 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
93 		return (EINVAL);
94 
95 	/*
96 	 * Thread group implies shared signals. Shared signals
97 	 * imply shared VM. This matches what Linux kernel does.
98 	 */
99 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
100 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
101 		return (EINVAL);
102 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
103 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
104 		return (EINVAL);
105 
106 	flags = 0;
107 
108 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
109 		flags |= FORK_SHAREVM;
110 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
111 		flags |= FORK_SHARECWD;
112 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
113 		flags |= FORK_SHAREFILES;
114 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
115 		flags |= FORK_SHARESIGS;
116 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
117 		flags |= FORK_PPWAIT;
118 
119 	/* Thread should not issue a SIGCHLD on termination */
120 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD) {
121 		sig = 0;
122 	} else {
123 		sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
124 		if (sig < 0 || sig >= LINUX__NSIG)
125 			return (EINVAL);
126 		sig = linux_to_native_signo[sig];
127 	}
128 
129 #ifdef LINUX_NPTL
130 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
131 
132 	if (SCARG(uap, flags) & LINUX_CLONE_PARENT_SETTID) {
133 		if (SCARG(uap, parent_tidptr) == NULL) {
134 			printf("linux_sys_clone: NULL parent_tidptr\n");
135 			return EINVAL;
136 		}
137 
138 		if ((error = copyout(&l->l_proc->p_pid,
139 		    SCARG(uap, parent_tidptr),
140 		    sizeof(l->l_proc->p_pid))) != 0)
141 			return error;
142 	}
143 
144 	/* CLONE_CHILD_CLEARTID: TID clear in the child on exit() */
145 	if (SCARG(uap, flags) & LINUX_CLONE_CHILD_CLEARTID)
146 		led->child_clear_tid = SCARG(uap, child_tidptr);
147 	else
148 		led->child_clear_tid = NULL;
149 
150 	/* CLONE_CHILD_SETTID: TID set in the child on clone() */
151 	if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID)
152 		led->child_set_tid = SCARG(uap, child_tidptr);
153 	else
154 		led->child_set_tid = NULL;
155 
156 	/* CLONE_SETTLS: new Thread Local Storage in the child */
157 	if (SCARG(uap, flags) & LINUX_CLONE_SETTLS)
158 		led->set_tls = linux_get_newtls(l);
159 	else
160 		led->set_tls = 0;
161 #endif /* LINUX_NPTL */
162 	/*
163 	 * Note that Linux does not provide a portable way of specifying
164 	 * the stack area; the caller must know if the stack grows up
165 	 * or down.  So, we pass a stack size of 0, so that the code
166 	 * that makes this adjustment is a noop.
167 	 */
168 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
169 	    NULL, NULL, retval, NULL)) != 0)
170 		return error;
171 
172 	return 0;
173 }
174 
175 int
176 linux_sys_sched_setparam(cl, v, retval)
177 	struct lwp *cl;
178 	void *v;
179 	register_t *retval;
180 {
181 	struct linux_sys_sched_setparam_args /* {
182 		syscallarg(linux_pid_t) pid;
183 		syscallarg(const struct linux_sched_param *) sp;
184 	} */ *uap = v;
185 	int error;
186 	struct linux_sched_param lp;
187 	struct proc *p;
188 
189 /*
190  * We only check for valid parameters and return afterwards.
191  */
192 
193 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
194 		return EINVAL;
195 
196 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
197 	if (error)
198 		return error;
199 
200 	if (SCARG(uap, pid) != 0) {
201 		kauth_cred_t pc = cl->l_cred;
202 
203 		if ((p = pfind(SCARG(uap, pid))) == NULL)
204 			return ESRCH;
205 		if (!(cl->l_proc == p ||
206 		      kauth_cred_geteuid(pc) == 0 ||
207 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
208 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
209 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
210 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
211 			return EPERM;
212 	}
213 
214 	return 0;
215 }
216 
217 int
218 linux_sys_sched_getparam(cl, v, retval)
219 	struct lwp *cl;
220 	void *v;
221 	register_t *retval;
222 {
223 	struct linux_sys_sched_getparam_args /* {
224 		syscallarg(linux_pid_t) pid;
225 		syscallarg(struct linux_sched_param *) sp;
226 	} */ *uap = v;
227 	struct proc *p;
228 	struct linux_sched_param lp;
229 
230 /*
231  * We only check for valid parameters and return a dummy priority afterwards.
232  */
233 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
234 		return EINVAL;
235 
236 	if (SCARG(uap, pid) != 0) {
237 		kauth_cred_t pc = cl->l_cred;
238 
239 		if ((p = pfind(SCARG(uap, pid))) == NULL)
240 			return ESRCH;
241 		if (!(cl->l_proc == p ||
242 		      kauth_cred_geteuid(pc) == 0 ||
243 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
244 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
245 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
246 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
247 			return EPERM;
248 	}
249 
250 	lp.sched_priority = 0;
251 	return copyout(&lp, SCARG(uap, sp), sizeof(lp));
252 }
253 
254 int
255 linux_sys_sched_setscheduler(cl, v, retval)
256 	struct lwp *cl;
257 	void *v;
258 	register_t *retval;
259 {
260 	struct linux_sys_sched_setscheduler_args /* {
261 		syscallarg(linux_pid_t) pid;
262 		syscallarg(int) policy;
263 		syscallarg(cont struct linux_sched_scheduler *) sp;
264 	} */ *uap = v;
265 	int error;
266 	struct linux_sched_param lp;
267 	struct proc *p;
268 
269 /*
270  * We only check for valid parameters and return afterwards.
271  */
272 
273 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
274 		return EINVAL;
275 
276 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
277 	if (error)
278 		return error;
279 
280 	if (SCARG(uap, pid) != 0) {
281 		kauth_cred_t pc = cl->l_cred;
282 
283 		if ((p = pfind(SCARG(uap, pid))) == NULL)
284 			return ESRCH;
285 		if (!(cl->l_proc == p ||
286 		      kauth_cred_geteuid(pc) == 0 ||
287 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
288 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
289 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
290 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
291 			return EPERM;
292 	}
293 
294 /*
295  * We can't emulate anything put the default scheduling policy.
296  */
297 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
298 		return EINVAL;
299 
300 	return 0;
301 }
302 
303 int
304 linux_sys_sched_getscheduler(cl, v, retval)
305 	struct lwp *cl;
306 	void *v;
307 	register_t *retval;
308 {
309 	struct linux_sys_sched_getscheduler_args /* {
310 		syscallarg(linux_pid_t) pid;
311 	} */ *uap = v;
312 	struct proc *p;
313 
314 	*retval = -1;
315 /*
316  * We only check for valid parameters and return afterwards.
317  */
318 
319 	if (SCARG(uap, pid) != 0) {
320 		kauth_cred_t pc = cl->l_cred;
321 
322 		if ((p = pfind(SCARG(uap, pid))) == NULL)
323 			return ESRCH;
324 		if (!(cl->l_proc == p ||
325 		      kauth_cred_geteuid(pc) == 0 ||
326 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
327 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
328 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
329 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
330 			return EPERM;
331 	}
332 
333 /*
334  * We can't emulate anything put the default scheduling policy.
335  */
336 	*retval = LINUX_SCHED_OTHER;
337 	return 0;
338 }
339 
340 int
341 linux_sys_sched_yield(cl, v, retval)
342 	struct lwp *cl;
343 	void *v;
344 	register_t *retval;
345 {
346 
347 	yield();
348 	return 0;
349 }
350 
351 int
352 linux_sys_sched_get_priority_max(cl, v, retval)
353 	struct lwp *cl;
354 	void *v;
355 	register_t *retval;
356 {
357 	struct linux_sys_sched_get_priority_max_args /* {
358 		syscallarg(int) policy;
359 	} */ *uap = v;
360 
361 /*
362  * We can't emulate anything put the default scheduling policy.
363  */
364 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
365 		*retval = -1;
366 		return EINVAL;
367 	}
368 
369 	*retval = 0;
370 	return 0;
371 }
372 
373 int
374 linux_sys_sched_get_priority_min(cl, v, retval)
375 	struct lwp *cl;
376 	void *v;
377 	register_t *retval;
378 {
379 	struct linux_sys_sched_get_priority_min_args /* {
380 		syscallarg(int) policy;
381 	} */ *uap = v;
382 
383 /*
384  * We can't emulate anything put the default scheduling policy.
385  */
386 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
387 		*retval = -1;
388 		return EINVAL;
389 	}
390 
391 	*retval = 0;
392 	return 0;
393 }
394 
395 #ifndef __m68k__
396 /* Present on everything but m68k */
397 int
398 linux_sys_exit_group(l, v, retval)
399 	struct lwp *l;
400 	void *v;
401 	register_t *retval;
402 {
403 	struct linux_sys_exit_group_args /* {
404 		syscallarg(int) error_code;
405 	} */ *uap = v;
406 #ifdef LINUX_NPTL
407 	struct proc *p = l->l_proc;
408 	struct linux_emuldata *led = p->p_emuldata;
409 	struct linux_emuldata *e;
410 	struct lwp *sl;
411 	struct proc *sp;
412 	int s;
413 
414 	SCHED_LOCK(s);
415 	/*
416 	 * The calling thread is supposed to kill all threads
417 	 * in the same thread group (i.e. all threads created
418 	 * via clone(2) with CLONE_THREAD flag set).
419 	 */
420         LIST_FOREACH(e, &led->s->threads, threads) {
421 		sp = e->proc;
422 
423 		if (sp == p)
424 			continue;
425 #ifdef DEBUG_LINUX
426 		printf("linux_sys_exit_group: kill PID %d\n", sp->p_pid);
427 #endif
428 		/* wakeup any waiter */
429 		if (sp->p_sigctx.ps_sigwaited &&
430 		    sigismember(sp->p_sigctx.ps_sigwait, SIGKILL) &&
431 		    sp->p_stat != SSTOP) {
432 			sched_wakeup(&sp->p_sigctx.ps_sigwait);
433 		}
434 
435 		/* post SIGKILL */
436 		sigaddset(&sp->p_sigctx.ps_siglist, SIGKILL);
437 		sp->p_sigctx.ps_sigcheck = 1;
438 
439 		/* Unblock the process if sleeping or stopped */
440 		switch(sp->p_stat) {
441 		case SSTOP:
442 			sl = proc_unstop(sp);
443 			break;
444 		case SACTIVE:
445 			sl = proc_representative_lwp(sp);
446 			break;
447 		default:
448 			sl = NULL;
449 			break;
450 		}
451 
452 		if (sl == NULL) {
453 			printf("linux_sys_exit_group: no lwp for process %d\n",
454 			    sp->p_pid);
455 			continue;
456 		}
457 
458 		if (sl->l_priority > PUSER)
459 			sl->l_priority = PUSER;
460 
461 		switch(sl->l_stat) {
462 		case LSSUSPENDED:
463 			lwp_continue(sl);
464 			/* FALLTHROUGH */
465 		case LSSTOP:
466 		case LSSLEEP:
467 		case LSIDL:
468 			setrunnable(sl);
469 			/* FALLTHROUGH */
470 		default:
471 			break;
472 		}
473 	}
474 	SCHED_UNLOCK(s);
475 #endif /* LINUX_NPTL */
476 
477 	exit1(l, W_EXITCODE(SCARG(uap, error_code), 0));
478 	/* NOTREACHED */
479 	return 0;
480 }
481 #endif /* !__m68k__ */
482 
483 #ifdef LINUX_NPTL
484 int
485 linux_sys_set_tid_address(l, v, retval)
486 	struct lwp *l;
487 	void *v;
488 	register_t *retval;
489 {
490 	struct linux_sys_set_tid_address_args /* {
491 		syscallarg(int *) tidptr;
492 	} */ *uap = v;
493 	struct linux_emuldata *led;
494 
495 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
496 	led->clear_tid = SCARG(uap, tid);
497 
498 	*retval = l->l_proc->p_pid;
499 
500 	return 0;
501 }
502 
503 /* ARGUSED1 */
504 int
505 linux_sys_gettid(l, v, retval)
506 	struct lwp *l;
507 	void *v;
508 	register_t *retval;
509 {
510 	/* The Linux kernel does it exactly that way */
511 	*retval = l->l_proc->p_pid;
512 	return 0;
513 }
514 
515 #ifdef LINUX_NPTL
516 /* ARGUSED1 */
517 int
518 linux_sys_getpid(l, v, retval)
519 	struct lwp *l;
520 	void *v;
521 	register_t *retval;
522 {
523 	struct linux_emuldata *led;
524 
525 	led = l->l_proc->p_emuldata;
526 
527 	/* The Linux kernel does it exactly that way */
528 	*retval = led->s->group_pid;
529 
530 	return 0;
531 }
532 
533 /* ARGUSED1 */
534 int
535 linux_sys_getppid(l, v, retval)
536 	struct lwp *l;
537 	void *v;
538 	register_t *retval;
539 {
540 	struct proc *p = l->l_proc;
541 	struct linux_emuldata *led = p->p_emuldata;
542 	struct proc *glp;
543 	struct proc *pp;
544 
545 	/* Find the thread group leader's parent */
546 	if ((glp = pfind(led->s->group_pid)) == NULL) {
547 		/* Maybe panic... */
548 		printf("linux_sys_getppid: missing group leader PID %d\n",
549 		    led->s->group_pid);
550 		return -1;
551 	}
552 	pp = glp->p_pptr;
553 
554 	/* If this is a Linux process too, return thread group PID */
555 	if (pp->p_emul == p->p_emul) {
556 		struct linux_emuldata *pled;
557 
558 		pled = pp->p_emuldata;
559 		*retval = pled->s->group_pid;
560 	} else {
561 		*retval = pp->p_pid;
562 	}
563 
564 	return 0;
565 }
566 #endif /* LINUX_NPTL */
567 
568 int
569 linux_sys_sched_getaffinity(l, v, retval)
570 	struct lwp *l;
571 	void *v;
572 	register_t *retval;
573 {
574 	struct linux_sys_sched_getaffinity_args /* {
575 		syscallarg(pid_t) pid;
576 		syscallarg(unsigned int) len;
577 		syscallarg(unsigned long *) mask;
578 	} */ *uap = v;
579 	int error;
580 	int ret;
581 	int ncpu;
582 	int name[2];
583 	size_t sz;
584 	char *data;
585 	int *retp;
586 
587 	if (SCARG(uap, mask) == NULL)
588 		return EINVAL;
589 
590 	if (SCARG(uap, len) < sizeof(int))
591 		return EINVAL;
592 
593 	if (pfind(SCARG(uap, pid)) == NULL)
594 		return ESRCH;
595 
596 	/*
597 	 * return the actual number of CPU, tag all of them as available
598 	 * The result is a mask, the first CPU being in the least significant
599 	 * bit.
600 	 */
601 	name[0] = CTL_HW;
602 	name[1] = HW_NCPU;
603 	sz = sizeof(ncpu);
604 
605 	if ((error = old_sysctl(&name[0], 2, &ncpu, &sz, NULL, 0, NULL)) != 0)
606 		return error;
607 
608 	ret = (1 << ncpu) - 1;
609 
610 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
611 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
612 	*retp = ret;
613 
614 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
615 		return error;
616 
617 	free(data, M_TEMP);
618 
619 	return 0;
620 
621 }
622 
623 int
624 linux_sys_sched_setaffinity(l, v, retval)
625 	struct lwp *l;
626 	void *v;
627 	register_t *retval;
628 {
629 	struct linux_sys_sched_setaffinity_args /* {
630 		syscallarg(pid_t) pid;
631 		syscallarg(unsigned int) len;
632 		syscallarg(unsigned long *) mask;
633 	} */ *uap = v;
634 
635 	if (pfind(SCARG(uap, pid)) == NULL)
636 		return ESRCH;
637 
638 	/* Let's ignore it */
639 #ifdef DEBUG_LINUX
640 	printf("linux_sys_sched_setaffinity\n");
641 #endif
642 	return 0;
643 };
644 #endif /* LINUX_NPTL */
645