xref: /netbsd-src/sys/compat/linux/common/linux_sched.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /*	$NetBSD: linux_sched.c,v 1.58 2008/10/25 23:38:28 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center; by Matthias Scheler.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Linux compatibility module. Try to deal with scheduler related syscalls.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.58 2008/10/25 23:38:28 christos Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/malloc.h>
46 #include <sys/syscallargs.h>
47 #include <sys/wait.h>
48 #include <sys/kauth.h>
49 #include <sys/ptrace.h>
50 
51 #include <sys/cpu.h>
52 
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
56 #include <compat/linux/common/linux_emuldata.h>
57 #include <compat/linux/common/linux_ipc.h>
58 #include <compat/linux/common/linux_sem.h>
59 #include <compat/linux/common/linux_exec.h>
60 
61 #include <compat/linux/linux_syscallargs.h>
62 
63 #include <compat/linux/common/linux_sched.h>
64 
65 int
66 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
67 {
68 	/* {
69 		syscallarg(int) flags;
70 		syscallarg(void *) stack;
71 #ifdef LINUX_NPTL
72 		syscallarg(void *) parent_tidptr;
73 		syscallarg(void *) child_tidptr;
74 #endif
75 	} */
76 	int flags, sig;
77 	int error;
78 	struct proc *p;
79 #ifdef LINUX_NPTL
80 	struct linux_emuldata *led;
81 #endif
82 
83 	/*
84 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
85 	 */
86 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
87 		return (EINVAL);
88 
89 	/*
90 	 * Thread group implies shared signals. Shared signals
91 	 * imply shared VM. This matches what Linux kernel does.
92 	 */
93 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
94 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
95 		return (EINVAL);
96 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
97 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
98 		return (EINVAL);
99 
100 	flags = 0;
101 
102 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
103 		flags |= FORK_SHAREVM;
104 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
105 		flags |= FORK_SHARECWD;
106 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
107 		flags |= FORK_SHAREFILES;
108 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
109 		flags |= FORK_SHARESIGS;
110 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
111 		flags |= FORK_PPWAIT;
112 
113 	sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
114 	if (sig < 0 || sig >= LINUX__NSIG)
115 		return (EINVAL);
116 	sig = linux_to_native_signo[sig];
117 
118 #ifdef LINUX_NPTL
119 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
120 
121 	led->parent_tidptr = SCARG(uap, parent_tidptr);
122 	led->child_tidptr = SCARG(uap, child_tidptr);
123 	led->clone_flags = SCARG(uap, flags);
124 #endif /* LINUX_NPTL */
125 
126 	/*
127 	 * Note that Linux does not provide a portable way of specifying
128 	 * the stack area; the caller must know if the stack grows up
129 	 * or down.  So, we pass a stack size of 0, so that the code
130 	 * that makes this adjustment is a noop.
131 	 */
132 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
133 	    NULL, NULL, retval, &p)) != 0)
134 		return error;
135 
136 #ifdef LINUX_NPTL
137 	if ((SCARG(uap, flags) & LINUX_CLONE_SETTLS) != 0)
138 		return linux_init_thread_area(l, LIST_FIRST(&p->p_lwps));
139 #endif /* LINUX_NPTL */
140 
141 	return 0;
142 }
143 
144 /*
145  * linux realtime priority
146  *
147  * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
148  *
149  * - SCHED_OTHER tasks don't have realtime priorities.
150  *   in particular, sched_param::sched_priority is always 0.
151  */
152 
153 #define	LINUX_SCHED_RTPRIO_MIN	1
154 #define	LINUX_SCHED_RTPRIO_MAX	99
155 
156 static int
157 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
158     int *native_policy, struct sched_param *native_params)
159 {
160 
161 	switch (linux_policy) {
162 	case LINUX_SCHED_OTHER:
163 		if (native_policy != NULL) {
164 			*native_policy = SCHED_OTHER;
165 		}
166 		break;
167 
168 	case LINUX_SCHED_FIFO:
169 		if (native_policy != NULL) {
170 			*native_policy = SCHED_FIFO;
171 		}
172 		break;
173 
174 	case LINUX_SCHED_RR:
175 		if (native_policy != NULL) {
176 			*native_policy = SCHED_RR;
177 		}
178 		break;
179 
180 	default:
181 		return EINVAL;
182 	}
183 
184 	if (linux_params != NULL) {
185 		int prio = linux_params->sched_priority;
186 
187 		KASSERT(native_params != NULL);
188 
189 		if (linux_policy == LINUX_SCHED_OTHER) {
190 			if (prio != 0) {
191 				return EINVAL;
192 			}
193 			native_params->sched_priority = PRI_NONE; /* XXX */
194 		} else {
195 			if (prio < LINUX_SCHED_RTPRIO_MIN ||
196 			    prio > LINUX_SCHED_RTPRIO_MAX) {
197 				return EINVAL;
198 			}
199 			native_params->sched_priority =
200 			    (prio - LINUX_SCHED_RTPRIO_MIN)
201 			    * (SCHED_PRI_MAX - SCHED_PRI_MIN)
202 			    / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
203 			    + SCHED_PRI_MIN;
204 		}
205 	}
206 
207 	return 0;
208 }
209 
210 static int
211 sched_native2linux(int native_policy, struct sched_param *native_params,
212     int *linux_policy, struct linux_sched_param *linux_params)
213 {
214 
215 	switch (native_policy) {
216 	case SCHED_OTHER:
217 		if (linux_policy != NULL) {
218 			*linux_policy = LINUX_SCHED_OTHER;
219 		}
220 		break;
221 
222 	case SCHED_FIFO:
223 		if (linux_policy != NULL) {
224 			*linux_policy = LINUX_SCHED_FIFO;
225 		}
226 		break;
227 
228 	case SCHED_RR:
229 		if (linux_policy != NULL) {
230 			*linux_policy = LINUX_SCHED_RR;
231 		}
232 		break;
233 
234 	default:
235 		panic("%s: unknown policy %d\n", __func__, native_policy);
236 	}
237 
238 	if (native_params != NULL) {
239 		int prio = native_params->sched_priority;
240 
241 		KASSERT(prio >= SCHED_PRI_MIN);
242 		KASSERT(prio <= SCHED_PRI_MAX);
243 		KASSERT(linux_params != NULL);
244 
245 #ifdef DEBUG_LINUX
246 		printf("native2linux: native: policy %d, priority %d\n",
247 		    native_policy, prio);
248 #endif
249 
250 		if (native_policy == SCHED_OTHER) {
251 			linux_params->sched_priority = 0;
252 		} else {
253 			linux_params->sched_priority =
254 			    (prio - SCHED_PRI_MIN)
255 			    * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
256 			    / (SCHED_PRI_MAX - SCHED_PRI_MIN)
257 			    + LINUX_SCHED_RTPRIO_MIN;
258 		}
259 #ifdef DEBUG_LINUX
260 		printf("native2linux: linux: policy %d, priority %d\n",
261 		    -1, linux_params->sched_priority);
262 #endif
263 	}
264 
265 	return 0;
266 }
267 
268 int
269 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
270 {
271 	/* {
272 		syscallarg(linux_pid_t) pid;
273 		syscallarg(const struct linux_sched_param *) sp;
274 	} */
275 	int error, policy;
276 	struct linux_sched_param lp;
277 	struct sched_param sp;
278 
279 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
280 		error = EINVAL;
281 		goto out;
282 	}
283 
284 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
285 	if (error)
286 		goto out;
287 
288 	/* We need the current policy in Linux terms. */
289 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
290 	if (error)
291 		goto out;
292 	error = sched_native2linux(policy, NULL, &policy, NULL);
293 	if (error)
294 		goto out;
295 
296 	error = sched_linux2native(policy, &lp, &policy, &sp);
297 	if (error)
298 		goto out;
299 
300 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
301 	if (error)
302 		goto out;
303 
304  out:
305 	return error;
306 }
307 
308 int
309 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
310 {
311 	/* {
312 		syscallarg(linux_pid_t) pid;
313 		syscallarg(struct linux_sched_param *) sp;
314 	} */
315 	struct linux_sched_param lp;
316 	struct sched_param sp;
317 	int error, policy;
318 
319 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
320 		error = EINVAL;
321 		goto out;
322 	}
323 
324 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
325 	if (error)
326 		goto out;
327 #ifdef DEBUG_LINUX
328 	printf("getparam: native: policy %d, priority %d\n",
329 	    policy, sp.sched_priority);
330 #endif
331 
332 	error = sched_native2linux(policy, &sp, NULL, &lp);
333 	if (error)
334 		goto out;
335 #ifdef DEBUG_LINUX
336 	printf("getparam: linux: policy %d, priority %d\n",
337 	    policy, lp.sched_priority);
338 #endif
339 
340 	error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
341 	if (error)
342 		goto out;
343 
344  out:
345 	return error;
346 }
347 
348 int
349 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
350 {
351 	/* {
352 		syscallarg(linux_pid_t) pid;
353 		syscallarg(int) policy;
354 		syscallarg(cont struct linux_sched_scheduler *) sp;
355 	} */
356 	int error, policy;
357 	struct linux_sched_param lp;
358 	struct sched_param sp;
359 
360 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
361 		error = EINVAL;
362 		goto out;
363 	}
364 
365 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
366 	if (error)
367 		goto out;
368 #ifdef DEBUG_LINUX
369 	printf("setscheduler: linux: policy %d, priority %d\n",
370 	    SCARG(uap, policy), lp.sched_priority);
371 #endif
372 
373 	error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
374 	if (error)
375 		goto out;
376 #ifdef DEBUG_LINUX
377 	printf("setscheduler: native: policy %d, priority %d\n",
378 	    policy, sp.sched_priority);
379 #endif
380 
381 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
382 	if (error)
383 		goto out;
384 
385  out:
386 	return error;
387 }
388 
389 int
390 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
391 {
392 	/* {
393 		syscallarg(linux_pid_t) pid;
394 	} */
395 	int error, policy;
396 
397 	*retval = -1;
398 
399 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
400 	if (error)
401 		goto out;
402 
403 	error = sched_native2linux(policy, NULL, &policy, NULL);
404 	if (error)
405 		goto out;
406 
407 	*retval = policy;
408 
409  out:
410 	return error;
411 }
412 
413 int
414 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
415 {
416 
417 	yield();
418 	return 0;
419 }
420 
421 int
422 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
423 {
424 	/* {
425 		syscallarg(int) policy;
426 	} */
427 
428 	switch (SCARG(uap, policy)) {
429 	case LINUX_SCHED_OTHER:
430 		*retval = 0;
431 		break;
432 	case LINUX_SCHED_FIFO:
433 	case LINUX_SCHED_RR:
434 		*retval = LINUX_SCHED_RTPRIO_MAX;
435 		break;
436 	default:
437 		return EINVAL;
438 	}
439 
440 	return 0;
441 }
442 
443 int
444 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
445 {
446 	/* {
447 		syscallarg(int) policy;
448 	} */
449 
450 	switch (SCARG(uap, policy)) {
451 	case LINUX_SCHED_OTHER:
452 		*retval = 0;
453 		break;
454 	case LINUX_SCHED_FIFO:
455 	case LINUX_SCHED_RR:
456 		*retval = LINUX_SCHED_RTPRIO_MIN;
457 		break;
458 	default:
459 		return EINVAL;
460 	}
461 
462 	return 0;
463 }
464 
465 #ifndef __m68k__
466 /* Present on everything but m68k */
467 int
468 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
469 {
470 #ifdef LINUX_NPTL
471 	/* {
472 		syscallarg(int) error_code;
473 	} */
474 	struct proc *p = l->l_proc;
475 	struct linux_emuldata *led = p->p_emuldata;
476 	struct linux_emuldata *e;
477 
478 	if (led->s->flags & LINUX_LES_USE_NPTL) {
479 
480 #ifdef DEBUG_LINUX
481 		printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
482 		    led->s->refs);
483 #endif
484 
485 		/*
486 		 * The calling thread is supposed to kill all threads
487 		 * in the same thread group (i.e. all threads created
488 		 * via clone(2) with CLONE_THREAD flag set).
489 		 *
490 		 * If there is only one thread, things are quite simple
491 		 */
492 		if (led->s->refs == 1)
493 			return sys_exit(l, (const void *)uap, retval);
494 
495 #ifdef DEBUG_LINUX
496 		printf("%s:%d\n", __func__, __LINE__);
497 #endif
498 
499 		mutex_enter(proc_lock);
500 		led->s->flags |= LINUX_LES_INEXITGROUP;
501 		led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
502 
503 		/*
504 		 * Kill all threads in the group. The emulation exit hook takes
505 		 * care of hiding the zombies and reporting the exit code
506 		 * properly.
507 		 */
508       		LIST_FOREACH(e, &led->s->threads, threads) {
509 			if (e->proc == p)
510 				continue;
511 
512 #ifdef DEBUG_LINUX
513 			printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
514 #endif
515 			psignal(e->proc, SIGKILL);
516 		}
517 
518 		/* Now, kill ourselves */
519 		psignal(p, SIGKILL);
520 		mutex_exit(proc_lock);
521 
522 		return 0;
523 
524 	}
525 #endif /* LINUX_NPTL */
526 
527 	return sys_exit(l, (const void *)uap, retval);
528 }
529 #endif /* !__m68k__ */
530 
531 #ifdef LINUX_NPTL
532 int
533 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
534 {
535 	/* {
536 		syscallarg(int *) tidptr;
537 	} */
538 	struct linux_emuldata *led;
539 
540 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
541 	led->clear_tid = SCARG(uap, tid);
542 
543 	led->s->flags |= LINUX_LES_USE_NPTL;
544 
545 	*retval = l->l_proc->p_pid;
546 
547 	return 0;
548 }
549 
550 /* ARGUSED1 */
551 int
552 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
553 {
554 	/* The Linux kernel does it exactly that way */
555 	*retval = l->l_proc->p_pid;
556 	return 0;
557 }
558 
559 #ifdef LINUX_NPTL
560 /* ARGUSED1 */
561 int
562 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
563 {
564 	struct linux_emuldata *led = l->l_proc->p_emuldata;
565 
566 	if (led->s->flags & LINUX_LES_USE_NPTL) {
567 		/* The Linux kernel does it exactly that way */
568 		*retval = led->s->group_pid;
569 	} else {
570 		*retval = l->l_proc->p_pid;
571 	}
572 
573 	return 0;
574 }
575 
576 /* ARGUSED1 */
577 int
578 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
579 {
580 	struct proc *p = l->l_proc;
581 	struct linux_emuldata *led = p->p_emuldata;
582 	struct proc *glp;
583 	struct proc *pp;
584 
585 	mutex_enter(proc_lock);
586 	if (led->s->flags & LINUX_LES_USE_NPTL) {
587 
588 		/* Find the thread group leader's parent */
589 		if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) {
590 			/* Maybe panic... */
591 			printf("linux_sys_getppid: missing group leader PID"
592 			    " %d\n", led->s->group_pid);
593 			mutex_exit(proc_lock);
594 			return -1;
595 		}
596 		pp = glp->p_pptr;
597 
598 		/* If this is a Linux process too, return thread group PID */
599 		if (pp->p_emul == p->p_emul) {
600 			struct linux_emuldata *pled;
601 
602 			pled = pp->p_emuldata;
603 			*retval = pled->s->group_pid;
604 		} else {
605 			*retval = pp->p_pid;
606 		}
607 
608 	} else {
609 		*retval = p->p_pptr->p_pid;
610 	}
611 	mutex_exit(proc_lock);
612 
613 	return 0;
614 }
615 #endif /* LINUX_NPTL */
616 
617 int
618 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
619 {
620 	/* {
621 		syscallarg(pid_t) pid;
622 		syscallarg(unsigned int) len;
623 		syscallarg(unsigned long *) mask;
624 	} */
625 	int error;
626 	int ret;
627 	char *data;
628 	int *retp;
629 
630 	if (SCARG(uap, mask) == NULL)
631 		return EINVAL;
632 
633 	if (SCARG(uap, len) < sizeof(int))
634 		return EINVAL;
635 
636 	if (pfind(SCARG(uap, pid)) == NULL)
637 		return ESRCH;
638 
639 	/*
640 	 * return the actual number of CPU, tag all of them as available
641 	 * The result is a mask, the first CPU being in the least significant
642 	 * bit.
643 	 */
644 	ret = (1 << ncpu) - 1;
645 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
646 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
647 	*retp = ret;
648 
649 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
650 		return error;
651 
652 	free(data, M_TEMP);
653 
654 	return 0;
655 
656 }
657 
658 int
659 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
660 {
661 	/* {
662 		syscallarg(pid_t) pid;
663 		syscallarg(unsigned int) len;
664 		syscallarg(unsigned long *) mask;
665 	} */
666 
667 	if (pfind(SCARG(uap, pid)) == NULL)
668 		return ESRCH;
669 
670 	/* Let's ignore it */
671 #ifdef DEBUG_LINUX
672 	printf("linux_sys_sched_setaffinity\n");
673 #endif
674 	return 0;
675 };
676 #endif /* LINUX_NPTL */
677