xref: /netbsd-src/sys/compat/linux/common/linux_sched.c (revision fff57c5525bbe431aee7bdb3983954f0627a42cb)
1 /*	$NetBSD: linux_sched.c,v 1.57 2008/05/07 15:18:35 njoly Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center; by Matthias Scheler.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Linux compatibility module. Try to deal with scheduler related syscalls.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.57 2008/05/07 15:18:35 njoly Exp $");
39 
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/malloc.h>
46 #include <sys/syscallargs.h>
47 #include <sys/wait.h>
48 #include <sys/kauth.h>
49 #include <sys/ptrace.h>
50 
51 #include <sys/cpu.h>
52 
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
56 #include <compat/linux/common/linux_emuldata.h>
57 #include <compat/linux/common/linux_ipc.h>
58 #include <compat/linux/common/linux_sem.h>
59 
60 #include <compat/linux/linux_syscallargs.h>
61 
62 #include <compat/linux/common/linux_sched.h>
63 
64 int
65 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
66 {
67 	/* {
68 		syscallarg(int) flags;
69 		syscallarg(void *) stack;
70 #ifdef LINUX_NPTL
71 		syscallarg(void *) parent_tidptr;
72 		syscallarg(void *) child_tidptr;
73 #endif
74 	} */
75 	int flags, sig;
76 	int error;
77 #ifdef LINUX_NPTL
78 	struct linux_emuldata *led;
79 #endif
80 
81 	/*
82 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
83 	 */
84 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
85 		return (EINVAL);
86 
87 	/*
88 	 * Thread group implies shared signals. Shared signals
89 	 * imply shared VM. This matches what Linux kernel does.
90 	 */
91 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
92 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
93 		return (EINVAL);
94 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
95 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
96 		return (EINVAL);
97 
98 	flags = 0;
99 
100 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
101 		flags |= FORK_SHAREVM;
102 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
103 		flags |= FORK_SHARECWD;
104 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
105 		flags |= FORK_SHAREFILES;
106 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
107 		flags |= FORK_SHARESIGS;
108 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
109 		flags |= FORK_PPWAIT;
110 
111 	sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
112 	if (sig < 0 || sig >= LINUX__NSIG)
113 		return (EINVAL);
114 	sig = linux_to_native_signo[sig];
115 
116 #ifdef LINUX_NPTL
117 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
118 
119 	led->parent_tidptr = SCARG(uap, parent_tidptr);
120 	led->child_tidptr = SCARG(uap, child_tidptr);
121 	led->clone_flags = SCARG(uap, flags);
122 #endif /* LINUX_NPTL */
123 
124 	/*
125 	 * Note that Linux does not provide a portable way of specifying
126 	 * the stack area; the caller must know if the stack grows up
127 	 * or down.  So, we pass a stack size of 0, so that the code
128 	 * that makes this adjustment is a noop.
129 	 */
130 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
131 	    NULL, NULL, retval, NULL)) != 0)
132 		return error;
133 
134 	return 0;
135 }
136 
137 /*
138  * linux realtime priority
139  *
140  * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
141  *
142  * - SCHED_OTHER tasks don't have realtime priorities.
143  *   in particular, sched_param::sched_priority is always 0.
144  */
145 
146 #define	LINUX_SCHED_RTPRIO_MIN	1
147 #define	LINUX_SCHED_RTPRIO_MAX	99
148 
149 static int
150 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
151     int *native_policy, struct sched_param *native_params)
152 {
153 
154 	switch (linux_policy) {
155 	case LINUX_SCHED_OTHER:
156 		if (native_policy != NULL) {
157 			*native_policy = SCHED_OTHER;
158 		}
159 		break;
160 
161 	case LINUX_SCHED_FIFO:
162 		if (native_policy != NULL) {
163 			*native_policy = SCHED_FIFO;
164 		}
165 		break;
166 
167 	case LINUX_SCHED_RR:
168 		if (native_policy != NULL) {
169 			*native_policy = SCHED_RR;
170 		}
171 		break;
172 
173 	default:
174 		return EINVAL;
175 	}
176 
177 	if (linux_params != NULL) {
178 		int prio = linux_params->sched_priority;
179 
180 		KASSERT(native_params != NULL);
181 
182 		if (linux_policy == LINUX_SCHED_OTHER) {
183 			if (prio != 0) {
184 				return EINVAL;
185 			}
186 			native_params->sched_priority = PRI_NONE; /* XXX */
187 		} else {
188 			if (prio < LINUX_SCHED_RTPRIO_MIN ||
189 			    prio > LINUX_SCHED_RTPRIO_MAX) {
190 				return EINVAL;
191 			}
192 			native_params->sched_priority =
193 			    (prio - LINUX_SCHED_RTPRIO_MIN)
194 			    * (SCHED_PRI_MAX - SCHED_PRI_MIN)
195 			    / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
196 			    + SCHED_PRI_MIN;
197 		}
198 	}
199 
200 	return 0;
201 }
202 
203 static int
204 sched_native2linux(int native_policy, struct sched_param *native_params,
205     int *linux_policy, struct linux_sched_param *linux_params)
206 {
207 
208 	switch (native_policy) {
209 	case SCHED_OTHER:
210 		if (linux_policy != NULL) {
211 			*linux_policy = LINUX_SCHED_OTHER;
212 		}
213 		break;
214 
215 	case SCHED_FIFO:
216 		if (linux_policy != NULL) {
217 			*linux_policy = LINUX_SCHED_FIFO;
218 		}
219 		break;
220 
221 	case SCHED_RR:
222 		if (linux_policy != NULL) {
223 			*linux_policy = LINUX_SCHED_RR;
224 		}
225 		break;
226 
227 	default:
228 		panic("%s: unknown policy %d\n", __func__, native_policy);
229 	}
230 
231 	if (native_params != NULL) {
232 		int prio = native_params->sched_priority;
233 
234 		KASSERT(prio >= SCHED_PRI_MIN);
235 		KASSERT(prio <= SCHED_PRI_MAX);
236 		KASSERT(linux_params != NULL);
237 
238 #ifdef DEBUG_LINUX
239 		printf("native2linux: native: policy %d, priority %d\n",
240 		    native_policy, prio);
241 #endif
242 
243 		if (native_policy == SCHED_OTHER) {
244 			linux_params->sched_priority = 0;
245 		} else {
246 			linux_params->sched_priority =
247 			    (prio - SCHED_PRI_MIN)
248 			    * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
249 			    / (SCHED_PRI_MAX - SCHED_PRI_MIN)
250 			    + LINUX_SCHED_RTPRIO_MIN;
251 		}
252 #ifdef DEBUG_LINUX
253 		printf("native2linux: linux: policy %d, priority %d\n",
254 		    -1, linux_params->sched_priority);
255 #endif
256 	}
257 
258 	return 0;
259 }
260 
261 int
262 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
263 {
264 	/* {
265 		syscallarg(linux_pid_t) pid;
266 		syscallarg(const struct linux_sched_param *) sp;
267 	} */
268 	int error, policy;
269 	struct linux_sched_param lp;
270 	struct sched_param sp;
271 
272 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
273 		error = EINVAL;
274 		goto out;
275 	}
276 
277 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
278 	if (error)
279 		goto out;
280 
281 	/* We need the current policy in Linux terms. */
282 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
283 	if (error)
284 		goto out;
285 	error = sched_native2linux(policy, NULL, &policy, NULL);
286 	if (error)
287 		goto out;
288 
289 	error = sched_linux2native(policy, &lp, &policy, &sp);
290 	if (error)
291 		goto out;
292 
293 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
294 	if (error)
295 		goto out;
296 
297  out:
298 	return error;
299 }
300 
301 int
302 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
303 {
304 	/* {
305 		syscallarg(linux_pid_t) pid;
306 		syscallarg(struct linux_sched_param *) sp;
307 	} */
308 	struct linux_sched_param lp;
309 	struct sched_param sp;
310 	int error, policy;
311 
312 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
313 		error = EINVAL;
314 		goto out;
315 	}
316 
317 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
318 	if (error)
319 		goto out;
320 #ifdef DEBUG_LINUX
321 	printf("getparam: native: policy %d, priority %d\n",
322 	    policy, sp.sched_priority);
323 #endif
324 
325 	error = sched_native2linux(policy, &sp, NULL, &lp);
326 	if (error)
327 		goto out;
328 #ifdef DEBUG_LINUX
329 	printf("getparam: linux: policy %d, priority %d\n",
330 	    policy, lp.sched_priority);
331 #endif
332 
333 	error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
334 	if (error)
335 		goto out;
336 
337  out:
338 	return error;
339 }
340 
341 int
342 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
343 {
344 	/* {
345 		syscallarg(linux_pid_t) pid;
346 		syscallarg(int) policy;
347 		syscallarg(cont struct linux_sched_scheduler *) sp;
348 	} */
349 	int error, policy;
350 	struct linux_sched_param lp;
351 	struct sched_param sp;
352 
353 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
354 		error = EINVAL;
355 		goto out;
356 	}
357 
358 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
359 	if (error)
360 		goto out;
361 #ifdef DEBUG_LINUX
362 	printf("setscheduler: linux: policy %d, priority %d\n",
363 	    SCARG(uap, policy), lp.sched_priority);
364 #endif
365 
366 	error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
367 	if (error)
368 		goto out;
369 #ifdef DEBUG_LINUX
370 	printf("setscheduler: native: policy %d, priority %d\n",
371 	    policy, sp.sched_priority);
372 #endif
373 
374 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
375 	if (error)
376 		goto out;
377 
378  out:
379 	return error;
380 }
381 
382 int
383 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
384 {
385 	/* {
386 		syscallarg(linux_pid_t) pid;
387 	} */
388 	int error, policy;
389 
390 	*retval = -1;
391 
392 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
393 	if (error)
394 		goto out;
395 
396 	error = sched_native2linux(policy, NULL, &policy, NULL);
397 	if (error)
398 		goto out;
399 
400 	*retval = policy;
401 
402  out:
403 	return error;
404 }
405 
406 int
407 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
408 {
409 
410 	yield();
411 	return 0;
412 }
413 
414 int
415 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
416 {
417 	/* {
418 		syscallarg(int) policy;
419 	} */
420 
421 	switch (SCARG(uap, policy)) {
422 	case LINUX_SCHED_OTHER:
423 		*retval = 0;
424 		break;
425 	case LINUX_SCHED_FIFO:
426 	case LINUX_SCHED_RR:
427 		*retval = LINUX_SCHED_RTPRIO_MAX;
428 		break;
429 	default:
430 		return EINVAL;
431 	}
432 
433 	return 0;
434 }
435 
436 int
437 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
438 {
439 	/* {
440 		syscallarg(int) policy;
441 	} */
442 
443 	switch (SCARG(uap, policy)) {
444 	case LINUX_SCHED_OTHER:
445 		*retval = 0;
446 		break;
447 	case LINUX_SCHED_FIFO:
448 	case LINUX_SCHED_RR:
449 		*retval = LINUX_SCHED_RTPRIO_MIN;
450 		break;
451 	default:
452 		return EINVAL;
453 	}
454 
455 	return 0;
456 }
457 
458 #ifndef __m68k__
459 /* Present on everything but m68k */
460 int
461 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
462 {
463 #ifdef LINUX_NPTL
464 	/* {
465 		syscallarg(int) error_code;
466 	} */
467 	struct proc *p = l->l_proc;
468 	struct linux_emuldata *led = p->p_emuldata;
469 	struct linux_emuldata *e;
470 
471 	if (led->s->flags & LINUX_LES_USE_NPTL) {
472 
473 #ifdef DEBUG_LINUX
474 		printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
475 		    led->s->refs);
476 #endif
477 
478 		/*
479 		 * The calling thread is supposed to kill all threads
480 		 * in the same thread group (i.e. all threads created
481 		 * via clone(2) with CLONE_THREAD flag set).
482 		 *
483 		 * If there is only one thread, things are quite simple
484 		 */
485 		if (led->s->refs == 1)
486 			return sys_exit(l, (const void *)uap, retval);
487 
488 #ifdef DEBUG_LINUX
489 		printf("%s:%d\n", __func__, __LINE__);
490 #endif
491 
492 		mutex_enter(proc_lock);
493 		led->s->flags |= LINUX_LES_INEXITGROUP;
494 		led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
495 
496 		/*
497 		 * Kill all threads in the group. The emulation exit hook takes
498 		 * care of hiding the zombies and reporting the exit code
499 		 * properly.
500 		 */
501       		LIST_FOREACH(e, &led->s->threads, threads) {
502 			if (e->proc == p)
503 				continue;
504 
505 #ifdef DEBUG_LINUX
506 			printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
507 #endif
508 			psignal(e->proc, SIGKILL);
509 		}
510 
511 		/* Now, kill ourselves */
512 		psignal(p, SIGKILL);
513 		mutex_exit(proc_lock);
514 
515 		return 0;
516 
517 	}
518 #endif /* LINUX_NPTL */
519 
520 	return sys_exit(l, (const void *)uap, retval);
521 }
522 #endif /* !__m68k__ */
523 
524 #ifdef LINUX_NPTL
525 int
526 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
527 {
528 	/* {
529 		syscallarg(int *) tidptr;
530 	} */
531 	struct linux_emuldata *led;
532 
533 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
534 	led->clear_tid = SCARG(uap, tid);
535 
536 	led->s->flags |= LINUX_LES_USE_NPTL;
537 
538 	*retval = l->l_proc->p_pid;
539 
540 	return 0;
541 }
542 
543 /* ARGUSED1 */
544 int
545 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
546 {
547 	/* The Linux kernel does it exactly that way */
548 	*retval = l->l_proc->p_pid;
549 	return 0;
550 }
551 
552 #ifdef LINUX_NPTL
553 /* ARGUSED1 */
554 int
555 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
556 {
557 	struct linux_emuldata *led = l->l_proc->p_emuldata;
558 
559 	if (led->s->flags & LINUX_LES_USE_NPTL) {
560 		/* The Linux kernel does it exactly that way */
561 		*retval = led->s->group_pid;
562 	} else {
563 		*retval = l->l_proc->p_pid;
564 	}
565 
566 	return 0;
567 }
568 
569 /* ARGUSED1 */
570 int
571 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
572 {
573 	struct proc *p = l->l_proc;
574 	struct linux_emuldata *led = p->p_emuldata;
575 	struct proc *glp;
576 	struct proc *pp;
577 
578 	mutex_enter(proc_lock);
579 	if (led->s->flags & LINUX_LES_USE_NPTL) {
580 
581 		/* Find the thread group leader's parent */
582 		if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) {
583 			/* Maybe panic... */
584 			printf("linux_sys_getppid: missing group leader PID"
585 			    " %d\n", led->s->group_pid);
586 			mutex_exit(proc_lock);
587 			return -1;
588 		}
589 		pp = glp->p_pptr;
590 
591 		/* If this is a Linux process too, return thread group PID */
592 		if (pp->p_emul == p->p_emul) {
593 			struct linux_emuldata *pled;
594 
595 			pled = pp->p_emuldata;
596 			*retval = pled->s->group_pid;
597 		} else {
598 			*retval = pp->p_pid;
599 		}
600 
601 	} else {
602 		*retval = p->p_pptr->p_pid;
603 	}
604 	mutex_exit(proc_lock);
605 
606 	return 0;
607 }
608 #endif /* LINUX_NPTL */
609 
610 int
611 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
612 {
613 	/* {
614 		syscallarg(pid_t) pid;
615 		syscallarg(unsigned int) len;
616 		syscallarg(unsigned long *) mask;
617 	} */
618 	int error;
619 	int ret;
620 	char *data;
621 	int *retp;
622 
623 	if (SCARG(uap, mask) == NULL)
624 		return EINVAL;
625 
626 	if (SCARG(uap, len) < sizeof(int))
627 		return EINVAL;
628 
629 	if (pfind(SCARG(uap, pid)) == NULL)
630 		return ESRCH;
631 
632 	/*
633 	 * return the actual number of CPU, tag all of them as available
634 	 * The result is a mask, the first CPU being in the least significant
635 	 * bit.
636 	 */
637 	ret = (1 << ncpu) - 1;
638 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
639 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
640 	*retp = ret;
641 
642 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
643 		return error;
644 
645 	free(data, M_TEMP);
646 
647 	return 0;
648 
649 }
650 
651 int
652 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
653 {
654 	/* {
655 		syscallarg(pid_t) pid;
656 		syscallarg(unsigned int) len;
657 		syscallarg(unsigned long *) mask;
658 	} */
659 
660 	if (pfind(SCARG(uap, pid)) == NULL)
661 		return ESRCH;
662 
663 	/* Let's ignore it */
664 #ifdef DEBUG_LINUX
665 	printf("linux_sys_sched_setaffinity\n");
666 #endif
667 	return 0;
668 };
669 #endif /* LINUX_NPTL */
670