xref: /netbsd-src/sys/compat/linux/common/linux_sched.c (revision 7c3f385475147b6e1c4753f2bee961630e2dfc40)
1 /*	$NetBSD: linux_sched.c,v 1.50 2008/02/29 14:29:06 elad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center; by Matthias Scheler.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Linux compatibility module. Try to deal with scheduler related syscalls.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.50 2008/02/29 14:29:06 elad Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/syscallargs.h>
54 #include <sys/wait.h>
55 #include <sys/kauth.h>
56 #include <sys/ptrace.h>
57 
58 #include <sys/cpu.h>
59 
60 #include <compat/linux/common/linux_types.h>
61 #include <compat/linux/common/linux_signal.h>
62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
63 #include <compat/linux/common/linux_emuldata.h>
64 #include <compat/linux/common/linux_ipc.h>
65 #include <compat/linux/common/linux_sem.h>
66 
67 #include <compat/linux/linux_syscallargs.h>
68 
69 #include <compat/linux/common/linux_sched.h>
70 
71 int
72 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
73 {
74 	/* {
75 		syscallarg(int) flags;
76 		syscallarg(void *) stack;
77 #ifdef LINUX_NPTL
78 		syscallarg(void *) parent_tidptr;
79 		syscallarg(void *) child_tidptr;
80 #endif
81 	} */
82 	int flags, sig;
83 	int error;
84 #ifdef LINUX_NPTL
85 	struct linux_emuldata *led;
86 #endif
87 
88 	/*
89 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
90 	 */
91 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
92 		return (EINVAL);
93 
94 	/*
95 	 * Thread group implies shared signals. Shared signals
96 	 * imply shared VM. This matches what Linux kernel does.
97 	 */
98 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
99 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
100 		return (EINVAL);
101 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
102 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
103 		return (EINVAL);
104 
105 	flags = 0;
106 
107 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
108 		flags |= FORK_SHAREVM;
109 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
110 		flags |= FORK_SHARECWD;
111 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
112 		flags |= FORK_SHAREFILES;
113 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
114 		flags |= FORK_SHARESIGS;
115 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
116 		flags |= FORK_PPWAIT;
117 
118 	sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
119 	if (sig < 0 || sig >= LINUX__NSIG)
120 		return (EINVAL);
121 	sig = linux_to_native_signo[sig];
122 
123 #ifdef LINUX_NPTL
124 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
125 
126 	led->parent_tidptr = SCARG(uap, parent_tidptr);
127 	led->child_tidptr = SCARG(uap, child_tidptr);
128 	led->clone_flags = SCARG(uap, flags);
129 #endif /* LINUX_NPTL */
130 
131 	/*
132 	 * Note that Linux does not provide a portable way of specifying
133 	 * the stack area; the caller must know if the stack grows up
134 	 * or down.  So, we pass a stack size of 0, so that the code
135 	 * that makes this adjustment is a noop.
136 	 */
137 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
138 	    NULL, NULL, retval, NULL)) != 0)
139 		return error;
140 
141 	return 0;
142 }
143 
144 /*
145  * linux realtime priority
146  *
147  * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
148  *
149  * - SCHED_OTHER tasks don't have realtime priorities.
150  *   in particular, sched_param::sched_priority is always 0.
151  */
152 
153 #define	LINUX_SCHED_RTPRIO_MIN	1
154 #define	LINUX_SCHED_RTPRIO_MAX	99
155 
156 static int
157 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
158     int *native_policy, struct sched_param *native_params)
159 {
160 
161 	switch (linux_policy) {
162 	case LINUX_SCHED_OTHER:
163 		if (native_policy != NULL) {
164 			*native_policy = SCHED_OTHER;
165 		}
166 		break;
167 
168 	case LINUX_SCHED_FIFO:
169 		if (native_policy != NULL) {
170 			*native_policy = SCHED_FIFO;
171 		}
172 		break;
173 
174 	case LINUX_SCHED_RR:
175 		if (native_policy != NULL) {
176 			*native_policy = SCHED_RR;
177 		}
178 		break;
179 
180 	default:
181 		return EINVAL;
182 	}
183 
184 	if (linux_params != NULL) {
185 		int prio = linux_params->sched_priority;
186 
187 		KASSERT(native_params != NULL);
188 
189 		if (linux_policy == LINUX_SCHED_OTHER) {
190 			if (prio != 0) {
191 				return EINVAL;
192 			}
193 			native_params->sched_priority = PRI_NONE; /* XXX */
194 		} else {
195 			if (prio < LINUX_SCHED_RTPRIO_MIN ||
196 			    prio > LINUX_SCHED_RTPRIO_MAX) {
197 				return EINVAL;
198 			}
199 			native_params->sched_priority =
200 			    (prio - LINUX_SCHED_RTPRIO_MIN)
201 			    * (SCHED_PRI_MAX - SCHED_PRI_MIN)
202 			    / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
203 			    + SCHED_PRI_MIN;
204 		}
205 	}
206 
207 	return 0;
208 }
209 
210 static int
211 sched_native2linux(int native_policy, struct sched_param *native_params,
212     int *linux_policy, struct linux_sched_param *linux_params)
213 {
214 
215 	switch (native_policy) {
216 	case SCHED_OTHER:
217 		if (linux_policy != NULL) {
218 			*linux_policy = LINUX_SCHED_OTHER;
219 		}
220 		break;
221 
222 	case SCHED_FIFO:
223 		if (linux_policy != NULL) {
224 			*linux_policy = LINUX_SCHED_FIFO;
225 		}
226 		break;
227 
228 	case SCHED_RR:
229 		if (linux_policy != NULL) {
230 			*linux_policy = LINUX_SCHED_RR;
231 		}
232 		break;
233 
234 	default:
235 		panic("%s: unknown policy %d\n", __func__, native_policy);
236 	}
237 
238 	if (native_params != NULL) {
239 		int prio = native_params->sched_priority;
240 
241 		KASSERT(prio >= SCHED_PRI_MIN);
242 		KASSERT(prio <= SCHED_PRI_MAX);
243 		KASSERT(linux_params != NULL);
244 
245 		if (native_policy == SCHED_OTHER) {
246 			linux_params->sched_priority = 0;
247 		} else {
248 			linux_params->sched_priority =
249 			    (prio - SCHED_PRI_MIN)
250 			    * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
251 			    / (SCHED_PRI_MAX - SCHED_PRI_MIN)
252 			    + LINUX_SCHED_RTPRIO_MIN;
253 		}
254 	}
255 
256 	return 0;
257 }
258 
259 int
260 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
261 {
262 	/* {
263 		syscallarg(linux_pid_t) pid;
264 		syscallarg(const struct linux_sched_param *) sp;
265 	} */
266 	int error, policy;
267 	struct linux_sched_param lp;
268 	struct sched_param sp;
269 
270 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
271 		error = EINVAL;
272 		goto out;
273 	}
274 
275 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
276 	if (error)
277 		goto out;
278 
279 	/* We need the current policy in Linux terms. */
280 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
281 	if (error)
282 		goto out;
283 	error = sched_native2linux(policy, NULL, &policy, NULL);
284 	if (error)
285 		goto out;
286 
287 	error = sched_linux2native(policy, &lp, &policy, &sp);
288 	if (error)
289 		goto out;
290 
291 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
292 	if (error)
293 		goto out;
294 
295  out:
296 	return error;
297 }
298 
299 int
300 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
301 {
302 	/* {
303 		syscallarg(linux_pid_t) pid;
304 		syscallarg(struct linux_sched_param *) sp;
305 	} */
306 	struct linux_sched_param lp;
307 	struct sched_param sp;
308 	int error, policy;
309 
310 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
311 		error = EINVAL;
312 		goto out;
313 	}
314 
315 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
316 	if (error)
317 		goto out;
318 
319 	error = sched_native2linux(policy, &sp, NULL, &lp);
320 	if (error)
321 		goto out;
322 
323 	error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
324 	if (error)
325 		goto out;
326 
327  out:
328 	return error;
329 }
330 
331 int
332 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
333 {
334 	/* {
335 		syscallarg(linux_pid_t) pid;
336 		syscallarg(int) policy;
337 		syscallarg(cont struct linux_sched_scheduler *) sp;
338 	} */
339 	int error, policy;
340 	struct linux_sched_param lp;
341 	struct sched_param sp;
342 
343 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
344 		error = EINVAL;
345 		goto out;
346 	}
347 
348 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
349 	if (error)
350 		goto out;
351 
352 	error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
353 	if (error)
354 		goto out;
355 
356 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
357 	if (error)
358 		goto out;
359 
360  out:
361 	return error;
362 }
363 
364 int
365 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
366 {
367 	/* {
368 		syscallarg(linux_pid_t) pid;
369 	} */
370 	int error, policy;
371 
372 	*retval = -1;
373 
374 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
375 	if (error)
376 		goto out;
377 
378 	error = sched_native2linux(policy, NULL, &policy, NULL);
379 	if (error)
380 		goto out;
381 
382 	*retval = policy;
383 
384  out:
385 	return error;
386 }
387 
388 int
389 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
390 {
391 
392 	yield();
393 	return 0;
394 }
395 
396 int
397 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
398 {
399 	/* {
400 		syscallarg(int) policy;
401 	} */
402 
403 /*
404  * We can't emulate anything put the default scheduling policy.
405  */
406 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
407 		*retval = -1;
408 		return EINVAL;
409 	}
410 
411 	*retval = 0;
412 	return 0;
413 }
414 
415 int
416 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
417 {
418 	/* {
419 		syscallarg(int) policy;
420 	} */
421 
422 /*
423  * We can't emulate anything put the default scheduling policy.
424  */
425 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
426 		*retval = -1;
427 		return EINVAL;
428 	}
429 
430 	*retval = 0;
431 	return 0;
432 }
433 
434 #ifndef __m68k__
435 /* Present on everything but m68k */
436 int
437 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
438 {
439 #ifdef LINUX_NPTL
440 	/* {
441 		syscallarg(int) error_code;
442 	} */
443 	struct proc *p = l->l_proc;
444 	struct linux_emuldata *led = p->p_emuldata;
445 	struct linux_emuldata *e;
446 
447 	if (led->s->flags & LINUX_LES_USE_NPTL) {
448 
449 #ifdef DEBUG_LINUX
450 		printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
451 		    led->s->refs);
452 #endif
453 
454 		/*
455 		 * The calling thread is supposed to kill all threads
456 		 * in the same thread group (i.e. all threads created
457 		 * via clone(2) with CLONE_THREAD flag set).
458 		 *
459 		 * If there is only one thread, things are quite simple
460 		 */
461 		if (led->s->refs == 1)
462 			return sys_exit(l, (const void *)uap, retval);
463 
464 #ifdef DEBUG_LINUX
465 		printf("%s:%d\n", __func__, __LINE__);
466 #endif
467 
468 		led->s->flags |= LINUX_LES_INEXITGROUP;
469 		led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
470 
471 		/*
472 		 * Kill all threads in the group. The emulation exit hook takes
473 		 * care of hiding the zombies and reporting the exit code
474 		 * properly.
475 		 */
476 		mutex_enter(&proclist_mutex);
477       		LIST_FOREACH(e, &led->s->threads, threads) {
478 			if (e->proc == p)
479 				continue;
480 
481 #ifdef DEBUG_LINUX
482 			printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
483 #endif
484 			psignal(e->proc, SIGKILL);
485 		}
486 
487 		/* Now, kill ourselves */
488 		psignal(p, SIGKILL);
489 		mutex_exit(&proclist_mutex);
490 
491 		return 0;
492 
493 	}
494 #endif /* LINUX_NPTL */
495 
496 	return sys_exit(l, (const void *)uap, retval);
497 }
498 #endif /* !__m68k__ */
499 
500 #ifdef LINUX_NPTL
501 int
502 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
503 {
504 	/* {
505 		syscallarg(int *) tidptr;
506 	} */
507 	struct linux_emuldata *led;
508 
509 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
510 	led->clear_tid = SCARG(uap, tid);
511 
512 	led->s->flags |= LINUX_LES_USE_NPTL;
513 
514 	*retval = l->l_proc->p_pid;
515 
516 	return 0;
517 }
518 
519 /* ARGUSED1 */
520 int
521 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
522 {
523 	/* The Linux kernel does it exactly that way */
524 	*retval = l->l_proc->p_pid;
525 	return 0;
526 }
527 
528 #ifdef LINUX_NPTL
529 /* ARGUSED1 */
530 int
531 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
532 {
533 	struct linux_emuldata *led = l->l_proc->p_emuldata;
534 
535 	if (led->s->flags & LINUX_LES_USE_NPTL) {
536 		/* The Linux kernel does it exactly that way */
537 		*retval = led->s->group_pid;
538 	} else {
539 		*retval = l->l_proc->p_pid;
540 	}
541 
542 	return 0;
543 }
544 
545 /* ARGUSED1 */
546 int
547 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
548 {
549 	struct proc *p = l->l_proc;
550 	struct linux_emuldata *led = p->p_emuldata;
551 	struct proc *glp;
552 	struct proc *pp;
553 
554 	if (led->s->flags & LINUX_LES_USE_NPTL) {
555 
556 		/* Find the thread group leader's parent */
557 		if ((glp = pfind(led->s->group_pid)) == NULL) {
558 			/* Maybe panic... */
559 			printf("linux_sys_getppid: missing group leader PID"
560 			    " %d\n", led->s->group_pid);
561 			return -1;
562 		}
563 		pp = glp->p_pptr;
564 
565 		/* If this is a Linux process too, return thread group PID */
566 		if (pp->p_emul == p->p_emul) {
567 			struct linux_emuldata *pled;
568 
569 			pled = pp->p_emuldata;
570 			*retval = pled->s->group_pid;
571 		} else {
572 			*retval = pp->p_pid;
573 		}
574 
575 	} else {
576 		*retval = p->p_pptr->p_pid;
577 	}
578 
579 	return 0;
580 }
581 #endif /* LINUX_NPTL */
582 
583 int
584 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
585 {
586 	/* {
587 		syscallarg(pid_t) pid;
588 		syscallarg(unsigned int) len;
589 		syscallarg(unsigned long *) mask;
590 	} */
591 	int error;
592 	int ret;
593 	char *data;
594 	int *retp;
595 
596 	if (SCARG(uap, mask) == NULL)
597 		return EINVAL;
598 
599 	if (SCARG(uap, len) < sizeof(int))
600 		return EINVAL;
601 
602 	if (pfind(SCARG(uap, pid)) == NULL)
603 		return ESRCH;
604 
605 	/*
606 	 * return the actual number of CPU, tag all of them as available
607 	 * The result is a mask, the first CPU being in the least significant
608 	 * bit.
609 	 */
610 	ret = (1 << ncpu) - 1;
611 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
612 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
613 	*retp = ret;
614 
615 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
616 		return error;
617 
618 	free(data, M_TEMP);
619 
620 	return 0;
621 
622 }
623 
624 int
625 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
626 {
627 	/* {
628 		syscallarg(pid_t) pid;
629 		syscallarg(unsigned int) len;
630 		syscallarg(unsigned long *) mask;
631 	} */
632 
633 	if (pfind(SCARG(uap, pid)) == NULL)
634 		return ESRCH;
635 
636 	/* Let's ignore it */
637 #ifdef DEBUG_LINUX
638 	printf("linux_sys_sched_setaffinity\n");
639 #endif
640 	return 0;
641 };
642 #endif /* LINUX_NPTL */
643