1 /* $NetBSD: lwproc.c,v 1.58 2023/10/15 11:11:37 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #define RUMP__CURLWP_PRIVATE
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: lwproc.c,v 1.58 2023/10/15 11:11:37 riastradh Exp $");
32
33 #include <sys/param.h>
34 #include <sys/atomic.h>
35 #include <sys/filedesc.h>
36 #include <sys/fstrans.h>
37 #include <sys/kauth.h>
38 #include <sys/kmem.h>
39 #include <sys/lwp.h>
40 #include <sys/ktrace.h>
41 #include <sys/pool.h>
42 #include <sys/proc.h>
43 #include <sys/queue.h>
44 #include <sys/resourcevar.h>
45 #include <sys/uidinfo.h>
46 #include <sys/psref.h>
47 #include <sys/syncobj.h>
48
49 #include <rump-sys/kern.h>
50
51 #include <rump/rumpuser.h>
52
53 #include "rump_curlwp.h"
54
55 struct lwp lwp0 = {
56 .l_lid = 0,
57 .l_proc = &proc0,
58 .l_fd = &filedesc0,
59 };
60 struct lwplist alllwp = LIST_HEAD_INITIALIZER(alllwp);
61
62 u_int nprocs = 1;
63
64 struct emul *emul_default = &emul_netbsd;
65
66 void
lwp_unsleep(lwp_t * l,bool cleanup)67 lwp_unsleep(lwp_t *l, bool cleanup)
68 {
69
70 KASSERT(mutex_owned(l->l_mutex));
71
72 (*l->l_syncobj->sobj_unsleep)(l, cleanup);
73 }
74
75 /*
76 * Look up a live LWP within the specified process.
77 *
78 * Must be called with p->p_lock held.
79 */
80 struct lwp *
lwp_find(struct proc * p,lwpid_t id)81 lwp_find(struct proc *p, lwpid_t id)
82 {
83 struct lwp *l;
84
85 KASSERT(mutex_owned(p->p_lock));
86
87 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
88 if (l->l_lid == id)
89 break;
90 }
91
92 /*
93 * No need to lock - all of these conditions will
94 * be visible with the process level mutex held.
95 */
96 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB))
97 l = NULL;
98
99 return l;
100 }
101
102 void
rump_lwproc_init(void)103 rump_lwproc_init(void)
104 {
105
106 lwproc_curlwpop(RUMPUSER_LWP_CREATE, &lwp0);
107 }
108
109 struct lwp *
rump_lwproc_curlwp_hypercall(void)110 rump_lwproc_curlwp_hypercall(void)
111 {
112
113 return rumpuser_curlwp();
114 }
115
116 void
rump_lwproc_curlwp_set(struct lwp * l)117 rump_lwproc_curlwp_set(struct lwp *l)
118 {
119
120 KASSERT(curlwp == NULL);
121 lwproc_curlwpop(RUMPUSER_LWP_SET, l);
122 }
123
124 void
rump_lwproc_curlwp_clear(struct lwp * l)125 rump_lwproc_curlwp_clear(struct lwp *l)
126 {
127
128 KASSERT(l == curlwp);
129 lwproc_curlwpop(RUMPUSER_LWP_CLEAR, l);
130 }
131
132 static void
lwproc_proc_free(struct proc * p)133 lwproc_proc_free(struct proc *p)
134 {
135 kauth_cred_t cred;
136 struct proc *child;
137
138 KASSERT(p->p_stat == SDYING || p->p_stat == SDEAD);
139
140 #ifdef KTRACE
141 if (p->p_tracep) {
142 mutex_enter(&ktrace_lock);
143 ktrderef(p);
144 mutex_exit(&ktrace_lock);
145 }
146 #endif
147
148 mutex_enter(&proc_lock);
149
150 /* childranee eunt initus */
151 while ((child = LIST_FIRST(&p->p_children)) != NULL) {
152 LIST_REMOVE(child, p_sibling);
153 child->p_pptr = initproc;
154 child->p_ppid = 1;
155 LIST_INSERT_HEAD(&initproc->p_children, child, p_sibling);
156 }
157
158 KASSERT(p->p_nlwps == 0);
159 KASSERT(LIST_EMPTY(&p->p_lwps));
160
161 LIST_REMOVE(p, p_list);
162 LIST_REMOVE(p, p_sibling);
163 proc_free_pid(p->p_pid);
164 atomic_dec_uint(&nprocs);
165 proc_leavepgrp(p); /* releases proc_lock */
166
167 cred = p->p_cred;
168 chgproccnt(kauth_cred_getuid(cred), -1);
169 rump_proc_vfs_release(p);
170
171 doexithooks(p);
172 lim_free(p->p_limit);
173 pstatsfree(p->p_stats);
174 kauth_cred_free(p->p_cred);
175 proc_finispecific(p);
176
177 mutex_obj_free(p->p_lock);
178 mutex_destroy(&p->p_stmutex);
179 mutex_destroy(&p->p_auxlock);
180 rw_destroy(&p->p_reflock);
181 cv_destroy(&p->p_waitcv);
182 cv_destroy(&p->p_lwpcv);
183
184 /* non-local vmspaces are not shared */
185 if (!RUMP_LOCALPROC_P(p)) {
186 struct rump_spctl *ctl = (struct rump_spctl *)p->p_vmspace;
187 KASSERT(p->p_vmspace->vm_refcnt == 1);
188 kmem_free(ctl, sizeof(*ctl));
189 }
190
191 proc_free_mem(p);
192 }
193
194 /*
195 * Allocate a new process. Mostly mimic fork by
196 * copying the properties of the parent. However, there are some
197 * differences.
198 *
199 * Switch to the new lwp and return a pointer to it.
200 */
201 static struct proc *
lwproc_newproc(struct proc * parent,struct vmspace * vm,int flags)202 lwproc_newproc(struct proc *parent, struct vmspace *vm, int flags)
203 {
204 uid_t uid = kauth_cred_getuid(parent->p_cred);
205 struct proc *p;
206
207 /* maxproc not enforced */
208 atomic_inc_uint(&nprocs);
209
210 /* allocate process */
211 p = proc_alloc();
212 memset(&p->p_startzero, 0,
213 offsetof(struct proc, p_endzero)
214 - offsetof(struct proc, p_startzero));
215 memcpy(&p->p_startcopy, &parent->p_startcopy,
216 offsetof(struct proc, p_endcopy)
217 - offsetof(struct proc, p_startcopy));
218
219 /* some other garbage we need to zero */
220 p->p_sigacts = NULL;
221 p->p_aio = NULL;
222 p->p_dtrace = NULL;
223 p->p_mqueue_cnt = p->p_exitsig = 0;
224 p->p_flag = p->p_sflag = p->p_slflag = p->p_lflag = p->p_stflag = 0;
225 p->p_trace_enabled = 0;
226 p->p_xsig = p->p_xexit = p->p_acflag = 0;
227 p->p_stackbase = 0;
228
229 p->p_stats = pstatscopy(parent->p_stats);
230
231 p->p_vmspace = vm;
232 p->p_emul = emul_default;
233 #ifdef __HAVE_SYSCALL_INTERN
234 p->p_emul->e_syscall_intern(p);
235 #endif
236 if (*parent->p_comm)
237 strcpy(p->p_comm, parent->p_comm);
238 else
239 strcpy(p->p_comm, "rumproc");
240
241 if ((flags & RUMP_RFCFDG) == 0)
242 KASSERT(parent == curproc);
243 if (flags & RUMP_RFFDG)
244 p->p_fd = fd_copy();
245 else if (flags & RUMP_RFCFDG)
246 p->p_fd = fd_init(NULL);
247 else
248 fd_share(p);
249
250 lim_addref(parent->p_limit);
251 p->p_limit = parent->p_limit;
252
253 LIST_INIT(&p->p_lwps);
254 LIST_INIT(&p->p_children);
255
256 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
257 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
258 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
259 rw_init(&p->p_reflock);
260 cv_init(&p->p_waitcv, "pwait");
261 cv_init(&p->p_lwpcv, "plwp");
262
263 p->p_pptr = parent;
264 p->p_ppid = parent->p_pid;
265 p->p_stat = SACTIVE;
266
267 kauth_proc_fork(parent, p);
268
269 /* initialize cwd in rump kernels with vfs */
270 rump_proc_vfs_init(p);
271
272 chgproccnt(uid, 1); /* not enforced */
273
274 /* publish proc various proc lists */
275 mutex_enter(&proc_lock);
276 LIST_INSERT_HEAD(&allproc, p, p_list);
277 LIST_INSERT_HEAD(&parent->p_children, p, p_sibling);
278 LIST_INSERT_AFTER(parent, p, p_pglist);
279 mutex_exit(&proc_lock);
280
281 return p;
282 }
283
284 static void
lwproc_freelwp(struct lwp * l)285 lwproc_freelwp(struct lwp *l)
286 {
287 struct proc *p;
288
289 p = l->l_proc;
290 mutex_enter(p->p_lock);
291
292 KASSERT(l->l_flag & LW_WEXIT);
293 KASSERT(l->l_refcnt == 0);
294
295 LIST_REMOVE(l, l_sibling);
296 KASSERT(p->p_nlwps >= 1);
297 if (--p->p_nlwps == 0) {
298 KASSERT(p != &proc0);
299 p->p_stat = SDEAD;
300 } else {
301 chglwpcnt(kauth_cred_getuid(p->p_cred), -1);
302 }
303 cv_broadcast(&p->p_lwpcv); /* nobody sleeps on this in a rump kernel? */
304 kauth_cred_free(l->l_cred);
305 l->l_stat = LSIDL;
306 mutex_exit(p->p_lock);
307
308 mutex_enter(&proc_lock);
309 proc_free_lwpid(p, l->l_lid);
310 LIST_REMOVE(l, l_list);
311 mutex_exit(&proc_lock);
312
313 if (l->l_name)
314 kmem_free(l->l_name, MAXCOMLEN);
315 fstrans_lwp_dtor(l);
316 lwp_finispecific(l);
317
318 lwproc_curlwpop(RUMPUSER_LWP_DESTROY, l);
319 kmem_free(l, sizeof(*l));
320
321 if (p->p_stat == SDEAD)
322 lwproc_proc_free(p);
323 }
324
325 extern kmutex_t unruntime_lock;
326
327 static struct lwp *
lwproc_makelwp(struct proc * p,bool doswitch,bool procmake)328 lwproc_makelwp(struct proc *p, bool doswitch, bool procmake)
329 {
330 struct lwp *l = kmem_zalloc(sizeof(*l), KM_SLEEP);
331
332 l->l_refcnt = 1;
333 l->l_proc = p;
334 l->l_stat = LSIDL;
335 l->l_mutex = &unruntime_lock;
336
337 proc_alloc_lwpid(p, l);
338
339 mutex_enter(p->p_lock);
340 /*
341 * Account the new lwp to the owner of the process.
342 * For some reason, NetBSD doesn't count the first lwp
343 * in a process as a lwp, so skip that.
344 */
345 if (p->p_nlwps++) {
346 chglwpcnt(kauth_cred_getuid(p->p_cred), 1);
347 }
348
349 KASSERT((p->p_sflag & PS_RUMP_LWPEXIT) == 0);
350 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
351
352 l->l_fd = p->p_fd;
353 l->l_cpu = &rump_bootcpu;
354 l->l_target_cpu = &rump_bootcpu; /* Initial target CPU always same */
355 l->l_stat = LSRUN;
356 TAILQ_INIT(&l->l_ld_locks);
357 mutex_exit(p->p_lock);
358
359 l->l_cred = kauth_cred_hold(p->p_cred);
360 lwp_initspecific(l);
361 PSREF_DEBUG_INIT_LWP(l);
362
363 lwproc_curlwpop(RUMPUSER_LWP_CREATE, l);
364 if (doswitch) {
365 rump_lwproc_switch(l);
366 }
367
368 /* filedesc already has refcount 1 when process is created */
369 if (!procmake) {
370 fd_hold(l);
371 }
372
373 mutex_enter(&proc_lock);
374 LIST_INSERT_HEAD(&alllwp, l, l_list);
375 mutex_exit(&proc_lock);
376
377 return l;
378 }
379
380 struct lwp *
rump__lwproc_alloclwp(struct proc * p)381 rump__lwproc_alloclwp(struct proc *p)
382 {
383 bool newproc = false;
384
385 if (p == NULL) {
386 p = lwproc_newproc(&proc0, rump_vmspace_local, RUMP_RFCFDG);
387 newproc = true;
388 }
389
390 return lwproc_makelwp(p, false, newproc);
391 }
392
393 int
rump_lwproc_newlwp(pid_t pid)394 rump_lwproc_newlwp(pid_t pid)
395 {
396 struct proc *p;
397 struct lwp *l;
398
399 l = kmem_zalloc(sizeof(*l), KM_SLEEP);
400 mutex_enter(&proc_lock);
401 p = proc_find_raw(pid);
402 if (p == NULL) {
403 mutex_exit(&proc_lock);
404 kmem_free(l, sizeof(*l));
405 return ESRCH;
406 }
407 mutex_enter(p->p_lock);
408 if (p->p_sflag & PS_RUMP_LWPEXIT) {
409 mutex_exit(&proc_lock);
410 mutex_exit(p->p_lock);
411 kmem_free(l, sizeof(*l));
412 return EBUSY;
413 }
414 mutex_exit(p->p_lock);
415 mutex_exit(&proc_lock);
416
417 /* XXX what holds proc? */
418
419 lwproc_makelwp(p, true, false);
420
421 return 0;
422 }
423
424 int
rump_lwproc_rfork_vmspace(struct vmspace * vm,int flags)425 rump_lwproc_rfork_vmspace(struct vmspace *vm, int flags)
426 {
427 struct proc *p;
428
429 if (flags & ~(RUMP_RFFDG|RUMP_RFCFDG) ||
430 (~flags & (RUMP_RFFDG|RUMP_RFCFDG)) == 0)
431 return EINVAL;
432
433 p = lwproc_newproc(curproc, vm, flags);
434 lwproc_makelwp(p, true, true);
435
436 return 0;
437 }
438
439 int
rump_lwproc_rfork(int flags)440 rump_lwproc_rfork(int flags)
441 {
442
443 return rump_lwproc_rfork_vmspace(rump_vmspace_local, flags);
444 }
445
446 /*
447 * Switch to a new process/thread. Release previous one if
448 * deemed to be exiting. This is considered a slow path for
449 * rump kernel entry.
450 */
451 void
rump_lwproc_switch(struct lwp * newlwp)452 rump_lwproc_switch(struct lwp *newlwp)
453 {
454 struct lwp *l = curlwp;
455 int nlocks;
456
457 KASSERT(!(l->l_flag & LW_WEXIT) || newlwp);
458
459 if (__predict_false(newlwp && (newlwp->l_pflag & LP_RUNNING)))
460 panic("lwp %p (%d:%d) already running",
461 newlwp, newlwp->l_proc->p_pid, newlwp->l_lid);
462
463 if (newlwp == NULL) {
464 l->l_pflag &= ~LP_RUNNING;
465 l->l_flag |= LW_RUMP_CLEAR;
466 return;
467 }
468
469 /* fd_free() must be called from curlwp context. talk about ugh */
470 if (l->l_flag & LW_WEXIT) {
471 fd_free();
472 }
473
474 KERNEL_UNLOCK_ALL(NULL, &nlocks);
475 lwproc_curlwpop(RUMPUSER_LWP_CLEAR, l);
476
477 newlwp->l_cpu = newlwp->l_target_cpu = l->l_cpu;
478 newlwp->l_mutex = l->l_mutex;
479 newlwp->l_pflag |= LP_RUNNING;
480
481 lwproc_curlwpop(RUMPUSER_LWP_SET, newlwp);
482 curcpu()->ci_curlwp = newlwp;
483 KERNEL_LOCK(nlocks, NULL);
484
485 /*
486 * Check if the thread should get a signal. This is
487 * mostly to satisfy the "record" rump sigmodel.
488 */
489 mutex_enter(newlwp->l_proc->p_lock);
490 if (sigispending(newlwp, 0)) {
491 newlwp->l_flag |= LW_PENDSIG;
492 }
493 mutex_exit(newlwp->l_proc->p_lock);
494
495 l->l_mutex = &unruntime_lock;
496 l->l_pflag &= ~LP_RUNNING;
497 l->l_flag &= ~LW_PENDSIG;
498 l->l_stat = LSRUN;
499 l->l_ru.ru_nvcsw++;
500
501 if (l->l_flag & LW_WEXIT) {
502 l->l_stat = LSIDL;
503 lwproc_freelwp(l);
504 }
505 }
506
507 /*
508 * Mark the current thread to be released upon return from
509 * kernel.
510 */
511 void
rump_lwproc_releaselwp(void)512 rump_lwproc_releaselwp(void)
513 {
514 struct lwp *l = curlwp;
515
516 if (l->l_refcnt == 0 || l->l_flag & LW_WEXIT)
517 panic("releasing non-pertinent lwp");
518
519 rump__lwproc_lwprele();
520 KASSERT(l->l_refcnt == 0 && (l->l_flag & LW_WEXIT));
521 }
522
523 /*
524 * In-kernel routines used to add and remove references for the
525 * current thread. The main purpose is to make it possible for
526 * implicit threads to persist over scheduling operations in
527 * rump kernel drivers. Note that we don't need p_lock in a
528 * rump kernel, since we do refcounting only for curlwp.
529 */
530 void
rump__lwproc_lwphold(void)531 rump__lwproc_lwphold(void)
532 {
533 struct lwp *l = curlwp;
534
535 l->l_refcnt++;
536 l->l_flag &= ~LW_WEXIT;
537 }
538
539 void
rump__lwproc_lwprele(void)540 rump__lwproc_lwprele(void)
541 {
542 struct lwp *l = curlwp;
543
544 l->l_refcnt--;
545 if (l->l_refcnt == 0)
546 l->l_flag |= LW_WEXIT;
547 }
548
549 struct lwp *
rump_lwproc_curlwp(void)550 rump_lwproc_curlwp(void)
551 {
552 struct lwp *l = curlwp;
553
554 if (l->l_flag & LW_WEXIT)
555 return NULL;
556 return l;
557 }
558
559 /* this interface is under construction (like the proverbial 90's web page) */
560 int rump_i_know_what_i_am_doing_with_sysents = 0;
561 void
rump_lwproc_sysent_usenative()562 rump_lwproc_sysent_usenative()
563 {
564
565 if (!rump_i_know_what_i_am_doing_with_sysents)
566 panic("don't use rump_lwproc_sysent_usenative()");
567 curproc->p_emul = &emul_netbsd;
568 }
569
570 long
lwp_pctr(void)571 lwp_pctr(void)
572 {
573
574 return curlwp->l_ru.ru_nvcsw;
575 }
576