xref: /netbsd-src/sys/kern/kern_lwp.c (revision bcfabd50d94d2c5ba2231dfefb922acd5a29f174)
1*bcfabd50Sandvar /*	$NetBSD: kern_lwp.c,v 1.269 2023/12/20 21:03:50 andvar Exp $	*/
2e0d8d366Sthorpej 
3e0d8d366Sthorpej /*-
459e0001fSad  * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020, 2023
520f33b02Sad  *     The NetBSD Foundation, Inc.
6e0d8d366Sthorpej  * All rights reserved.
7e0d8d366Sthorpej  *
8e0d8d366Sthorpej  * This code is derived from software contributed to The NetBSD Foundation
9b07ec3fcSad  * by Nathan J. Williams, and Andrew Doran.
10e0d8d366Sthorpej  *
11e0d8d366Sthorpej  * Redistribution and use in source and binary forms, with or without
12e0d8d366Sthorpej  * modification, are permitted provided that the following conditions
13e0d8d366Sthorpej  * are met:
14e0d8d366Sthorpej  * 1. Redistributions of source code must retain the above copyright
15e0d8d366Sthorpej  *    notice, this list of conditions and the following disclaimer.
16e0d8d366Sthorpej  * 2. Redistributions in binary form must reproduce the above copyright
17e0d8d366Sthorpej  *    notice, this list of conditions and the following disclaimer in the
18e0d8d366Sthorpej  *    documentation and/or other materials provided with the distribution.
19e0d8d366Sthorpej  *
20e0d8d366Sthorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21e0d8d366Sthorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22e0d8d366Sthorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23e0d8d366Sthorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24e0d8d366Sthorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25e0d8d366Sthorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26e0d8d366Sthorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27e0d8d366Sthorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28e0d8d366Sthorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29e0d8d366Sthorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30e0d8d366Sthorpej  * POSSIBILITY OF SUCH DAMAGE.
31e0d8d366Sthorpej  */
32e0d8d366Sthorpej 
33b07ec3fcSad /*
34b07ec3fcSad  * Overview
35b07ec3fcSad  *
3646022e56Sad  *	Lightweight processes (LWPs) are the basic unit or thread of
37b07ec3fcSad  *	execution within the kernel.  The core state of an LWP is described
3846022e56Sad  *	by "struct lwp", also known as lwp_t.
39b07ec3fcSad  *
40b07ec3fcSad  *	Each LWP is contained within a process (described by "struct proc"),
41b07ec3fcSad  *	Every process contains at least one LWP, but may contain more.  The
42b07ec3fcSad  *	process describes attributes shared among all of its LWPs such as a
43b07ec3fcSad  *	private address space, global execution state (stopped, active,
44b07ec3fcSad  *	zombie, ...), signal disposition and so on.  On a multiprocessor
4546022e56Sad  *	machine, multiple LWPs be executing concurrently in the kernel.
46b07ec3fcSad  *
47b07ec3fcSad  * Execution states
48b07ec3fcSad  *
49b07ec3fcSad  *	At any given time, an LWP has overall state that is described by
50b07ec3fcSad  *	lwp::l_stat.  The states are broken into two sets below.  The first
51b07ec3fcSad  *	set is guaranteed to represent the absolute, current state of the
52b07ec3fcSad  *	LWP:
53b07ec3fcSad  *
54b07ec3fcSad  *	LSONPROC
55b07ec3fcSad  *
56b07ec3fcSad  *		On processor: the LWP is executing on a CPU, either in the
57b07ec3fcSad  *		kernel or in user space.
58b07ec3fcSad  *
59b07ec3fcSad  *	LSRUN
60b07ec3fcSad  *
61b07ec3fcSad  *		Runnable: the LWP is parked on a run queue, and may soon be
625c0e3318Srmind  *		chosen to run by an idle processor, or by a processor that
634b34a918Sandvar  *		has been asked to preempt a currently running but lower
6440cf6f36Srmind  *		priority LWP.
65b07ec3fcSad  *
66b07ec3fcSad  *	LSIDL
67b07ec3fcSad  *
6820180cb1Sad  *		Idle: the LWP has been created but has not yet executed, or
6920180cb1Sad  *		it has ceased executing a unit of work and is waiting to be
7020180cb1Sad  *		started again.  This state exists so that the LWP can occupy
7120180cb1Sad  *		a slot in the process & PID table, but without having to
7220180cb1Sad  *		worry about being touched; lookups of the LWP by ID will
7320180cb1Sad  *		fail while in this state.  The LWP will become visible for
7420180cb1Sad  *		lookup once its state transitions further.  Some special
7520180cb1Sad  *		kernel threads also (ab)use this state to indicate that they
7620180cb1Sad  *		are idle (soft interrupts and idle LWPs).
77b07ec3fcSad  *
78b07ec3fcSad  *	LSSUSPENDED:
79b07ec3fcSad  *
80b07ec3fcSad  *		Suspended: the LWP has had its execution suspended by
81b07ec3fcSad  *		another LWP in the same process using the _lwp_suspend()
82b07ec3fcSad  *		system call.  User-level LWPs also enter the suspended
83b07ec3fcSad  *		state when the system is shutting down.
84b07ec3fcSad  *
85b07ec3fcSad  *	The second set represent a "statement of intent" on behalf of the
86b07ec3fcSad  *	LWP.  The LWP may in fact be executing on a processor, may be
8746022e56Sad  *	sleeping or idle. It is expected to take the necessary action to
8846022e56Sad  *	stop executing or become "running" again within a short timeframe.
8982002773Sad  *	The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running.
905c0e3318Srmind  *	Importantly, it indicates that its state is tied to a CPU.
91b07ec3fcSad  *
92b07ec3fcSad  *	LSZOMB:
93b07ec3fcSad  *
9446022e56Sad  *		Dead or dying: the LWP has released most of its resources
95ddf65d89Sad  *		and is about to switch away into oblivion, or has already
9646022e56Sad  *		switched away.  When it switches away, its few remaining
9746022e56Sad  *		resources can be collected.
98b07ec3fcSad  *
99b07ec3fcSad  *	LSSLEEP:
100b07ec3fcSad  *
101b07ec3fcSad  *		Sleeping: the LWP has entered itself onto a sleep queue, and
10246022e56Sad  *		has switched away or will switch away shortly to allow other
10346022e56Sad  *		LWPs to run on the CPU.
104b07ec3fcSad  *
105b07ec3fcSad  *	LSSTOP:
106b07ec3fcSad  *
107b07ec3fcSad  *		Stopped: the LWP has been stopped as a result of a job
108b07ec3fcSad  *		control signal, or as a result of the ptrace() interface.
10946022e56Sad  *
110b07ec3fcSad  *		Stopped LWPs may run briefly within the kernel to handle
111b07ec3fcSad  *		signals that they receive, but will not return to user space
112b07ec3fcSad  *		until their process' state is changed away from stopped.
11346022e56Sad  *
114b07ec3fcSad  *		Single LWPs within a process can not be set stopped
115b07ec3fcSad  *		selectively: all actions that can stop or continue LWPs
116b07ec3fcSad  *		occur at the process level.
117b07ec3fcSad  *
118b07ec3fcSad  * State transitions
119b07ec3fcSad  *
12046022e56Sad  *	Note that the LSSTOP state may only be set when returning to
12146022e56Sad  *	user space in userret(), or when sleeping interruptably.  The
12246022e56Sad  *	LSSUSPENDED state may only be set in userret().  Before setting
12346022e56Sad  *	those states, we try to ensure that the LWPs will release all
12446022e56Sad  *	locks that they hold, and at a minimum try to ensure that the
12546022e56Sad  *	LWP can be set runnable again by a signal.
126b07ec3fcSad  *
127b07ec3fcSad  *	LWPs may transition states in the following ways:
128b07ec3fcSad  *
129b07ec3fcSad  *	 RUN -------> ONPROC		ONPROC -----> RUN
130ddf65d89Sad  *		    				    > SLEEP
131ddf65d89Sad  *		    				    > STOPPED
132b07ec3fcSad  *						    > SUSPENDED
133b07ec3fcSad  *						    > ZOMB
134ddf65d89Sad  *						    > IDL (special cases)
135b07ec3fcSad  *
136b07ec3fcSad  *	 STOPPED ---> RUN		SUSPENDED --> RUN
137ddf65d89Sad  *	            > SLEEP
138b07ec3fcSad  *
139b07ec3fcSad  *	 SLEEP -----> ONPROC		IDL --------> RUN
140b07ec3fcSad  *		    > RUN			    > SUSPENDED
141b07ec3fcSad  *		    > STOPPED			    > STOPPED
142ddf65d89Sad  *						    > ONPROC (special cases)
143b07ec3fcSad  *
144ddf65d89Sad  *	Some state transitions are only possible with kernel threads (eg
145ddf65d89Sad  *	ONPROC -> IDL) and happen under tightly controlled circumstances
146ddf65d89Sad  *	free of unwanted side effects.
14746022e56Sad  *
14829170d38Srmind  * Migration
14929170d38Srmind  *
15029170d38Srmind  *	Migration of threads from one CPU to another could be performed
15129170d38Srmind  *	internally by the scheduler via sched_takecpu() or sched_catchlwp()
15229170d38Srmind  *	functions.  The universal lwp_migrate() function should be used for
15329170d38Srmind  *	any other cases.  Subsystems in the kernel must be aware that CPU
15429170d38Srmind  *	of LWP may change, while it is not locked.
15529170d38Srmind  *
156b07ec3fcSad  * Locking
157b07ec3fcSad  *
158b07ec3fcSad  *	The majority of fields in 'struct lwp' are covered by a single,
15946022e56Sad  *	general spin lock pointed to by lwp::l_mutex.  The locks covering
160b07ec3fcSad  *	each field are documented in sys/lwp.h.
161b07ec3fcSad  *
16246022e56Sad  *	State transitions must be made with the LWP's general lock held,
16346022e56Sad  *	and may cause the LWP's lock pointer to change.  Manipulation of
16446022e56Sad  *	the general lock is not performed directly, but through calls to
16511a35aedSrmind  *	lwp_lock(), lwp_unlock() and others.  It should be noted that the
16611a35aedSrmind  *	adaptive locks are not allowed to be released while the LWP's lock
16711a35aedSrmind  *	is being held (unlike for other spin-locks).
168b07ec3fcSad  *
169b07ec3fcSad  *	States and their associated locks:
170b07ec3fcSad  *
17111ba4e18Sad  *	LSIDL, LSONPROC, LSZOMB, LSSUPENDED:
172b07ec3fcSad  *
17311ba4e18Sad  *		Always covered by spc_lwplock, which protects LWPs not
17411ba4e18Sad  *		associated with any other sync object.  This is a per-CPU
17511ba4e18Sad  *		lock and matches lwp::l_cpu.
176b07ec3fcSad  *
17711ba4e18Sad  *	LSRUN:
178b07ec3fcSad  *
179f0301095Syamt  *		Always covered by spc_mutex, which protects the run queues.
180ddf65d89Sad  *		This is a per-CPU lock and matches lwp::l_cpu.
181b07ec3fcSad  *
182b07ec3fcSad  *	LSSLEEP:
183b07ec3fcSad  *
18411ba4e18Sad  *		Covered by a lock associated with the sleep queue (sometimes
185059ae07aSad  *		a turnstile sleep queue) that the LWP resides on.  This can
186059ae07aSad  *		be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep).
187b07ec3fcSad  *
18811ba4e18Sad  *	LSSTOP:
189b07ec3fcSad  *
190b07ec3fcSad  *		If the LWP was previously sleeping (l_wchan != NULL), then
19146022e56Sad  *		l_mutex references the sleep queue lock.  If the LWP was
192b07ec3fcSad  *		runnable or on the CPU when halted, or has been removed from
19346022e56Sad  *		the sleep queue since halted, then the lock is spc_lwplock.
194b07ec3fcSad  *
195b07ec3fcSad  *	The lock order is as follows:
196b07ec3fcSad  *
19711ba4e18Sad  *		sleepq -> turnstile -> spc_lwplock -> spc_mutex
198b07ec3fcSad  *
199118a4c19Sskrll  *	Each process has a scheduler state lock (proc::p_lock), and a
200b07ec3fcSad  *	number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and
201b07ec3fcSad  *	so on.  When an LWP is to be entered into or removed from one of the
202284c2b9aSad  *	following states, p_lock must be held and the process wide counters
203b07ec3fcSad  *	adjusted:
204b07ec3fcSad  *
205b07ec3fcSad  *		LSIDL, LSZOMB, LSSTOP, LSSUSPENDED
206b07ec3fcSad  *
207ddf65d89Sad  *	(But not always for kernel threads.  There are some special cases
20811ba4e18Sad  *	as mentioned above: soft interrupts, and the idle loops.)
209ddf65d89Sad  *
210b07ec3fcSad  *	Note that an LWP is considered running or likely to run soon if in
211b07ec3fcSad  *	one of the following states.  This affects the value of p_nrlwps:
212b07ec3fcSad  *
213b07ec3fcSad  *		LSRUN, LSONPROC, LSSLEEP
214b07ec3fcSad  *
215284c2b9aSad  *	p_lock does not need to be held when transitioning among these
216ddf65d89Sad  *	three states, hence p_lock is rarely taken for state transitions.
217b07ec3fcSad  */
218b07ec3fcSad 
21909b31914Slukem #include <sys/cdefs.h>
220*bcfabd50Sandvar __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.269 2023/12/20 21:03:50 andvar Exp $");
22109b31914Slukem 
2220c382013Syamt #include "opt_ddb.h"
223b07ec3fcSad #include "opt_lockdebug.h"
22437422f86Sdarran #include "opt_dtrace.h"
225d505b189Smartin 
2268ea60a7fShannken #define _LWP_API_PRIVATE
2278ea60a7fShannken 
228e0d8d366Sthorpej #include <sys/param.h>
229d31d6338Sriastradh 
230e2aaefb8Sad #include <sys/atomic.h>
231d31d6338Sriastradh #include <sys/cprng.h>
232d31d6338Sriastradh #include <sys/cpu.h>
233d31d6338Sriastradh #include <sys/dtrace_bsd.h>
2343cb7a24bSad #include <sys/filedesc.h>
23572421a19Shannken #include <sys/fstrans.h>
236276ef223Sthorpej #include <sys/futex.h>
237d31d6338Sriastradh #include <sys/intr.h>
238d31d6338Sriastradh #include <sys/kauth.h>
239d31d6338Sriastradh #include <sys/kcov.h>
240d31d6338Sriastradh #include <sys/kmem.h>
241d31d6338Sriastradh #include <sys/lockdebug.h>
242d31d6338Sriastradh #include <sys/lwpctl.h>
243d31d6338Sriastradh #include <sys/msan.h>
244d31d6338Sriastradh #include <sys/pool.h>
245d31d6338Sriastradh #include <sys/proc.h>
246d31d6338Sriastradh #include <sys/pset.h>
247d31d6338Sriastradh #include <sys/psref.h>
248d31d6338Sriastradh #include <sys/ptrace.h>
249d31d6338Sriastradh #include <sys/sdt.h>
250d31d6338Sriastradh #include <sys/sleepq.h>
251fac91bbeSriastradh #include <sys/syncobj.h>
252d31d6338Sriastradh #include <sys/syscall_stats.h>
253d31d6338Sriastradh #include <sys/syscallargs.h>
254d31d6338Sriastradh #include <sys/sysctl.h>
255d31d6338Sriastradh #include <sys/systm.h>
256d31d6338Sriastradh #include <sys/uidinfo.h>
257d31d6338Sriastradh #include <sys/xcall.h>
2581bc28ea1Sdarran 
259e0d8d366Sthorpej #include <uvm/uvm_extern.h>
2601587bff9Sskrll #include <uvm/uvm_object.h>
261e0d8d366Sthorpej 
26211a35aedSrmind static pool_cache_t	lwp_cache	__read_mostly;
26311a35aedSrmind struct lwplist		alllwp		__cacheline_aligned;
26404e486d9Sthorpej 
26520180cb1Sad static int		lwp_ctor(void *, void *, int);
266ccfaf6e4Srmind static void		lwp_dtor(void *, void *);
267ccfaf6e4Srmind 
2686a9056a9Sdarran /* DTrace proc provider probes */
269db70f181Schristos SDT_PROVIDER_DEFINE(proc);
270db70f181Schristos 
271db70f181Schristos SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *");
272db70f181Schristos SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *");
273db70f181Schristos SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *");
2746a9056a9Sdarran 
2757b708f2aSad struct turnstile turnstile0 __cacheline_aligned;
27651542301Spooka struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = {
27751542301Spooka #ifdef LWP0_CPU_INFO
27851542301Spooka 	.l_cpu = LWP0_CPU_INFO,
27951542301Spooka #endif
2807eef8264Smatt #ifdef LWP0_MD_INITIALIZER
2817eef8264Smatt 	.l_md = LWP0_MD_INITIALIZER,
2827eef8264Smatt #endif
28351542301Spooka 	.l_proc = &proc0,
28415689570Sthorpej 	.l_lid = 0,		/* we own proc0's slot in the pid table */
28551542301Spooka 	.l_flag = LW_SYSTEM,
28651542301Spooka 	.l_stat = LSONPROC,
28751542301Spooka 	.l_ts = &turnstile0,
28851542301Spooka 	.l_syncobj = &sched_syncobj,
2896a317a61Sad 	.l_refcnt = 0,
29051542301Spooka 	.l_priority = PRI_USER + NPRI_USER - 1,
29151542301Spooka 	.l_inheritedprio = -1,
29251542301Spooka 	.l_class = SCHED_OTHER,
29351542301Spooka 	.l_psid = PS_NONE,
29451542301Spooka 	.l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders),
29551542301Spooka 	.l_name = __UNCONST("swapper"),
29651542301Spooka 	.l_fd = &filedesc0,
29751542301Spooka };
29851542301Spooka 
29960441c6eSmrg static int
lwp_maxlwp(void)30060441c6eSmrg lwp_maxlwp(void)
30160441c6eSmrg {
30260441c6eSmrg 	/* Assume 1 LWP per 1MiB. */
30360441c6eSmrg 	uint64_t lwps_per = ctob(physmem) / (1024 * 1024);
30460441c6eSmrg 
30560441c6eSmrg 	return MAX(MIN(MAXMAXLWP, lwps_per), MAXLWP);
30660441c6eSmrg }
30760441c6eSmrg 
30804610895Schristos static int sysctl_kern_maxlwp(SYSCTLFN_PROTO);
30904610895Schristos 
31004610895Schristos /*
31104610895Schristos  * sysctl helper routine for kern.maxlwp. Ensures that the new
31204610895Schristos  * values are not too low or too high.
31304610895Schristos  */
31404610895Schristos static int
sysctl_kern_maxlwp(SYSCTLFN_ARGS)31504610895Schristos sysctl_kern_maxlwp(SYSCTLFN_ARGS)
31604610895Schristos {
31704610895Schristos 	int error, nmaxlwp;
31804610895Schristos 	struct sysctlnode node;
31904610895Schristos 
32004610895Schristos 	nmaxlwp = maxlwp;
32104610895Schristos 	node = *rnode;
32204610895Schristos 	node.sysctl_data = &nmaxlwp;
32304610895Schristos 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
32404610895Schristos 	if (error || newp == NULL)
32504610895Schristos 		return error;
32604610895Schristos 
32760441c6eSmrg 	if (nmaxlwp < 0 || nmaxlwp >= MAXMAXLWP)
32804610895Schristos 		return EINVAL;
32960441c6eSmrg 	if (nmaxlwp > lwp_maxlwp())
33004610895Schristos 		return EINVAL;
33104610895Schristos 	maxlwp = nmaxlwp;
33204610895Schristos 
33304610895Schristos 	return 0;
33404610895Schristos }
33504610895Schristos 
33604610895Schristos static void
sysctl_kern_lwp_setup(void)33704610895Schristos sysctl_kern_lwp_setup(void)
33804610895Schristos {
3397040dfd0Smaxv 	sysctl_createv(NULL, 0, NULL, NULL,
34004610895Schristos 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
34104610895Schristos 		       CTLTYPE_INT, "maxlwp",
34204610895Schristos 		       SYSCTL_DESCR("Maximum number of simultaneous threads"),
34304610895Schristos 		       sysctl_kern_maxlwp, 0, NULL, 0,
34404610895Schristos 		       CTL_KERN, CTL_CREATE, CTL_EOL);
34504610895Schristos }
34604610895Schristos 
34704e486d9Sthorpej void
lwpinit(void)34804e486d9Sthorpej lwpinit(void)
34904e486d9Sthorpej {
35004e486d9Sthorpej 
35111a35aedSrmind 	LIST_INIT(&alllwp);
352290fe400Spooka 	lwpinit_specificdata();
3534b941db4Sthorpej 	/*
3544b941db4Sthorpej 	 * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu()
3554b941db4Sthorpej 	 * calls will exit before memory of LWPs is returned to the pool, where
3564b941db4Sthorpej 	 * KVA of LWP structure might be freed and re-used for other purposes.
3574b941db4Sthorpej 	 * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu()
3584b941db4Sthorpej 	 * callers, therefore a regular passive serialization barrier will
3594b941db4Sthorpej 	 * do the job.
3604b941db4Sthorpej 	 */
3614b941db4Sthorpej 	lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0,
3624b941db4Sthorpej 	    PR_PSERIALIZE, "lwppl", NULL, IPL_NONE, lwp_ctor, lwp_dtor, NULL);
36304610895Schristos 
36460441c6eSmrg 	maxlwp = lwp_maxlwp();
36504610895Schristos 	sysctl_kern_lwp_setup();
36604e486d9Sthorpej }
36704e486d9Sthorpej 
36851542301Spooka void
lwp0_init(void)36951542301Spooka lwp0_init(void)
37051542301Spooka {
37151542301Spooka 	struct lwp *l = &lwp0;
37251542301Spooka 
37351542301Spooka 	KASSERT((void *)uvm_lwp_getuarea(l) != NULL);
37451542301Spooka 
37551542301Spooka 	LIST_INSERT_HEAD(&alllwp, l, l_list);
37651542301Spooka 
37751542301Spooka 	callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE);
37851542301Spooka 	callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l);
37951542301Spooka 	cv_init(&l->l_sigcv, "sigwait");
3806d7c7959Srmind 	cv_init(&l->l_waitcv, "vfork");
38151542301Spooka 
3820f335007Sad 	l->l_cred = kauth_cred_hold(proc0.p_cred);
38351542301Spooka 
384c35e5be5Syamt 	kdtrace_thread_ctor(NULL, l);
38551542301Spooka 	lwp_initspecific(l);
38651542301Spooka 
38751542301Spooka 	SYSCALL_TIME_LWP_INIT(l);
38851542301Spooka }
38951542301Spooka 
39020180cb1Sad /*
39120180cb1Sad  * Initialize the non-zeroed portion of an lwp_t.
39220180cb1Sad  */
39320180cb1Sad static int
lwp_ctor(void * arg,void * obj,int flags)39420180cb1Sad lwp_ctor(void *arg, void *obj, int flags)
39520180cb1Sad {
39620180cb1Sad 	lwp_t *l = obj;
39720180cb1Sad 
39820180cb1Sad 	l->l_stat = LSIDL;
39920180cb1Sad 	l->l_cpu = curcpu();
40020180cb1Sad 	l->l_mutex = l->l_cpu->ci_schedstate.spc_lwplock;
40159e0001fSad 	l->l_ts = kmem_alloc(sizeof(*l->l_ts), flags == PR_WAITOK ?
40259e0001fSad 	    KM_SLEEP : KM_NOSLEEP);
40320180cb1Sad 
40420180cb1Sad 	if (l->l_ts == NULL) {
40520180cb1Sad 		return ENOMEM;
40620180cb1Sad 	} else {
40720180cb1Sad 		turnstile_ctor(l->l_ts);
40820180cb1Sad 		return 0;
40920180cb1Sad 	}
41020180cb1Sad }
41120180cb1Sad 
412ccfaf6e4Srmind static void
lwp_dtor(void * arg,void * obj)4132b1ed0f8Sthorpej lwp_dtor(void *arg, void *obj)
4142b1ed0f8Sthorpej {
4152b1ed0f8Sthorpej 	lwp_t *l = obj;
4162b1ed0f8Sthorpej 
4172b1ed0f8Sthorpej 	/*
4182b1ed0f8Sthorpej 	 * The value of l->l_cpu must still be valid at this point.
4192b1ed0f8Sthorpej 	 */
4202b1ed0f8Sthorpej 	KASSERT(l->l_cpu != NULL);
42120180cb1Sad 
42220180cb1Sad 	/*
42320180cb1Sad 	 * We can't return turnstile0 to the pool (it didn't come from it),
42420180cb1Sad 	 * so if it comes up just drop it quietly and move on.
42520180cb1Sad 	 */
42620180cb1Sad 	if (l->l_ts != &turnstile0)
42759e0001fSad 		kmem_free(l->l_ts, sizeof(*l->l_ts));
428ccfaf6e4Srmind }
429ccfaf6e4Srmind 
430e0d8d366Sthorpej /*
43120180cb1Sad  * Set an LWP suspended.
432b07ec3fcSad  *
433284c2b9aSad  * Must be called with p_lock held, and the LWP locked.  Will unlock the
434b07ec3fcSad  * LWP before return.
435e0d8d366Sthorpej  */
436b07ec3fcSad int
lwp_suspend(struct lwp * curl,struct lwp * t)437b07ec3fcSad lwp_suspend(struct lwp *curl, struct lwp *t)
438b07ec3fcSad {
439b07ec3fcSad 	int error;
440e0d8d366Sthorpej 
441284c2b9aSad 	KASSERT(mutex_owned(t->l_proc->p_lock));
442fed17936Sad 	KASSERT(lwp_locked(t, NULL));
443b07ec3fcSad 
444b07ec3fcSad 	KASSERT(curl != t || curl->l_stat == LSONPROC);
445b07ec3fcSad 
446b07ec3fcSad 	/*
447b07ec3fcSad 	 * If the current LWP has been told to exit, we must not suspend anyone
448b07ec3fcSad 	 * else or deadlock could occur.  We won't return to userspace.
449b07ec3fcSad 	 */
4500fe9197cSrmind 	if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) {
451b07ec3fcSad 		lwp_unlock(t);
452e0d8d366Sthorpej 		return (EDEADLK);
45354db0e51Smanu 	}
454e0d8d366Sthorpej 
455a35a4fe3Skamil 	if ((t->l_flag & LW_DBGSUSPEND) != 0) {
456a35a4fe3Skamil 		lwp_unlock(t);
457a35a4fe3Skamil 		return 0;
458a35a4fe3Skamil 	}
459a35a4fe3Skamil 
460b07ec3fcSad 	error = 0;
46154db0e51Smanu 
462e0d8d366Sthorpej 	switch (t->l_stat) {
463e0d8d366Sthorpej 	case LSRUN:
464b07ec3fcSad 	case LSONPROC:
465934634a1Spavel 		t->l_flag |= LW_WSUSPEND;
466b07ec3fcSad 		lwp_need_userret(t);
467b07ec3fcSad 		lwp_unlock(t);
468e0d8d366Sthorpej 		break;
469b07ec3fcSad 
470e0d8d366Sthorpej 	case LSSLEEP:
471934634a1Spavel 		t->l_flag |= LW_WSUSPEND;
472725adb2aSad 		lwp_need_userret(t);
473b07ec3fcSad 
474b07ec3fcSad 		/*
475b07ec3fcSad 		 * Kick the LWP and try to get it to the kernel boundary
476b07ec3fcSad 		 * so that it will release any locks that it holds.
477b07ec3fcSad 		 * setrunnable() will release the lock.
478b07ec3fcSad 		 */
479934634a1Spavel 		if ((t->l_flag & LW_SINTR) != 0)
480b07ec3fcSad 			setrunnable(t);
481b07ec3fcSad 		else
482b07ec3fcSad 			lwp_unlock(t);
483e0d8d366Sthorpej 		break;
484b07ec3fcSad 
485b07ec3fcSad 	case LSSUSPENDED:
486b07ec3fcSad 		lwp_unlock(t);
487b07ec3fcSad 		break;
488b07ec3fcSad 
489b07ec3fcSad 	case LSSTOP:
490934634a1Spavel 		t->l_flag |= LW_WSUSPEND;
491725adb2aSad 		lwp_need_userret(t);
492b07ec3fcSad 		setrunnable(t);
493b07ec3fcSad 		break;
494b07ec3fcSad 
495e0d8d366Sthorpej 	case LSIDL:
496e0d8d366Sthorpej 	case LSZOMB:
497b07ec3fcSad 		error = EINTR; /* It's what Solaris does..... */
498b07ec3fcSad 		lwp_unlock(t);
499e0d8d366Sthorpej 		break;
500e0d8d366Sthorpej 	}
501e0d8d366Sthorpej 
502b07ec3fcSad 	return (error);
503e0d8d366Sthorpej }
504e0d8d366Sthorpej 
505b07ec3fcSad /*
506b07ec3fcSad  * Restart a suspended LWP.
507b07ec3fcSad  *
508284c2b9aSad  * Must be called with p_lock held, and the LWP locked.  Will unlock the
509b07ec3fcSad  * LWP before return.
510b07ec3fcSad  */
511e0d8d366Sthorpej void
lwp_continue(struct lwp * l)512e0d8d366Sthorpej lwp_continue(struct lwp *l)
513e0d8d366Sthorpej {
514e0d8d366Sthorpej 
515284c2b9aSad 	KASSERT(mutex_owned(l->l_proc->p_lock));
516fed17936Sad 	KASSERT(lwp_locked(l, NULL));
517b07ec3fcSad 
518b07ec3fcSad 	/* If rebooting or not suspended, then just bail out. */
519934634a1Spavel 	if ((l->l_flag & LW_WREBOOT) != 0) {
520b07ec3fcSad 		lwp_unlock(l);
521e0d8d366Sthorpej 		return;
522b07ec3fcSad 	}
523e0d8d366Sthorpej 
524934634a1Spavel 	l->l_flag &= ~LW_WSUSPEND;
525b07ec3fcSad 
526a35a4fe3Skamil 	if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) {
527b07ec3fcSad 		lwp_unlock(l);
528b07ec3fcSad 		return;
529b07ec3fcSad 	}
530b07ec3fcSad 
531b07ec3fcSad 	/* setrunnable() will release the lock. */
532e0d8d366Sthorpej 	setrunnable(l);
533e0d8d366Sthorpej }
534e0d8d366Sthorpej 
53579fccc75Smatt /*
536ca843a73Schristos  * Restart a stopped LWP.
537ca843a73Schristos  *
538ca843a73Schristos  * Must be called with p_lock held, and the LWP NOT locked.  Will unlock the
539ca843a73Schristos  * LWP before return.
540ca843a73Schristos  */
541ca843a73Schristos void
lwp_unstop(struct lwp * l)542ca843a73Schristos lwp_unstop(struct lwp *l)
543ca843a73Schristos {
544ca843a73Schristos 	struct proc *p = l->l_proc;
545ca843a73Schristos 
5460eaaa024Sad 	KASSERT(mutex_owned(&proc_lock));
547ca843a73Schristos 	KASSERT(mutex_owned(p->p_lock));
548ca843a73Schristos 
549ca843a73Schristos 	lwp_lock(l);
550ca843a73Schristos 
551a35a4fe3Skamil 	KASSERT((l->l_flag & LW_DBGSUSPEND) == 0);
552a35a4fe3Skamil 
553ca843a73Schristos 	/* If not stopped, then just bail out. */
554ca843a73Schristos 	if (l->l_stat != LSSTOP) {
555ca843a73Schristos 		lwp_unlock(l);
556ca843a73Schristos 		return;
557ca843a73Schristos 	}
558ca843a73Schristos 
559ca843a73Schristos 	p->p_stat = SACTIVE;
560ca843a73Schristos 	p->p_sflag &= ~PS_STOPPING;
561ca843a73Schristos 
562ca843a73Schristos 	if (!p->p_waited)
563ca843a73Schristos 		p->p_pptr->p_nstopchild--;
564ca843a73Schristos 
565ca843a73Schristos 	if (l->l_wchan == NULL) {
566ca843a73Schristos 		/* setrunnable() will release the lock. */
567ca843a73Schristos 		setrunnable(l);
5684fbdf206Schristos 	} else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) {
5698cdc46a4Schristos 		/* setrunnable() so we can receive the signal */
5708cdc46a4Schristos 		setrunnable(l);
571ca843a73Schristos 	} else {
572ca843a73Schristos 		l->l_stat = LSSLEEP;
573ca843a73Schristos 		p->p_nrlwps++;
574ca843a73Schristos 		lwp_unlock(l);
575ca843a73Schristos 	}
576ca843a73Schristos }
577ca843a73Schristos 
578ca843a73Schristos /*
579b07ec3fcSad  * Wait for an LWP within the current process to exit.  If 'lid' is
580b07ec3fcSad  * non-zero, we are waiting for a specific LWP.
581b07ec3fcSad  *
582284c2b9aSad  * Must be called with p->p_lock held.
58379fccc75Smatt  */
584e0d8d366Sthorpej int
lwp_wait(struct lwp * l,lwpid_t lid,lwpid_t * departed,bool exiting)585ea775f75Srmind lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
586e0d8d366Sthorpej {
587ea775f75Srmind 	const lwpid_t curlid = l->l_lid;
588ea775f75Srmind 	proc_t *p = l->l_proc;
589d1c42b4fSad 	lwp_t *l2, *next;
590ea775f75Srmind 	int error;
591e0d8d366Sthorpej 
592284c2b9aSad 	KASSERT(mutex_owned(p->p_lock));
593b07ec3fcSad 
594b07ec3fcSad 	p->p_nlwpwait++;
595fed17936Sad 	l->l_waitingfor = lid;
596b07ec3fcSad 
597b07ec3fcSad 	for (;;) {
598ea775f75Srmind 		int nfound;
599ea775f75Srmind 
600b07ec3fcSad 		/*
601b07ec3fcSad 		 * Avoid a race between exit1() and sigexit(): if the
602b07ec3fcSad 		 * process is dumping core, then we need to bail out: call
603b07ec3fcSad 		 * into lwp_userret() where we will be suspended until the
604b07ec3fcSad 		 * deed is done.
605b07ec3fcSad 		 */
606b07ec3fcSad 		if ((p->p_sflag & PS_WCORE) != 0) {
607284c2b9aSad 			mutex_exit(p->p_lock);
608b07ec3fcSad 			lwp_userret(l);
609ea775f75Srmind 			KASSERT(false);
610b07ec3fcSad 		}
611b07ec3fcSad 
612b07ec3fcSad 		/*
613b07ec3fcSad 		 * First off, drain any detached LWP that is waiting to be
614b07ec3fcSad 		 * reaped.
615b07ec3fcSad 		 */
616ce3debcbSad 		if ((l2 = p->p_zomblwp) != NULL) {
617b07ec3fcSad 			p->p_zomblwp = NULL;
618fed17936Sad 			lwp_free(l2, false, false);/* releases proc mutex */
619284c2b9aSad 			mutex_enter(p->p_lock);
620ce3debcbSad 			continue;
621b07ec3fcSad 		}
622b07ec3fcSad 
623b07ec3fcSad 		/*
624b07ec3fcSad 		 * Now look for an LWP to collect.  If the whole process is
625b07ec3fcSad 		 * exiting, count detached LWPs as eligible to be collected,
626b07ec3fcSad 		 * but don't drain them here.
627b07ec3fcSad 		 */
628e0d8d366Sthorpej 		nfound = 0;
629fed17936Sad 		error = 0;
630d1c42b4fSad 
631d1c42b4fSad 		/*
63220180cb1Sad 		 * If given a specific LID, go via pid_table and make sure
633d1c42b4fSad 		 * it's not detached.
634d1c42b4fSad 		 */
635d1c42b4fSad 		if (lid != 0) {
63615689570Sthorpej 			l2 = proc_find_lwp(p, lid);
637d1c42b4fSad 			if (l2 == NULL) {
638d1c42b4fSad 				error = ESRCH;
639d1c42b4fSad 				break;
640d1c42b4fSad 			}
641d1c42b4fSad 			KASSERT(l2->l_lid == lid);
642d1c42b4fSad 			if ((l2->l_prflag & LPR_DETACHED) != 0) {
643d1c42b4fSad 				error = EINVAL;
644d1c42b4fSad 				break;
645d1c42b4fSad 			}
646d1c42b4fSad 		} else {
647d1c42b4fSad 			l2 = LIST_FIRST(&p->p_lwps);
648d1c42b4fSad 		}
649d1c42b4fSad 		for (; l2 != NULL; l2 = next) {
650d1c42b4fSad 			next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling));
651d1c42b4fSad 
652fed17936Sad 			/*
653fed17936Sad 			 * If a specific wait and the target is waiting on
654fed17936Sad 			 * us, then avoid deadlock.  This also traps LWPs
655fed17936Sad 			 * that try to wait on themselves.
656fed17936Sad 			 *
657fed17936Sad 			 * Note that this does not handle more complicated
658fed17936Sad 			 * cycles, like: t1 -> t2 -> t3 -> t1.  The process
659fed17936Sad 			 * can still be killed so it is not a major problem.
660fed17936Sad 			 */
661fed17936Sad 			if (l2->l_lid == lid && l2->l_waitingfor == curlid) {
662fed17936Sad 				error = EDEADLK;
663fed17936Sad 				break;
664fed17936Sad 			}
665fed17936Sad 			if (l2 == l)
666b07ec3fcSad 				continue;
667b07ec3fcSad 			if ((l2->l_prflag & LPR_DETACHED) != 0) {
668fed17936Sad 				nfound += exiting;
669fed17936Sad 				continue;
670fed17936Sad 			}
671fed17936Sad 			if (lid != 0) {
672fed17936Sad 				/*
673fed17936Sad 				 * Mark this LWP as the first waiter, if there
674fed17936Sad 				 * is no other.
675fed17936Sad 				 */
676fed17936Sad 				if (l2->l_waiter == 0)
677fed17936Sad 					l2->l_waiter = curlid;
678fed17936Sad 			} else if (l2->l_waiter != 0) {
679fed17936Sad 				/*
680fed17936Sad 				 * It already has a waiter - so don't
681fed17936Sad 				 * collect it.  If the waiter doesn't
682fed17936Sad 				 * grab it we'll get another chance
683fed17936Sad 				 * later.
684fed17936Sad 				 */
685fed17936Sad 				nfound++;
686b07ec3fcSad 				continue;
687b07ec3fcSad 			}
688b07ec3fcSad 			nfound++;
689b07ec3fcSad 
690b07ec3fcSad 			/* No need to lock the LWP in order to see LSZOMB. */
691b07ec3fcSad 			if (l2->l_stat != LSZOMB)
692e0d8d366Sthorpej 				continue;
693e0d8d366Sthorpej 
694fed17936Sad 			/*
695fed17936Sad 			 * We're no longer waiting.  Reset the "first waiter"
696fed17936Sad 			 * pointer on the target, in case it was us.
697fed17936Sad 			 */
698fed17936Sad 			l->l_waitingfor = 0;
699fed17936Sad 			l2->l_waiter = 0;
700fed17936Sad 			p->p_nlwpwait--;
701e0d8d366Sthorpej 			if (departed)
702e0d8d366Sthorpej 				*departed = l2->l_lid;
703d831186dSad 			sched_lwp_collect(l2);
704fed17936Sad 
705fed17936Sad 			/* lwp_free() releases the proc lock. */
706fed17936Sad 			lwp_free(l2, false, false);
707284c2b9aSad 			mutex_enter(p->p_lock);
708b07ec3fcSad 			return 0;
709b07ec3fcSad 		}
710e0d8d366Sthorpej 
711fed17936Sad 		if (error != 0)
712fed17936Sad 			break;
713b07ec3fcSad 		if (nfound == 0) {
714b07ec3fcSad 			error = ESRCH;
715e0d8d366Sthorpej 			break;
716e0d8d366Sthorpej 		}
717fed17936Sad 
718fed17936Sad 		/*
719ea775f75Srmind 		 * Note: since the lock will be dropped, need to restart on
720ea775f75Srmind 		 * wakeup to run all LWPs again, e.g. there may be new LWPs.
721fed17936Sad 		 */
722fed17936Sad 		if (exiting) {
723b07ec3fcSad 			KASSERT(p->p_nlwps > 1);
7246c8b9827Sad 			error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1);
725ea775f75Srmind 			break;
726b07ec3fcSad 		}
727fed17936Sad 
728fed17936Sad 		/*
729bdfa5c00Sad 		 * Break out if all LWPs are in _lwp_wait().  There are
730bdfa5c00Sad 		 * other ways to hang the process with _lwp_wait(), but the
731bdfa5c00Sad 		 * sleep is interruptable so little point checking for them.
732fed17936Sad 		 */
733bdfa5c00Sad 		if (p->p_nlwpwait == p->p_nlwps) {
734b07ec3fcSad 			error = EDEADLK;
735b07ec3fcSad 			break;
736b07ec3fcSad 		}
737fed17936Sad 
738fed17936Sad 		/*
739fed17936Sad 		 * Sit around and wait for something to happen.  We'll be
740fed17936Sad 		 * awoken if any of the conditions examined change: if an
741fed17936Sad 		 * LWP exits, is collected, or is detached.
742fed17936Sad 		 */
743284c2b9aSad 		if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0)
744b07ec3fcSad 			break;
745b07ec3fcSad 	}
746e0d8d366Sthorpej 
747fed17936Sad 	/*
748fed17936Sad 	 * We didn't find any LWPs to collect, we may have received a
749fed17936Sad 	 * signal, or some other condition has caused us to bail out.
750fed17936Sad 	 *
751fed17936Sad 	 * If waiting on a specific LWP, clear the waiters marker: some
752fed17936Sad 	 * other LWP may want it.  Then, kick all the remaining waiters
753fed17936Sad 	 * so that they can re-check for zombies and for deadlock.
754fed17936Sad 	 */
755fed17936Sad 	if (lid != 0) {
75615689570Sthorpej 		l2 = proc_find_lwp(p, lid);
757d1c42b4fSad 		KASSERT(l2 == NULL || l2->l_lid == lid);
758d1c42b4fSad 
759d1c42b4fSad 		if (l2 != NULL && l2->l_waiter == curlid)
760fed17936Sad 			l2->l_waiter = 0;
761fed17936Sad 	}
762b07ec3fcSad 	p->p_nlwpwait--;
763fed17936Sad 	l->l_waitingfor = 0;
764fed17936Sad 	cv_broadcast(&p->p_lwpcv);
765fed17936Sad 
766b07ec3fcSad 	return error;
767b07ec3fcSad }
768b07ec3fcSad 
769d1c42b4fSad /*
770b07ec3fcSad  * Create a new LWP within process 'p2', using LWP 'l1' as a template.
771b07ec3fcSad  * The new LWP is created in state LSIDL and must be set running,
772b07ec3fcSad  * suspended, or stopped by the caller.
773e0d8d366Sthorpej  */
774e0d8d366Sthorpej int
lwp_create(lwp_t * l1,proc_t * p2,vaddr_t uaddr,int flags,void * stack,size_t stacksize,void (* func)(void *),void * arg,lwp_t ** rnewlwpp,int sclass,const sigset_t * sigmask,const stack_t * sigstk)77540cf6f36Srmind lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
776d831186dSad     void *stack, size_t stacksize, void (*func)(void *), void *arg,
777d7746f2eSchristos     lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask,
778d7746f2eSchristos     const stack_t *sigstk)
779e0d8d366Sthorpej {
780c2427831Sad 	struct lwp *l2;
781e0d8d366Sthorpej 
7824c7ba244Sad 	KASSERT(l1 == curlwp || l1->l_proc == &proc0);
7834c7ba244Sad 
784b07ec3fcSad 	/*
785c2427831Sad 	 * Enforce limits, excluding the first lwp and kthreads.  We must
786c2427831Sad 	 * use the process credentials here when adjusting the limit, as
787c2427831Sad 	 * they are what's tied to the accounting entity.  However for
788c2427831Sad 	 * authorizing the action, we'll use the LWP's credentials.
78904610895Schristos 	 */
790c2427831Sad 	mutex_enter(p2->p_lock);
79104610895Schristos 	if (p2->p_nlwps != 0 && p2 != &proc0) {
792c2427831Sad 		uid_t uid = kauth_cred_getuid(p2->p_cred);
79304610895Schristos 		int count = chglwpcnt(uid, 1);
79404610895Schristos 		if (__predict_false(count >
79504610895Schristos 		    p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) {
79604610895Schristos 			if (kauth_authorize_process(l1->l_cred,
79704610895Schristos 			    KAUTH_PROCESS_RLIMIT, p2,
79804610895Schristos 			    KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
79904610895Schristos 			    &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR))
80004610895Schristos 			    != 0) {
801847d3096Schristos 				(void)chglwpcnt(uid, -1);
802c2427831Sad 				mutex_exit(p2->p_lock);
803847d3096Schristos 				return EAGAIN;
80404610895Schristos 			}
80504610895Schristos 		}
80604610895Schristos 	}
80704610895Schristos 
80804610895Schristos 	/*
809b07ec3fcSad 	 * First off, reap any detached LWP waiting to be collected.
810b07ec3fcSad 	 * We can re-use its LWP structure and turnstile.
811b07ec3fcSad 	 */
812c2427831Sad 	if ((l2 = p2->p_zomblwp) != NULL) {
813b07ec3fcSad 		p2->p_zomblwp = NULL;
814c2427831Sad 		lwp_free(l2, true, false);
815c2427831Sad 		/* p2 now unlocked by lwp_free() */
81620180cb1Sad 		KASSERT(l2->l_ts != NULL);
817d831186dSad 		KASSERT(l2->l_inheritedprio == -1);
818e781af39Syamt 		KASSERT(SLIST_EMPTY(&l2->l_pi_lenders));
81920180cb1Sad 		memset(&l2->l_startzero, 0, sizeof(*l2) -
82020180cb1Sad 		    offsetof(lwp_t, l_startzero));
821c2427831Sad 	} else {
822c2427831Sad 		mutex_exit(p2->p_lock);
823c2427831Sad 		l2 = pool_cache_get(lwp_cache, PR_WAITOK);
82420180cb1Sad 		memset(&l2->l_startzero, 0, sizeof(*l2) -
82520180cb1Sad 		    offsetof(lwp_t, l_startzero));
826c2427831Sad 		SLIST_INIT(&l2->l_pi_lenders);
827b07ec3fcSad 	}
828e0d8d366Sthorpej 
82920180cb1Sad 	/*
83020180cb1Sad 	 * Because of lockless lookup via pid_table, the LWP can be locked
83120180cb1Sad 	 * and inspected briefly even after it's freed, so a few fields are
83220180cb1Sad 	 * kept stable.
83320180cb1Sad 	 */
83420180cb1Sad 	KASSERT(l2->l_stat == LSIDL);
83520180cb1Sad 	KASSERT(l2->l_cpu != NULL);
83620180cb1Sad 	KASSERT(l2->l_ts != NULL);
83720180cb1Sad 	KASSERT(l2->l_mutex == l2->l_cpu->ci_schedstate.spc_lwplock);
83820180cb1Sad 
839e0d8d366Sthorpej 	l2->l_proc = p2;
8406a317a61Sad 	l2->l_refcnt = 0;
841d831186dSad 	l2->l_class = sclass;
842ea8a9257Sad 
843ea8a9257Sad 	/*
84415689570Sthorpej 	 * Allocate a process ID for this LWP.  We need to do this now
845f42f89fdSandvar 	 * while we can still unwind if it fails.  Because we're marked
84620180cb1Sad 	 * as LSIDL, no lookups by the ID will succeed.
84715689570Sthorpej 	 *
84815689570Sthorpej 	 * N.B. this will always succeed for the first LWP in a process,
84915689570Sthorpej 	 * because proc_alloc_lwpid() will usurp the slot.  Also note
85015689570Sthorpej 	 * that l2->l_proc MUST be valid so that lookups of the proc
85115689570Sthorpej 	 * will succeed, even if the LWP itself is not visible.
85215689570Sthorpej 	 */
85315689570Sthorpej 	if (__predict_false(proc_alloc_lwpid(p2, l2) == -1)) {
85415689570Sthorpej 		pool_cache_put(lwp_cache, l2);
85515689570Sthorpej 		return EAGAIN;
85615689570Sthorpej 	}
85715689570Sthorpej 
858cbc1d2c4Sad 	/*
859cbc1d2c4Sad 	 * If vfork(), we want the LWP to run fast and on the same CPU
860cbc1d2c4Sad 	 * as its parent, so that it can reuse the VM context and cache
861cbc1d2c4Sad 	 * footprint on the local CPU.
862cbc1d2c4Sad 	 */
863cbc1d2c4Sad 	l2->l_boostpri = ((flags & LWP_VFORK) ? PRI_KERNEL : PRI_USER);
864b07ec3fcSad  	l2->l_priority = l1->l_priority;
865d831186dSad 	l2->l_inheritedprio = -1;
8667cf7644fSchristos 	l2->l_protectprio = -1;
8677cf7644fSchristos 	l2->l_auxprio = -1;
8686c8b9827Sad 	l2->l_flag = 0;
8694a780c9aSad 	l2->l_pflag = LP_MPSAFE;
870a4e0004bSad 	TAILQ_INIT(&l2->l_ld_locks);
8713843688cSozaki-r 	l2->l_psrefs = 0;
87210c5b023Smaxv 	kmsan_lwp_alloc(l2);
87304e486d9Sthorpej 
8743cb7a24bSad 	/*
875e820d9feSpooka 	 * For vfork, borrow parent's lwpctl context if it exists.
876e820d9feSpooka 	 * This also causes us to return via lwp_userret.
877e820d9feSpooka 	 */
878e820d9feSpooka 	if (flags & LWP_VFORK && l1->l_lwpctl) {
879e820d9feSpooka 		l2->l_lwpctl = l1->l_lwpctl;
880e820d9feSpooka 		l2->l_flag |= LW_LWPCTL;
881e820d9feSpooka 	}
882e820d9feSpooka 
883e820d9feSpooka 	/*
8843cb7a24bSad 	 * If not the first LWP in the process, grab a reference to the
8853cb7a24bSad 	 * descriptor table.
8863cb7a24bSad 	 */
8873cb7a24bSad 	l2->l_fd = p2->p_fd;
8883cb7a24bSad 	if (p2->p_nlwps != 0) {
8893cb7a24bSad 		KASSERT(l1->l_proc == p2);
890e4be2748Srmind 		fd_hold(l2);
8913cb7a24bSad 	} else {
8923cb7a24bSad 		KASSERT(l1->l_proc != p2);
8933cb7a24bSad 	}
8943cb7a24bSad 
895934634a1Spavel 	if (p2->p_flag & PK_SYSTEM) {
89640cf6f36Srmind 		/* Mark it as a system LWP. */
897934634a1Spavel 		l2->l_flag |= LW_SYSTEM;
898b07ec3fcSad 	}
899e0d8d366Sthorpej 
9001bc28ea1Sdarran 	kdtrace_thread_ctor(NULL, l2);
901a21233e4Srmind 	lwp_initspecific(l2);
902d831186dSad 	sched_lwp_fork(l1, l2);
903513227e9Sad 	callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE);
904513227e9Sad 	callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2);
905b07ec3fcSad 	cv_init(&l2->l_sigcv, "sigwait");
9066d7c7959Srmind 	cv_init(&l2->l_waitcv, "vfork");
907b07ec3fcSad 	l2->l_syncobj = &sched_syncobj;
9087fc219a5Sozaki-r 	PSREF_DEBUG_INIT_LWP(l2);
909e0d8d366Sthorpej 
910e0d8d366Sthorpej 	if (rnewlwpp != NULL)
911e0d8d366Sthorpej 		*rnewlwpp = l2;
912e0d8d366Sthorpej 
913f3c47d39Smatt 	/*
914f3c47d39Smatt 	 * PCU state needs to be saved before calling uvm_lwp_fork() so that
915f3c47d39Smatt 	 * the MD cpu_lwp_fork() can copy the saved state to the new LWP.
916f3c47d39Smatt 	 */
917f3c47d39Smatt 	pcu_save_all(l1);
918277fc727Sdogcow #if PCU_UNIT_COUNT > 0
919a9d52c09Sriastradh 	l2->l_pcu_valid = l1->l_pcu_valid;
920277fc727Sdogcow #endif
921f3c47d39Smatt 
92210697458Srmind 	uvm_lwp_setuarea(l2, uaddr);
92334397172Sskrll 	uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2);
924e0d8d366Sthorpej 
92515689570Sthorpej 	mutex_enter(p2->p_lock);
9260f335007Sad 	l2->l_cred = kauth_cred_hold(p2->p_cred);
927b07ec3fcSad 	if ((flags & LWP_DETACHED) != 0) {
928b07ec3fcSad 		l2->l_prflag = LPR_DETACHED;
929b07ec3fcSad 		p2->p_ndlwps++;
930b07ec3fcSad 	} else
931b07ec3fcSad 		l2->l_prflag = 0;
932b07ec3fcSad 
933d1c42b4fSad 	if (l1->l_proc == p2) {
934d1c42b4fSad 		/*
935d1c42b4fSad 		 * These flags are set while p_lock is held.  Copy with
936d1c42b4fSad 		 * p_lock held too, so the LWP doesn't sneak into the
937d1c42b4fSad 		 * process without them being set.
938d1c42b4fSad 		 */
9396c8b9827Sad 		l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE));
940d1c42b4fSad 	} else {
941d1c42b4fSad 		/* fork(): pending core/exit doesn't apply to child. */
9426c8b9827Sad 		l2->l_flag |= (l1->l_flag & LW_WREBOOT);
943d1c42b4fSad 	}
9446c8b9827Sad 
945d7746f2eSchristos 	l2->l_sigstk = *sigstk;
946d7746f2eSchristos 	l2->l_sigmask = *sigmask;
947471b216bSchristos 	TAILQ_INIT(&l2->l_sigpend.sp_info);
948b07ec3fcSad 	sigemptyset(&l2->l_sigpend.sp_set);
949e0d8d366Sthorpej 	LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling);
950e0d8d366Sthorpej 	p2->p_nlwps++;
951d5dec378Syamt 	p2->p_nrlwps++;
952e0d8d366Sthorpej 
953501dd321Srmind 	KASSERT(l2->l_affinity == NULL);
954501dd321Srmind 
955501dd321Srmind 	/* Inherit the affinity mask. */
956501dd321Srmind 	if (l1->l_affinity) {
9574f1720c3Srmind 		/*
9584f1720c3Srmind 		 * Note that we hold the state lock while inheriting
9594f1720c3Srmind 		 * the affinity to avoid race with sched_setaffinity().
9604f1720c3Srmind 		 */
9614f1720c3Srmind 		lwp_lock(l1);
962501dd321Srmind 		if (l1->l_affinity) {
963481ae155Srmind 			kcpuset_use(l1->l_affinity);
964f30b5785Schristos 			l2->l_affinity = l1->l_affinity;
9654f91cff0Srmind 		}
9664f1720c3Srmind 		lwp_unlock(l1);
967f30b5785Schristos 	}
968d1c42b4fSad 
969725adb2aSad 	/* Ensure a trip through lwp_userret() if needed. */
970725adb2aSad 	if ((l2->l_flag & LW_USERRET) != 0) {
971725adb2aSad 		lwp_need_userret(l2);
972725adb2aSad 	}
973725adb2aSad 
974d1c42b4fSad 	/* This marks the end of the "must be atomic" section. */
9754f1720c3Srmind 	mutex_exit(p2->p_lock);
9764f1720c3Srmind 
977db70f181Schristos 	SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0);
9786a9056a9Sdarran 
9790eaaa024Sad 	mutex_enter(&proc_lock);
9804f1720c3Srmind 	LIST_INSERT_HEAD(&alllwp, l2, l_list);
981f15dda4bSad 	/* Inherit a processor-set */
982f15dda4bSad 	l2->l_psid = l1->l_psid;
9830eaaa024Sad 	mutex_exit(&proc_lock);
9845c71a4d4Srmind 
985d64834e4Sdsl 	SYSCALL_TIME_LWP_INIT(l2);
986d64834e4Sdsl 
987b23b73b9Smanu 	if (p2->p_emul->e_lwp_fork)
988b23b73b9Smanu 		(*p2->p_emul->e_lwp_fork)(l1, l2);
989b23b73b9Smanu 
990e0d8d366Sthorpej 	return (0);
991e0d8d366Sthorpej }
992e0d8d366Sthorpej 
993e0d8d366Sthorpej /*
99411ba4e18Sad  * Set a new LWP running.  If the process is stopping, then the LWP is
99511ba4e18Sad  * created stopped.
99611ba4e18Sad  */
99711ba4e18Sad void
lwp_start(lwp_t * l,int flags)99811ba4e18Sad lwp_start(lwp_t *l, int flags)
99911ba4e18Sad {
100011ba4e18Sad 	proc_t *p = l->l_proc;
100111ba4e18Sad 
100211ba4e18Sad 	mutex_enter(p->p_lock);
100311ba4e18Sad 	lwp_lock(l);
100411ba4e18Sad 	KASSERT(l->l_stat == LSIDL);
100511ba4e18Sad 	if ((flags & LWP_SUSPENDED) != 0) {
100611ba4e18Sad 		/* It'll suspend itself in lwp_userret(). */
100711ba4e18Sad 		l->l_flag |= LW_WSUSPEND;
100827711c94Sad 		lwp_need_userret(l);
100911ba4e18Sad 	}
101011ba4e18Sad 	if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) {
101111ba4e18Sad 		KASSERT(l->l_wchan == NULL);
101211ba4e18Sad 	    	l->l_stat = LSSTOP;
101311ba4e18Sad 		p->p_nrlwps--;
101411ba4e18Sad 		lwp_unlock(l);
101511ba4e18Sad 	} else {
101611ba4e18Sad 		setrunnable(l);
101711ba4e18Sad 		/* LWP now unlocked */
101811ba4e18Sad 	}
101911ba4e18Sad 	mutex_exit(p->p_lock);
102011ba4e18Sad }
102111ba4e18Sad 
102211ba4e18Sad /*
1023f0301095Syamt  * Called by MD code when a new LWP begins execution.  Must be called
1024f0301095Syamt  * with the previous LWP locked (so at splsched), or if there is no
1025f0301095Syamt  * previous LWP, at splsched.
1026f0301095Syamt  */
1027f0301095Syamt void
lwp_startup(struct lwp * prev,struct lwp * new_lwp)1028a35d1a8cSmatt lwp_startup(struct lwp *prev, struct lwp *new_lwp)
1029f0301095Syamt {
103082002773Sad 	kmutex_t *lock;
10312ddceed1Sad 
1032a35d1a8cSmatt 	KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev);
10334c7ba244Sad 	KASSERT(kpreempt_disabled());
10342ddceed1Sad 	KASSERT(prev != NULL);
103582002773Sad 	KASSERT((prev->l_pflag & LP_RUNNING) != 0);
10362ddceed1Sad 	KASSERT(curcpu()->ci_mtx_count == -2);
10372ddceed1Sad 
103882002773Sad 	/*
1039b2097dd8Sriastradh 	 * Immediately mark the previous LWP as no longer running and
1040b2097dd8Sriastradh 	 * unlock (to keep lock wait times short as possible).  If a
1041b2097dd8Sriastradh 	 * zombie, don't touch after clearing LP_RUNNING as it could be
1042b2097dd8Sriastradh 	 * reaped by another CPU.  Use atomic_store_release to ensure
1043b2097dd8Sriastradh 	 * this -- matches atomic_load_acquire in lwp_free.
104482002773Sad 	 */
104582002773Sad 	lock = prev->l_mutex;
104682002773Sad 	if (__predict_false(prev->l_stat == LSZOMB)) {
1047b2097dd8Sriastradh 		atomic_store_release(&prev->l_pflag,
1048b2097dd8Sriastradh 		    prev->l_pflag & ~LP_RUNNING);
1049b2097dd8Sriastradh 	} else {
105082002773Sad 		prev->l_pflag &= ~LP_RUNNING;
1051b2097dd8Sriastradh 	}
105282002773Sad 	mutex_spin_exit(lock);
10532ddceed1Sad 
10542ddceed1Sad 	/* Correct spin mutex count after mi_switch(). */
10552ddceed1Sad 	curcpu()->ci_mtx_count = 0;
10562ddceed1Sad 
10572ddceed1Sad 	/* Install new VM context. */
10582ddceed1Sad 	if (__predict_true(new_lwp->l_proc->p_vmspace)) {
1059a35d1a8cSmatt 		pmap_activate(new_lwp);
10602ddceed1Sad 	}
10612ddceed1Sad 
10622ddceed1Sad 	/* We remain at IPL_SCHED from mi_switch() - reset it. */
1063dea60533Sskrll 	spl0();
106444968cbaSchristos 
1065f0301095Syamt 	LOCKDEBUG_BARRIER(NULL, 0);
10662ddceed1Sad 	SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0);
10672ddceed1Sad 
10682ddceed1Sad 	/* For kthreads, acquire kernel lock if not MPSAFE. */
10692ddceed1Sad 	if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) {
1070a35d1a8cSmatt 		KERNEL_LOCK(1, new_lwp);
1071f0301095Syamt 	}
107288ab7da9Sad }
1073f0301095Syamt 
1074f0301095Syamt /*
107588ab7da9Sad  * Exit an LWP.
10763ce4840cSad  *
10773ce4840cSad  * *** WARNING *** This can be called with (l != curlwp) in error paths.
1078e0d8d366Sthorpej  */
1079e0d8d366Sthorpej void
lwp_exit(struct lwp * l)1080e0d8d366Sthorpej lwp_exit(struct lwp *l)
1081e0d8d366Sthorpej {
1082e0d8d366Sthorpej 	struct proc *p = l->l_proc;
1083b07ec3fcSad 	struct lwp *l2;
108488ab7da9Sad 	bool current;
108588ab7da9Sad 
108688ab7da9Sad 	current = (l == curlwp);
1087e0d8d366Sthorpej 
10880dec6ba3Sriastradh 	KASSERT(current || l->l_stat == LSIDL);
10890dec6ba3Sriastradh 	KASSERT(current || l->l_target_cpu == NULL);
10903cb7a24bSad 	KASSERT(p == curproc);
1091b23b73b9Smanu 
1092db70f181Schristos 	SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0);
10936a9056a9Sdarran 
109420f33b02Sad 	/* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */
10952ddceed1Sad 	LOCKDEBUG_BARRIER(NULL, 0);
109620f33b02Sad 	KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked");
1097b07ec3fcSad 
1098b07ec3fcSad 	/*
1099b07ec3fcSad 	 * If we are the last live LWP in a process, we need to exit the
1100b07ec3fcSad 	 * entire process.  We do so with an exit status of zero, because
1101b07ec3fcSad 	 * it's a "controlled" exit, and because that's what Solaris does.
1102b07ec3fcSad 	 *
1103b07ec3fcSad 	 * We are not quite a zombie yet, but for accounting purposes we
1104b07ec3fcSad 	 * must increment the count of zombies here.
110512e8bb91Sthorpej 	 *
110612e8bb91Sthorpej 	 * Note: the last LWP's specificdata will be deleted here.
1107e0d8d366Sthorpej 	 */
1108284c2b9aSad 	mutex_enter(p->p_lock);
1109b07ec3fcSad 	if (p->p_nlwps - p->p_nzlwps == 1) {
111088ab7da9Sad 		KASSERT(current == true);
11112411d236Smatt 		KASSERT(p != &proc0);
11125c35dbcdSchristos 		exit1(l, 0, 0);
1113089abdadSjdolecek 		/* NOTREACHED */
1114e0d8d366Sthorpej 	}
1115b07ec3fcSad 	p->p_nzlwps++;
111698a9cebbSthorpej 
111798a9cebbSthorpej 	/*
111898a9cebbSthorpej 	 * Perform any required thread cleanup.  Do this early so
111915689570Sthorpej 	 * anyone wanting to look us up with lwp_getref_lwpid() will
112015689570Sthorpej 	 * fail to find us before we become a zombie.
112198a9cebbSthorpej 	 *
112298a9cebbSthorpej 	 * N.B. this will unlock p->p_lock on our behalf.
112398a9cebbSthorpej 	 */
112498a9cebbSthorpej 	lwp_thread_cleanup(l);
1125b07ec3fcSad 
1126b07ec3fcSad 	if (p->p_emul->e_lwp_exit)
1127b07ec3fcSad 		(*p->p_emul->e_lwp_exit)(l);
1128e0d8d366Sthorpej 
11293cb7a24bSad 	/* Drop filedesc reference. */
11303cb7a24bSad 	fd_free();
11313cb7a24bSad 
113272421a19Shannken 	/* Release fstrans private data. */
113372421a19Shannken 	fstrans_lwp_dtor(l);
113472421a19Shannken 
113512e8bb91Sthorpej 	/* Delete the specificdata while it's still safe to sleep. */
11362c4f731dSpooka 	lwp_finispecific(l);
113712e8bb91Sthorpej 
1138b07ec3fcSad 	/*
1139b07ec3fcSad 	 * Release our cached credentials.
1140b07ec3fcSad 	 */
11412b79369cSad 	kauth_cred_free(l->l_cred);
1142513227e9Sad 	callout_destroy(&l->l_timeout_ch);
114388ab7da9Sad 
114488ab7da9Sad 	/*
1145d1fa1f15Skamil 	 * If traced, report LWP exit event to the debugger.
1146d1fa1f15Skamil 	 *
1147b07ec3fcSad 	 * Remove the LWP from the global list.
114833fa5ccbSchs 	 * Free its LID from the PID namespace if needed.
1149b07ec3fcSad 	 */
11500eaaa024Sad 	mutex_enter(&proc_lock);
1151d1fa1f15Skamil 
1152ac37cdceSkamil 	if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) ==
1153d1fa1f15Skamil 	    (PSL_TRACED|PSL_TRACELWP_EXIT)) {
1154d1fa1f15Skamil 		mutex_enter(p->p_lock);
1155bcb2d047Skamil 		if (ISSET(p->p_sflag, PS_WEXIT)) {
1156bcb2d047Skamil 			mutex_exit(p->p_lock);
1157bcb2d047Skamil 			/*
1158bcb2d047Skamil 			 * We are exiting, bail out without informing parent
1159bcb2d047Skamil 			 * about a terminating LWP as it would deadlock.
1160bcb2d047Skamil 			 */
1161bcb2d047Skamil 		} else {
11625e4bbc49Skamil 			eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid);
11630eaaa024Sad 			mutex_enter(&proc_lock);
1164d1fa1f15Skamil 		}
1165bcb2d047Skamil 	}
1166d1fa1f15Skamil 
1167b07ec3fcSad 	LIST_REMOVE(l, l_list);
11680eaaa024Sad 	mutex_exit(&proc_lock);
1169b07ec3fcSad 
1170b07ec3fcSad 	/*
1171b07ec3fcSad 	 * Get rid of all references to the LWP that others (e.g. procfs)
1172b07ec3fcSad 	 * may have, and mark the LWP as a zombie.  If the LWP is detached,
1173b07ec3fcSad 	 * mark it waiting for collection in the proc structure.  Note that
1174*bcfabd50Sandvar 	 * before we can do that, we need to free any other dead, detached
1175b07ec3fcSad 	 * LWP waiting to meet its maker.
11766a317a61Sad 	 *
11776a317a61Sad 	 * All conditions need to be observed upon under the same hold of
11786a317a61Sad 	 * p_lock, because if the lock is dropped any of them can change.
1179b07ec3fcSad 	 */
1180284c2b9aSad 	mutex_enter(p->p_lock);
11816a317a61Sad 	for (;;) {
118298a9cebbSthorpej 		if (lwp_drainrefs(l))
11836a317a61Sad 			continue;
1184b07ec3fcSad 		if ((l->l_prflag & LPR_DETACHED) != 0) {
11856a317a61Sad 			if ((l2 = p->p_zomblwp) != NULL) {
1186b07ec3fcSad 				p->p_zomblwp = NULL;
11876a317a61Sad 				lwp_free(l2, false, false);
11886a317a61Sad 				/* proc now unlocked */
1189284c2b9aSad 				mutex_enter(p->p_lock);
11906a317a61Sad 				continue;
1191b07ec3fcSad 			}
1192b07ec3fcSad 			p->p_zomblwp = l;
1193b07ec3fcSad 		}
11946a317a61Sad 		break;
11956a317a61Sad 	}
1196b07ec3fcSad 
1197b07ec3fcSad 	/*
1198b07ec3fcSad 	 * If we find a pending signal for the process and we have been
119933fa5ccbSchs 	 * asked to check for signals, then we lose: arrange to have
1200b07ec3fcSad 	 * all other LWPs in the process check for signals.
1201b07ec3fcSad 	 */
1202934634a1Spavel 	if ((l->l_flag & LW_PENDSIG) != 0 &&
1203b07ec3fcSad 	    firstsig(&p->p_sigpend.sp_set) != 0) {
1204b07ec3fcSad 		LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1205b07ec3fcSad 			lwp_lock(l2);
1206e57dd2baSad 			signotify(l2);
1207b07ec3fcSad 			lwp_unlock(l2);
1208b07ec3fcSad 		}
1209b07ec3fcSad 	}
1210b07ec3fcSad 
1211f3c47d39Smatt 	/*
1212f3c47d39Smatt 	 * Release any PCU resources before becoming a zombie.
1213f3c47d39Smatt 	 */
1214f3c47d39Smatt 	pcu_discard_all(l);
1215f3c47d39Smatt 
1216b07ec3fcSad 	lwp_lock(l);
1217b07ec3fcSad 	l->l_stat = LSZOMB;
1218501dd321Srmind 	if (l->l_name != NULL) {
121979aa087aSad 		strcpy(l->l_name, "(zombie)");
12204f1720c3Srmind 	}
1221b07ec3fcSad 	lwp_unlock(l);
1222b07ec3fcSad 	p->p_nrlwps--;
1223b668a9a0Sad 	if (l->l_lwpctl != NULL)
1224b668a9a0Sad 		l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED;
1225284c2b9aSad 	mutex_exit(p->p_lock);
12263d1cabfdSad 	cv_broadcast(&p->p_lwpcv);
1227b07ec3fcSad 
1228b07ec3fcSad 	/*
1229b07ec3fcSad 	 * We can no longer block.  At this point, lwp_free() may already
1230b07ec3fcSad 	 * be gunning for us.  On a multi-CPU system, we may be off p_lwps.
1231b07ec3fcSad 	 *
1232b07ec3fcSad 	 * Free MD LWP resources.
1233b07ec3fcSad 	 */
1234089abdadSjdolecek 	cpu_lwp_free(l, 0);
123588ab7da9Sad 
123688ab7da9Sad 	if (current) {
12372ddceed1Sad 		/* Switch away into oblivion. */
12382ddceed1Sad 		lwp_lock(l);
12392ddceed1Sad 		spc_lock(l->l_cpu);
12402ddceed1Sad 		mi_switch(l);
12412ddceed1Sad 		panic("lwp_exit");
1242e0d8d366Sthorpej 	}
124388ab7da9Sad }
1244e0d8d366Sthorpej 
1245b07ec3fcSad /*
1246b07ec3fcSad  * Free a dead LWP's remaining resources.
1247b07ec3fcSad  *
1248b07ec3fcSad  * XXXLWP limits.
1249b07ec3fcSad  */
1250b07ec3fcSad void
lwp_free(struct lwp * l,bool recycle,bool last)1251fed17936Sad lwp_free(struct lwp *l, bool recycle, bool last)
1252b07ec3fcSad {
1253b07ec3fcSad 	struct proc *p = l->l_proc;
1254be04ac48Sad 	struct rusage *ru;
1255b07ec3fcSad 	ksiginfoq_t kq;
1256b07ec3fcSad 
125721832401Syamt 	KASSERT(l != curlwp);
1258b6608b64Syamt 	KASSERT(last || mutex_owned(p->p_lock));
125921832401Syamt 
12606cce1f9fSchristos 	/*
12616cce1f9fSchristos 	 * We use the process credentials instead of the lwp credentials here
12626cce1f9fSchristos 	 * because the lwp credentials maybe cached (just after a setuid call)
12636cce1f9fSchristos 	 * and we don't want pay for syncing, since the lwp is going away
12646cce1f9fSchristos 	 * anyway
12656cce1f9fSchristos 	 */
126604610895Schristos 	if (p != &proc0 && p->p_nlwps != 1)
12676cce1f9fSchristos 		(void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1);
12682ddceed1Sad 
1269b07ec3fcSad 	/*
127020180cb1Sad 	 * In the unlikely event that the LWP is still on the CPU,
127120180cb1Sad 	 * then spin until it has switched away.
1272b2097dd8Sriastradh 	 *
1273b2097dd8Sriastradh 	 * atomic_load_acquire matches atomic_store_release in
1274b2097dd8Sriastradh 	 * lwp_startup and mi_switch.
127520180cb1Sad 	 */
1276b2097dd8Sriastradh 	while (__predict_false((atomic_load_acquire(&l->l_pflag) & LP_RUNNING)
1277b2097dd8Sriastradh 		!= 0)) {
127820180cb1Sad 		SPINLOCK_BACKOFF_HOOK;
127920180cb1Sad 	}
128020180cb1Sad 
128120180cb1Sad 	/*
128220180cb1Sad 	 * Now that the LWP's known off the CPU, reset its state back to
128320180cb1Sad 	 * LSIDL, which defeats anything that might have gotten a hold on
128420180cb1Sad 	 * the LWP via pid_table before the ID was freed.  It's important
128520180cb1Sad 	 * to do this with both the LWP locked and p_lock held.
128620180cb1Sad 	 *
128720180cb1Sad 	 * Also reset the CPU and lock pointer back to curcpu(), since the
128820180cb1Sad 	 * LWP will in all likelyhood be cached with the current CPU in
128920180cb1Sad 	 * lwp_cache when we free it and later allocated from there again
129020180cb1Sad 	 * (avoid incidental lock contention).
129120180cb1Sad 	 */
129220180cb1Sad 	lwp_lock(l);
129320180cb1Sad 	l->l_stat = LSIDL;
129420180cb1Sad 	l->l_cpu = curcpu();
129520180cb1Sad 	lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_lwplock);
129620180cb1Sad 
129720180cb1Sad 	/*
1298d1c42b4fSad 	 * If this was not the last LWP in the process, then adjust counters
1299d1c42b4fSad 	 * and unlock.  This is done differently for the last LWP in exit1().
1300b07ec3fcSad 	 */
1301b07ec3fcSad 	if (!last) {
1302b07ec3fcSad 		/*
1303b07ec3fcSad 		 * Add the LWP's run time to the process' base value.
1304b07ec3fcSad 		 * This needs to co-incide with coming off p_lwps.
1305b07ec3fcSad 		 */
1306949e16d9Syamt 		bintime_add(&p->p_rtime, &l->l_rtime);
1307f0301095Syamt 		p->p_pctcpu += l->l_pctcpu;
1308be04ac48Sad 		ru = &p->p_stats->p_ru;
1309be04ac48Sad 		ruadd(ru, &l->l_ru);
1310b07ec3fcSad 		LIST_REMOVE(l, l_sibling);
1311b07ec3fcSad 		p->p_nlwps--;
1312b07ec3fcSad 		p->p_nzlwps--;
1313b07ec3fcSad 		if ((l->l_prflag & LPR_DETACHED) != 0)
1314b07ec3fcSad 			p->p_ndlwps--;
13153d1cabfdSad 		mutex_exit(p->p_lock);
1316fed17936Sad 
1317fed17936Sad 		/*
1318fed17936Sad 		 * Have any LWPs sleeping in lwp_wait() recheck for
1319fed17936Sad 		 * deadlock.
1320fed17936Sad 		 */
1321fed17936Sad 		cv_broadcast(&p->p_lwpcv);
1322b07ec3fcSad 
132320180cb1Sad 		/* Free the LWP ID. */
13240eaaa024Sad 		mutex_enter(&proc_lock);
132520180cb1Sad 		proc_free_lwpid(p, l->l_lid);
13260eaaa024Sad 		mutex_exit(&proc_lock);
1327b07ec3fcSad 	}
1328b07ec3fcSad 
1329b07ec3fcSad 	/*
1330b07ec3fcSad 	 * Destroy the LWP's remaining signal information.
1331b07ec3fcSad 	 */
1332b07ec3fcSad 	ksiginfo_queue_init(&kq);
1333b07ec3fcSad 	sigclear(&l->l_sigpend, NULL, &kq);
1334b07ec3fcSad 	ksiginfo_queue_drain(&kq);
1335b07ec3fcSad 	cv_destroy(&l->l_sigcv);
13366d7c7959Srmind 	cv_destroy(&l->l_waitcv);
1337b07ec3fcSad 
1338b07ec3fcSad 	/*
1339501dd321Srmind 	 * Free lwpctl structure and affinity.
1340501dd321Srmind 	 */
1341501dd321Srmind 	if (l->l_lwpctl) {
1342501dd321Srmind 		lwp_ctl_free(l);
1343501dd321Srmind 	}
1344501dd321Srmind 	if (l->l_affinity) {
1345501dd321Srmind 		kcpuset_unuse(l->l_affinity, NULL);
1346501dd321Srmind 		l->l_affinity = NULL;
1347501dd321Srmind 	}
1348501dd321Srmind 
1349501dd321Srmind 	/*
135020180cb1Sad 	 * Free remaining data structures and the LWP itself unless the
135120180cb1Sad 	 * caller wants to recycle.
1352b07ec3fcSad 	 */
135379aa087aSad 	if (l->l_name != NULL)
135479aa087aSad 		kmem_free(l->l_name, MAXCOMLEN);
135533963b14Srmind 
135610c5b023Smaxv 	kmsan_lwp_free(l);
1357b3036422Smaxv 	kcov_lwp_free(l);
1358b07ec3fcSad 	cpu_lwp_free2(l);
1359b07ec3fcSad 	uvm_lwp_exit(l);
136040cf6f36Srmind 
1361e781af39Syamt 	KASSERT(SLIST_EMPTY(&l->l_pi_lenders));
1362d831186dSad 	KASSERT(l->l_inheritedprio == -1);
136318bf160fSmatt 	KASSERT(l->l_blcnt == 0);
13641bc28ea1Sdarran 	kdtrace_thread_dtor(NULL, l);
1365b07ec3fcSad 	if (!recycle)
1366ea3f10f7Sad 		pool_cache_put(lwp_cache, l);
1367e0d8d366Sthorpej }
1368e0d8d366Sthorpej 
1369e0d8d366Sthorpej /*
13705c71a4d4Srmind  * Migrate the LWP to the another CPU.  Unlocks the LWP.
13715c71a4d4Srmind  */
13725c71a4d4Srmind void
lwp_migrate(lwp_t * l,struct cpu_info * tci)137329170d38Srmind lwp_migrate(lwp_t *l, struct cpu_info *tci)
13745c71a4d4Srmind {
137529170d38Srmind 	struct schedstate_percpu *tspc;
137630dfdb28Srmind 	int lstat = l->l_stat;
137730dfdb28Srmind 
13785c71a4d4Srmind 	KASSERT(lwp_locked(l, NULL));
137929170d38Srmind 	KASSERT(tci != NULL);
13805c71a4d4Srmind 
138130dfdb28Srmind 	/* If LWP is still on the CPU, it must be handled like LSONPROC */
138282002773Sad 	if ((l->l_pflag & LP_RUNNING) != 0) {
138330dfdb28Srmind 		lstat = LSONPROC;
138430dfdb28Srmind 	}
138530dfdb28Srmind 
138629170d38Srmind 	/*
138729170d38Srmind 	 * The destination CPU could be changed while previous migration
138829170d38Srmind 	 * was not finished.
138929170d38Srmind 	 */
139030dfdb28Srmind 	if (l->l_target_cpu != NULL) {
139129170d38Srmind 		l->l_target_cpu = tci;
13925c71a4d4Srmind 		lwp_unlock(l);
13935c71a4d4Srmind 		return;
13945c71a4d4Srmind 	}
13955c71a4d4Srmind 
139629170d38Srmind 	/* Nothing to do if trying to migrate to the same CPU */
139729170d38Srmind 	if (l->l_cpu == tci) {
139829170d38Srmind 		lwp_unlock(l);
139929170d38Srmind 		return;
140029170d38Srmind 	}
140129170d38Srmind 
140229170d38Srmind 	KASSERT(l->l_target_cpu == NULL);
140329170d38Srmind 	tspc = &tci->ci_schedstate;
140430dfdb28Srmind 	switch (lstat) {
14055c71a4d4Srmind 	case LSRUN:
140629170d38Srmind 		l->l_target_cpu = tci;
140740cf6f36Srmind 		break;
14085c71a4d4Srmind 	case LSSLEEP:
140929170d38Srmind 		l->l_cpu = tci;
14105c71a4d4Srmind 		break;
141111ba4e18Sad 	case LSIDL:
14125c71a4d4Srmind 	case LSSTOP:
14135c71a4d4Srmind 	case LSSUSPENDED:
141429170d38Srmind 		l->l_cpu = tci;
141529170d38Srmind 		if (l->l_wchan == NULL) {
141629170d38Srmind 			lwp_unlock_to(l, tspc->spc_lwplock);
141729170d38Srmind 			return;
14185c71a4d4Srmind 		}
141929170d38Srmind 		break;
14205c71a4d4Srmind 	case LSONPROC:
142129170d38Srmind 		l->l_target_cpu = tci;
142229170d38Srmind 		spc_lock(l->l_cpu);
142311ba4e18Sad 		sched_resched_cpu(l->l_cpu, PRI_USER_RT, true);
142411ba4e18Sad 		/* spc now unlocked */
14255c71a4d4Srmind 		break;
14265c71a4d4Srmind 	}
14275c71a4d4Srmind 	lwp_unlock(l);
14285c71a4d4Srmind }
14295c71a4d4Srmind 
143059150873Sthorpej #define	lwp_find_exclude(l)					\
143159150873Sthorpej 	((l)->l_stat == LSIDL || (l)->l_stat == LSZOMB)
143259150873Sthorpej 
14335c71a4d4Srmind /*
14349850c055Srmind  * Find the LWP in the process.  Arguments may be zero, in such case,
14359850c055Srmind  * the calling process and first LWP in the list will be used.
1436284c2b9aSad  * On success - returns proc locked.
143759150873Sthorpej  *
143859150873Sthorpej  * => pid == 0 -> look in curproc.
143959150873Sthorpej  * => pid == -1 -> match any proc.
144059150873Sthorpej  * => otherwise look up the proc.
144159150873Sthorpej  *
144259150873Sthorpej  * => lid == 0 -> first LWP in the proc
144359150873Sthorpej  * => otherwise specific LWP
14445c71a4d4Srmind  */
14455c71a4d4Srmind struct lwp *
lwp_find2(pid_t pid,lwpid_t lid)14465c71a4d4Srmind lwp_find2(pid_t pid, lwpid_t lid)
14475c71a4d4Srmind {
14485c71a4d4Srmind 	proc_t *p;
14495c71a4d4Srmind 	lwp_t *l;
14505c71a4d4Srmind 
145159150873Sthorpej 	/* First LWP of specified proc. */
145259150873Sthorpej 	if (lid == 0) {
145359150873Sthorpej 		switch (pid) {
145459150873Sthorpej 		case -1:
145559150873Sthorpej 			/* No lookup keys. */
145659150873Sthorpej 			return NULL;
145759150873Sthorpej 		case 0:
145859150873Sthorpej 			p = curproc;
145959150873Sthorpej 			mutex_enter(p->p_lock);
146059150873Sthorpej 			break;
146159150873Sthorpej 		default:
14620eaaa024Sad 			mutex_enter(&proc_lock);
14633c507045Srmind 			p = proc_find(pid);
146459150873Sthorpej 			if (__predict_false(p == NULL)) {
14650eaaa024Sad 				mutex_exit(&proc_lock);
14663c507045Srmind 				return NULL;
14679850c055Srmind 			}
14683c507045Srmind 			mutex_enter(p->p_lock);
14690eaaa024Sad 			mutex_exit(&proc_lock);
147059150873Sthorpej 			break;
14713c507045Srmind 		}
147259150873Sthorpej 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
147359150873Sthorpej 			if (__predict_true(!lwp_find_exclude(l)))
147459150873Sthorpej 				break;
14753c507045Srmind 		}
147659150873Sthorpej 		goto out;
147759150873Sthorpej 	}
147859150873Sthorpej 
147959150873Sthorpej 	l = proc_find_lwp_acquire_proc(lid, &p);
148059150873Sthorpej 	if (l == NULL)
148159150873Sthorpej 		return NULL;
148259150873Sthorpej 	KASSERT(p != NULL);
148359150873Sthorpej 	KASSERT(mutex_owned(p->p_lock));
148459150873Sthorpej 
148559150873Sthorpej 	if (__predict_false(lwp_find_exclude(l))) {
148659150873Sthorpej 		l = NULL;
148759150873Sthorpej 		goto out;
148859150873Sthorpej 	}
148959150873Sthorpej 
149059150873Sthorpej 	/* Apply proc filter, if applicable. */
149159150873Sthorpej 	switch (pid) {
149259150873Sthorpej 	case -1:
149359150873Sthorpej 		/* Match anything. */
149459150873Sthorpej 		break;
149559150873Sthorpej 	case 0:
149659150873Sthorpej 		if (p != curproc)
149759150873Sthorpej 			l = NULL;
149859150873Sthorpej 		break;
149959150873Sthorpej 	default:
150059150873Sthorpej 		if (p->p_pid != pid)
150159150873Sthorpej 			l = NULL;
150259150873Sthorpej 		break;
150359150873Sthorpej 	}
150459150873Sthorpej 
150559150873Sthorpej  out:
150659150873Sthorpej 	if (__predict_false(l == NULL)) {
1507284c2b9aSad 		mutex_exit(p->p_lock);
1508284c2b9aSad 	}
15095c71a4d4Srmind 	return l;
15105c71a4d4Srmind }
15115c71a4d4Srmind 
15125c71a4d4Srmind /*
1513ea845191Syamt  * Look up a live LWP within the specified process.
1514b07ec3fcSad  *
1515d1c42b4fSad  * Must be called with p->p_lock held (as it looks at the radix tree,
1516d1c42b4fSad  * and also wants to exclude idle and zombie LWPs).
1517b07ec3fcSad  */
1518b07ec3fcSad struct lwp *
lwp_find(struct proc * p,lwpid_t id)151933fa5ccbSchs lwp_find(struct proc *p, lwpid_t id)
1520b07ec3fcSad {
1521b07ec3fcSad 	struct lwp *l;
1522b07ec3fcSad 
1523284c2b9aSad 	KASSERT(mutex_owned(p->p_lock));
1524b07ec3fcSad 
152515689570Sthorpej 	l = proc_find_lwp(p, id);
1526d1c42b4fSad 	KASSERT(l == NULL || l->l_lid == id);
1527b07ec3fcSad 
1528b07ec3fcSad 	/*
1529b07ec3fcSad 	 * No need to lock - all of these conditions will
1530b07ec3fcSad 	 * be visible with the process level mutex held.
1531b07ec3fcSad 	 */
153259150873Sthorpej 	if (__predict_false(l != NULL && lwp_find_exclude(l)))
1533b07ec3fcSad 		l = NULL;
1534b07ec3fcSad 
1535b07ec3fcSad 	return l;
1536b07ec3fcSad }
1537b07ec3fcSad 
1538b07ec3fcSad /*
1539b07ec3fcSad  * Verify that an LWP is locked, and optionally verify that the lock matches
1540b07ec3fcSad  * one we specify.
1541b07ec3fcSad  */
1542b07ec3fcSad int
lwp_locked(struct lwp * l,kmutex_t * mtx)1543b07ec3fcSad lwp_locked(struct lwp *l, kmutex_t *mtx)
1544b07ec3fcSad {
1545b07ec3fcSad 	kmutex_t *cur = l->l_mutex;
1546b07ec3fcSad 
1547b07ec3fcSad 	return mutex_owned(cur) && (mtx == cur || mtx == NULL);
1548b07ec3fcSad }
1549b07ec3fcSad 
1550b07ec3fcSad /*
1551b07ec3fcSad  * Lend a new mutex to an LWP.  The old mutex must be held.
1552b07ec3fcSad  */
15530e70dcbeSad kmutex_t *
lwp_setlock(struct lwp * l,kmutex_t * mtx)1554a35d1a8cSmatt lwp_setlock(struct lwp *l, kmutex_t *mtx)
1555b07ec3fcSad {
15560e70dcbeSad 	kmutex_t *oldmtx = l->l_mutex;
1557b07ec3fcSad 
15580e70dcbeSad 	KASSERT(mutex_owned(oldmtx));
1559b07ec3fcSad 
156076e07a94Sriastradh 	atomic_store_release(&l->l_mutex, mtx);
15610e70dcbeSad 	return oldmtx;
1562b07ec3fcSad }
1563b07ec3fcSad 
1564b07ec3fcSad /*
1565b07ec3fcSad  * Lend a new mutex to an LWP, and release the old mutex.  The old mutex
1566b07ec3fcSad  * must be held.
1567b07ec3fcSad  */
1568b07ec3fcSad void
lwp_unlock_to(struct lwp * l,kmutex_t * mtx)1569a35d1a8cSmatt lwp_unlock_to(struct lwp *l, kmutex_t *mtx)
1570b07ec3fcSad {
1571b07ec3fcSad 	kmutex_t *old;
1572b07ec3fcSad 
157311a35aedSrmind 	KASSERT(lwp_locked(l, NULL));
1574b07ec3fcSad 
1575b07ec3fcSad 	old = l->l_mutex;
157676e07a94Sriastradh 	atomic_store_release(&l->l_mutex, mtx);
1577b07ec3fcSad 	mutex_spin_exit(old);
1578b07ec3fcSad }
1579b07ec3fcSad 
1580e781af39Syamt int
lwp_trylock(struct lwp * l)1581e781af39Syamt lwp_trylock(struct lwp *l)
1582e781af39Syamt {
1583e781af39Syamt 	kmutex_t *old;
1584e781af39Syamt 
1585e781af39Syamt 	for (;;) {
158676e07a94Sriastradh 		if (!mutex_tryenter(old = atomic_load_consume(&l->l_mutex)))
1587e781af39Syamt 			return 0;
158876e07a94Sriastradh 		if (__predict_true(atomic_load_relaxed(&l->l_mutex) == old))
1589e781af39Syamt 			return 1;
1590e781af39Syamt 		mutex_spin_exit(old);
1591e781af39Syamt 	}
1592e781af39Syamt }
1593e781af39Syamt 
159440cf6f36Srmind void
lwp_unsleep(lwp_t * l,bool unlock)15950e70dcbeSad lwp_unsleep(lwp_t *l, bool unlock)
1596c42a4d14Sad {
1597c42a4d14Sad 
1598c42a4d14Sad 	KASSERT(mutex_owned(l->l_mutex));
15990e70dcbeSad 	(*l->l_syncobj->sobj_unsleep)(l, unlock);
1600c42a4d14Sad }
1601c42a4d14Sad 
1602b07ec3fcSad /*
16036ed72b5fSad  * Lock an LWP.
16046ed72b5fSad  */
16056ed72b5fSad void
lwp_lock(lwp_t * l)16066ed72b5fSad lwp_lock(lwp_t *l)
16076ed72b5fSad {
16086ed72b5fSad 	kmutex_t *old = atomic_load_consume(&l->l_mutex);
16096ed72b5fSad 
16106ed72b5fSad 	/*
16116ed72b5fSad 	 * Note: mutex_spin_enter() will have posted a read barrier.
16126ed72b5fSad 	 * Re-test l->l_mutex.  If it has changed, we need to try again.
16136ed72b5fSad 	 */
16146ed72b5fSad 	mutex_spin_enter(old);
16156ed72b5fSad 	while (__predict_false(atomic_load_relaxed(&l->l_mutex) != old)) {
16166ed72b5fSad 		mutex_spin_exit(old);
16176ed72b5fSad 		old = atomic_load_consume(&l->l_mutex);
16186ed72b5fSad 		mutex_spin_enter(old);
16196ed72b5fSad 	}
16206ed72b5fSad }
16216ed72b5fSad 
16226ed72b5fSad /*
16236ed72b5fSad  * Unlock an LWP.
16246ed72b5fSad  */
16256ed72b5fSad void
lwp_unlock(lwp_t * l)16266ed72b5fSad lwp_unlock(lwp_t *l)
16276ed72b5fSad {
16286ed72b5fSad 
16296ed72b5fSad 	mutex_spin_exit(l->l_mutex);
16306ed72b5fSad }
16316ed72b5fSad 
16326ed72b5fSad void
lwp_changepri(lwp_t * l,pri_t pri)16336ed72b5fSad lwp_changepri(lwp_t *l, pri_t pri)
16346ed72b5fSad {
16356ed72b5fSad 
16366ed72b5fSad 	KASSERT(mutex_owned(l->l_mutex));
16376ed72b5fSad 
16386ed72b5fSad 	if (l->l_priority == pri)
16396ed72b5fSad 		return;
16406ed72b5fSad 
16416ed72b5fSad 	(*l->l_syncobj->sobj_changepri)(l, pri);
16426ed72b5fSad 	KASSERT(l->l_priority == pri);
16436ed72b5fSad }
16446ed72b5fSad 
16456ed72b5fSad void
lwp_lendpri(lwp_t * l,pri_t pri)16466ed72b5fSad lwp_lendpri(lwp_t *l, pri_t pri)
16476ed72b5fSad {
16486ed72b5fSad 	KASSERT(mutex_owned(l->l_mutex));
16496ed72b5fSad 
16506ed72b5fSad 	(*l->l_syncobj->sobj_lendpri)(l, pri);
16516ed72b5fSad 	KASSERT(l->l_inheritedprio == pri);
16526ed72b5fSad }
16536ed72b5fSad 
16546ed72b5fSad pri_t
lwp_eprio(lwp_t * l)16556ed72b5fSad lwp_eprio(lwp_t *l)
16566ed72b5fSad {
16576ed72b5fSad 	pri_t pri = l->l_priority;
16586ed72b5fSad 
16596ed72b5fSad 	KASSERT(mutex_owned(l->l_mutex));
16606ed72b5fSad 
16616ed72b5fSad 	/*
16626ed72b5fSad 	 * Timeshared/user LWPs get a temporary priority boost for blocking
16636ed72b5fSad 	 * in kernel.  This is key to good interactive response on a loaded
16646ed72b5fSad 	 * system: without it, things will seem very sluggish to the user.
16656ed72b5fSad 	 *
16666ed72b5fSad 	 * The function of the boost is to get the LWP onto a CPU and
16676ed72b5fSad 	 * running quickly.  Once that happens the LWP loses the priority
16686ed72b5fSad 	 * boost and could be preempted very quickly by another LWP but that
1669*bcfabd50Sandvar 	 * won't happen often enough to be an annoyance.
16706ed72b5fSad 	 */
1671cbc1d2c4Sad 	if (pri <= MAXPRI_USER && l->l_boostpri > MAXPRI_USER)
1672cbc1d2c4Sad 		pri = (pri >> 1) + l->l_boostpri;
16736ed72b5fSad 
16746ed72b5fSad 	return MAX(l->l_auxprio, pri);
16756ed72b5fSad }
16766ed72b5fSad 
16776ed72b5fSad /*
1678934634a1Spavel  * Handle exceptions for mi_userret().  Called if a member of LW_USERRET is
16796ed72b5fSad  * set or a preemption is required.
1680b07ec3fcSad  */
1681b07ec3fcSad void
lwp_userret(struct lwp * l)1682b07ec3fcSad lwp_userret(struct lwp *l)
1683b07ec3fcSad {
1684b07ec3fcSad 	struct proc *p;
16856ed72b5fSad 	int sig, f;
1686b07ec3fcSad 
168729170d38Srmind 	KASSERT(l == curlwp);
168829170d38Srmind 	KASSERT(l->l_stat == LSONPROC);
1689b07ec3fcSad 	p = l->l_proc;
1690b07ec3fcSad 
16916ed72b5fSad 	for (;;) {
1692b07ec3fcSad 		/*
16936ed72b5fSad 		 * This is the main location that user preemptions are
16946ed72b5fSad 		 * processed.
1695b07ec3fcSad 		 */
16966ed72b5fSad 		preempt_point();
16976ed72b5fSad 
16986ed72b5fSad 		/*
16996ed72b5fSad 		 * It is safe to do this unlocked and without raised SPL,
17006ed72b5fSad 		 * since whenever a flag of interest is added to l_flag the
17016ed72b5fSad 		 * LWP will take an AST and come down this path again.  If a
17026ed72b5fSad 		 * remote CPU posts the AST, it will be done with an IPI
17036ed72b5fSad 		 * (strongly synchronising).
17046ed72b5fSad 		 */
17056ed72b5fSad 		if ((f = atomic_load_relaxed(&l->l_flag) & LW_USERRET) == 0) {
17066ed72b5fSad 			return;
17076ed72b5fSad 		}
17086ed72b5fSad 
1709b07ec3fcSad 		/*
171068fa5843Sad 		 * Start out with the correct credentials.
171168fa5843Sad 		 */
171268fa5843Sad 		if ((f & LW_CACHECRED) != 0) {
171368fa5843Sad 			kauth_cred_t oc = l->l_cred;
171468fa5843Sad 			mutex_enter(p->p_lock);
171568fa5843Sad 			l->l_cred = kauth_cred_hold(p->p_cred);
171668fa5843Sad 			lwp_lock(l);
171768fa5843Sad 			l->l_flag &= ~LW_CACHECRED;
171868fa5843Sad 			lwp_unlock(l);
171968fa5843Sad 			mutex_exit(p->p_lock);
172068fa5843Sad 			kauth_cred_free(oc);
172168fa5843Sad 		}
172268fa5843Sad 
172368fa5843Sad 		/*
1724b07ec3fcSad 		 * Process pending signals first, unless the process
1725681b77ebSad 		 * is dumping core or exiting, where we will instead
17265c0e3318Srmind 		 * enter the LW_WSUSPEND case below.
1727b07ec3fcSad 		 */
17286ed72b5fSad 		if ((f & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == LW_PENDSIG) {
1729284c2b9aSad 			mutex_enter(p->p_lock);
1730b07ec3fcSad 			while ((sig = issignal(l)) != 0)
1731b07ec3fcSad 				postsig(sig);
1732284c2b9aSad 			mutex_exit(p->p_lock);
17336ed72b5fSad 			continue;
1734b07ec3fcSad 		}
1735b07ec3fcSad 
1736b07ec3fcSad 		/*
1737b07ec3fcSad 		 * Core-dump or suspend pending.
1738b07ec3fcSad 		 *
17395ca5a72bSmatt 		 * In case of core dump, suspend ourselves, so that the kernel
17405ca5a72bSmatt 		 * stack and therefore the userland registers saved in the
17415ca5a72bSmatt 		 * trapframe are around for coredump() to write them out.
17425ca5a72bSmatt 		 * We also need to save any PCU resources that we have so that
17435ca5a72bSmatt 		 * they accessible for coredump().  We issue a wakeup on
17445ca5a72bSmatt 		 * p->p_lwpcv so that sigexit() will write the core file out
17455ca5a72bSmatt 		 * once all other LWPs are suspended.
1746b07ec3fcSad 		 */
17476ed72b5fSad 		if ((f & LW_WSUSPEND) != 0) {
17485ca5a72bSmatt 			pcu_save_all(l);
1749284c2b9aSad 			mutex_enter(p->p_lock);
1750b07ec3fcSad 			p->p_nrlwps--;
1751b07ec3fcSad 			lwp_lock(l);
1752b07ec3fcSad 			l->l_stat = LSSUSPENDED;
17533cef7381Sad 			lwp_unlock(l);
1754284c2b9aSad 			mutex_exit(p->p_lock);
17553d1cabfdSad 			cv_broadcast(&p->p_lwpcv);
17563cef7381Sad 			lwp_lock(l);
17574477d28dSad 			spc_lock(l->l_cpu);
1758f0301095Syamt 			mi_switch(l);
17596ed72b5fSad 			continue;
1760b07ec3fcSad 		}
1761b07ec3fcSad 
17626ed72b5fSad 		/*
17636ed72b5fSad 		 * Process is exiting.  The core dump and signal cases must
17646ed72b5fSad 		 * be handled first.
17656ed72b5fSad 		 */
17666ed72b5fSad 		if ((f & LW_WEXIT) != 0) {
1767b07ec3fcSad 			lwp_exit(l);
1768b07ec3fcSad 			KASSERT(0);
1769b07ec3fcSad 			/* NOTREACHED */
1770b07ec3fcSad 		}
1771e820d9feSpooka 
17726ed72b5fSad 		/*
17736ed72b5fSad 		 * Update lwpctl processor (for vfork child_return).
17746ed72b5fSad 		 */
17756ed72b5fSad 		if ((f & LW_LWPCTL) != 0) {
1776e820d9feSpooka 			lwp_lock(l);
1777e820d9feSpooka 			KASSERT(kpreempt_disabled());
1778e820d9feSpooka 			l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu);
1779e820d9feSpooka 			l->l_lwpctl->lc_pctr++;
1780e820d9feSpooka 			l->l_flag &= ~LW_LWPCTL;
1781e820d9feSpooka 			lwp_unlock(l);
17826ed72b5fSad 			continue;
1783e820d9feSpooka 		}
1784b07ec3fcSad 	}
1785b07ec3fcSad }
1786b07ec3fcSad 
1787b07ec3fcSad /*
1788b07ec3fcSad  * Force an LWP to enter the kernel, to take a trip through lwp_userret().
1789b07ec3fcSad  */
1790b07ec3fcSad void
lwp_need_userret(struct lwp * l)1791b07ec3fcSad lwp_need_userret(struct lwp *l)
1792b07ec3fcSad {
1793e57dd2baSad 
1794e57dd2baSad 	KASSERT(!cpu_intr_p());
1795725adb2aSad 	KASSERT(lwp_locked(l, NULL) || l->l_stat == LSIDL);
1796b07ec3fcSad 
1797b07ec3fcSad 	/*
1798e57dd2baSad 	 * If the LWP is in any state other than LSONPROC, we know that it
1799e57dd2baSad 	 * is executing in-kernel and will hit userret() on the way out.
1800e57dd2baSad 	 *
1801e57dd2baSad 	 * If the LWP is curlwp, then we know we'll be back out to userspace
1802e57dd2baSad 	 * soon (can't be called from a hardware interrupt here).
1803e57dd2baSad 	 *
1804e57dd2baSad 	 * Otherwise, we can't be sure what the LWP is doing, so first make
1805e57dd2baSad 	 * sure the update to l_flag will be globally visible, and then
1806e57dd2baSad 	 * force the LWP to take a trip through trap() where it will do
1807e57dd2baSad 	 * userret().
1808b07ec3fcSad 	 */
1809e57dd2baSad 	if (l->l_stat == LSONPROC && l != curlwp) {
1810e2aaefb8Sad 		membar_producer();
1811b07ec3fcSad 		cpu_signotify(l);
1812b07ec3fcSad 	}
1813e57dd2baSad }
1814b07ec3fcSad 
1815b07ec3fcSad /*
1816b07ec3fcSad  * Add one reference to an LWP.  This will prevent the LWP from
1817b07ec3fcSad  * exiting, thus keep the lwp structure and PCB around to inspect.
1818b07ec3fcSad  */
1819b07ec3fcSad void
lwp_addref(struct lwp * l)1820b07ec3fcSad lwp_addref(struct lwp *l)
1821b07ec3fcSad {
1822284c2b9aSad 	KASSERT(mutex_owned(l->l_proc->p_lock));
182359150873Sthorpej 	KASSERT(l->l_stat != LSZOMB);
182459150873Sthorpej 	l->l_refcnt++;
1825b07ec3fcSad }
1826b07ec3fcSad 
1827b07ec3fcSad /*
1828b07ec3fcSad  * Remove one reference to an LWP.  If this is the last reference,
1829b07ec3fcSad  * then we must finalize the LWP's death.
1830b07ec3fcSad  */
1831b07ec3fcSad void
lwp_delref(struct lwp * l)1832b07ec3fcSad lwp_delref(struct lwp *l)
1833b07ec3fcSad {
1834b07ec3fcSad 	struct proc *p = l->l_proc;
1835b07ec3fcSad 
1836284c2b9aSad 	mutex_enter(p->p_lock);
1837ca843a73Schristos 	lwp_delref2(l);
1838ca843a73Schristos 	mutex_exit(p->p_lock);
1839ca843a73Schristos }
1840ca843a73Schristos 
1841ca843a73Schristos /*
1842ca843a73Schristos  * Remove one reference to an LWP.  If this is the last reference,
1843ca843a73Schristos  * then we must finalize the LWP's death.  The proc mutex is held
1844ca843a73Schristos  * on entry.
1845ca843a73Schristos  */
1846ca843a73Schristos void
lwp_delref2(struct lwp * l)1847ca843a73Schristos lwp_delref2(struct lwp *l)
1848ca843a73Schristos {
1849ca843a73Schristos 	struct proc *p = l->l_proc;
1850ca843a73Schristos 
1851ca843a73Schristos 	KASSERT(mutex_owned(p->p_lock));
18520a0689eeSad 	KASSERT(l->l_stat != LSZOMB);
185359150873Sthorpej 	KASSERT(l->l_refcnt > 0);
18546a317a61Sad 
185559150873Sthorpej 	if (--l->l_refcnt == 0)
1856d18c6ca4Sad 		cv_broadcast(&p->p_lwpcv);
1857b07ec3fcSad }
1858b07ec3fcSad 
1859b07ec3fcSad /*
186098a9cebbSthorpej  * Drain all references to the current LWP.  Returns true if
186198a9cebbSthorpej  * we blocked.
1862b07ec3fcSad  */
186398a9cebbSthorpej bool
lwp_drainrefs(struct lwp * l)1864b07ec3fcSad lwp_drainrefs(struct lwp *l)
1865b07ec3fcSad {
1866b07ec3fcSad 	struct proc *p = l->l_proc;
186798a9cebbSthorpej 	bool rv = false;
1868b07ec3fcSad 
1869284c2b9aSad 	KASSERT(mutex_owned(p->p_lock));
1870b07ec3fcSad 
187198a9cebbSthorpej 	l->l_prflag |= LPR_DRAINING;
187298a9cebbSthorpej 
187359150873Sthorpej 	while (l->l_refcnt > 0) {
187498a9cebbSthorpej 		rv = true;
1875284c2b9aSad 		cv_wait(&p->p_lwpcv, p->p_lock);
18762b79369cSad 	}
187798a9cebbSthorpej 	return rv;
187898a9cebbSthorpej }
187904e486d9Sthorpej 
188004e486d9Sthorpej /*
18813c323631Sad  * Return true if the specified LWP is 'alive'.  Only p->p_lock need
18823c323631Sad  * be held.
18833c323631Sad  */
18843c323631Sad bool
lwp_alive(lwp_t * l)18853c323631Sad lwp_alive(lwp_t *l)
18863c323631Sad {
18873c323631Sad 
18883c323631Sad 	KASSERT(mutex_owned(l->l_proc->p_lock));
18893c323631Sad 
18903c323631Sad 	switch (l->l_stat) {
18913c323631Sad 	case LSSLEEP:
18923c323631Sad 	case LSRUN:
18933c323631Sad 	case LSONPROC:
18943c323631Sad 	case LSSTOP:
18953c323631Sad 	case LSSUSPENDED:
18963c323631Sad 		return true;
18973c323631Sad 	default:
18983c323631Sad 		return false;
18993c323631Sad 	}
19003c323631Sad }
19013c323631Sad 
19023c323631Sad /*
19033c323631Sad  * Return first live LWP in the process.
19043c323631Sad  */
19053c323631Sad lwp_t *
lwp_find_first(proc_t * p)19063c323631Sad lwp_find_first(proc_t *p)
19073c323631Sad {
19083c323631Sad 	lwp_t *l;
19093c323631Sad 
19103c323631Sad 	KASSERT(mutex_owned(p->p_lock));
19113c323631Sad 
19123c323631Sad 	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
19133c323631Sad 		if (lwp_alive(l)) {
19143c323631Sad 			return l;
19153c323631Sad 		}
19163c323631Sad 	}
19173c323631Sad 
19183c323631Sad 	return NULL;
19193c323631Sad }
19203c323631Sad 
19213c323631Sad /*
1922b668a9a0Sad  * Allocate a new lwpctl structure for a user LWP.
1923b668a9a0Sad  */
1924b668a9a0Sad int
lwp_ctl_alloc(vaddr_t * uaddr)1925b668a9a0Sad lwp_ctl_alloc(vaddr_t *uaddr)
1926b668a9a0Sad {
1927b668a9a0Sad 	lcproc_t *lp;
1928b668a9a0Sad 	u_int bit, i, offset;
1929b668a9a0Sad 	struct uvm_object *uao;
1930b668a9a0Sad 	int error;
1931b668a9a0Sad 	lcpage_t *lcp;
1932b668a9a0Sad 	proc_t *p;
1933b668a9a0Sad 	lwp_t *l;
1934b668a9a0Sad 
1935b668a9a0Sad 	l = curlwp;
1936b668a9a0Sad 	p = l->l_proc;
1937b668a9a0Sad 
1938e820d9feSpooka 	/* don't allow a vforked process to create lwp ctls */
1939e820d9feSpooka 	if (p->p_lflag & PL_PPWAIT)
1940e820d9feSpooka 		return EBUSY;
1941e820d9feSpooka 
1942e2aaefb8Sad 	if (l->l_lcpage != NULL) {
1943e2aaefb8Sad 		lcp = l->l_lcpage;
1944e2aaefb8Sad 		*uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr;
1945cb925a94Snjoly 		return 0;
1946e2aaefb8Sad 	}
1947b668a9a0Sad 
1948b668a9a0Sad 	/* First time around, allocate header structure for the process. */
1949b668a9a0Sad 	if ((lp = p->p_lwpctl) == NULL) {
1950b668a9a0Sad 		lp = kmem_alloc(sizeof(*lp), KM_SLEEP);
1951b668a9a0Sad 		mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE);
1952b668a9a0Sad 		lp->lp_uao = NULL;
1953b668a9a0Sad 		TAILQ_INIT(&lp->lp_pages);
1954284c2b9aSad 		mutex_enter(p->p_lock);
1955b668a9a0Sad 		if (p->p_lwpctl == NULL) {
1956b668a9a0Sad 			p->p_lwpctl = lp;
1957284c2b9aSad 			mutex_exit(p->p_lock);
1958b668a9a0Sad 		} else {
1959284c2b9aSad 			mutex_exit(p->p_lock);
1960b668a9a0Sad 			mutex_destroy(&lp->lp_lock);
1961b668a9a0Sad 			kmem_free(lp, sizeof(*lp));
1962b668a9a0Sad 			lp = p->p_lwpctl;
1963b668a9a0Sad 		}
1964b668a9a0Sad 	}
1965b668a9a0Sad 
1966b668a9a0Sad  	/*
1967b668a9a0Sad  	 * Set up an anonymous memory region to hold the shared pages.
1968b668a9a0Sad  	 * Map them into the process' address space.  The user vmspace
1969b668a9a0Sad  	 * gets the first reference on the UAO.
1970b668a9a0Sad  	 */
1971b668a9a0Sad 	mutex_enter(&lp->lp_lock);
1972b668a9a0Sad 	if (lp->lp_uao == NULL) {
1973b668a9a0Sad 		lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0);
1974b668a9a0Sad 		lp->lp_cur = 0;
1975b668a9a0Sad 		lp->lp_max = LWPCTL_UAREA_SZ;
1976b668a9a0Sad 		lp->lp_uva = p->p_emul->e_vm_default_addr(p,
197776713fa8Smartin 		     (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ,
197876713fa8Smartin 		     p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1979b668a9a0Sad 		error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva,
1980b668a9a0Sad 		    LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW,
1981b668a9a0Sad 		    UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0));
1982b668a9a0Sad 		if (error != 0) {
1983b668a9a0Sad 			uao_detach(lp->lp_uao);
1984b668a9a0Sad 			lp->lp_uao = NULL;
1985b668a9a0Sad 			mutex_exit(&lp->lp_lock);
1986b668a9a0Sad 			return error;
1987b668a9a0Sad 		}
1988b668a9a0Sad 	}
1989b668a9a0Sad 
1990b668a9a0Sad 	/* Get a free block and allocate for this LWP. */
1991b668a9a0Sad 	TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) {
1992b668a9a0Sad 		if (lcp->lcp_nfree != 0)
1993b668a9a0Sad 			break;
1994b668a9a0Sad 	}
1995b668a9a0Sad 	if (lcp == NULL) {
1996b668a9a0Sad 		/* Nothing available - try to set up a free page. */
1997b668a9a0Sad 		if (lp->lp_cur == lp->lp_max) {
1998b668a9a0Sad 			mutex_exit(&lp->lp_lock);
1999b668a9a0Sad 			return ENOMEM;
2000b668a9a0Sad 		}
2001b668a9a0Sad 		lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP);
2002fd34ea77Schs 
2003b668a9a0Sad 		/*
2004b668a9a0Sad 		 * Wire the next page down in kernel space.  Since this
2005b668a9a0Sad 		 * is a new mapping, we must add a reference.
2006b668a9a0Sad 		 */
2007b668a9a0Sad 		uao = lp->lp_uao;
2008b668a9a0Sad 		(*uao->pgops->pgo_reference)(uao);
200925b10dbbSad 		lcp->lcp_kaddr = vm_map_min(kernel_map);
2010b668a9a0Sad 		error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE,
2011b668a9a0Sad 		    uao, lp->lp_cur, PAGE_SIZE,
2012b668a9a0Sad 		    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
2013b668a9a0Sad 		    UVM_INH_NONE, UVM_ADV_RANDOM, 0));
2014b668a9a0Sad 		if (error != 0) {
2015b668a9a0Sad 			mutex_exit(&lp->lp_lock);
2016b668a9a0Sad 			kmem_free(lcp, LWPCTL_LCPAGE_SZ);
2017b668a9a0Sad 			(*uao->pgops->pgo_detach)(uao);
2018b668a9a0Sad 			return error;
2019b668a9a0Sad 		}
20202e8a5beeSyamt 		error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr,
20212e8a5beeSyamt 		    lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0);
20222e8a5beeSyamt 		if (error != 0) {
20232e8a5beeSyamt 			mutex_exit(&lp->lp_lock);
20242e8a5beeSyamt 			uvm_unmap(kernel_map, lcp->lcp_kaddr,
20252e8a5beeSyamt 			    lcp->lcp_kaddr + PAGE_SIZE);
20262e8a5beeSyamt 			kmem_free(lcp, LWPCTL_LCPAGE_SZ);
20272e8a5beeSyamt 			return error;
20282e8a5beeSyamt 		}
2029b668a9a0Sad 		/* Prepare the page descriptor and link into the list. */
2030b668a9a0Sad 		lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur;
2031b668a9a0Sad 		lp->lp_cur += PAGE_SIZE;
2032b668a9a0Sad 		lcp->lcp_nfree = LWPCTL_PER_PAGE;
2033b668a9a0Sad 		lcp->lcp_rotor = 0;
2034b668a9a0Sad 		memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ);
2035b668a9a0Sad 		TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain);
2036b668a9a0Sad 	}
2037b668a9a0Sad 	for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) {
2038b668a9a0Sad 		if (++i >= LWPCTL_BITMAP_ENTRIES)
2039b668a9a0Sad 			i = 0;
2040b668a9a0Sad 	}
2041b668a9a0Sad 	bit = ffs(lcp->lcp_bitmap[i]) - 1;
204285b6812cSkamil 	lcp->lcp_bitmap[i] ^= (1U << bit);
2043b668a9a0Sad 	lcp->lcp_rotor = i;
2044b668a9a0Sad 	lcp->lcp_nfree--;
2045b668a9a0Sad 	l->l_lcpage = lcp;
2046b668a9a0Sad 	offset = (i << 5) + bit;
2047b668a9a0Sad 	l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset;
2048b668a9a0Sad 	*uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t);
2049b668a9a0Sad 	mutex_exit(&lp->lp_lock);
2050b668a9a0Sad 
20514c7ba244Sad 	KPREEMPT_DISABLE(l);
2052c8304d84Sskrll 	l->l_lwpctl->lc_curcpu = (int)cpu_index(curcpu());
20534c7ba244Sad 	KPREEMPT_ENABLE(l);
2054b668a9a0Sad 
2055b668a9a0Sad 	return 0;
2056b668a9a0Sad }
2057b668a9a0Sad 
2058b668a9a0Sad /*
2059b668a9a0Sad  * Free an lwpctl structure back to the per-process list.
2060b668a9a0Sad  */
2061b668a9a0Sad void
lwp_ctl_free(lwp_t * l)2062b668a9a0Sad lwp_ctl_free(lwp_t *l)
2063b668a9a0Sad {
2064e820d9feSpooka 	struct proc *p = l->l_proc;
2065b668a9a0Sad 	lcproc_t *lp;
2066b668a9a0Sad 	lcpage_t *lcp;
2067b668a9a0Sad 	u_int map, offset;
2068b668a9a0Sad 
2069e820d9feSpooka 	/* don't free a lwp context we borrowed for vfork */
2070e820d9feSpooka 	if (p->p_lflag & PL_PPWAIT) {
2071e820d9feSpooka 		l->l_lwpctl = NULL;
2072e820d9feSpooka 		return;
2073e820d9feSpooka 	}
2074e820d9feSpooka 
2075e820d9feSpooka 	lp = p->p_lwpctl;
2076b668a9a0Sad 	KASSERT(lp != NULL);
2077b668a9a0Sad 
2078b668a9a0Sad 	lcp = l->l_lcpage;
2079b668a9a0Sad 	offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr);
2080b668a9a0Sad 	KASSERT(offset < LWPCTL_PER_PAGE);
2081b668a9a0Sad 
2082b668a9a0Sad 	mutex_enter(&lp->lp_lock);
2083b668a9a0Sad 	lcp->lcp_nfree++;
2084b668a9a0Sad 	map = offset >> 5;
20851d52842dSkamil 	lcp->lcp_bitmap[map] |= (1U << (offset & 31));
2086b668a9a0Sad 	if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0)
2087b668a9a0Sad 		lcp->lcp_rotor = map;
2088b668a9a0Sad 	if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) {
2089b668a9a0Sad 		TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain);
2090b668a9a0Sad 		TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain);
2091b668a9a0Sad 	}
2092b668a9a0Sad 	mutex_exit(&lp->lp_lock);
2093b668a9a0Sad }
2094b668a9a0Sad 
2095b668a9a0Sad /*
2096b668a9a0Sad  * Process is exiting; tear down lwpctl state.  This can only be safely
2097b668a9a0Sad  * called by the last LWP in the process.
2098b668a9a0Sad  */
2099b668a9a0Sad void
lwp_ctl_exit(void)2100b668a9a0Sad lwp_ctl_exit(void)
2101b668a9a0Sad {
2102b668a9a0Sad 	lcpage_t *lcp, *next;
2103b668a9a0Sad 	lcproc_t *lp;
2104b668a9a0Sad 	proc_t *p;
2105b668a9a0Sad 	lwp_t *l;
2106b668a9a0Sad 
2107b668a9a0Sad 	l = curlwp;
2108b668a9a0Sad 	l->l_lwpctl = NULL;
210932b8f98eSad 	l->l_lcpage = NULL;
2110b668a9a0Sad 	p = l->l_proc;
2111b668a9a0Sad 	lp = p->p_lwpctl;
2112b668a9a0Sad 
2113b668a9a0Sad 	KASSERT(lp != NULL);
2114b668a9a0Sad 	KASSERT(p->p_nlwps == 1);
2115b668a9a0Sad 
2116b668a9a0Sad 	for (lcp = TAILQ_FIRST(&lp->lp_pages); lcp != NULL; lcp = next) {
2117b668a9a0Sad 		next = TAILQ_NEXT(lcp, lcp_chain);
2118b668a9a0Sad 		uvm_unmap(kernel_map, lcp->lcp_kaddr,
2119b668a9a0Sad 		    lcp->lcp_kaddr + PAGE_SIZE);
2120b668a9a0Sad 		kmem_free(lcp, LWPCTL_LCPAGE_SZ);
2121b668a9a0Sad 	}
2122b668a9a0Sad 
2123b668a9a0Sad 	if (lp->lp_uao != NULL) {
2124b668a9a0Sad 		uvm_unmap(&p->p_vmspace->vm_map, lp->lp_uva,
2125b668a9a0Sad 		    lp->lp_uva + LWPCTL_UAREA_SZ);
2126b668a9a0Sad 	}
2127b668a9a0Sad 
2128b668a9a0Sad 	mutex_destroy(&lp->lp_lock);
2129b668a9a0Sad 	kmem_free(lp, sizeof(*lp));
2130b668a9a0Sad 	p->p_lwpctl = NULL;
2131b668a9a0Sad }
21320c382013Syamt 
2133f0545a5eSad /*
2134f0545a5eSad  * Return the current LWP's "preemption counter".  Used to detect
2135f0545a5eSad  * preemption across operations that can tolerate preemption without
2136f0545a5eSad  * crashing, but which may generate incorrect results if preempted.
2137f103f77aSriastradh  *
2138f103f77aSriastradh  * We do arithmetic in unsigned long to avoid undefined behaviour in
2139f103f77aSriastradh  * the event of arithmetic overflow on LP32, and issue __insn_barrier()
2140f103f77aSriastradh  * on both sides so this can safely be used to detect changes to the
2141f103f77aSriastradh  * preemption counter in loops around other memory accesses even in the
2142f103f77aSriastradh  * event of whole-program optimization (e.g., gcc -flto).
2143f0545a5eSad  */
2144a355028fSad long
lwp_pctr(void)2145f0545a5eSad lwp_pctr(void)
2146f0545a5eSad {
2147f103f77aSriastradh 	unsigned long pctr;
2148f0545a5eSad 
2149f103f77aSriastradh 	__insn_barrier();
2150f103f77aSriastradh 	pctr = curlwp->l_ru.ru_nvcsw;
2151f103f77aSriastradh 	pctr += curlwp->l_ru.ru_nivcsw;
2152f103f77aSriastradh 	__insn_barrier();
2153f103f77aSriastradh 	return pctr;
2154f0545a5eSad }
2155f0545a5eSad 
215633fa5ccbSchs /*
215733fa5ccbSchs  * Set an LWP's private data pointer.
215833fa5ccbSchs  */
215933fa5ccbSchs int
lwp_setprivate(struct lwp * l,void * ptr)216033fa5ccbSchs lwp_setprivate(struct lwp *l, void *ptr)
216133fa5ccbSchs {
216233fa5ccbSchs 	int error = 0;
216333fa5ccbSchs 
216433fa5ccbSchs 	l->l_private = ptr;
216533fa5ccbSchs #ifdef __HAVE_CPU_LWP_SETPRIVATE
216633fa5ccbSchs 	error = cpu_lwp_setprivate(l, ptr);
216733fa5ccbSchs #endif
216833fa5ccbSchs 	return error;
216933fa5ccbSchs }
217033fa5ccbSchs 
217198a9cebbSthorpej /*
217298a9cebbSthorpej  * Perform any thread-related cleanup on LWP exit.
217398a9cebbSthorpej  * N.B. l->l_proc->p_lock must be HELD on entry but will
217498a9cebbSthorpej  * be released before returning!
217598a9cebbSthorpej  */
217698a9cebbSthorpej void
lwp_thread_cleanup(struct lwp * l)217798a9cebbSthorpej lwp_thread_cleanup(struct lwp *l)
217898a9cebbSthorpej {
217998a9cebbSthorpej 
218098a9cebbSthorpej 	KASSERT(mutex_owned(l->l_proc->p_lock));
218198a9cebbSthorpej 	mutex_exit(l->l_proc->p_lock);
2182276ef223Sthorpej 
2183276ef223Sthorpej 	/*
2184276ef223Sthorpej 	 * If the LWP has robust futexes, release them all
2185276ef223Sthorpej 	 * now.
2186276ef223Sthorpej 	 */
2187276ef223Sthorpej 	if (__predict_false(l->l_robust_head != 0)) {
2188978ef622Sthorpej 		futex_release_all_lwp(l);
2189276ef223Sthorpej 	}
219098a9cebbSthorpej }
219198a9cebbSthorpej 
21920c382013Syamt #if defined(DDB)
21937146b2f6Srmind #include <machine/pcb.h>
21947146b2f6Srmind 
21950c382013Syamt void
lwp_whatis(uintptr_t addr,void (* pr)(const char *,...))21960c382013Syamt lwp_whatis(uintptr_t addr, void (*pr)(const char *, ...))
21970c382013Syamt {
21980c382013Syamt 	lwp_t *l;
21990c382013Syamt 
22000c382013Syamt 	LIST_FOREACH(l, &alllwp, l_list) {
22010c382013Syamt 		uintptr_t stack = (uintptr_t)KSTACK_LOWEST_ADDR(l);
22020c382013Syamt 
22030c382013Syamt 		if (addr < stack || stack + KSTACK_SIZE <= addr) {
22040c382013Syamt 			continue;
22050c382013Syamt 		}
22060c382013Syamt 		(*pr)("%p is %p+%zu, LWP %p's stack\n",
22070c382013Syamt 		    (void *)addr, (void *)stack,
22080c382013Syamt 		    (size_t)(addr - stack), l);
22090c382013Syamt 	}
22100c382013Syamt }
22110c382013Syamt #endif /* defined(DDB) */
2212