xref: /spdk/lib/thread/thread.c (revision 7219bd1a704333219622442510837560780574dd)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2016 Intel Corporation.
302d75f62SBen Walker  *   All rights reserved.
498eca6faSAlexey Marchuk  *   Copyright (c) 2022, 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
502d75f62SBen Walker  */
602d75f62SBen Walker 
702d75f62SBen Walker #include "spdk/stdinc.h"
802d75f62SBen Walker 
908d36a55SBen Walker #include "spdk/env.h"
10907efcd7SKozlowski Mateusz #include "spdk/likely.h"
1108d36a55SBen Walker #include "spdk/queue.h"
12ec571793SBen Walker #include "spdk/string.h"
13a83f91c2SBen Walker #include "spdk/thread.h"
14462eb754SKonrad Sztyber #include "spdk/trace.h"
1508d36a55SBen Walker #include "spdk/util.h"
164bf6e4bbSLiu Xiaodong #include "spdk/fd_group.h"
17ec571793SBen Walker 
184e8e97c8STomasz Zawadzki #include "spdk/log.h"
19605e530aSBen Walker #include "spdk_internal/thread.h"
2070f3606bSJohn Levon #include "spdk_internal/usdt.h"
215fc0475cSJiewei Ke #include "thread_internal.h"
2202d75f62SBen Walker 
23c37e776eSKrzysztof Karas #include "spdk_internal/trace_defs.h"
24c37e776eSKrzysztof Karas 
254bf6e4bbSLiu Xiaodong #ifdef __linux__
264bf6e4bbSLiu Xiaodong #include <sys/timerfd.h>
274bf6e4bbSLiu Xiaodong #include <sys/eventfd.h>
28531258aaSMike Gerdts #endif
29531258aaSMike Gerdts 
3001452b1bSDuncan Bellamy #ifdef SPDK_HAVE_EXECINFO_H
31531258aaSMike Gerdts #include <execinfo.h>
324bf6e4bbSLiu Xiaodong #endif
334bf6e4bbSLiu Xiaodong 
34d761ddbfSBen Walker #define SPDK_MSG_BATCH_SIZE		8
353d1995c3SShuhei Matsumoto #define SPDK_MAX_DEVICE_NAME_LEN	256
36a12aae4cSShuhei Matsumoto #define SPDK_THREAD_EXIT_TIMEOUT_SEC	5
375fdb2b76SShuhei Matsumoto #define SPDK_MAX_POLLER_NAME_LEN	256
3854bc83dcSShuhei Matsumoto #define SPDK_MAX_THREAD_NAME_LEN	256
395fdb2b76SShuhei Matsumoto 
40090b8af1SJim Harris static struct spdk_thread *g_app_thread;
41090b8af1SJim Harris 
4201dca5edSBen Walker struct spdk_interrupt {
4301dca5edSBen Walker 	int			efd;
44969b360dSKonrad Sztyber 	struct spdk_fd_group	*fgrp;
4501dca5edSBen Walker 	struct spdk_thread	*thread;
46f3c1b59aSBen Walker 	spdk_interrupt_fn	fn;
47f3c1b59aSBen Walker 	void			*arg;
4801dca5edSBen Walker 	char			name[SPDK_MAX_POLLER_NAME_LEN + 1];
4901dca5edSBen Walker };
5001dca5edSBen Walker 
515fdb2b76SShuhei Matsumoto enum spdk_poller_state {
525fdb2b76SShuhei Matsumoto 	/* The poller is registered with a thread but not currently executing its fn. */
535fdb2b76SShuhei Matsumoto 	SPDK_POLLER_STATE_WAITING,
545fdb2b76SShuhei Matsumoto 
555fdb2b76SShuhei Matsumoto 	/* The poller is currently running its fn. */
565fdb2b76SShuhei Matsumoto 	SPDK_POLLER_STATE_RUNNING,
575fdb2b76SShuhei Matsumoto 
585fdb2b76SShuhei Matsumoto 	/* The poller was unregistered during the execution of its fn. */
595fdb2b76SShuhei Matsumoto 	SPDK_POLLER_STATE_UNREGISTERED,
605fdb2b76SShuhei Matsumoto 
615fdb2b76SShuhei Matsumoto 	/* The poller is in the process of being paused.  It will be paused
625fdb2b76SShuhei Matsumoto 	 * during the next time it's supposed to be executed.
635fdb2b76SShuhei Matsumoto 	 */
645fdb2b76SShuhei Matsumoto 	SPDK_POLLER_STATE_PAUSING,
655fdb2b76SShuhei Matsumoto 
665fdb2b76SShuhei Matsumoto 	/* The poller is registered but currently paused.  It's on the
675fdb2b76SShuhei Matsumoto 	 * paused_pollers list.
685fdb2b76SShuhei Matsumoto 	 */
695fdb2b76SShuhei Matsumoto 	SPDK_POLLER_STATE_PAUSED,
705fdb2b76SShuhei Matsumoto };
715fdb2b76SShuhei Matsumoto 
725fdb2b76SShuhei Matsumoto struct spdk_poller {
735fdb2b76SShuhei Matsumoto 	TAILQ_ENTRY(spdk_poller)	tailq;
744e9adb3bSShuhei Matsumoto 	RB_ENTRY(spdk_poller)		node;
755fdb2b76SShuhei Matsumoto 
765fdb2b76SShuhei Matsumoto 	/* Current state of the poller; should only be accessed from the poller's thread. */
775fdb2b76SShuhei Matsumoto 	enum spdk_poller_state		state;
785fdb2b76SShuhei Matsumoto 
795fdb2b76SShuhei Matsumoto 	uint64_t			period_ticks;
805fdb2b76SShuhei Matsumoto 	uint64_t			next_run_tick;
815fdb2b76SShuhei Matsumoto 	uint64_t			run_count;
825fdb2b76SShuhei Matsumoto 	uint64_t			busy_count;
8329c0e0dcSMichael Piszczek 	uint64_t			id;
845fdb2b76SShuhei Matsumoto 	spdk_poller_fn			fn;
855fdb2b76SShuhei Matsumoto 	void				*arg;
865fdb2b76SShuhei Matsumoto 	struct spdk_thread		*thread;
8701dca5edSBen Walker 	struct spdk_interrupt		*intr;
885fdb2b76SShuhei Matsumoto 	spdk_poller_set_interrupt_mode_cb set_intr_cb_fn;
895fdb2b76SShuhei Matsumoto 	void				*set_intr_cb_arg;
905fdb2b76SShuhei Matsumoto 
915fdb2b76SShuhei Matsumoto 	char				name[SPDK_MAX_POLLER_NAME_LEN + 1];
925fdb2b76SShuhei Matsumoto };
93d761ddbfSBen Walker 
9454bc83dcSShuhei Matsumoto enum spdk_thread_state {
95cc6920a4SJosh Soref 	/* The thread is processing poller and message by spdk_thread_poll(). */
9654bc83dcSShuhei Matsumoto 	SPDK_THREAD_STATE_RUNNING,
9754bc83dcSShuhei Matsumoto 
9854bc83dcSShuhei Matsumoto 	/* The thread is in the process of termination. It reaps unregistering
9954bc83dcSShuhei Matsumoto 	 * poller are releasing I/O channel.
10054bc83dcSShuhei Matsumoto 	 */
10154bc83dcSShuhei Matsumoto 	SPDK_THREAD_STATE_EXITING,
10254bc83dcSShuhei Matsumoto 
10354bc83dcSShuhei Matsumoto 	/* The thread is exited. It is ready to call spdk_thread_destroy(). */
10454bc83dcSShuhei Matsumoto 	SPDK_THREAD_STATE_EXITED,
10554bc83dcSShuhei Matsumoto };
10654bc83dcSShuhei Matsumoto 
10798eca6faSAlexey Marchuk struct spdk_thread_post_poller_handler {
10898eca6faSAlexey Marchuk 	spdk_post_poller_fn fn;
10998eca6faSAlexey Marchuk 	void *fn_arg;
11098eca6faSAlexey Marchuk };
11198eca6faSAlexey Marchuk 
11298eca6faSAlexey Marchuk #define SPDK_THREAD_MAX_POST_POLLER_HANDLERS (4)
11398eca6faSAlexey Marchuk 
11454bc83dcSShuhei Matsumoto struct spdk_thread {
11554bc83dcSShuhei Matsumoto 	uint64_t			tsc_last;
11654bc83dcSShuhei Matsumoto 	struct spdk_thread_stats	stats;
11754bc83dcSShuhei Matsumoto 	/*
11854bc83dcSShuhei Matsumoto 	 * Contains pollers actively running on this thread.  Pollers
11954bc83dcSShuhei Matsumoto 	 *  are run round-robin. The thread takes one poller from the head
12054bc83dcSShuhei Matsumoto 	 *  of the ring, executes it, then puts it back at the tail of
12154bc83dcSShuhei Matsumoto 	 *  the ring.
12254bc83dcSShuhei Matsumoto 	 */
12354bc83dcSShuhei Matsumoto 	TAILQ_HEAD(active_pollers_head, spdk_poller)	active_pollers;
12454bc83dcSShuhei Matsumoto 	/**
12554bc83dcSShuhei Matsumoto 	 * Contains pollers running on this thread with a periodic timer.
12654bc83dcSShuhei Matsumoto 	 */
1274e9adb3bSShuhei Matsumoto 	RB_HEAD(timed_pollers_tree, spdk_poller)	timed_pollers;
1284748ebefSShuhei Matsumoto 	struct spdk_poller				*first_timed_poller;
12954bc83dcSShuhei Matsumoto 	/*
13054bc83dcSShuhei Matsumoto 	 * Contains paused pollers.  Pollers on this queue are waiting until
13154bc83dcSShuhei Matsumoto 	 * they are resumed (in which case they're put onto the active/timer
13254bc83dcSShuhei Matsumoto 	 * queues) or unregistered.
13354bc83dcSShuhei Matsumoto 	 */
13454bc83dcSShuhei Matsumoto 	TAILQ_HEAD(paused_pollers_head, spdk_poller)	paused_pollers;
13598eca6faSAlexey Marchuk 	struct spdk_thread_post_poller_handler		pp_handlers[SPDK_THREAD_MAX_POST_POLLER_HANDLERS];
13654bc83dcSShuhei Matsumoto 	struct spdk_ring		*messages;
13798eca6faSAlexey Marchuk 	uint8_t				num_pp_handlers;
13854bc83dcSShuhei Matsumoto 	int				msg_fd;
13954bc83dcSShuhei Matsumoto 	SLIST_HEAD(, spdk_msg)		msg_cache;
14054bc83dcSShuhei Matsumoto 	size_t				msg_cache_count;
14154bc83dcSShuhei Matsumoto 	spdk_msg_fn			critical_msg;
14254bc83dcSShuhei Matsumoto 	uint64_t			id;
14329c0e0dcSMichael Piszczek 	uint64_t			next_poller_id;
14454bc83dcSShuhei Matsumoto 	enum spdk_thread_state		state;
14554bc83dcSShuhei Matsumoto 	int				pending_unregister_count;
1466f4e0c95SShuhei Matsumoto 	uint32_t			for_each_count;
14754bc83dcSShuhei Matsumoto 
148df559ab6SJiewei Ke 	RB_HEAD(io_channel_tree, spdk_io_channel)	io_channels;
14954bc83dcSShuhei Matsumoto 	TAILQ_ENTRY(spdk_thread)			tailq;
15054bc83dcSShuhei Matsumoto 
15154bc83dcSShuhei Matsumoto 	char				name[SPDK_MAX_THREAD_NAME_LEN + 1];
15254bc83dcSShuhei Matsumoto 	struct spdk_cpuset		cpumask;
15354bc83dcSShuhei Matsumoto 	uint64_t			exit_timeout_tsc;
15454bc83dcSShuhei Matsumoto 
155cd2bcf10SMike Gerdts 	int32_t				lock_count;
156cd2bcf10SMike Gerdts 
1573f4b2c67SYifan Bian 	/* spdk_thread is bound to current CPU core. */
1583f4b2c67SYifan Bian 	bool				is_bound;
1593f4b2c67SYifan Bian 
16054bc83dcSShuhei Matsumoto 	/* Indicates whether this spdk_thread currently runs in interrupt. */
16154bc83dcSShuhei Matsumoto 	bool				in_interrupt;
162b1906912SLiu Xiaodong 	bool				poller_unregistered;
16354bc83dcSShuhei Matsumoto 	struct spdk_fd_group		*fgrp;
16454bc83dcSShuhei Matsumoto 
165fa6aec49SAnisa Su 	uint16_t			trace_id;
166fa6aec49SAnisa Su 
167fa6aec49SAnisa Su 	uint8_t				reserved[6];
168fa6aec49SAnisa Su 
16954bc83dcSShuhei Matsumoto 	/* User context allocated at the end */
17054bc83dcSShuhei Matsumoto 	uint8_t				ctx[0];
17154bc83dcSShuhei Matsumoto };
17254bc83dcSShuhei Matsumoto 
173fa6aec49SAnisa Su /*
174fa6aec49SAnisa Su  * Assert that spdk_thread struct is 8 byte aligned to ensure
175fa6aec49SAnisa Su  * the user ctx is also 8-byte aligned.
176fa6aec49SAnisa Su  */
177fa6aec49SAnisa Su SPDK_STATIC_ASSERT((sizeof(struct spdk_thread)) % 8 == 0, "Incorrect size");
178fa6aec49SAnisa Su 
17902d75f62SBen Walker static pthread_mutex_t g_devlist_mutex = PTHREAD_MUTEX_INITIALIZER;
18002d75f62SBen Walker 
181aaa9a278SBen Walker static spdk_new_thread_fn g_new_thread_fn = NULL;
182d82d6901SShuhei Matsumoto static spdk_thread_op_fn g_thread_op_fn = NULL;
183d82d6901SShuhei Matsumoto static spdk_thread_op_supported_fn g_thread_op_supported_fn;
18498104314SBen Walker static size_t g_ctx_sz = 0;
185515733caSShuhei Matsumoto /* Monotonic increasing ID is set to each created thread beginning at 1. Once the
186515733caSShuhei Matsumoto  * ID exceeds UINT64_MAX, further thread creation is not allowed and restarting
187515733caSShuhei Matsumoto  * SPDK application is required.
188515733caSShuhei Matsumoto  */
189515733caSShuhei Matsumoto static uint64_t g_thread_id = 1;
190aaa9a278SBen Walker 
191cd2bcf10SMike Gerdts enum spin_error {
192cd2bcf10SMike Gerdts 	SPIN_ERR_NONE,
193cd2bcf10SMike Gerdts 	/* Trying to use an SPDK lock while not on an SPDK thread */
194cd2bcf10SMike Gerdts 	SPIN_ERR_NOT_SPDK_THREAD,
195cd2bcf10SMike Gerdts 	/* Trying to lock a lock already held by this SPDK thread */
196cd2bcf10SMike Gerdts 	SPIN_ERR_DEADLOCK,
197cd2bcf10SMike Gerdts 	/* Trying to unlock a lock not held by this SPDK thread */
198cd2bcf10SMike Gerdts 	SPIN_ERR_WRONG_THREAD,
199cd2bcf10SMike Gerdts 	/* pthread_spin_*() returned an error */
200cd2bcf10SMike Gerdts 	SPIN_ERR_PTHREAD,
201cd2bcf10SMike Gerdts 	/* Trying to destroy a lock that is held */
202cd2bcf10SMike Gerdts 	SPIN_ERR_LOCK_HELD,
203cd2bcf10SMike Gerdts 	/* lock_count is invalid */
204cd2bcf10SMike Gerdts 	SPIN_ERR_LOCK_COUNT,
205cd2bcf10SMike Gerdts 	/*
206cd2bcf10SMike Gerdts 	 * An spdk_thread may migrate to another pthread. A spinlock held across migration leads to
207cd2bcf10SMike Gerdts 	 * undefined behavior. A spinlock held when an SPDK thread goes off CPU would lead to
208cd2bcf10SMike Gerdts 	 * deadlock when another SPDK thread on the same pthread tries to take that lock.
209cd2bcf10SMike Gerdts 	 */
210cd2bcf10SMike Gerdts 	SPIN_ERR_HOLD_DURING_SWITCH,
211c9f3613fSMike Gerdts 	/* Trying to use a lock that was destroyed (but not re-initialized) */
212c9f3613fSMike Gerdts 	SPIN_ERR_DESTROYED,
213c9f3613fSMike Gerdts 	/* Trying to use a lock that is not initialized */
214c9f3613fSMike Gerdts 	SPIN_ERR_NOT_INITIALIZED,
215c9f3613fSMike Gerdts 
2160f73e766SMike Gerdts 	/* Must be last, not an actual error code */
2170f73e766SMike Gerdts 	SPIN_ERR_LAST
218cd2bcf10SMike Gerdts };
219cd2bcf10SMike Gerdts 
220cd2bcf10SMike Gerdts static const char *spin_error_strings[] = {
221cd2bcf10SMike Gerdts 	[SPIN_ERR_NONE]			= "No error",
222cd2bcf10SMike Gerdts 	[SPIN_ERR_NOT_SPDK_THREAD]	= "Not an SPDK thread",
223cd2bcf10SMike Gerdts 	[SPIN_ERR_DEADLOCK]		= "Deadlock detected",
224cd2bcf10SMike Gerdts 	[SPIN_ERR_WRONG_THREAD]		= "Unlock on wrong SPDK thread",
225cd2bcf10SMike Gerdts 	[SPIN_ERR_PTHREAD]		= "Error from pthread_spinlock",
226cd2bcf10SMike Gerdts 	[SPIN_ERR_LOCK_HELD]		= "Destroying a held spinlock",
227cd2bcf10SMike Gerdts 	[SPIN_ERR_LOCK_COUNT]		= "Lock count is invalid",
228cd2bcf10SMike Gerdts 	[SPIN_ERR_HOLD_DURING_SWITCH]	= "Lock(s) held while SPDK thread going off CPU",
229c9f3613fSMike Gerdts 	[SPIN_ERR_DESTROYED]		= "Lock has been destroyed",
230c9f3613fSMike Gerdts 	[SPIN_ERR_NOT_INITIALIZED]	= "Lock has not been initialized",
231cd2bcf10SMike Gerdts };
232cd2bcf10SMike Gerdts 
233cd2bcf10SMike Gerdts #define SPIN_ERROR_STRING(err) (err < 0 || err >= SPDK_COUNTOF(spin_error_strings)) \
234cd2bcf10SMike Gerdts 				? "Unknown error" : spin_error_strings[err]
235cd2bcf10SMike Gerdts 
236cd2bcf10SMike Gerdts static void
237cd2bcf10SMike Gerdts __posix_abort(enum spin_error err)
238cd2bcf10SMike Gerdts {
239cd2bcf10SMike Gerdts 	abort();
240cd2bcf10SMike Gerdts }
241cd2bcf10SMike Gerdts 
242cd2bcf10SMike Gerdts typedef void (*spin_abort)(enum spin_error err);
243cd2bcf10SMike Gerdts spin_abort g_spin_abort_fn = __posix_abort;
244cd2bcf10SMike Gerdts 
2453d9395c6SMike Gerdts #define SPIN_ASSERT_IMPL(cond, err, extra_log, ret) \
246cd2bcf10SMike Gerdts 	do { \
247cd2bcf10SMike Gerdts 		if (spdk_unlikely(!(cond))) { \
248cd2bcf10SMike Gerdts 			SPDK_ERRLOG("unrecoverable spinlock error %d: %s (%s)\n", err, \
249cd2bcf10SMike Gerdts 				    SPIN_ERROR_STRING(err), #cond); \
2503d9395c6SMike Gerdts 			extra_log; \
251cd2bcf10SMike Gerdts 			g_spin_abort_fn(err); \
252cd2bcf10SMike Gerdts 			ret; \
253cd2bcf10SMike Gerdts 		} \
254cd2bcf10SMike Gerdts 	} while (0)
2553d9395c6SMike Gerdts #define SPIN_ASSERT_LOG_STACKS(cond, err, lock) \
2563d9395c6SMike Gerdts 	SPIN_ASSERT_IMPL(cond, err, sspin_stacks_print(sspin), return)
2573d9395c6SMike Gerdts #define SPIN_ASSERT_RETURN(cond, err, ret)	SPIN_ASSERT_IMPL(cond, err, , return ret)
2583d9395c6SMike Gerdts #define SPIN_ASSERT(cond, err)			SPIN_ASSERT_IMPL(cond, err, ,)
259cd2bcf10SMike Gerdts 
26002d75f62SBen Walker struct io_device {
26102d75f62SBen Walker 	void				*io_device;
2623d1995c3SShuhei Matsumoto 	char				name[SPDK_MAX_DEVICE_NAME_LEN + 1];
26302d75f62SBen Walker 	spdk_io_channel_create_cb	create_cb;
26402d75f62SBen Walker 	spdk_io_channel_destroy_cb	destroy_cb;
26502d75f62SBen Walker 	spdk_io_device_unregister_cb	unregister_cb;
26602d75f62SBen Walker 	struct spdk_thread		*unregister_thread;
26702d75f62SBen Walker 	uint32_t			ctx_size;
26802d75f62SBen Walker 	uint32_t			for_each_count;
26949c6afbfSJiewei Ke 	RB_ENTRY(io_device)		node;
27002d75f62SBen Walker 
27102d75f62SBen Walker 	uint32_t			refcnt;
27202d75f62SBen Walker 
273d33497d3SJim Harris 	bool				pending_unregister;
27402d75f62SBen Walker 	bool				unregistered;
27502d75f62SBen Walker };
27602d75f62SBen Walker 
27749c6afbfSJiewei Ke static RB_HEAD(io_device_tree, io_device) g_io_devices = RB_INITIALIZER(g_io_devices);
27849c6afbfSJiewei Ke 
27949c6afbfSJiewei Ke static int
28049c6afbfSJiewei Ke io_device_cmp(struct io_device *dev1, struct io_device *dev2)
28149c6afbfSJiewei Ke {
28249c6afbfSJiewei Ke 	return (dev1->io_device < dev2->io_device ? -1 : dev1->io_device > dev2->io_device);
28349c6afbfSJiewei Ke }
28449c6afbfSJiewei Ke 
28549c6afbfSJiewei Ke RB_GENERATE_STATIC(io_device_tree, io_device, node, io_device_cmp);
28602d75f62SBen Walker 
287df559ab6SJiewei Ke static int
288df559ab6SJiewei Ke io_channel_cmp(struct spdk_io_channel *ch1, struct spdk_io_channel *ch2)
289df559ab6SJiewei Ke {
290df559ab6SJiewei Ke 	return (ch1->dev < ch2->dev ? -1 : ch1->dev > ch2->dev);
291df559ab6SJiewei Ke }
292df559ab6SJiewei Ke 
293df559ab6SJiewei Ke RB_GENERATE_STATIC(io_channel_tree, spdk_io_channel, node, io_channel_cmp);
294df559ab6SJiewei Ke 
295d761ddbfSBen Walker struct spdk_msg {
2967b940538SBen Walker 	spdk_msg_fn		fn;
297d761ddbfSBen Walker 	void			*arg;
2982446c5c6SBen Walker 
2992446c5c6SBen Walker 	SLIST_ENTRY(spdk_msg)	link;
300d761ddbfSBen Walker };
301d761ddbfSBen Walker 
302d761ddbfSBen Walker static struct spdk_mempool *g_spdk_msg_mempool = NULL;
303d761ddbfSBen Walker 
30402d75f62SBen Walker static TAILQ_HEAD(, spdk_thread) g_threads = TAILQ_HEAD_INITIALIZER(g_threads);
305e30535feSJim Harris static uint32_t g_thread_count = 0;
30602d75f62SBen Walker 
307605e530aSBen Walker static __thread struct spdk_thread *tls_thread = NULL;
308605e530aSBen Walker 
3090eae0106SJim Harris static void
3100eae0106SJim Harris thread_trace(void)
311462eb754SKonrad Sztyber {
3121f4a57b4SKonrad Sztyber 	struct spdk_trace_tpoint_opts opts[] = {
3131f4a57b4SKonrad Sztyber 		{
3141f4a57b4SKonrad Sztyber 			"THREAD_IOCH_GET", TRACE_THREAD_IOCH_GET,
31526d44a12SJim Harris 			OWNER_TYPE_NONE, OBJECT_NONE, 0,
3161f4a57b4SKonrad Sztyber 			{{ "refcnt", SPDK_TRACE_ARG_TYPE_INT, 4 }}
3171f4a57b4SKonrad Sztyber 		},
3181f4a57b4SKonrad Sztyber 		{
3191f4a57b4SKonrad Sztyber 			"THREAD_IOCH_PUT", TRACE_THREAD_IOCH_PUT,
32026d44a12SJim Harris 			OWNER_TYPE_NONE, OBJECT_NONE, 0,
3211f4a57b4SKonrad Sztyber 			{{ "refcnt", SPDK_TRACE_ARG_TYPE_INT, 4 }}
3221f4a57b4SKonrad Sztyber 		}
3231f4a57b4SKonrad Sztyber 	};
3241f4a57b4SKonrad Sztyber 
325fa6aec49SAnisa Su 	spdk_trace_register_owner_type(OWNER_TYPE_THREAD, 't');
3261f4a57b4SKonrad Sztyber 	spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
327462eb754SKonrad Sztyber }
3280eae0106SJim Harris SPDK_TRACE_REGISTER_FN(thread_trace, "thread", TRACE_GROUP_THREAD)
329462eb754SKonrad Sztyber 
3304e9adb3bSShuhei Matsumoto /*
3314e9adb3bSShuhei Matsumoto  * If this compare function returns zero when two next_run_ticks are equal,
3324e9adb3bSShuhei Matsumoto  * the macro RB_INSERT() returns a pointer to the element with the same
3334e9adb3bSShuhei Matsumoto  * next_run_tick.
3344e9adb3bSShuhei Matsumoto  *
3354e9adb3bSShuhei Matsumoto  * Fortunately, the macro RB_REMOVE() takes not a key but a pointer to the element
3364e9adb3bSShuhei Matsumoto  * to remove as a parameter.
3374e9adb3bSShuhei Matsumoto  *
3384e9adb3bSShuhei Matsumoto  * Hence we allow RB_INSERT() to insert elements with the same keys on the right
3394e9adb3bSShuhei Matsumoto  * side by returning 1 when two next_run_ticks are equal.
3404e9adb3bSShuhei Matsumoto  */
3414e9adb3bSShuhei Matsumoto static inline int
3424e9adb3bSShuhei Matsumoto timed_poller_compare(struct spdk_poller *poller1, struct spdk_poller *poller2)
3434e9adb3bSShuhei Matsumoto {
3444e9adb3bSShuhei Matsumoto 	if (poller1->next_run_tick < poller2->next_run_tick) {
3454e9adb3bSShuhei Matsumoto 		return -1;
3464e9adb3bSShuhei Matsumoto 	} else {
3474e9adb3bSShuhei Matsumoto 		return 1;
3484e9adb3bSShuhei Matsumoto 	}
3494e9adb3bSShuhei Matsumoto }
3504e9adb3bSShuhei Matsumoto 
3514e9adb3bSShuhei Matsumoto RB_GENERATE_STATIC(timed_pollers_tree, spdk_poller, node, timed_poller_compare);
3524e9adb3bSShuhei Matsumoto 
353605e530aSBen Walker static inline struct spdk_thread *
35402d75f62SBen Walker _get_thread(void)
35502d75f62SBen Walker {
356605e530aSBen Walker 	return tls_thread;
35702d75f62SBen Walker }
35802d75f62SBen Walker 
359d82d6901SShuhei Matsumoto static int
360a71cd521SAlexis Lescouet _thread_lib_init(size_t ctx_sz, size_t msg_mempool_sz)
361603a6de8SBen Walker {
362d761ddbfSBen Walker 	char mempool_name[SPDK_MAX_MEMZONE_NAME_LEN];
363d761ddbfSBen Walker 
36498104314SBen Walker 	g_ctx_sz = ctx_sz;
36598104314SBen Walker 
366d761ddbfSBen Walker 	snprintf(mempool_name, sizeof(mempool_name), "msgpool_%d", getpid());
367a71cd521SAlexis Lescouet 	g_spdk_msg_mempool = spdk_mempool_create(mempool_name, msg_mempool_sz,
368d761ddbfSBen Walker 			     sizeof(struct spdk_msg),
3692446c5c6SBen Walker 			     0, /* No cache. We do our own. */
370186b109dSJim Harris 			     SPDK_ENV_NUMA_ID_ANY);
371d761ddbfSBen Walker 
372a71cd521SAlexis Lescouet 	SPDK_DEBUGLOG(thread, "spdk_msg_mempool was created with size: %zu\n",
373a71cd521SAlexis Lescouet 		      msg_mempool_sz);
374a71cd521SAlexis Lescouet 
375d761ddbfSBen Walker 	if (!g_spdk_msg_mempool) {
3764237d2d8Ssunshihao520 		SPDK_ERRLOG("spdk_msg_mempool creation failed\n");
377c9eb502aSyidong0635 		return -ENOMEM;
378d761ddbfSBen Walker 	}
379d761ddbfSBen Walker 
380603a6de8SBen Walker 	return 0;
381603a6de8SBen Walker }
382603a6de8SBen Walker 
3834bf6e4bbSLiu Xiaodong static void thread_interrupt_destroy(struct spdk_thread *thread);
3844bf6e4bbSLiu Xiaodong static int thread_interrupt_create(struct spdk_thread *thread);
3854bf6e4bbSLiu Xiaodong 
386dcb8ba7bSBen Walker static void
387dcb8ba7bSBen Walker _free_thread(struct spdk_thread *thread)
388dcb8ba7bSBen Walker {
389dcb8ba7bSBen Walker 	struct spdk_io_channel *ch;
390dcb8ba7bSBen Walker 	struct spdk_msg *msg;
391dcb8ba7bSBen Walker 	struct spdk_poller *poller, *ptmp;
392dcb8ba7bSBen Walker 
393df559ab6SJiewei Ke 	RB_FOREACH(ch, io_channel_tree, &thread->io_channels) {
394dcb8ba7bSBen Walker 		SPDK_ERRLOG("thread %s still has channel for io_device %s\n",
395dcb8ba7bSBen Walker 			    thread->name, ch->dev->name);
396dcb8ba7bSBen Walker 	}
397dcb8ba7bSBen Walker 
398dcb8ba7bSBen Walker 	TAILQ_FOREACH_SAFE(poller, &thread->active_pollers, tailq, ptmp) {
3999ec598d1SKonrad Sztyber 		if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
400686401ebSsunshihao520 			SPDK_WARNLOG("active_poller %s still registered at thread exit\n",
401b992bb4eSShuhei Matsumoto 				     poller->name);
402dcb8ba7bSBen Walker 		}
403dcb8ba7bSBen Walker 		TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
404dcb8ba7bSBen Walker 		free(poller);
405dcb8ba7bSBen Walker 	}
406dcb8ba7bSBen Walker 
4074e9adb3bSShuhei Matsumoto 	RB_FOREACH_SAFE(poller, timed_pollers_tree, &thread->timed_pollers, ptmp) {
4089ec598d1SKonrad Sztyber 		if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
409686401ebSsunshihao520 			SPDK_WARNLOG("timed_poller %s still registered at thread exit\n",
410b992bb4eSShuhei Matsumoto 				     poller->name);
411dcb8ba7bSBen Walker 		}
4124e9adb3bSShuhei Matsumoto 		RB_REMOVE(timed_pollers_tree, &thread->timed_pollers, poller);
413dcb8ba7bSBen Walker 		free(poller);
414dcb8ba7bSBen Walker 	}
415dcb8ba7bSBen Walker 
4169ec598d1SKonrad Sztyber 	TAILQ_FOREACH_SAFE(poller, &thread->paused_pollers, tailq, ptmp) {
417686401ebSsunshihao520 		SPDK_WARNLOG("paused_poller %s still registered at thread exit\n", poller->name);
4189ec598d1SKonrad Sztyber 		TAILQ_REMOVE(&thread->paused_pollers, poller, tailq);
4199ec598d1SKonrad Sztyber 		free(poller);
4209ec598d1SKonrad Sztyber 	}
4219ec598d1SKonrad Sztyber 
422dcb8ba7bSBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
423dcb8ba7bSBen Walker 	assert(g_thread_count > 0);
424dcb8ba7bSBen Walker 	g_thread_count--;
425dcb8ba7bSBen Walker 	TAILQ_REMOVE(&g_threads, thread, tailq);
426dcb8ba7bSBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
427dcb8ba7bSBen Walker 
428dcb8ba7bSBen Walker 	msg = SLIST_FIRST(&thread->msg_cache);
429dcb8ba7bSBen Walker 	while (msg != NULL) {
430dcb8ba7bSBen Walker 		SLIST_REMOVE_HEAD(&thread->msg_cache, link);
431dcb8ba7bSBen Walker 
432dcb8ba7bSBen Walker 		assert(thread->msg_cache_count > 0);
433dcb8ba7bSBen Walker 		thread->msg_cache_count--;
434dcb8ba7bSBen Walker 		spdk_mempool_put(g_spdk_msg_mempool, msg);
435dcb8ba7bSBen Walker 
436dcb8ba7bSBen Walker 		msg = SLIST_FIRST(&thread->msg_cache);
437dcb8ba7bSBen Walker 	}
438dcb8ba7bSBen Walker 
439dcb8ba7bSBen Walker 	assert(thread->msg_cache_count == 0);
440dcb8ba7bSBen Walker 
44153429c7fSLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
4424bf6e4bbSLiu Xiaodong 		thread_interrupt_destroy(thread);
4434bf6e4bbSLiu Xiaodong 	}
4444bf6e4bbSLiu Xiaodong 
445dcb8ba7bSBen Walker 	spdk_ring_free(thread->messages);
446dcb8ba7bSBen Walker 	free(thread);
447dcb8ba7bSBen Walker }
448dcb8ba7bSBen Walker 
449db18916fSJim Harris int
450db18916fSJim Harris spdk_thread_lib_init(spdk_new_thread_fn new_thread_fn, size_t ctx_sz)
451db18916fSJim Harris {
452db18916fSJim Harris 	assert(g_new_thread_fn == NULL);
453db18916fSJim Harris 	assert(g_thread_op_fn == NULL);
454db18916fSJim Harris 
455db18916fSJim Harris 	if (new_thread_fn == NULL) {
456db18916fSJim Harris 		SPDK_INFOLOG(thread, "new_thread_fn was not specified at spdk_thread_lib_init\n");
457db18916fSJim Harris 	} else {
458db18916fSJim Harris 		g_new_thread_fn = new_thread_fn;
459db18916fSJim Harris 	}
460db18916fSJim Harris 
461db18916fSJim Harris 	return _thread_lib_init(ctx_sz, SPDK_DEFAULT_MSG_MEMPOOL_SIZE);
462db18916fSJim Harris }
463db18916fSJim Harris 
464db18916fSJim Harris int
465db18916fSJim Harris spdk_thread_lib_init_ext(spdk_thread_op_fn thread_op_fn,
466db18916fSJim Harris 			 spdk_thread_op_supported_fn thread_op_supported_fn,
467db18916fSJim Harris 			 size_t ctx_sz, size_t msg_mempool_sz)
468db18916fSJim Harris {
469db18916fSJim Harris 	assert(g_new_thread_fn == NULL);
470db18916fSJim Harris 	assert(g_thread_op_fn == NULL);
471db18916fSJim Harris 	assert(g_thread_op_supported_fn == NULL);
472db18916fSJim Harris 
473db18916fSJim Harris 	if ((thread_op_fn != NULL) != (thread_op_supported_fn != NULL)) {
474db18916fSJim Harris 		SPDK_ERRLOG("Both must be defined or undefined together.\n");
475db18916fSJim Harris 		return -EINVAL;
476db18916fSJim Harris 	}
477db18916fSJim Harris 
478db18916fSJim Harris 	if (thread_op_fn == NULL && thread_op_supported_fn == NULL) {
479db18916fSJim Harris 		SPDK_INFOLOG(thread, "thread_op_fn and thread_op_supported_fn were not specified\n");
480db18916fSJim Harris 	} else {
481db18916fSJim Harris 		g_thread_op_fn = thread_op_fn;
482db18916fSJim Harris 		g_thread_op_supported_fn = thread_op_supported_fn;
483db18916fSJim Harris 	}
484db18916fSJim Harris 
485db18916fSJim Harris 	return _thread_lib_init(ctx_sz, msg_mempool_sz);
486db18916fSJim Harris }
487db18916fSJim Harris 
488db18916fSJim Harris void
489db18916fSJim Harris spdk_thread_lib_fini(void)
490db18916fSJim Harris {
491db18916fSJim Harris 	struct io_device *dev;
492db18916fSJim Harris 
493db18916fSJim Harris 	RB_FOREACH(dev, io_device_tree, &g_io_devices) {
494db18916fSJim Harris 		SPDK_ERRLOG("io_device %s not unregistered\n", dev->name);
495db18916fSJim Harris 	}
496db18916fSJim Harris 
497db18916fSJim Harris 	g_new_thread_fn = NULL;
498db18916fSJim Harris 	g_thread_op_fn = NULL;
499db18916fSJim Harris 	g_thread_op_supported_fn = NULL;
500db18916fSJim Harris 	g_ctx_sz = 0;
501090b8af1SJim Harris 	if (g_app_thread != NULL) {
502090b8af1SJim Harris 		_free_thread(g_app_thread);
503090b8af1SJim Harris 		g_app_thread = NULL;
504090b8af1SJim Harris 	}
505090b8af1SJim Harris 
506090b8af1SJim Harris 	if (g_spdk_msg_mempool) {
507090b8af1SJim Harris 		spdk_mempool_free(g_spdk_msg_mempool);
508090b8af1SJim Harris 		g_spdk_msg_mempool = NULL;
509090b8af1SJim Harris 	}
510db18916fSJim Harris }
511db18916fSJim Harris 
51202d75f62SBen Walker struct spdk_thread *
5139644491dSKefu Chai spdk_thread_create(const char *name, const struct spdk_cpuset *cpumask)
51402d75f62SBen Walker {
515090b8af1SJim Harris 	struct spdk_thread *thread, *null_thread;
516338475bdSAlex Michon 	size_t size = SPDK_ALIGN_CEIL(sizeof(*thread) + g_ctx_sz, SPDK_CACHE_LINE_SIZE);
5172446c5c6SBen Walker 	struct spdk_msg *msgs[SPDK_MSG_MEMPOOL_CACHE_SIZE];
518d82d6901SShuhei Matsumoto 	int rc = 0, i;
51902d75f62SBen Walker 
520338475bdSAlex Michon 	/* Since this spdk_thread object will be used by another core, ensure that it won't share a
521338475bdSAlex Michon 	 * cache line with any other object allocated on this core */
522338475bdSAlex Michon 	rc = posix_memalign((void **)&thread, SPDK_CACHE_LINE_SIZE, size);
523338475bdSAlex Michon 	if (rc != 0) {
52402d75f62SBen Walker 		SPDK_ERRLOG("Unable to allocate memory for thread\n");
52502d75f62SBen Walker 		return NULL;
52602d75f62SBen Walker 	}
527338475bdSAlex Michon 	memset(thread, 0, size);
52802d75f62SBen Walker 
5295d0b5e2cSBen Walker 	if (cpumask) {
530752fa1caSShuhei Matsumoto 		spdk_cpuset_copy(&thread->cpumask, cpumask);
5315d0b5e2cSBen Walker 	} else {
532752fa1caSShuhei Matsumoto 		spdk_cpuset_negate(&thread->cpumask);
5335d0b5e2cSBen Walker 	}
5345d0b5e2cSBen Walker 
535df559ab6SJiewei Ke 	RB_INIT(&thread->io_channels);
53608d36a55SBen Walker 	TAILQ_INIT(&thread->active_pollers);
5374e9adb3bSShuhei Matsumoto 	RB_INIT(&thread->timed_pollers);
5389ec598d1SKonrad Sztyber 	TAILQ_INIT(&thread->paused_pollers);
5392446c5c6SBen Walker 	SLIST_INIT(&thread->msg_cache);
5402446c5c6SBen Walker 	thread->msg_cache_count = 0;
54108d36a55SBen Walker 
54215d36310SBen Walker 	thread->tsc_last = spdk_get_ticks();
54315d36310SBen Walker 
54429c0e0dcSMichael Piszczek 	/* Monotonic increasing ID is set to each created poller beginning at 1. Once the
54529c0e0dcSMichael Piszczek 	 * ID exceeds UINT64_MAX a warning message is logged
54629c0e0dcSMichael Piszczek 	 */
54729c0e0dcSMichael Piszczek 	thread->next_poller_id = 1;
54829c0e0dcSMichael Piszczek 
549186b109dSJim Harris 	thread->messages = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_NUMA_ID_ANY);
550d761ddbfSBen Walker 	if (!thread->messages) {
551d761ddbfSBen Walker 		SPDK_ERRLOG("Unable to allocate memory for message ring\n");
552d761ddbfSBen Walker 		free(thread);
553d761ddbfSBen Walker 		return NULL;
554d761ddbfSBen Walker 	}
555d761ddbfSBen Walker 
5562446c5c6SBen Walker 	/* Fill the local message pool cache. */
5572446c5c6SBen Walker 	rc = spdk_mempool_get_bulk(g_spdk_msg_mempool, (void **)msgs, SPDK_MSG_MEMPOOL_CACHE_SIZE);
5582446c5c6SBen Walker 	if (rc == 0) {
5592446c5c6SBen Walker 		/* If we can't populate the cache it's ok. The cache will get filled
5602446c5c6SBen Walker 		 * up organically as messages are passed to the thread. */
5612446c5c6SBen Walker 		for (i = 0; i < SPDK_MSG_MEMPOOL_CACHE_SIZE; i++) {
5622446c5c6SBen Walker 			SLIST_INSERT_HEAD(&thread->msg_cache, msgs[i], link);
5632446c5c6SBen Walker 			thread->msg_cache_count++;
5642446c5c6SBen Walker 		}
5652446c5c6SBen Walker 	}
5662446c5c6SBen Walker 
56702d75f62SBen Walker 	if (name) {
56809013306SShuhei Matsumoto 		snprintf(thread->name, sizeof(thread->name), "%s", name);
569ec571793SBen Walker 	} else {
57009013306SShuhei Matsumoto 		snprintf(thread->name, sizeof(thread->name), "%p", thread);
57102d75f62SBen Walker 	}
57202d75f62SBen Walker 
573fa6aec49SAnisa Su 	thread->trace_id = spdk_trace_register_owner(OWNER_TYPE_THREAD, thread->name);
574fa6aec49SAnisa Su 
575978725c0SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
576515733caSShuhei Matsumoto 	if (g_thread_id == 0) {
577515733caSShuhei Matsumoto 		SPDK_ERRLOG("Thread ID rolled over. Further thread creation is not allowed.\n");
578515733caSShuhei Matsumoto 		pthread_mutex_unlock(&g_devlist_mutex);
579515733caSShuhei Matsumoto 		_free_thread(thread);
580515733caSShuhei Matsumoto 		return NULL;
581515733caSShuhei Matsumoto 	}
582515733caSShuhei Matsumoto 	thread->id = g_thread_id++;
583978725c0SBen Walker 	TAILQ_INSERT_TAIL(&g_threads, thread, tailq);
584978725c0SBen Walker 	g_thread_count++;
58502d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
58602d75f62SBen Walker 
5872172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Allocating new thread (%" PRIu64 ", %s)\n",
588515733caSShuhei Matsumoto 		      thread->id, thread->name);
589515733caSShuhei Matsumoto 
5904bf6e4bbSLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
59153429c7fSLiu Xiaodong 		thread->in_interrupt = true;
5924bf6e4bbSLiu Xiaodong 		rc = thread_interrupt_create(thread);
5934bf6e4bbSLiu Xiaodong 		if (rc != 0) {
5944bf6e4bbSLiu Xiaodong 			_free_thread(thread);
5954bf6e4bbSLiu Xiaodong 			return NULL;
5964bf6e4bbSLiu Xiaodong 		}
5974bf6e4bbSLiu Xiaodong 	}
5984bf6e4bbSLiu Xiaodong 
599aaa9a278SBen Walker 	if (g_new_thread_fn) {
600835d21a2SBen Walker 		rc = g_new_thread_fn(thread);
601d82d6901SShuhei Matsumoto 	} else if (g_thread_op_supported_fn && g_thread_op_supported_fn(SPDK_THREAD_OP_NEW)) {
602d82d6901SShuhei Matsumoto 		rc = g_thread_op_fn(thread, SPDK_THREAD_OP_NEW);
603d82d6901SShuhei Matsumoto 	}
604d82d6901SShuhei Matsumoto 
605835d21a2SBen Walker 	if (rc != 0) {
606dcb8ba7bSBen Walker 		_free_thread(thread);
607835d21a2SBen Walker 		return NULL;
608835d21a2SBen Walker 	}
609aaa9a278SBen Walker 
6106397735bSShuhei Matsumoto 	thread->state = SPDK_THREAD_STATE_RUNNING;
6116397735bSShuhei Matsumoto 
612090b8af1SJim Harris 	/* If this is the first thread, save it as the app thread.  Use an atomic
613090b8af1SJim Harris 	 * compare + exchange to guard against crazy users who might try to
614090b8af1SJim Harris 	 * call spdk_thread_create() simultaneously on multiple threads.
615090b8af1SJim Harris 	 */
616090b8af1SJim Harris 	null_thread = NULL;
617090b8af1SJim Harris 	__atomic_compare_exchange_n(&g_app_thread, &null_thread, thread, false,
618090b8af1SJim Harris 				    __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
619090b8af1SJim Harris 
62002d75f62SBen Walker 	return thread;
62102d75f62SBen Walker }
62202d75f62SBen Walker 
623090b8af1SJim Harris struct spdk_thread *
624090b8af1SJim Harris spdk_thread_get_app_thread(void)
625090b8af1SJim Harris {
626090b8af1SJim Harris 	return g_app_thread;
627090b8af1SJim Harris }
628090b8af1SJim Harris 
6291b1967bdSJim Harris bool
6301b1967bdSJim Harris spdk_thread_is_app_thread(struct spdk_thread *thread)
6311b1967bdSJim Harris {
6321b1967bdSJim Harris 	if (thread == NULL) {
6331b1967bdSJim Harris 		thread = _get_thread();
6341b1967bdSJim Harris 	}
6351b1967bdSJim Harris 
6361b1967bdSJim Harris 	return g_app_thread == thread;
6371b1967bdSJim Harris }
6381b1967bdSJim Harris 
63902d75f62SBen Walker void
6403f4b2c67SYifan Bian spdk_thread_bind(struct spdk_thread *thread, bool bind)
6413f4b2c67SYifan Bian {
6423f4b2c67SYifan Bian 	thread->is_bound = bind;
6433f4b2c67SYifan Bian }
6443f4b2c67SYifan Bian 
6453f4b2c67SYifan Bian bool
6463f4b2c67SYifan Bian spdk_thread_is_bound(struct spdk_thread *thread)
6473f4b2c67SYifan Bian {
6483f4b2c67SYifan Bian 	return thread->is_bound;
6493f4b2c67SYifan Bian }
6503f4b2c67SYifan Bian 
6513f4b2c67SYifan Bian void
652605e530aSBen Walker spdk_set_thread(struct spdk_thread *thread)
653605e530aSBen Walker {
654605e530aSBen Walker 	tls_thread = thread;
655605e530aSBen Walker }
656605e530aSBen Walker 
657e9aec674SShuhei Matsumoto static void
6580af754f0SSeth Howell thread_exit(struct spdk_thread *thread, uint64_t now)
65902d75f62SBen Walker {
660648d6cd5SShuhei Matsumoto 	struct spdk_poller *poller;
661e038e096SShuhei Matsumoto 	struct spdk_io_channel *ch;
662648d6cd5SShuhei Matsumoto 
663a12aae4cSShuhei Matsumoto 	if (now >= thread->exit_timeout_tsc) {
664a12aae4cSShuhei Matsumoto 		SPDK_ERRLOG("thread %s got timeout, and move it to the exited state forcefully\n",
665a12aae4cSShuhei Matsumoto 			    thread->name);
666a12aae4cSShuhei Matsumoto 		goto exited;
667a12aae4cSShuhei Matsumoto 	}
668a12aae4cSShuhei Matsumoto 
66985d70c03SJim Harris 	if (spdk_ring_count(thread->messages) > 0) {
67085d70c03SJim Harris 		SPDK_INFOLOG(thread, "thread %s still has messages\n", thread->name);
67185d70c03SJim Harris 		return;
67285d70c03SJim Harris 	}
67385d70c03SJim Harris 
6746f4e0c95SShuhei Matsumoto 	if (thread->for_each_count > 0) {
6756f4e0c95SShuhei Matsumoto 		SPDK_INFOLOG(thread, "thread %s is still executing %u for_each_channels/threads\n",
6766f4e0c95SShuhei Matsumoto 			     thread->name, thread->for_each_count);
6776f4e0c95SShuhei Matsumoto 		return;
6786f4e0c95SShuhei Matsumoto 	}
6796f4e0c95SShuhei Matsumoto 
680648d6cd5SShuhei Matsumoto 	TAILQ_FOREACH(poller, &thread->active_pollers, tailq) {
681648d6cd5SShuhei Matsumoto 		if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
6822172c432STomasz Zawadzki 			SPDK_INFOLOG(thread,
683e9aec674SShuhei Matsumoto 				     "thread %s still has active poller %s\n",
684b992bb4eSShuhei Matsumoto 				     thread->name, poller->name);
685e9aec674SShuhei Matsumoto 			return;
686648d6cd5SShuhei Matsumoto 		}
687648d6cd5SShuhei Matsumoto 	}
688648d6cd5SShuhei Matsumoto 
6894e9adb3bSShuhei Matsumoto 	RB_FOREACH(poller, timed_pollers_tree, &thread->timed_pollers) {
690648d6cd5SShuhei Matsumoto 		if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
6912172c432STomasz Zawadzki 			SPDK_INFOLOG(thread,
692e9aec674SShuhei Matsumoto 				     "thread %s still has active timed poller %s\n",
693b992bb4eSShuhei Matsumoto 				     thread->name, poller->name);
694e9aec674SShuhei Matsumoto 			return;
695648d6cd5SShuhei Matsumoto 		}
696648d6cd5SShuhei Matsumoto 	}
697648d6cd5SShuhei Matsumoto 
698648d6cd5SShuhei Matsumoto 	TAILQ_FOREACH(poller, &thread->paused_pollers, tailq) {
6992172c432STomasz Zawadzki 		SPDK_INFOLOG(thread,
700e9aec674SShuhei Matsumoto 			     "thread %s still has paused poller %s\n",
701b992bb4eSShuhei Matsumoto 			     thread->name, poller->name);
702e9aec674SShuhei Matsumoto 		return;
703648d6cd5SShuhei Matsumoto 	}
704648d6cd5SShuhei Matsumoto 
705df559ab6SJiewei Ke 	RB_FOREACH(ch, io_channel_tree, &thread->io_channels) {
7062172c432STomasz Zawadzki 		SPDK_INFOLOG(thread,
707e9aec674SShuhei Matsumoto 			     "thread %s still has channel for io_device %s\n",
708e038e096SShuhei Matsumoto 			     thread->name, ch->dev->name);
709e9aec674SShuhei Matsumoto 		return;
710e038e096SShuhei Matsumoto 	}
711e038e096SShuhei Matsumoto 
7126fdc71ecSShuhei Matsumoto 	if (thread->pending_unregister_count > 0) {
7136fdc71ecSShuhei Matsumoto 		SPDK_INFOLOG(thread,
7146fdc71ecSShuhei Matsumoto 			     "thread %s is still unregistering io_devices\n",
7156fdc71ecSShuhei Matsumoto 			     thread->name);
7166fdc71ecSShuhei Matsumoto 		return;
7176fdc71ecSShuhei Matsumoto 	}
7186fdc71ecSShuhei Matsumoto 
719a12aae4cSShuhei Matsumoto exited:
7206397735bSShuhei Matsumoto 	thread->state = SPDK_THREAD_STATE_EXITED;
72189c1e5bfSApokleos 	if (spdk_unlikely(thread->in_interrupt)) {
72289c1e5bfSApokleos 		g_thread_op_fn(thread, SPDK_THREAD_OP_RESCHED);
72389c1e5bfSApokleos 	}
724e036215fSBen Walker }
725e036215fSBen Walker 
726059073c4SBen Walker static void _thread_exit(void *ctx);
727059073c4SBen Walker 
728719343c9SShuhei Matsumoto int
729719343c9SShuhei Matsumoto spdk_thread_exit(struct spdk_thread *thread)
730719343c9SShuhei Matsumoto {
7312172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Exit thread %s\n", thread->name);
732719343c9SShuhei Matsumoto 
733719343c9SShuhei Matsumoto 	assert(tls_thread == thread);
734719343c9SShuhei Matsumoto 
7356397735bSShuhei Matsumoto 	if (thread->state >= SPDK_THREAD_STATE_EXITING) {
7362172c432STomasz Zawadzki 		SPDK_INFOLOG(thread,
7376397735bSShuhei Matsumoto 			     "thread %s is already exiting\n",
738719343c9SShuhei Matsumoto 			     thread->name);
739719343c9SShuhei Matsumoto 		return 0;
740719343c9SShuhei Matsumoto 	}
741719343c9SShuhei Matsumoto 
742a12aae4cSShuhei Matsumoto 	thread->exit_timeout_tsc = spdk_get_ticks() + (spdk_get_ticks_hz() *
743a12aae4cSShuhei Matsumoto 				   SPDK_THREAD_EXIT_TIMEOUT_SEC);
744e9aec674SShuhei Matsumoto 	thread->state = SPDK_THREAD_STATE_EXITING;
745059073c4SBen Walker 
746059073c4SBen Walker 	if (spdk_interrupt_mode_is_enabled()) {
747059073c4SBen Walker 		spdk_thread_send_msg(thread, _thread_exit, thread);
748059073c4SBen Walker 	}
749059073c4SBen Walker 
750e9aec674SShuhei Matsumoto 	return 0;
751719343c9SShuhei Matsumoto }
752719343c9SShuhei Matsumoto 
753644e6420SShuhei Matsumoto bool
7548203e68eSJim Harris spdk_thread_is_running(struct spdk_thread *thread)
7558203e68eSJim Harris {
7568203e68eSJim Harris 	return thread->state == SPDK_THREAD_STATE_RUNNING;
7578203e68eSJim Harris }
7588203e68eSJim Harris 
7598203e68eSJim Harris bool
760644e6420SShuhei Matsumoto spdk_thread_is_exited(struct spdk_thread *thread)
761644e6420SShuhei Matsumoto {
7626397735bSShuhei Matsumoto 	return thread->state == SPDK_THREAD_STATE_EXITED;
763644e6420SShuhei Matsumoto }
764644e6420SShuhei Matsumoto 
765e036215fSBen Walker void
766e036215fSBen Walker spdk_thread_destroy(struct spdk_thread *thread)
767e036215fSBen Walker {
768090b8af1SJim Harris 	assert(thread != NULL);
7692172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Destroy thread %s\n", thread->name);
770e036215fSBen Walker 
7716397735bSShuhei Matsumoto 	assert(thread->state == SPDK_THREAD_STATE_EXITED);
772ec571793SBen Walker 
7731068e22dSBen Walker 	if (tls_thread == thread) {
7741068e22dSBen Walker 		tls_thread = NULL;
7751068e22dSBen Walker 	}
7761068e22dSBen Walker 
777090b8af1SJim Harris 	/* To be safe, do not free the app thread until spdk_thread_lib_fini(). */
778090b8af1SJim Harris 	if (thread != g_app_thread) {
779dcb8ba7bSBen Walker 		_free_thread(thread);
78002d75f62SBen Walker 	}
781090b8af1SJim Harris }
78202d75f62SBen Walker 
78398104314SBen Walker void *
78498104314SBen Walker spdk_thread_get_ctx(struct spdk_thread *thread)
78598104314SBen Walker {
78698104314SBen Walker 	if (g_ctx_sz > 0) {
78798104314SBen Walker 		return thread->ctx;
78898104314SBen Walker 	}
78998104314SBen Walker 
79098104314SBen Walker 	return NULL;
79198104314SBen Walker }
79298104314SBen Walker 
79357706d18SDarek Stojaczyk struct spdk_cpuset *
79457706d18SDarek Stojaczyk spdk_thread_get_cpumask(struct spdk_thread *thread)
79557706d18SDarek Stojaczyk {
796752fa1caSShuhei Matsumoto 	return &thread->cpumask;
79757706d18SDarek Stojaczyk }
79857706d18SDarek Stojaczyk 
799713aafbcSShuhei Matsumoto int
800713aafbcSShuhei Matsumoto spdk_thread_set_cpumask(struct spdk_cpuset *cpumask)
801713aafbcSShuhei Matsumoto {
802713aafbcSShuhei Matsumoto 	struct spdk_thread *thread;
803713aafbcSShuhei Matsumoto 
804713aafbcSShuhei Matsumoto 	if (!g_thread_op_supported_fn || !g_thread_op_supported_fn(SPDK_THREAD_OP_RESCHED)) {
805713aafbcSShuhei Matsumoto 		SPDK_ERRLOG("Framework does not support reschedule operation.\n");
806713aafbcSShuhei Matsumoto 		assert(false);
807713aafbcSShuhei Matsumoto 		return -ENOTSUP;
808713aafbcSShuhei Matsumoto 	}
809713aafbcSShuhei Matsumoto 
810713aafbcSShuhei Matsumoto 	thread = spdk_get_thread();
811713aafbcSShuhei Matsumoto 	if (!thread) {
812713aafbcSShuhei Matsumoto 		SPDK_ERRLOG("Called from non-SPDK thread\n");
813713aafbcSShuhei Matsumoto 		assert(false);
814713aafbcSShuhei Matsumoto 		return -EINVAL;
815713aafbcSShuhei Matsumoto 	}
816713aafbcSShuhei Matsumoto 
817713aafbcSShuhei Matsumoto 	spdk_cpuset_copy(&thread->cpumask, cpumask);
818713aafbcSShuhei Matsumoto 
819713aafbcSShuhei Matsumoto 	/* Invoke framework's reschedule operation. If this function is called multiple times
820713aafbcSShuhei Matsumoto 	 * in a single spdk_thread_poll() context, the last cpumask will be used in the
821713aafbcSShuhei Matsumoto 	 * reschedule operation.
822713aafbcSShuhei Matsumoto 	 */
823713aafbcSShuhei Matsumoto 	g_thread_op_fn(thread, SPDK_THREAD_OP_RESCHED);
824713aafbcSShuhei Matsumoto 
825713aafbcSShuhei Matsumoto 	return 0;
826713aafbcSShuhei Matsumoto }
827713aafbcSShuhei Matsumoto 
8284e7bb83eSBen Walker struct spdk_thread *
8294e7bb83eSBen Walker spdk_thread_get_from_ctx(void *ctx)
8304e7bb83eSBen Walker {
8314e7bb83eSBen Walker 	if (ctx == NULL) {
8324e7bb83eSBen Walker 		assert(false);
8334e7bb83eSBen Walker 		return NULL;
8344e7bb83eSBen Walker 	}
8354e7bb83eSBen Walker 
8364e7bb83eSBen Walker 	assert(g_ctx_sz > 0);
8374e7bb83eSBen Walker 
8384e7bb83eSBen Walker 	return SPDK_CONTAINEROF(ctx, struct spdk_thread, ctx);
8394e7bb83eSBen Walker }
8404e7bb83eSBen Walker 
841d761ddbfSBen Walker static inline uint32_t
8420af754f0SSeth Howell msg_queue_run_batch(struct spdk_thread *thread, uint32_t max_msgs)
843d761ddbfSBen Walker {
844d761ddbfSBen Walker 	unsigned count, i;
845d761ddbfSBen Walker 	void *messages[SPDK_MSG_BATCH_SIZE];
8464bf6e4bbSLiu Xiaodong 	uint64_t notify = 1;
847a3c3c0b5SLiu Xiaodong 	int rc;
848d761ddbfSBen Walker 
849d761ddbfSBen Walker #ifdef DEBUG
850d761ddbfSBen Walker 	/*
851d761ddbfSBen Walker 	 * spdk_ring_dequeue() fills messages and returns how many entries it wrote,
852d761ddbfSBen Walker 	 * so we will never actually read uninitialized data from events, but just to be sure
853d761ddbfSBen Walker 	 * (and to silence a static analyzer false positive), initialize the array to NULL pointers.
854d761ddbfSBen Walker 	 */
855d761ddbfSBen Walker 	memset(messages, 0, sizeof(messages));
856d761ddbfSBen Walker #endif
857d761ddbfSBen Walker 
858d761ddbfSBen Walker 	if (max_msgs > 0) {
859d761ddbfSBen Walker 		max_msgs = spdk_min(max_msgs, SPDK_MSG_BATCH_SIZE);
860d761ddbfSBen Walker 	} else {
861d761ddbfSBen Walker 		max_msgs = SPDK_MSG_BATCH_SIZE;
862d761ddbfSBen Walker 	}
863d761ddbfSBen Walker 
864d761ddbfSBen Walker 	count = spdk_ring_dequeue(thread->messages, messages, max_msgs);
86553429c7fSLiu Xiaodong 	if (spdk_unlikely(thread->in_interrupt) &&
86653429c7fSLiu Xiaodong 	    spdk_ring_count(thread->messages) != 0) {
867a3c3c0b5SLiu Xiaodong 		rc = write(thread->msg_fd, &notify, sizeof(notify));
868a3c3c0b5SLiu Xiaodong 		if (rc < 0) {
869a3c3c0b5SLiu Xiaodong 			SPDK_ERRLOG("failed to notify msg_queue: %s.\n", spdk_strerror(errno));
870a3c3c0b5SLiu Xiaodong 		}
8714bf6e4bbSLiu Xiaodong 	}
872d761ddbfSBen Walker 	if (count == 0) {
873d761ddbfSBen Walker 		return 0;
874d761ddbfSBen Walker 	}
875d761ddbfSBen Walker 
876d761ddbfSBen Walker 	for (i = 0; i < count; i++) {
877d761ddbfSBen Walker 		struct spdk_msg *msg = messages[i];
878d761ddbfSBen Walker 
879d761ddbfSBen Walker 		assert(msg != NULL);
88070f3606bSJohn Levon 
88170f3606bSJohn Levon 		SPDK_DTRACE_PROBE2(msg_exec, msg->fn, msg->arg);
88270f3606bSJohn Levon 
883d761ddbfSBen Walker 		msg->fn(msg->arg);
884d761ddbfSBen Walker 
885cd2bcf10SMike Gerdts 		SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
886cd2bcf10SMike Gerdts 
8872446c5c6SBen Walker 		if (thread->msg_cache_count < SPDK_MSG_MEMPOOL_CACHE_SIZE) {
8882446c5c6SBen Walker 			/* Insert the messages at the head. We want to re-use the hot
8892446c5c6SBen Walker 			 * ones. */
8902446c5c6SBen Walker 			SLIST_INSERT_HEAD(&thread->msg_cache, msg, link);
8912446c5c6SBen Walker 			thread->msg_cache_count++;
8922446c5c6SBen Walker 		} else {
8932446c5c6SBen Walker 			spdk_mempool_put(g_spdk_msg_mempool, msg);
8942446c5c6SBen Walker 		}
8952446c5c6SBen Walker 	}
896d761ddbfSBen Walker 
897d761ddbfSBen Walker 	return count;
898d761ddbfSBen Walker }
899d761ddbfSBen Walker 
90008d36a55SBen Walker static void
9010af754f0SSeth Howell poller_insert_timer(struct spdk_thread *thread, struct spdk_poller *poller, uint64_t now)
90208d36a55SBen Walker {
9034e9adb3bSShuhei Matsumoto 	struct spdk_poller *tmp __attribute__((unused));
90408d36a55SBen Walker 
90508d36a55SBen Walker 	poller->next_run_tick = now + poller->period_ticks;
90608d36a55SBen Walker 
90708d36a55SBen Walker 	/*
9084e9adb3bSShuhei Matsumoto 	 * Insert poller in the thread's timed_pollers tree by next scheduled run time
9094e9adb3bSShuhei Matsumoto 	 * as its key.
91008d36a55SBen Walker 	 */
9114e9adb3bSShuhei Matsumoto 	tmp = RB_INSERT(timed_pollers_tree, &thread->timed_pollers, poller);
9124e9adb3bSShuhei Matsumoto 	assert(tmp == NULL);
91308d36a55SBen Walker 
9144e9adb3bSShuhei Matsumoto 	/* Update the cache only if it is empty or the inserted poller is earlier than it.
9154e9adb3bSShuhei Matsumoto 	 * RB_MIN() is not necessary here because all pollers, which has exactly the same
9164e9adb3bSShuhei Matsumoto 	 * next_run_tick as the existing poller, are inserted on the right side.
9174e9adb3bSShuhei Matsumoto 	 */
9184e9adb3bSShuhei Matsumoto 	if (thread->first_timed_poller == NULL ||
9194e9adb3bSShuhei Matsumoto 	    poller->next_run_tick < thread->first_timed_poller->next_run_tick) {
9204748ebefSShuhei Matsumoto 		thread->first_timed_poller = poller;
92108d36a55SBen Walker 	}
9224e9adb3bSShuhei Matsumoto }
92308d36a55SBen Walker 
924f5313b55SShuhei Matsumoto static inline void
925f5313b55SShuhei Matsumoto poller_remove_timer(struct spdk_thread *thread, struct spdk_poller *poller)
926f5313b55SShuhei Matsumoto {
9274e9adb3bSShuhei Matsumoto 	struct spdk_poller *tmp __attribute__((unused));
9284748ebefSShuhei Matsumoto 
9294e9adb3bSShuhei Matsumoto 	tmp = RB_REMOVE(timed_pollers_tree, &thread->timed_pollers, poller);
9304e9adb3bSShuhei Matsumoto 	assert(tmp != NULL);
9314e9adb3bSShuhei Matsumoto 
9324e9adb3bSShuhei Matsumoto 	/* This function is not used in any case that is performance critical.
9334e9adb3bSShuhei Matsumoto 	 * Update the cache simply by RB_MIN() if it needs to be changed.
9344e9adb3bSShuhei Matsumoto 	 */
9354748ebefSShuhei Matsumoto 	if (thread->first_timed_poller == poller) {
9364e9adb3bSShuhei Matsumoto 		thread->first_timed_poller = RB_MIN(timed_pollers_tree, &thread->timed_pollers);
9374748ebefSShuhei Matsumoto 	}
938f5313b55SShuhei Matsumoto }
939f5313b55SShuhei Matsumoto 
9409ec598d1SKonrad Sztyber static void
9410af754f0SSeth Howell thread_insert_poller(struct spdk_thread *thread, struct spdk_poller *poller)
9429ec598d1SKonrad Sztyber {
9439ec598d1SKonrad Sztyber 	if (poller->period_ticks) {
9440af754f0SSeth Howell 		poller_insert_timer(thread, poller, spdk_get_ticks());
9459ec598d1SKonrad Sztyber 	} else {
9469ec598d1SKonrad Sztyber 		TAILQ_INSERT_TAIL(&thread->active_pollers, poller, tailq);
9479ec598d1SKonrad Sztyber 	}
9489ec598d1SKonrad Sztyber }
9499ec598d1SKonrad Sztyber 
95082ef7574SShuhei Matsumoto static inline void
9510af754f0SSeth Howell thread_update_stats(struct spdk_thread *thread, uint64_t end,
9522139be15SShuhei Matsumoto 		    uint64_t start, int rc)
95382ef7574SShuhei Matsumoto {
95482ef7574SShuhei Matsumoto 	if (rc == 0) {
95582ef7574SShuhei Matsumoto 		/* Poller status idle */
9562139be15SShuhei Matsumoto 		thread->stats.idle_tsc += end - start;
95782ef7574SShuhei Matsumoto 	} else if (rc > 0) {
95882ef7574SShuhei Matsumoto 		/* Poller status busy */
9592139be15SShuhei Matsumoto 		thread->stats.busy_tsc += end - start;
96082ef7574SShuhei Matsumoto 	}
9612139be15SShuhei Matsumoto 	/* Store end time to use it as start time of the next spdk_thread_poll(). */
9622139be15SShuhei Matsumoto 	thread->tsc_last = end;
96382ef7574SShuhei Matsumoto }
96482ef7574SShuhei Matsumoto 
96554215a1eSShuhei Matsumoto static inline int
96654215a1eSShuhei Matsumoto thread_execute_poller(struct spdk_thread *thread, struct spdk_poller *poller)
96754215a1eSShuhei Matsumoto {
96854215a1eSShuhei Matsumoto 	int rc;
96954215a1eSShuhei Matsumoto 
9703f45ed24SShuhei Matsumoto 	switch (poller->state) {
9713f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_UNREGISTERED:
97254215a1eSShuhei Matsumoto 		TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
97354215a1eSShuhei Matsumoto 		free(poller);
97454215a1eSShuhei Matsumoto 		return 0;
9753f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSING:
97654215a1eSShuhei Matsumoto 		TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
97754215a1eSShuhei Matsumoto 		TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
97854215a1eSShuhei Matsumoto 		poller->state = SPDK_POLLER_STATE_PAUSED;
97954215a1eSShuhei Matsumoto 		return 0;
9803f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_WAITING:
9813f45ed24SShuhei Matsumoto 		break;
9823f45ed24SShuhei Matsumoto 	default:
9833f45ed24SShuhei Matsumoto 		assert(false);
9843f45ed24SShuhei Matsumoto 		break;
98554215a1eSShuhei Matsumoto 	}
98654215a1eSShuhei Matsumoto 
98754215a1eSShuhei Matsumoto 	poller->state = SPDK_POLLER_STATE_RUNNING;
98854215a1eSShuhei Matsumoto 	rc = poller->fn(poller->arg);
98954215a1eSShuhei Matsumoto 
990cd2bcf10SMike Gerdts 	SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
991cd2bcf10SMike Gerdts 
99254215a1eSShuhei Matsumoto 	poller->run_count++;
99354215a1eSShuhei Matsumoto 	if (rc > 0) {
99454215a1eSShuhei Matsumoto 		poller->busy_count++;
99554215a1eSShuhei Matsumoto 	}
99654215a1eSShuhei Matsumoto 
99754215a1eSShuhei Matsumoto #ifdef DEBUG
99854215a1eSShuhei Matsumoto 	if (rc == -1) {
99954215a1eSShuhei Matsumoto 		SPDK_DEBUGLOG(thread, "Poller %s returned -1\n", poller->name);
100054215a1eSShuhei Matsumoto 	}
100154215a1eSShuhei Matsumoto #endif
100254215a1eSShuhei Matsumoto 
10033f45ed24SShuhei Matsumoto 	switch (poller->state) {
10043f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_UNREGISTERED:
100554215a1eSShuhei Matsumoto 		TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
100654215a1eSShuhei Matsumoto 		free(poller);
10073f45ed24SShuhei Matsumoto 		break;
10083ca15e33SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSING:
10093ca15e33SShuhei Matsumoto 		TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
10103ca15e33SShuhei Matsumoto 		TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
10113ca15e33SShuhei Matsumoto 		poller->state = SPDK_POLLER_STATE_PAUSED;
10123ca15e33SShuhei Matsumoto 		break;
10133f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSED:
10143ca15e33SShuhei Matsumoto 	case SPDK_POLLER_STATE_WAITING:
10153f45ed24SShuhei Matsumoto 		break;
10163f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_RUNNING:
101754215a1eSShuhei Matsumoto 		poller->state = SPDK_POLLER_STATE_WAITING;
10183f45ed24SShuhei Matsumoto 		break;
10193f45ed24SShuhei Matsumoto 	default:
10203f45ed24SShuhei Matsumoto 		assert(false);
10213f45ed24SShuhei Matsumoto 		break;
102254215a1eSShuhei Matsumoto 	}
102354215a1eSShuhei Matsumoto 
102454215a1eSShuhei Matsumoto 	return rc;
102554215a1eSShuhei Matsumoto }
102654215a1eSShuhei Matsumoto 
102754215a1eSShuhei Matsumoto static inline int
102854215a1eSShuhei Matsumoto thread_execute_timed_poller(struct spdk_thread *thread, struct spdk_poller *poller,
102954215a1eSShuhei Matsumoto 			    uint64_t now)
103054215a1eSShuhei Matsumoto {
103154215a1eSShuhei Matsumoto 	int rc;
103254215a1eSShuhei Matsumoto 
10333f45ed24SShuhei Matsumoto 	switch (poller->state) {
10343f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_UNREGISTERED:
103554215a1eSShuhei Matsumoto 		free(poller);
103654215a1eSShuhei Matsumoto 		return 0;
10373f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSING:
103854215a1eSShuhei Matsumoto 		TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
103954215a1eSShuhei Matsumoto 		poller->state = SPDK_POLLER_STATE_PAUSED;
104054215a1eSShuhei Matsumoto 		return 0;
10413f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_WAITING:
10423f45ed24SShuhei Matsumoto 		break;
10433f45ed24SShuhei Matsumoto 	default:
10443f45ed24SShuhei Matsumoto 		assert(false);
10453f45ed24SShuhei Matsumoto 		break;
104654215a1eSShuhei Matsumoto 	}
104754215a1eSShuhei Matsumoto 
104854215a1eSShuhei Matsumoto 	poller->state = SPDK_POLLER_STATE_RUNNING;
104954215a1eSShuhei Matsumoto 	rc = poller->fn(poller->arg);
105054215a1eSShuhei Matsumoto 
1051cd2bcf10SMike Gerdts 	SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
1052cd2bcf10SMike Gerdts 
105354215a1eSShuhei Matsumoto 	poller->run_count++;
105454215a1eSShuhei Matsumoto 	if (rc > 0) {
105554215a1eSShuhei Matsumoto 		poller->busy_count++;
105654215a1eSShuhei Matsumoto 	}
105754215a1eSShuhei Matsumoto 
105854215a1eSShuhei Matsumoto #ifdef DEBUG
105954215a1eSShuhei Matsumoto 	if (rc == -1) {
106054215a1eSShuhei Matsumoto 		SPDK_DEBUGLOG(thread, "Timed poller %s returned -1\n", poller->name);
106154215a1eSShuhei Matsumoto 	}
106254215a1eSShuhei Matsumoto #endif
106354215a1eSShuhei Matsumoto 
10643f45ed24SShuhei Matsumoto 	switch (poller->state) {
10653f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_UNREGISTERED:
106654215a1eSShuhei Matsumoto 		free(poller);
10673f45ed24SShuhei Matsumoto 		break;
10683ca15e33SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSING:
10693ca15e33SShuhei Matsumoto 		TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
10703ca15e33SShuhei Matsumoto 		poller->state = SPDK_POLLER_STATE_PAUSED;
10713ca15e33SShuhei Matsumoto 		break;
10723f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSED:
10733f45ed24SShuhei Matsumoto 		break;
10743f45ed24SShuhei Matsumoto 	case SPDK_POLLER_STATE_RUNNING:
107554215a1eSShuhei Matsumoto 		poller->state = SPDK_POLLER_STATE_WAITING;
10763ca15e33SShuhei Matsumoto 	/* fallthrough */
10773ca15e33SShuhei Matsumoto 	case SPDK_POLLER_STATE_WAITING:
107854215a1eSShuhei Matsumoto 		poller_insert_timer(thread, poller, now);
10793f45ed24SShuhei Matsumoto 		break;
10803f45ed24SShuhei Matsumoto 	default:
10813f45ed24SShuhei Matsumoto 		assert(false);
10823f45ed24SShuhei Matsumoto 		break;
108354215a1eSShuhei Matsumoto 	}
108454215a1eSShuhei Matsumoto 
108554215a1eSShuhei Matsumoto 	return rc;
108654215a1eSShuhei Matsumoto }
108754215a1eSShuhei Matsumoto 
108898eca6faSAlexey Marchuk static inline void
108998eca6faSAlexey Marchuk thread_run_pp_handlers(struct spdk_thread *thread)
109098eca6faSAlexey Marchuk {
109198eca6faSAlexey Marchuk 	uint8_t i, count = thread->num_pp_handlers;
109298eca6faSAlexey Marchuk 
109398eca6faSAlexey Marchuk 	/* Set to max value to prevent new handlers registration within the callback */
109498eca6faSAlexey Marchuk 	thread->num_pp_handlers = SPDK_THREAD_MAX_POST_POLLER_HANDLERS;
109598eca6faSAlexey Marchuk 
109698eca6faSAlexey Marchuk 	for (i = 0; i < count; i++) {
109798eca6faSAlexey Marchuk 		thread->pp_handlers[i].fn(thread->pp_handlers[i].fn_arg);
109898eca6faSAlexey Marchuk 		thread->pp_handlers[i].fn = NULL;
109998eca6faSAlexey Marchuk 	}
110098eca6faSAlexey Marchuk 
110198eca6faSAlexey Marchuk 	thread->num_pp_handlers = 0;
110298eca6faSAlexey Marchuk }
110398eca6faSAlexey Marchuk 
1104a4a8080fSShuhei Matsumoto static int
11050af754f0SSeth Howell thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now)
110608d36a55SBen Walker {
1107d761ddbfSBen Walker 	uint32_t msg_count;
1108836356f2SBen Walker 	struct spdk_poller *poller, *tmp;
1109907efcd7SKozlowski Mateusz 	spdk_msg_fn critical_msg;
111008d36a55SBen Walker 	int rc = 0;
111108d36a55SBen Walker 
1112aa6b6f08SBen Walker 	thread->tsc_last = now;
1113aa6b6f08SBen Walker 
1114907efcd7SKozlowski Mateusz 	critical_msg = thread->critical_msg;
1115907efcd7SKozlowski Mateusz 	if (spdk_unlikely(critical_msg != NULL)) {
1116907efcd7SKozlowski Mateusz 		critical_msg(NULL);
1117907efcd7SKozlowski Mateusz 		thread->critical_msg = NULL;
1118b8db1af4SLiu Xiaodong 		rc = 1;
1119907efcd7SKozlowski Mateusz 	}
1120907efcd7SKozlowski Mateusz 
11210af754f0SSeth Howell 	msg_count = msg_queue_run_batch(thread, max_msgs);
1122d761ddbfSBen Walker 	if (msg_count) {
1123d761ddbfSBen Walker 		rc = 1;
1124d761ddbfSBen Walker 	}
1125d761ddbfSBen Walker 
1126836356f2SBen Walker 	TAILQ_FOREACH_REVERSE_SAFE(poller, &thread->active_pollers,
1127836356f2SBen Walker 				   active_pollers_head, tailq, tmp) {
1128d761ddbfSBen Walker 		int poller_rc;
1129d761ddbfSBen Walker 
113054215a1eSShuhei Matsumoto 		poller_rc = thread_execute_poller(thread, poller);
1131d761ddbfSBen Walker 		if (poller_rc > rc) {
1132d761ddbfSBen Walker 			rc = poller_rc;
1133d761ddbfSBen Walker 		}
113498eca6faSAlexey Marchuk 		if (thread->num_pp_handlers) {
113598eca6faSAlexey Marchuk 			thread_run_pp_handlers(thread);
113698eca6faSAlexey Marchuk 		}
113708d36a55SBen Walker 	}
113808d36a55SBen Walker 
11394748ebefSShuhei Matsumoto 	poller = thread->first_timed_poller;
1140c204c3d7SShuhei Matsumoto 	while (poller != NULL) {
114108d36a55SBen Walker 		int timer_rc = 0;
114208d36a55SBen Walker 
114384ec9989SShuhei Matsumoto 		if (now < poller->next_run_tick) {
114484ec9989SShuhei Matsumoto 			break;
114584ec9989SShuhei Matsumoto 		}
114684ec9989SShuhei Matsumoto 
11474e9adb3bSShuhei Matsumoto 		tmp = RB_NEXT(timed_pollers_tree, &thread->timed_pollers, poller);
11484e9adb3bSShuhei Matsumoto 		RB_REMOVE(timed_pollers_tree, &thread->timed_pollers, poller);
11494eb96aebSShuhei Matsumoto 
11504eb96aebSShuhei Matsumoto 		/* Update the cache to the next timed poller in the list
11514eb96aebSShuhei Matsumoto 		 * only if the current poller is still the closest, otherwise,
11524eb96aebSShuhei Matsumoto 		 * do nothing because the cache has been already updated.
11534eb96aebSShuhei Matsumoto 		 */
11544eb96aebSShuhei Matsumoto 		if (thread->first_timed_poller == poller) {
11554eb96aebSShuhei Matsumoto 			thread->first_timed_poller = tmp;
11564eb96aebSShuhei Matsumoto 		}
1157c204c3d7SShuhei Matsumoto 
115854215a1eSShuhei Matsumoto 		timer_rc = thread_execute_timed_poller(thread, poller, now);
115908d36a55SBen Walker 		if (timer_rc > rc) {
116008d36a55SBen Walker 			rc = timer_rc;
116108d36a55SBen Walker 		}
1162c204c3d7SShuhei Matsumoto 
1163c204c3d7SShuhei Matsumoto 		poller = tmp;
116408d36a55SBen Walker 	}
116508d36a55SBen Walker 
1166a4a8080fSShuhei Matsumoto 	return rc;
1167a4a8080fSShuhei Matsumoto }
1168a4a8080fSShuhei Matsumoto 
11695ecfb221SBen Walker static void
11705ecfb221SBen Walker _thread_remove_pollers(void *ctx)
11715ecfb221SBen Walker {
11725ecfb221SBen Walker 	struct spdk_thread *thread = ctx;
11735ecfb221SBen Walker 	struct spdk_poller *poller, *tmp;
11745ecfb221SBen Walker 
11755ecfb221SBen Walker 	TAILQ_FOREACH_REVERSE_SAFE(poller, &thread->active_pollers,
11765ecfb221SBen Walker 				   active_pollers_head, tailq, tmp) {
11775ecfb221SBen Walker 		if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
11785ecfb221SBen Walker 			TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
11795ecfb221SBen Walker 			free(poller);
11805ecfb221SBen Walker 		}
11815ecfb221SBen Walker 	}
11825ecfb221SBen Walker 
11835ecfb221SBen Walker 	RB_FOREACH_SAFE(poller, timed_pollers_tree, &thread->timed_pollers, tmp) {
11845ecfb221SBen Walker 		if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
11855ecfb221SBen Walker 			poller_remove_timer(thread, poller);
11865ecfb221SBen Walker 			free(poller);
11875ecfb221SBen Walker 		}
11885ecfb221SBen Walker 	}
11895ecfb221SBen Walker 
11905ecfb221SBen Walker 	thread->poller_unregistered = false;
11915ecfb221SBen Walker }
11925ecfb221SBen Walker 
1193059073c4SBen Walker static void
1194059073c4SBen Walker _thread_exit(void *ctx)
1195059073c4SBen Walker {
1196059073c4SBen Walker 	struct spdk_thread *thread = ctx;
1197059073c4SBen Walker 
1198059073c4SBen Walker 	assert(thread->state == SPDK_THREAD_STATE_EXITING);
1199059073c4SBen Walker 
1200059073c4SBen Walker 	thread_exit(thread, spdk_get_ticks());
1201403c0ec1SArtur Paszkiewicz 
1202403c0ec1SArtur Paszkiewicz 	if (thread->state != SPDK_THREAD_STATE_EXITED) {
1203403c0ec1SArtur Paszkiewicz 		spdk_thread_send_msg(thread, _thread_exit, thread);
1204403c0ec1SArtur Paszkiewicz 	}
1205059073c4SBen Walker }
1206059073c4SBen Walker 
1207a4a8080fSShuhei Matsumoto int
1208a4a8080fSShuhei Matsumoto spdk_thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now)
1209a4a8080fSShuhei Matsumoto {
1210a4a8080fSShuhei Matsumoto 	struct spdk_thread *orig_thread;
1211a4a8080fSShuhei Matsumoto 	int rc;
1212a4a8080fSShuhei Matsumoto 
1213a4a8080fSShuhei Matsumoto 	orig_thread = _get_thread();
1214a4a8080fSShuhei Matsumoto 	tls_thread = thread;
1215a4a8080fSShuhei Matsumoto 
1216a4a8080fSShuhei Matsumoto 	if (now == 0) {
1217a4a8080fSShuhei Matsumoto 		now = spdk_get_ticks();
1218a4a8080fSShuhei Matsumoto 	}
1219a4a8080fSShuhei Matsumoto 
122053429c7fSLiu Xiaodong 	if (spdk_likely(!thread->in_interrupt)) {
12210af754f0SSeth Howell 		rc = thread_poll(thread, max_msgs, now);
12222d52c6a1SLiu Xiaodong 		if (spdk_unlikely(thread->in_interrupt)) {
12232d52c6a1SLiu Xiaodong 			/* The thread transitioned to interrupt mode during the above poll.
12242d52c6a1SLiu Xiaodong 			 * Poll it one more time in case that during the transition time
12252d52c6a1SLiu Xiaodong 			 * there is msg received without notification.
12262d52c6a1SLiu Xiaodong 			 */
12272d52c6a1SLiu Xiaodong 			rc = thread_poll(thread, max_msgs, now);
12282d52c6a1SLiu Xiaodong 		}
1229a4a8080fSShuhei Matsumoto 
1230e9aec674SShuhei Matsumoto 		if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITING)) {
12310af754f0SSeth Howell 			thread_exit(thread, now);
1232e9aec674SShuhei Matsumoto 		}
1233059073c4SBen Walker 	} else {
1234059073c4SBen Walker 		/* Non-block wait on thread's fd_group */
1235059073c4SBen Walker 		rc = spdk_fd_group_wait(thread->fgrp, 0);
1236059073c4SBen Walker 	}
1237e9aec674SShuhei Matsumoto 
12380af754f0SSeth Howell 	thread_update_stats(thread, spdk_get_ticks(), now, rc);
123915d36310SBen Walker 
124013586cd8SBen Walker 	tls_thread = orig_thread;
1241605e530aSBen Walker 
124208d36a55SBen Walker 	return rc;
124308d36a55SBen Walker }
124408d36a55SBen Walker 
124508d36a55SBen Walker uint64_t
124608d36a55SBen Walker spdk_thread_next_poller_expiration(struct spdk_thread *thread)
124708d36a55SBen Walker {
124808d36a55SBen Walker 	struct spdk_poller *poller;
124908d36a55SBen Walker 
12504748ebefSShuhei Matsumoto 	poller = thread->first_timed_poller;
125108d36a55SBen Walker 	if (poller) {
125208d36a55SBen Walker 		return poller->next_run_tick;
125308d36a55SBen Walker 	}
125408d36a55SBen Walker 
125508d36a55SBen Walker 	return 0;
125608d36a55SBen Walker }
125708d36a55SBen Walker 
125822c1a00fSKonrad Sztyber int
125922c1a00fSKonrad Sztyber spdk_thread_has_active_pollers(struct spdk_thread *thread)
126022c1a00fSKonrad Sztyber {
126122c1a00fSKonrad Sztyber 	return !TAILQ_EMPTY(&thread->active_pollers);
126222c1a00fSKonrad Sztyber }
126322c1a00fSKonrad Sztyber 
12649ec598d1SKonrad Sztyber static bool
12650af754f0SSeth Howell thread_has_unpaused_pollers(struct spdk_thread *thread)
12667173e9bdSTomasz Zawadzki {
12677173e9bdSTomasz Zawadzki 	if (TAILQ_EMPTY(&thread->active_pollers) &&
12684e9adb3bSShuhei Matsumoto 	    RB_EMPTY(&thread->timed_pollers)) {
12697173e9bdSTomasz Zawadzki 		return false;
12707173e9bdSTomasz Zawadzki 	}
12717173e9bdSTomasz Zawadzki 
12727173e9bdSTomasz Zawadzki 	return true;
12737173e9bdSTomasz Zawadzki }
12747173e9bdSTomasz Zawadzki 
1275154eb339STomasz Zawadzki bool
12769ec598d1SKonrad Sztyber spdk_thread_has_pollers(struct spdk_thread *thread)
12779ec598d1SKonrad Sztyber {
12780af754f0SSeth Howell 	if (!thread_has_unpaused_pollers(thread) &&
12799ec598d1SKonrad Sztyber 	    TAILQ_EMPTY(&thread->paused_pollers)) {
12809ec598d1SKonrad Sztyber 		return false;
12819ec598d1SKonrad Sztyber 	}
12829ec598d1SKonrad Sztyber 
12839ec598d1SKonrad Sztyber 	return true;
12849ec598d1SKonrad Sztyber }
12859ec598d1SKonrad Sztyber 
12869ec598d1SKonrad Sztyber bool
1287154eb339STomasz Zawadzki spdk_thread_is_idle(struct spdk_thread *thread)
1288154eb339STomasz Zawadzki {
1289154eb339STomasz Zawadzki 	if (spdk_ring_count(thread->messages) ||
12900af754f0SSeth Howell 	    thread_has_unpaused_pollers(thread) ||
1291907efcd7SKozlowski Mateusz 	    thread->critical_msg != NULL) {
1292154eb339STomasz Zawadzki 		return false;
1293154eb339STomasz Zawadzki 	}
1294154eb339STomasz Zawadzki 
1295154eb339STomasz Zawadzki 	return true;
1296154eb339STomasz Zawadzki }
1297154eb339STomasz Zawadzki 
1298e30535feSJim Harris uint32_t
1299e30535feSJim Harris spdk_thread_get_count(void)
1300e30535feSJim Harris {
1301e30535feSJim Harris 	/*
1302e30535feSJim Harris 	 * Return cached value of the current thread count.  We could acquire the
1303e30535feSJim Harris 	 *  lock and iterate through the TAILQ of threads to count them, but that
1304e30535feSJim Harris 	 *  count could still be invalidated after we release the lock.
1305e30535feSJim Harris 	 */
1306e30535feSJim Harris 	return g_thread_count;
1307e30535feSJim Harris }
1308e30535feSJim Harris 
130902d75f62SBen Walker struct spdk_thread *
131002d75f62SBen Walker spdk_get_thread(void)
131102d75f62SBen Walker {
131262deaa83SJesse Grodman 	return _get_thread();
131302d75f62SBen Walker }
131402d75f62SBen Walker 
131502d75f62SBen Walker const char *
131602d75f62SBen Walker spdk_thread_get_name(const struct spdk_thread *thread)
131702d75f62SBen Walker {
131802d75f62SBen Walker 	return thread->name;
131902d75f62SBen Walker }
132002d75f62SBen Walker 
1321515733caSShuhei Matsumoto uint64_t
1322515733caSShuhei Matsumoto spdk_thread_get_id(const struct spdk_thread *thread)
1323515733caSShuhei Matsumoto {
1324515733caSShuhei Matsumoto 	return thread->id;
1325515733caSShuhei Matsumoto }
1326515733caSShuhei Matsumoto 
1327ee813409SShuhei Matsumoto struct spdk_thread *
1328ee813409SShuhei Matsumoto spdk_thread_get_by_id(uint64_t id)
1329ee813409SShuhei Matsumoto {
1330ee813409SShuhei Matsumoto 	struct spdk_thread *thread;
1331ee813409SShuhei Matsumoto 
133233e1f4b0Ssunshihao520 	if (id == 0 || id >= g_thread_id) {
133333e1f4b0Ssunshihao520 		SPDK_ERRLOG("invalid thread id: %" PRIu64 ".\n", id);
133433e1f4b0Ssunshihao520 		return NULL;
133533e1f4b0Ssunshihao520 	}
1336ee813409SShuhei Matsumoto 	pthread_mutex_lock(&g_devlist_mutex);
1337ee813409SShuhei Matsumoto 	TAILQ_FOREACH(thread, &g_threads, tailq) {
1338ee813409SShuhei Matsumoto 		if (thread->id == id) {
133933e1f4b0Ssunshihao520 			break;
134033e1f4b0Ssunshihao520 		}
134133e1f4b0Ssunshihao520 	}
1342ee813409SShuhei Matsumoto 	pthread_mutex_unlock(&g_devlist_mutex);
1343ee813409SShuhei Matsumoto 	return thread;
1344ee813409SShuhei Matsumoto }
1345ee813409SShuhei Matsumoto 
134615d36310SBen Walker int
134715d36310SBen Walker spdk_thread_get_stats(struct spdk_thread_stats *stats)
134815d36310SBen Walker {
134915d36310SBen Walker 	struct spdk_thread *thread;
135015d36310SBen Walker 
135115d36310SBen Walker 	thread = _get_thread();
135215d36310SBen Walker 	if (!thread) {
135315d36310SBen Walker 		SPDK_ERRLOG("No thread allocated\n");
135415d36310SBen Walker 		return -EINVAL;
135515d36310SBen Walker 	}
135615d36310SBen Walker 
135715d36310SBen Walker 	if (stats == NULL) {
135815d36310SBen Walker 		return -EINVAL;
135915d36310SBen Walker 	}
136015d36310SBen Walker 
136115d36310SBen Walker 	*stats = thread->stats;
136215d36310SBen Walker 
136315d36310SBen Walker 	return 0;
136415d36310SBen Walker }
136515d36310SBen Walker 
13662139be15SShuhei Matsumoto uint64_t
13672139be15SShuhei Matsumoto spdk_thread_get_last_tsc(struct spdk_thread *thread)
13682139be15SShuhei Matsumoto {
1369c99e1d60SBen Walker 	if (thread == NULL) {
1370c99e1d60SBen Walker 		thread = _get_thread();
1371c99e1d60SBen Walker 	}
1372c99e1d60SBen Walker 
13732139be15SShuhei Matsumoto 	return thread->tsc_last;
13742139be15SShuhei Matsumoto }
13752139be15SShuhei Matsumoto 
13768c4c8c39SLiu Xiaodong static inline int
13778c4c8c39SLiu Xiaodong thread_send_msg_notification(const struct spdk_thread *target_thread)
13788c4c8c39SLiu Xiaodong {
13798c4c8c39SLiu Xiaodong 	uint64_t notify = 1;
13808c4c8c39SLiu Xiaodong 	int rc;
13818c4c8c39SLiu Xiaodong 
138253429c7fSLiu Xiaodong 	/* Not necessary to do notification if interrupt facility is not enabled */
138353429c7fSLiu Xiaodong 	if (spdk_likely(!spdk_interrupt_mode_is_enabled())) {
138453429c7fSLiu Xiaodong 		return 0;
138553429c7fSLiu Xiaodong 	}
138653429c7fSLiu Xiaodong 
13872d52c6a1SLiu Xiaodong 	/* When each spdk_thread can switch between poll and interrupt mode dynamically,
13882d52c6a1SLiu Xiaodong 	 * after sending thread msg, it is necessary to check whether target thread runs in
13892d52c6a1SLiu Xiaodong 	 * interrupt mode and then decide whether do event notification.
13902d52c6a1SLiu Xiaodong 	 */
139153429c7fSLiu Xiaodong 	if (spdk_unlikely(target_thread->in_interrupt)) {
13928c4c8c39SLiu Xiaodong 		rc = write(target_thread->msg_fd, &notify, sizeof(notify));
13938c4c8c39SLiu Xiaodong 		if (rc < 0) {
13948c4c8c39SLiu Xiaodong 			SPDK_ERRLOG("failed to notify msg_queue: %s.\n", spdk_strerror(errno));
13958c4c8c39SLiu Xiaodong 			return -EIO;
13968c4c8c39SLiu Xiaodong 		}
13978c4c8c39SLiu Xiaodong 	}
13988c4c8c39SLiu Xiaodong 
13998c4c8c39SLiu Xiaodong 	return 0;
14008c4c8c39SLiu Xiaodong }
14018c4c8c39SLiu Xiaodong 
14024036f95bSJim Harris int
14037b940538SBen Walker spdk_thread_send_msg(const struct spdk_thread *thread, spdk_msg_fn fn, void *ctx)
140402d75f62SBen Walker {
14052446c5c6SBen Walker 	struct spdk_thread *local_thread;
1406d761ddbfSBen Walker 	struct spdk_msg *msg;
1407d761ddbfSBen Walker 	int rc;
1408d761ddbfSBen Walker 
14094036f95bSJim Harris 	assert(thread != NULL);
141002d75f62SBen Walker 
14116397735bSShuhei Matsumoto 	if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) {
1412d7393e2eSShuhei Matsumoto 		SPDK_ERRLOG("Thread %s is marked as exited.\n", thread->name);
1413d7393e2eSShuhei Matsumoto 		return -EIO;
1414d7393e2eSShuhei Matsumoto 	}
1415d7393e2eSShuhei Matsumoto 
14162446c5c6SBen Walker 	local_thread = _get_thread();
14172446c5c6SBen Walker 
14182446c5c6SBen Walker 	msg = NULL;
14192446c5c6SBen Walker 	if (local_thread != NULL) {
14202446c5c6SBen Walker 		if (local_thread->msg_cache_count > 0) {
14212446c5c6SBen Walker 			msg = SLIST_FIRST(&local_thread->msg_cache);
14222446c5c6SBen Walker 			assert(msg != NULL);
14232446c5c6SBen Walker 			SLIST_REMOVE_HEAD(&local_thread->msg_cache, link);
14242446c5c6SBen Walker 			local_thread->msg_cache_count--;
14252446c5c6SBen Walker 		}
14262446c5c6SBen Walker 	}
14272446c5c6SBen Walker 
14282446c5c6SBen Walker 	if (msg == NULL) {
1429d761ddbfSBen Walker 		msg = spdk_mempool_get(g_spdk_msg_mempool);
1430d761ddbfSBen Walker 		if (!msg) {
14314036f95bSJim Harris 			SPDK_ERRLOG("msg could not be allocated\n");
14324036f95bSJim Harris 			return -ENOMEM;
1433d761ddbfSBen Walker 		}
14342446c5c6SBen Walker 	}
1435d761ddbfSBen Walker 
1436d761ddbfSBen Walker 	msg->fn = fn;
1437d761ddbfSBen Walker 	msg->arg = ctx;
1438d761ddbfSBen Walker 
14391554a344SShuhei Matsumoto 	rc = spdk_ring_enqueue(thread->messages, (void **)&msg, 1, NULL);
1440d761ddbfSBen Walker 	if (rc != 1) {
14414036f95bSJim Harris 		SPDK_ERRLOG("msg could not be enqueued\n");
1442d761ddbfSBen Walker 		spdk_mempool_put(g_spdk_msg_mempool, msg);
14434036f95bSJim Harris 		return -EIO;
1444d761ddbfSBen Walker 	}
14454036f95bSJim Harris 
14468c4c8c39SLiu Xiaodong 	return thread_send_msg_notification(thread);
1447d761ddbfSBen Walker }
144802d75f62SBen Walker 
1449907efcd7SKozlowski Mateusz int
1450907efcd7SKozlowski Mateusz spdk_thread_send_critical_msg(struct spdk_thread *thread, spdk_msg_fn fn)
1451907efcd7SKozlowski Mateusz {
1452907efcd7SKozlowski Mateusz 	spdk_msg_fn expected = NULL;
1453907efcd7SKozlowski Mateusz 
14548c4c8c39SLiu Xiaodong 	if (!__atomic_compare_exchange_n(&thread->critical_msg, &expected, fn, false, __ATOMIC_SEQ_CST,
1455907efcd7SKozlowski Mateusz 					 __ATOMIC_SEQ_CST)) {
1456a3c3c0b5SLiu Xiaodong 		return -EIO;
1457a3c3c0b5SLiu Xiaodong 	}
14584bf6e4bbSLiu Xiaodong 
14598c4c8c39SLiu Xiaodong 	return thread_send_msg_notification(thread);
1460907efcd7SKozlowski Mateusz }
1461907efcd7SKozlowski Mateusz 
14624bf6e4bbSLiu Xiaodong #ifdef __linux__
14634bf6e4bbSLiu Xiaodong static int
14644bf6e4bbSLiu Xiaodong interrupt_timerfd_process(void *arg)
14654bf6e4bbSLiu Xiaodong {
14664bf6e4bbSLiu Xiaodong 	struct spdk_poller *poller = arg;
14674bf6e4bbSLiu Xiaodong 	uint64_t exp;
14684bf6e4bbSLiu Xiaodong 	int rc;
14694bf6e4bbSLiu Xiaodong 
14704bf6e4bbSLiu Xiaodong 	/* clear the level of interval timer */
147101dca5edSBen Walker 	rc = read(poller->intr->efd, &exp, sizeof(exp));
14724bf6e4bbSLiu Xiaodong 	if (rc < 0) {
14734bf6e4bbSLiu Xiaodong 		if (rc == -EAGAIN) {
14744bf6e4bbSLiu Xiaodong 			return 0;
14754bf6e4bbSLiu Xiaodong 		}
14764bf6e4bbSLiu Xiaodong 
14774bf6e4bbSLiu Xiaodong 		return rc;
14784bf6e4bbSLiu Xiaodong 	}
14794bf6e4bbSLiu Xiaodong 
148070f3606bSJohn Levon 	SPDK_DTRACE_PROBE2(timerfd_exec, poller->fn, poller->arg);
148170f3606bSJohn Levon 
14824bf6e4bbSLiu Xiaodong 	return poller->fn(poller->arg);
14834bf6e4bbSLiu Xiaodong }
14844bf6e4bbSLiu Xiaodong 
14854bf6e4bbSLiu Xiaodong static int
148639527e93SLiu Xiaodong period_poller_interrupt_init(struct spdk_poller *poller)
14874bf6e4bbSLiu Xiaodong {
14884bf6e4bbSLiu Xiaodong 	int timerfd;
148939527e93SLiu Xiaodong 
1490c74421c1SLiu Xiaodong 	SPDK_DEBUGLOG(thread, "timerfd init for periodic poller %s\n", poller->name);
149139527e93SLiu Xiaodong 	timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
14924bf6e4bbSLiu Xiaodong 	if (timerfd < 0) {
149339527e93SLiu Xiaodong 		return -errno;
14944bf6e4bbSLiu Xiaodong 	}
14954bf6e4bbSLiu Xiaodong 
149601dca5edSBen Walker 	poller->intr = spdk_interrupt_register(timerfd, interrupt_timerfd_process, poller, poller->name);
149701dca5edSBen Walker 	if (poller->intr == NULL) {
14984bf6e4bbSLiu Xiaodong 		close(timerfd);
149901dca5edSBen Walker 		return -1;
15004bf6e4bbSLiu Xiaodong 	}
15014bf6e4bbSLiu Xiaodong 
1502c7cf48ddSLiu Xiaodong 	return 0;
1503c7cf48ddSLiu Xiaodong }
150439527e93SLiu Xiaodong 
1505c7cf48ddSLiu Xiaodong static void
1506c7cf48ddSLiu Xiaodong period_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
1507c7cf48ddSLiu Xiaodong {
150801dca5edSBen Walker 	int timerfd;
1509c7cf48ddSLiu Xiaodong 	uint64_t now_tick = spdk_get_ticks();
1510c7cf48ddSLiu Xiaodong 	uint64_t ticks = spdk_get_ticks_hz();
1511c7cf48ddSLiu Xiaodong 	int ret;
1512c7cf48ddSLiu Xiaodong 	struct itimerspec new_tv = {};
1513c7cf48ddSLiu Xiaodong 	struct itimerspec old_tv = {};
1514c7cf48ddSLiu Xiaodong 
151501dca5edSBen Walker 	assert(poller->intr != NULL);
1516c7cf48ddSLiu Xiaodong 	assert(poller->period_ticks != 0);
151701dca5edSBen Walker 
151801dca5edSBen Walker 	timerfd = poller->intr->efd;
151901dca5edSBen Walker 
1520c7cf48ddSLiu Xiaodong 	assert(timerfd >= 0);
1521c7cf48ddSLiu Xiaodong 
1522c7cf48ddSLiu Xiaodong 	SPDK_DEBUGLOG(thread, "timerfd set poller %s into %s mode\n", poller->name,
1523c7cf48ddSLiu Xiaodong 		      interrupt_mode ? "interrupt" : "poll");
1524c7cf48ddSLiu Xiaodong 
1525c7cf48ddSLiu Xiaodong 	if (interrupt_mode) {
1526c7cf48ddSLiu Xiaodong 		/* Set repeated timer expiration */
152739527e93SLiu Xiaodong 		new_tv.it_interval.tv_sec = poller->period_ticks / ticks;
152839527e93SLiu Xiaodong 		new_tv.it_interval.tv_nsec = poller->period_ticks % ticks * SPDK_SEC_TO_NSEC / ticks;
152939527e93SLiu Xiaodong 
1530c7cf48ddSLiu Xiaodong 		/* Update next timer expiration */
153139527e93SLiu Xiaodong 		if (poller->next_run_tick == 0) {
153239527e93SLiu Xiaodong 			poller->next_run_tick = now_tick + poller->period_ticks;
153339527e93SLiu Xiaodong 		} else if (poller->next_run_tick < now_tick) {
153439527e93SLiu Xiaodong 			poller->next_run_tick = now_tick;
15354bf6e4bbSLiu Xiaodong 		}
15364bf6e4bbSLiu Xiaodong 
153739527e93SLiu Xiaodong 		new_tv.it_value.tv_sec = (poller->next_run_tick - now_tick) / ticks;
153839527e93SLiu Xiaodong 		new_tv.it_value.tv_nsec = (poller->next_run_tick - now_tick) % ticks * SPDK_SEC_TO_NSEC / ticks;
153939527e93SLiu Xiaodong 
154039527e93SLiu Xiaodong 		ret = timerfd_settime(timerfd, 0, &new_tv, NULL);
154139527e93SLiu Xiaodong 		if (ret < 0) {
154239527e93SLiu Xiaodong 			SPDK_ERRLOG("Failed to arm timerfd: error(%d)\n", errno);
1543c7cf48ddSLiu Xiaodong 			assert(false);
1544c7cf48ddSLiu Xiaodong 		}
1545c7cf48ddSLiu Xiaodong 	} else {
1546c7cf48ddSLiu Xiaodong 		/* Disarm the timer */
1547c7cf48ddSLiu Xiaodong 		ret = timerfd_settime(timerfd, 0, &new_tv, &old_tv);
1548c7cf48ddSLiu Xiaodong 		if (ret < 0) {
1549c7cf48ddSLiu Xiaodong 			/* timerfd_settime's failure indicates that the timerfd is in error */
1550c7cf48ddSLiu Xiaodong 			SPDK_ERRLOG("Failed to disarm timerfd: error(%d)\n", errno);
1551c7cf48ddSLiu Xiaodong 			assert(false);
155239527e93SLiu Xiaodong 		}
155339527e93SLiu Xiaodong 
1554c7cf48ddSLiu Xiaodong 		/* In order to reuse poller_insert_timer, fix now_tick, so next_run_tick would be
1555c7cf48ddSLiu Xiaodong 		 * now_tick + ticks * old_tv.it_value.tv_sec + (ticks * old_tv.it_value.tv_nsec) / SPDK_SEC_TO_NSEC
1556c7cf48ddSLiu Xiaodong 		 */
1557c7cf48ddSLiu Xiaodong 		now_tick = now_tick - poller->period_ticks + ticks * old_tv.it_value.tv_sec + \
1558c7cf48ddSLiu Xiaodong 			   (ticks * old_tv.it_value.tv_nsec) / SPDK_SEC_TO_NSEC;
1559f5313b55SShuhei Matsumoto 		poller_remove_timer(poller->thread, poller);
1560c7cf48ddSLiu Xiaodong 		poller_insert_timer(poller->thread, poller, now_tick);
1561c7cf48ddSLiu Xiaodong 	}
156239527e93SLiu Xiaodong }
156339527e93SLiu Xiaodong 
156439527e93SLiu Xiaodong static void
1565924d4bf3SLiu Xiaodong poller_interrupt_fini(struct spdk_poller *poller)
156639527e93SLiu Xiaodong {
156701dca5edSBen Walker 	int fd;
156801dca5edSBen Walker 
1569924d4bf3SLiu Xiaodong 	SPDK_DEBUGLOG(thread, "interrupt fini for poller %s\n", poller->name);
157001dca5edSBen Walker 	assert(poller->intr != NULL);
157101dca5edSBen Walker 	fd = poller->intr->efd;
157201dca5edSBen Walker 	spdk_interrupt_unregister(&poller->intr);
157301dca5edSBen Walker 	close(fd);
157439527e93SLiu Xiaodong }
157539527e93SLiu Xiaodong 
1576924d4bf3SLiu Xiaodong static int
1577924d4bf3SLiu Xiaodong busy_poller_interrupt_init(struct spdk_poller *poller)
1578924d4bf3SLiu Xiaodong {
1579924d4bf3SLiu Xiaodong 	int busy_efd;
1580924d4bf3SLiu Xiaodong 
1581924d4bf3SLiu Xiaodong 	SPDK_DEBUGLOG(thread, "busy_efd init for busy poller %s\n", poller->name);
1582924d4bf3SLiu Xiaodong 	busy_efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1583924d4bf3SLiu Xiaodong 	if (busy_efd < 0) {
1584924d4bf3SLiu Xiaodong 		SPDK_ERRLOG("Failed to create eventfd for Poller(%s).\n", poller->name);
1585924d4bf3SLiu Xiaodong 		return -errno;
1586924d4bf3SLiu Xiaodong 	}
1587924d4bf3SLiu Xiaodong 
158801dca5edSBen Walker 	poller->intr = spdk_interrupt_register(busy_efd, poller->fn, poller->arg, poller->name);
158901dca5edSBen Walker 	if (poller->intr == NULL) {
1590924d4bf3SLiu Xiaodong 		close(busy_efd);
159101dca5edSBen Walker 		return -1;
1592924d4bf3SLiu Xiaodong 	}
1593924d4bf3SLiu Xiaodong 
1594924d4bf3SLiu Xiaodong 	return 0;
1595924d4bf3SLiu Xiaodong }
1596924d4bf3SLiu Xiaodong 
1597924d4bf3SLiu Xiaodong static void
1598924d4bf3SLiu Xiaodong busy_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
1599924d4bf3SLiu Xiaodong {
160001dca5edSBen Walker 	int busy_efd = poller->intr->efd;
1601924d4bf3SLiu Xiaodong 	uint64_t notify = 1;
1602b9563518SJim Harris 	int rc __attribute__((unused));
1603924d4bf3SLiu Xiaodong 
1604924d4bf3SLiu Xiaodong 	assert(busy_efd >= 0);
1605924d4bf3SLiu Xiaodong 
1606924d4bf3SLiu Xiaodong 	if (interrupt_mode) {
1607924d4bf3SLiu Xiaodong 		/* Write without read on eventfd will get it repeatedly triggered. */
1608924d4bf3SLiu Xiaodong 		if (write(busy_efd, &notify, sizeof(notify)) < 0) {
1609924d4bf3SLiu Xiaodong 			SPDK_ERRLOG("Failed to set busy wait for Poller(%s).\n", poller->name);
1610924d4bf3SLiu Xiaodong 		}
1611924d4bf3SLiu Xiaodong 	} else {
1612924d4bf3SLiu Xiaodong 		/* Read on eventfd will clear its level triggering. */
1613b9563518SJim Harris 		rc = read(busy_efd, &notify, sizeof(notify));
1614924d4bf3SLiu Xiaodong 	}
1615924d4bf3SLiu Xiaodong }
1616924d4bf3SLiu Xiaodong 
161739527e93SLiu Xiaodong #else
161839527e93SLiu Xiaodong 
161939527e93SLiu Xiaodong static int
162039527e93SLiu Xiaodong period_poller_interrupt_init(struct spdk_poller *poller)
162139527e93SLiu Xiaodong {
162239527e93SLiu Xiaodong 	return -ENOTSUP;
162339527e93SLiu Xiaodong }
162439527e93SLiu Xiaodong 
162539527e93SLiu Xiaodong static void
1626c7cf48ddSLiu Xiaodong period_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
1627c7cf48ddSLiu Xiaodong {
1628c7cf48ddSLiu Xiaodong }
1629c7cf48ddSLiu Xiaodong 
1630c7cf48ddSLiu Xiaodong static void
1631924d4bf3SLiu Xiaodong poller_interrupt_fini(struct spdk_poller *poller)
163239527e93SLiu Xiaodong {
163339527e93SLiu Xiaodong }
1634924d4bf3SLiu Xiaodong 
1635924d4bf3SLiu Xiaodong static int
1636924d4bf3SLiu Xiaodong busy_poller_interrupt_init(struct spdk_poller *poller)
1637924d4bf3SLiu Xiaodong {
1638924d4bf3SLiu Xiaodong 	return -ENOTSUP;
1639924d4bf3SLiu Xiaodong }
1640924d4bf3SLiu Xiaodong 
1641924d4bf3SLiu Xiaodong static void
1642924d4bf3SLiu Xiaodong busy_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
1643924d4bf3SLiu Xiaodong {
1644924d4bf3SLiu Xiaodong }
1645924d4bf3SLiu Xiaodong 
164639527e93SLiu Xiaodong #endif
164739527e93SLiu Xiaodong 
1648c7cf48ddSLiu Xiaodong void
1649c7cf48ddSLiu Xiaodong spdk_poller_register_interrupt(struct spdk_poller *poller,
1650c7cf48ddSLiu Xiaodong 			       spdk_poller_set_interrupt_mode_cb cb_fn,
1651c7cf48ddSLiu Xiaodong 			       void *cb_arg)
1652c7cf48ddSLiu Xiaodong {
1653c7cf48ddSLiu Xiaodong 	assert(poller != NULL);
1654c7cf48ddSLiu Xiaodong 	assert(spdk_get_thread() == poller->thread);
1655c7cf48ddSLiu Xiaodong 
1656c7cf48ddSLiu Xiaodong 	if (!spdk_interrupt_mode_is_enabled()) {
1657c7cf48ddSLiu Xiaodong 		return;
1658c7cf48ddSLiu Xiaodong 	}
1659c7cf48ddSLiu Xiaodong 
166001dca5edSBen Walker 	/* If this poller already had an interrupt, clean the old one up. */
166101dca5edSBen Walker 	if (poller->intr != NULL) {
1662924d4bf3SLiu Xiaodong 		poller_interrupt_fini(poller);
1663924d4bf3SLiu Xiaodong 	}
1664924d4bf3SLiu Xiaodong 
1665c7cf48ddSLiu Xiaodong 	poller->set_intr_cb_fn = cb_fn;
1666c7cf48ddSLiu Xiaodong 	poller->set_intr_cb_arg = cb_arg;
1667c7cf48ddSLiu Xiaodong 
1668c7cf48ddSLiu Xiaodong 	/* Set poller into interrupt mode if thread is in interrupt. */
16695c782a70SJim Harris 	if (poller->thread->in_interrupt && poller->set_intr_cb_fn) {
1670c7cf48ddSLiu Xiaodong 		poller->set_intr_cb_fn(poller, poller->set_intr_cb_arg, true);
1671c7cf48ddSLiu Xiaodong 	}
1672c7cf48ddSLiu Xiaodong }
1673c7cf48ddSLiu Xiaodong 
1674aca41b43SShuhei Matsumoto static uint64_t
1675aca41b43SShuhei Matsumoto convert_us_to_ticks(uint64_t us)
1676aca41b43SShuhei Matsumoto {
1677aca41b43SShuhei Matsumoto 	uint64_t quotient, remainder, ticks;
1678aca41b43SShuhei Matsumoto 
1679aca41b43SShuhei Matsumoto 	if (us) {
1680aca41b43SShuhei Matsumoto 		quotient = us / SPDK_SEC_TO_USEC;
1681aca41b43SShuhei Matsumoto 		remainder = us % SPDK_SEC_TO_USEC;
1682aca41b43SShuhei Matsumoto 		ticks = spdk_get_ticks_hz();
1683aca41b43SShuhei Matsumoto 
1684aca41b43SShuhei Matsumoto 		return ticks * quotient + (ticks * remainder) / SPDK_SEC_TO_USEC;
1685aca41b43SShuhei Matsumoto 	} else {
1686aca41b43SShuhei Matsumoto 		return 0;
1687aca41b43SShuhei Matsumoto 	}
1688aca41b43SShuhei Matsumoto }
1689aca41b43SShuhei Matsumoto 
1690b992bb4eSShuhei Matsumoto static struct spdk_poller *
16910af754f0SSeth Howell poller_register(spdk_poller_fn fn,
169202d75f62SBen Walker 		void *arg,
1693b992bb4eSShuhei Matsumoto 		uint64_t period_microseconds,
1694b992bb4eSShuhei Matsumoto 		const char *name)
169502d75f62SBen Walker {
169602d75f62SBen Walker 	struct spdk_thread *thread;
169702d75f62SBen Walker 	struct spdk_poller *poller;
169802d75f62SBen Walker 
169902d75f62SBen Walker 	thread = spdk_get_thread();
170002d75f62SBen Walker 	if (!thread) {
17013181857bSEd rodriguez 		assert(false);
17023181857bSEd rodriguez 		return NULL;
170302d75f62SBen Walker 	}
170402d75f62SBen Walker 
17056397735bSShuhei Matsumoto 	if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) {
17061e98e820SShuhei Matsumoto 		SPDK_ERRLOG("thread %s is marked as exited\n", thread->name);
17071e98e820SShuhei Matsumoto 		return NULL;
17081e98e820SShuhei Matsumoto 	}
17091e98e820SShuhei Matsumoto 
171008d36a55SBen Walker 	poller = calloc(1, sizeof(*poller));
171108d36a55SBen Walker 	if (poller == NULL) {
171208d36a55SBen Walker 		SPDK_ERRLOG("Poller memory allocation failed\n");
17133181857bSEd rodriguez 		return NULL;
171402d75f62SBen Walker 	}
171502d75f62SBen Walker 
1716b992bb4eSShuhei Matsumoto 	if (name) {
1717b992bb4eSShuhei Matsumoto 		snprintf(poller->name, sizeof(poller->name), "%s", name);
1718b992bb4eSShuhei Matsumoto 	} else {
1719b992bb4eSShuhei Matsumoto 		snprintf(poller->name, sizeof(poller->name), "%p", fn);
1720b992bb4eSShuhei Matsumoto 	}
1721b992bb4eSShuhei Matsumoto 
172208d36a55SBen Walker 	poller->state = SPDK_POLLER_STATE_WAITING;
172308d36a55SBen Walker 	poller->fn = fn;
172408d36a55SBen Walker 	poller->arg = arg;
17256146c678SShuhei Matsumoto 	poller->thread = thread;
172601dca5edSBen Walker 	poller->intr = NULL;
172729c0e0dcSMichael Piszczek 	if (thread->next_poller_id == 0) {
172829c0e0dcSMichael Piszczek 		SPDK_WARNLOG("Poller ID rolled over. Poller ID is duplicated.\n");
172929c0e0dcSMichael Piszczek 		thread->next_poller_id = 1;
173029c0e0dcSMichael Piszczek 	}
173129c0e0dcSMichael Piszczek 	poller->id = thread->next_poller_id++;
173208d36a55SBen Walker 
1733aca41b43SShuhei Matsumoto 	poller->period_ticks = convert_us_to_ticks(period_microseconds);
173408d36a55SBen Walker 
1735924d4bf3SLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
17364bf6e4bbSLiu Xiaodong 		int rc;
17374bf6e4bbSLiu Xiaodong 
1738924d4bf3SLiu Xiaodong 		if (period_microseconds) {
173939527e93SLiu Xiaodong 			rc = period_poller_interrupt_init(poller);
17404bf6e4bbSLiu Xiaodong 			if (rc < 0) {
1741924d4bf3SLiu Xiaodong 				SPDK_ERRLOG("Failed to register interruptfd for periodic poller: %s\n", spdk_strerror(-rc));
17424bf6e4bbSLiu Xiaodong 				free(poller);
17434bf6e4bbSLiu Xiaodong 				return NULL;
17444bf6e4bbSLiu Xiaodong 			}
1745c7cf48ddSLiu Xiaodong 
174601dca5edSBen Walker 			poller->set_intr_cb_fn = period_poller_set_interrupt_mode;
174701dca5edSBen Walker 			poller->set_intr_cb_arg = NULL;
174801dca5edSBen Walker 
1749924d4bf3SLiu Xiaodong 		} else {
1750924d4bf3SLiu Xiaodong 			/* If the poller doesn't have a period, create interruptfd that's always
1751cc6920a4SJosh Soref 			 * busy automatically when running in interrupt mode.
1752924d4bf3SLiu Xiaodong 			 */
1753924d4bf3SLiu Xiaodong 			rc = busy_poller_interrupt_init(poller);
1754924d4bf3SLiu Xiaodong 			if (rc > 0) {
1755924d4bf3SLiu Xiaodong 				SPDK_ERRLOG("Failed to register interruptfd for busy poller: %s\n", spdk_strerror(-rc));
1756924d4bf3SLiu Xiaodong 				free(poller);
1757924d4bf3SLiu Xiaodong 				return NULL;
1758924d4bf3SLiu Xiaodong 			}
1759924d4bf3SLiu Xiaodong 
176001dca5edSBen Walker 			poller->set_intr_cb_fn = busy_poller_set_interrupt_mode;
176101dca5edSBen Walker 			poller->set_intr_cb_arg = NULL;
176201dca5edSBen Walker 		}
176301dca5edSBen Walker 
176401dca5edSBen Walker 		/* Set poller into interrupt mode if thread is in interrupt. */
176501dca5edSBen Walker 		if (poller->thread->in_interrupt) {
176601dca5edSBen Walker 			poller->set_intr_cb_fn(poller, poller->set_intr_cb_arg, true);
1767924d4bf3SLiu Xiaodong 		}
17684bf6e4bbSLiu Xiaodong 	}
17694bf6e4bbSLiu Xiaodong 
17700af754f0SSeth Howell 	thread_insert_poller(thread, poller);
177102d75f62SBen Walker 
177202d75f62SBen Walker 	return poller;
177302d75f62SBen Walker }
177402d75f62SBen Walker 
1775b992bb4eSShuhei Matsumoto struct spdk_poller *
1776b992bb4eSShuhei Matsumoto spdk_poller_register(spdk_poller_fn fn,
1777b992bb4eSShuhei Matsumoto 		     void *arg,
1778b992bb4eSShuhei Matsumoto 		     uint64_t period_microseconds)
1779b992bb4eSShuhei Matsumoto {
17800af754f0SSeth Howell 	return poller_register(fn, arg, period_microseconds, NULL);
1781b992bb4eSShuhei Matsumoto }
1782b992bb4eSShuhei Matsumoto 
1783b992bb4eSShuhei Matsumoto struct spdk_poller *
1784b992bb4eSShuhei Matsumoto spdk_poller_register_named(spdk_poller_fn fn,
1785b992bb4eSShuhei Matsumoto 			   void *arg,
1786b992bb4eSShuhei Matsumoto 			   uint64_t period_microseconds,
1787b992bb4eSShuhei Matsumoto 			   const char *name)
1788b992bb4eSShuhei Matsumoto {
17890af754f0SSeth Howell 	return poller_register(fn, arg, period_microseconds, name);
1790b992bb4eSShuhei Matsumoto }
1791b992bb4eSShuhei Matsumoto 
1792d4255740SJohn Levon static void
1793d4255740SJohn Levon wrong_thread(const char *func, const char *name, struct spdk_thread *thread,
1794d4255740SJohn Levon 	     struct spdk_thread *curthread)
1795d4255740SJohn Levon {
179679ba049aSMike Gerdts 	if (thread == NULL) {
179779ba049aSMike Gerdts 		SPDK_ERRLOG("%s(%s) called with NULL thread\n", func, name);
179879ba049aSMike Gerdts 		abort();
179979ba049aSMike Gerdts 	}
18002a51824aSNick Connolly 	SPDK_ERRLOG("%s(%s) called from wrong thread %s:%" PRIu64 " (should be "
18012a51824aSNick Connolly 		    "%s:%" PRIu64 ")\n", func, name, curthread->name, curthread->id,
1802d4255740SJohn Levon 		    thread->name, thread->id);
1803d4255740SJohn Levon 	assert(false);
1804d4255740SJohn Levon }
1805d4255740SJohn Levon 
180602d75f62SBen Walker void
180702d75f62SBen Walker spdk_poller_unregister(struct spdk_poller **ppoller)
180802d75f62SBen Walker {
180902d75f62SBen Walker 	struct spdk_thread *thread;
181002d75f62SBen Walker 	struct spdk_poller *poller;
181102d75f62SBen Walker 
181202d75f62SBen Walker 	poller = *ppoller;
181302d75f62SBen Walker 	if (poller == NULL) {
181402d75f62SBen Walker 		return;
181502d75f62SBen Walker 	}
181602d75f62SBen Walker 
181702d75f62SBen Walker 	*ppoller = NULL;
181802d75f62SBen Walker 
181902d75f62SBen Walker 	thread = spdk_get_thread();
182008d36a55SBen Walker 	if (!thread) {
182108d36a55SBen Walker 		assert(false);
182208d36a55SBen Walker 		return;
182308d36a55SBen Walker 	}
182402d75f62SBen Walker 
18256146c678SShuhei Matsumoto 	if (poller->thread != thread) {
1826d4255740SJohn Levon 		wrong_thread(__func__, poller->name, poller->thread, thread);
18276146c678SShuhei Matsumoto 		return;
18286146c678SShuhei Matsumoto 	}
18296146c678SShuhei Matsumoto 
1830b1906912SLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
1831b1906912SLiu Xiaodong 		/* Release the interrupt resource for period or busy poller */
183201dca5edSBen Walker 		if (poller->intr != NULL) {
1833924d4bf3SLiu Xiaodong 			poller_interrupt_fini(poller);
18344bf6e4bbSLiu Xiaodong 		}
18354bf6e4bbSLiu Xiaodong 
18365ecfb221SBen Walker 		/* If there is not already a pending poller removal, generate
18375ecfb221SBen Walker 		 * a message to go process removals. */
18385ecfb221SBen Walker 		if (!thread->poller_unregistered) {
1839b1906912SLiu Xiaodong 			thread->poller_unregistered = true;
18405ecfb221SBen Walker 			spdk_thread_send_msg(thread, _thread_remove_pollers, thread);
18415ecfb221SBen Walker 		}
1842b1906912SLiu Xiaodong 	}
1843b1906912SLiu Xiaodong 
18449ec598d1SKonrad Sztyber 	/* If the poller was paused, put it on the active_pollers list so that
18459ec598d1SKonrad Sztyber 	 * its unregistration can be processed by spdk_thread_poll().
18469ec598d1SKonrad Sztyber 	 */
18479ec598d1SKonrad Sztyber 	if (poller->state == SPDK_POLLER_STATE_PAUSED) {
18489ec598d1SKonrad Sztyber 		TAILQ_REMOVE(&thread->paused_pollers, poller, tailq);
18499ec598d1SKonrad Sztyber 		TAILQ_INSERT_TAIL(&thread->active_pollers, poller, tailq);
18509ec598d1SKonrad Sztyber 		poller->period_ticks = 0;
18519ec598d1SKonrad Sztyber 	}
18529ec598d1SKonrad Sztyber 
1853836356f2SBen Walker 	/* Simply set the state to unregistered. The poller will get cleaned up
1854836356f2SBen Walker 	 * in a subsequent call to spdk_thread_poll().
185508d36a55SBen Walker 	 */
185608d36a55SBen Walker 	poller->state = SPDK_POLLER_STATE_UNREGISTERED;
185702d75f62SBen Walker }
185802d75f62SBen Walker 
18599ec598d1SKonrad Sztyber void
18609ec598d1SKonrad Sztyber spdk_poller_pause(struct spdk_poller *poller)
18619ec598d1SKonrad Sztyber {
18629ec598d1SKonrad Sztyber 	struct spdk_thread *thread;
18639ec598d1SKonrad Sztyber 
18649ec598d1SKonrad Sztyber 	thread = spdk_get_thread();
18659ec598d1SKonrad Sztyber 	if (!thread) {
18669ec598d1SKonrad Sztyber 		assert(false);
18679ec598d1SKonrad Sztyber 		return;
18689ec598d1SKonrad Sztyber 	}
18699ec598d1SKonrad Sztyber 
1870d5b7f3c5SShuhei Matsumoto 	if (poller->thread != thread) {
1871d4255740SJohn Levon 		wrong_thread(__func__, poller->name, poller->thread, thread);
1872d5b7f3c5SShuhei Matsumoto 		return;
1873d5b7f3c5SShuhei Matsumoto 	}
1874d5b7f3c5SShuhei Matsumoto 
18753ca15e33SShuhei Matsumoto 	/* We just set its state to SPDK_POLLER_STATE_PAUSING and let
18763ca15e33SShuhei Matsumoto 	 * spdk_thread_poll() move it. It allows a poller to be paused from
18773ca15e33SShuhei Matsumoto 	 * another one's context without breaking the TAILQ_FOREACH_REVERSE_SAFE
18783ca15e33SShuhei Matsumoto 	 * iteration, or from within itself without breaking the logic to always
18793ca15e33SShuhei Matsumoto 	 * remove the closest timed poller in the TAILQ_FOREACH_SAFE iteration.
18809ec598d1SKonrad Sztyber 	 */
188142ad32daSShuhei Matsumoto 	switch (poller->state) {
188242ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSED:
188342ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSING:
188442ad32daSShuhei Matsumoto 		break;
188542ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_RUNNING:
188642ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_WAITING:
188742ad32daSShuhei Matsumoto 		poller->state = SPDK_POLLER_STATE_PAUSING;
188842ad32daSShuhei Matsumoto 		break;
188942ad32daSShuhei Matsumoto 	default:
189042ad32daSShuhei Matsumoto 		assert(false);
189142ad32daSShuhei Matsumoto 		break;
18929ec598d1SKonrad Sztyber 	}
18939ec598d1SKonrad Sztyber }
18949ec598d1SKonrad Sztyber 
18959ec598d1SKonrad Sztyber void
18969ec598d1SKonrad Sztyber spdk_poller_resume(struct spdk_poller *poller)
18979ec598d1SKonrad Sztyber {
18989ec598d1SKonrad Sztyber 	struct spdk_thread *thread;
18999ec598d1SKonrad Sztyber 
19009ec598d1SKonrad Sztyber 	thread = spdk_get_thread();
19019ec598d1SKonrad Sztyber 	if (!thread) {
19029ec598d1SKonrad Sztyber 		assert(false);
19039ec598d1SKonrad Sztyber 		return;
19049ec598d1SKonrad Sztyber 	}
19059ec598d1SKonrad Sztyber 
1906d5b7f3c5SShuhei Matsumoto 	if (poller->thread != thread) {
1907d4255740SJohn Levon 		wrong_thread(__func__, poller->name, poller->thread, thread);
1908d5b7f3c5SShuhei Matsumoto 		return;
1909d5b7f3c5SShuhei Matsumoto 	}
1910d5b7f3c5SShuhei Matsumoto 
19119ec598d1SKonrad Sztyber 	/* If a poller is paused it has to be removed from the paused pollers
19124e9adb3bSShuhei Matsumoto 	 * list and put on the active list or timer tree depending on its
19139ec598d1SKonrad Sztyber 	 * period_ticks.  If a poller is still in the process of being paused,
19149ec598d1SKonrad Sztyber 	 * we just need to flip its state back to waiting, as it's already on
19154e9adb3bSShuhei Matsumoto 	 * the appropriate list or tree.
19169ec598d1SKonrad Sztyber 	 */
191742ad32daSShuhei Matsumoto 	switch (poller->state) {
191842ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSED:
19199ec598d1SKonrad Sztyber 		TAILQ_REMOVE(&thread->paused_pollers, poller, tailq);
19200af754f0SSeth Howell 		thread_insert_poller(thread, poller);
192142ad32daSShuhei Matsumoto 	/* fallthrough */
192242ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSING:
19239ec598d1SKonrad Sztyber 		poller->state = SPDK_POLLER_STATE_WAITING;
192442ad32daSShuhei Matsumoto 		break;
192542ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_RUNNING:
192642ad32daSShuhei Matsumoto 	case SPDK_POLLER_STATE_WAITING:
192742ad32daSShuhei Matsumoto 		break;
192842ad32daSShuhei Matsumoto 	default:
192942ad32daSShuhei Matsumoto 		assert(false);
193042ad32daSShuhei Matsumoto 		break;
193142ad32daSShuhei Matsumoto 	}
19329ec598d1SKonrad Sztyber }
19339ec598d1SKonrad Sztyber 
19340b0f0c54SShuhei Matsumoto const char *
19351aec9334SShuhei Matsumoto spdk_poller_get_name(struct spdk_poller *poller)
19360b0f0c54SShuhei Matsumoto {
19371aec9334SShuhei Matsumoto 	return poller->name;
19381aec9334SShuhei Matsumoto }
19391aec9334SShuhei Matsumoto 
194029c0e0dcSMichael Piszczek uint64_t
194129c0e0dcSMichael Piszczek spdk_poller_get_id(struct spdk_poller *poller)
194229c0e0dcSMichael Piszczek {
194329c0e0dcSMichael Piszczek 	return poller->id;
194429c0e0dcSMichael Piszczek }
194529c0e0dcSMichael Piszczek 
19461aec9334SShuhei Matsumoto const char *
19471aec9334SShuhei Matsumoto spdk_poller_get_state_str(struct spdk_poller *poller)
19481aec9334SShuhei Matsumoto {
19491aec9334SShuhei Matsumoto 	switch (poller->state) {
19500b0f0c54SShuhei Matsumoto 	case SPDK_POLLER_STATE_WAITING:
19510b0f0c54SShuhei Matsumoto 		return "waiting";
19520b0f0c54SShuhei Matsumoto 	case SPDK_POLLER_STATE_RUNNING:
19530b0f0c54SShuhei Matsumoto 		return "running";
19540b0f0c54SShuhei Matsumoto 	case SPDK_POLLER_STATE_UNREGISTERED:
19550b0f0c54SShuhei Matsumoto 		return "unregistered";
19560b0f0c54SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSING:
19570b0f0c54SShuhei Matsumoto 		return "pausing";
19580b0f0c54SShuhei Matsumoto 	case SPDK_POLLER_STATE_PAUSED:
19590b0f0c54SShuhei Matsumoto 		return "paused";
19600b0f0c54SShuhei Matsumoto 	default:
19610b0f0c54SShuhei Matsumoto 		return NULL;
19620b0f0c54SShuhei Matsumoto 	}
19630b0f0c54SShuhei Matsumoto }
19640b0f0c54SShuhei Matsumoto 
19651aec9334SShuhei Matsumoto uint64_t
19661aec9334SShuhei Matsumoto spdk_poller_get_period_ticks(struct spdk_poller *poller)
19671aec9334SShuhei Matsumoto {
19681aec9334SShuhei Matsumoto 	return poller->period_ticks;
19691aec9334SShuhei Matsumoto }
19701aec9334SShuhei Matsumoto 
19711aec9334SShuhei Matsumoto void
19721aec9334SShuhei Matsumoto spdk_poller_get_stats(struct spdk_poller *poller, struct spdk_poller_stats *stats)
19731aec9334SShuhei Matsumoto {
19741aec9334SShuhei Matsumoto 	stats->run_count = poller->run_count;
19751aec9334SShuhei Matsumoto 	stats->busy_count = poller->busy_count;
19761aec9334SShuhei Matsumoto }
19771aec9334SShuhei Matsumoto 
1978cd83ea4aSShuhei Matsumoto struct spdk_poller *
1979cd83ea4aSShuhei Matsumoto spdk_thread_get_first_active_poller(struct spdk_thread *thread)
1980cd83ea4aSShuhei Matsumoto {
1981cd83ea4aSShuhei Matsumoto 	return TAILQ_FIRST(&thread->active_pollers);
1982cd83ea4aSShuhei Matsumoto }
1983cd83ea4aSShuhei Matsumoto 
1984cd83ea4aSShuhei Matsumoto struct spdk_poller *
1985cd83ea4aSShuhei Matsumoto spdk_thread_get_next_active_poller(struct spdk_poller *prev)
1986cd83ea4aSShuhei Matsumoto {
1987cd83ea4aSShuhei Matsumoto 	return TAILQ_NEXT(prev, tailq);
1988cd83ea4aSShuhei Matsumoto }
1989cd83ea4aSShuhei Matsumoto 
1990cd83ea4aSShuhei Matsumoto struct spdk_poller *
1991cd83ea4aSShuhei Matsumoto spdk_thread_get_first_timed_poller(struct spdk_thread *thread)
1992cd83ea4aSShuhei Matsumoto {
19934e9adb3bSShuhei Matsumoto 	return RB_MIN(timed_pollers_tree, &thread->timed_pollers);
1994cd83ea4aSShuhei Matsumoto }
1995cd83ea4aSShuhei Matsumoto 
1996cd83ea4aSShuhei Matsumoto struct spdk_poller *
1997cd83ea4aSShuhei Matsumoto spdk_thread_get_next_timed_poller(struct spdk_poller *prev)
1998cd83ea4aSShuhei Matsumoto {
19994e9adb3bSShuhei Matsumoto 	return RB_NEXT(timed_pollers_tree, &thread->timed_pollers, prev);
2000cd83ea4aSShuhei Matsumoto }
2001cd83ea4aSShuhei Matsumoto 
2002cd83ea4aSShuhei Matsumoto struct spdk_poller *
2003cd83ea4aSShuhei Matsumoto spdk_thread_get_first_paused_poller(struct spdk_thread *thread)
2004cd83ea4aSShuhei Matsumoto {
20051180c390STomasz Zawadzki 	return TAILQ_FIRST(&thread->paused_pollers);
2006cd83ea4aSShuhei Matsumoto }
2007cd83ea4aSShuhei Matsumoto 
2008cd83ea4aSShuhei Matsumoto struct spdk_poller *
2009cd83ea4aSShuhei Matsumoto spdk_thread_get_next_paused_poller(struct spdk_poller *prev)
2010cd83ea4aSShuhei Matsumoto {
2011cd83ea4aSShuhei Matsumoto 	return TAILQ_NEXT(prev, tailq);
2012cd83ea4aSShuhei Matsumoto }
2013cd83ea4aSShuhei Matsumoto 
2014bd2fae2fSShuhei Matsumoto struct spdk_io_channel *
2015bd2fae2fSShuhei Matsumoto spdk_thread_get_first_io_channel(struct spdk_thread *thread)
2016bd2fae2fSShuhei Matsumoto {
2017df559ab6SJiewei Ke 	return RB_MIN(io_channel_tree, &thread->io_channels);
2018bd2fae2fSShuhei Matsumoto }
2019bd2fae2fSShuhei Matsumoto 
2020bd2fae2fSShuhei Matsumoto struct spdk_io_channel *
2021bd2fae2fSShuhei Matsumoto spdk_thread_get_next_io_channel(struct spdk_io_channel *prev)
2022bd2fae2fSShuhei Matsumoto {
2023df559ab6SJiewei Ke 	return RB_NEXT(io_channel_tree, &thread->io_channels, prev);
2024bd2fae2fSShuhei Matsumoto }
2025bd2fae2fSShuhei Matsumoto 
2026fa6aec49SAnisa Su uint16_t
2027fa6aec49SAnisa Su spdk_thread_get_trace_id(struct spdk_thread *thread)
2028fa6aec49SAnisa Su {
2029fa6aec49SAnisa Su 	return thread->trace_id;
2030fa6aec49SAnisa Su }
2031fa6aec49SAnisa Su 
203202d75f62SBen Walker struct call_thread {
203302d75f62SBen Walker 	struct spdk_thread *cur_thread;
20347b940538SBen Walker 	spdk_msg_fn fn;
203502d75f62SBen Walker 	void *ctx;
203602d75f62SBen Walker 
203702d75f62SBen Walker 	struct spdk_thread *orig_thread;
20387b940538SBen Walker 	spdk_msg_fn cpl;
203902d75f62SBen Walker };
204002d75f62SBen Walker 
204102d75f62SBen Walker static void
20426f4e0c95SShuhei Matsumoto _back_to_orig_thread(void *ctx)
20436f4e0c95SShuhei Matsumoto {
20446f4e0c95SShuhei Matsumoto 	struct call_thread *ct = ctx;
20456f4e0c95SShuhei Matsumoto 
20466f4e0c95SShuhei Matsumoto 	assert(ct->orig_thread->for_each_count > 0);
20476f4e0c95SShuhei Matsumoto 	ct->orig_thread->for_each_count--;
20486f4e0c95SShuhei Matsumoto 
20496f4e0c95SShuhei Matsumoto 	ct->cpl(ct->ctx);
20506f4e0c95SShuhei Matsumoto 	free(ctx);
20516f4e0c95SShuhei Matsumoto }
20526f4e0c95SShuhei Matsumoto 
20536f4e0c95SShuhei Matsumoto static void
20547a2bf6fcSSeth Howell _on_thread(void *ctx)
205502d75f62SBen Walker {
205602d75f62SBen Walker 	struct call_thread *ct = ctx;
20575feebd85SShuhei Matsumoto 	int rc __attribute__((unused));
205802d75f62SBen Walker 
205902d75f62SBen Walker 	ct->fn(ct->ctx);
206002d75f62SBen Walker 
206102d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
206202d75f62SBen Walker 	ct->cur_thread = TAILQ_NEXT(ct->cur_thread, tailq);
206389c1e5bfSApokleos 	while (ct->cur_thread && ct->cur_thread->state != SPDK_THREAD_STATE_RUNNING) {
206489c1e5bfSApokleos 		SPDK_DEBUGLOG(thread, "thread %s is not running but still not destroyed.\n",
206589c1e5bfSApokleos 			      ct->cur_thread->name);
206689c1e5bfSApokleos 		ct->cur_thread = TAILQ_NEXT(ct->cur_thread, tailq);
206789c1e5bfSApokleos 	}
206802d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
206902d75f62SBen Walker 
207002d75f62SBen Walker 	if (!ct->cur_thread) {
20712172c432STomasz Zawadzki 		SPDK_DEBUGLOG(thread, "Completed thread iteration\n");
2072ec571793SBen Walker 
20736f4e0c95SShuhei Matsumoto 		rc = spdk_thread_send_msg(ct->orig_thread, _back_to_orig_thread, ctx);
207402d75f62SBen Walker 	} else {
20752172c432STomasz Zawadzki 		SPDK_DEBUGLOG(thread, "Continuing thread iteration to %s\n",
2076ec571793SBen Walker 			      ct->cur_thread->name);
2077ec571793SBen Walker 
20787a2bf6fcSSeth Howell 		rc = spdk_thread_send_msg(ct->cur_thread, _on_thread, ctx);
207902d75f62SBen Walker 	}
20805feebd85SShuhei Matsumoto 	assert(rc == 0);
208102d75f62SBen Walker }
208202d75f62SBen Walker 
208302d75f62SBen Walker void
20847b940538SBen Walker spdk_for_each_thread(spdk_msg_fn fn, void *ctx, spdk_msg_fn cpl)
208502d75f62SBen Walker {
208602d75f62SBen Walker 	struct call_thread *ct;
2087c4978672STomasz Zawadzki 	struct spdk_thread *thread;
20885feebd85SShuhei Matsumoto 	int rc __attribute__((unused));
208902d75f62SBen Walker 
209002d75f62SBen Walker 	ct = calloc(1, sizeof(*ct));
209102d75f62SBen Walker 	if (!ct) {
209202d75f62SBen Walker 		SPDK_ERRLOG("Unable to perform thread iteration\n");
209302d75f62SBen Walker 		cpl(ctx);
209402d75f62SBen Walker 		return;
209502d75f62SBen Walker 	}
209602d75f62SBen Walker 
209702d75f62SBen Walker 	ct->fn = fn;
209802d75f62SBen Walker 	ct->ctx = ctx;
209902d75f62SBen Walker 	ct->cpl = cpl;
210002d75f62SBen Walker 
2101c4978672STomasz Zawadzki 	thread = _get_thread();
2102c4978672STomasz Zawadzki 	if (!thread) {
2103c4978672STomasz Zawadzki 		SPDK_ERRLOG("No thread allocated\n");
2104c4978672STomasz Zawadzki 		free(ct);
2105c4978672STomasz Zawadzki 		cpl(ctx);
2106c4978672STomasz Zawadzki 		return;
2107c4978672STomasz Zawadzki 	}
2108c4978672STomasz Zawadzki 	ct->orig_thread = thread;
21097b3f4110SDarek Stojaczyk 
21106f4e0c95SShuhei Matsumoto 	ct->orig_thread->for_each_count++;
21116f4e0c95SShuhei Matsumoto 
21127b3f4110SDarek Stojaczyk 	pthread_mutex_lock(&g_devlist_mutex);
211302d75f62SBen Walker 	ct->cur_thread = TAILQ_FIRST(&g_threads);
211402d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
211502d75f62SBen Walker 
21162172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Starting thread iteration from %s\n",
2117ec571793SBen Walker 		      ct->orig_thread->name);
2118ec571793SBen Walker 
21197a2bf6fcSSeth Howell 	rc = spdk_thread_send_msg(ct->cur_thread, _on_thread, ct);
21205feebd85SShuhei Matsumoto 	assert(rc == 0);
212102d75f62SBen Walker }
212202d75f62SBen Walker 
2123c7cf48ddSLiu Xiaodong static inline void
2124c7cf48ddSLiu Xiaodong poller_set_interrupt_mode(struct spdk_poller *poller, bool interrupt_mode)
2125c7cf48ddSLiu Xiaodong {
2126c7cf48ddSLiu Xiaodong 	if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
2127c7cf48ddSLiu Xiaodong 		return;
2128c7cf48ddSLiu Xiaodong 	}
2129c7cf48ddSLiu Xiaodong 
21305c782a70SJim Harris 	if (poller->set_intr_cb_fn) {
2131c7cf48ddSLiu Xiaodong 		poller->set_intr_cb_fn(poller, poller->set_intr_cb_arg, interrupt_mode);
2132c7cf48ddSLiu Xiaodong 	}
21335c782a70SJim Harris }
2134c7cf48ddSLiu Xiaodong 
213502d75f62SBen Walker void
21362d52c6a1SLiu Xiaodong spdk_thread_set_interrupt_mode(bool enable_interrupt)
21372d52c6a1SLiu Xiaodong {
21382d52c6a1SLiu Xiaodong 	struct spdk_thread *thread = _get_thread();
2139c7cf48ddSLiu Xiaodong 	struct spdk_poller *poller, *tmp;
21402d52c6a1SLiu Xiaodong 
21412d52c6a1SLiu Xiaodong 	assert(thread);
21422d52c6a1SLiu Xiaodong 	assert(spdk_interrupt_mode_is_enabled());
21432d52c6a1SLiu Xiaodong 
21447ff8688cSLiu Xiaodong 	SPDK_NOTICELOG("Set spdk_thread (%s) to %s mode from %s mode.\n",
21457ff8688cSLiu Xiaodong 		       thread->name,  enable_interrupt ? "intr" : "poll",
21467ff8688cSLiu Xiaodong 		       thread->in_interrupt ? "intr" : "poll");
21477ff8688cSLiu Xiaodong 
21482d52c6a1SLiu Xiaodong 	if (thread->in_interrupt == enable_interrupt) {
21492d52c6a1SLiu Xiaodong 		return;
21502d52c6a1SLiu Xiaodong 	}
21512d52c6a1SLiu Xiaodong 
2152c7cf48ddSLiu Xiaodong 	/* Set pollers to expected mode */
21534e9adb3bSShuhei Matsumoto 	RB_FOREACH_SAFE(poller, timed_pollers_tree, &thread->timed_pollers, tmp) {
2154c7cf48ddSLiu Xiaodong 		poller_set_interrupt_mode(poller, enable_interrupt);
2155c7cf48ddSLiu Xiaodong 	}
2156c7cf48ddSLiu Xiaodong 	TAILQ_FOREACH_SAFE(poller, &thread->active_pollers, tailq, tmp) {
2157c7cf48ddSLiu Xiaodong 		poller_set_interrupt_mode(poller, enable_interrupt);
2158c7cf48ddSLiu Xiaodong 	}
2159c7cf48ddSLiu Xiaodong 	/* All paused pollers will go to work in interrupt mode */
2160c7cf48ddSLiu Xiaodong 	TAILQ_FOREACH_SAFE(poller, &thread->paused_pollers, tailq, tmp) {
2161c7cf48ddSLiu Xiaodong 		poller_set_interrupt_mode(poller, enable_interrupt);
2162c7cf48ddSLiu Xiaodong 	}
21632d52c6a1SLiu Xiaodong 
21642d52c6a1SLiu Xiaodong 	thread->in_interrupt = enable_interrupt;
21652d52c6a1SLiu Xiaodong 	return;
21662d52c6a1SLiu Xiaodong }
21672d52c6a1SLiu Xiaodong 
216849c6afbfSJiewei Ke static struct io_device *
216949c6afbfSJiewei Ke io_device_get(void *io_device)
217049c6afbfSJiewei Ke {
217149c6afbfSJiewei Ke 	struct io_device find = {};
217249c6afbfSJiewei Ke 
217349c6afbfSJiewei Ke 	find.io_device = io_device;
217449c6afbfSJiewei Ke 	return RB_FIND(io_device_tree, &g_io_devices, &find);
217549c6afbfSJiewei Ke }
217649c6afbfSJiewei Ke 
21772d52c6a1SLiu Xiaodong void
217802d75f62SBen Walker spdk_io_device_register(void *io_device, spdk_io_channel_create_cb create_cb,
2179c9402000SBen Walker 			spdk_io_channel_destroy_cb destroy_cb, uint32_t ctx_size,
2180c9402000SBen Walker 			const char *name)
218102d75f62SBen Walker {
218202d75f62SBen Walker 	struct io_device *dev, *tmp;
2183c4978672STomasz Zawadzki 	struct spdk_thread *thread;
218402d75f62SBen Walker 
218502d75f62SBen Walker 	assert(io_device != NULL);
218602d75f62SBen Walker 	assert(create_cb != NULL);
218702d75f62SBen Walker 	assert(destroy_cb != NULL);
218802d75f62SBen Walker 
2189c4978672STomasz Zawadzki 	thread = spdk_get_thread();
2190c4978672STomasz Zawadzki 	if (!thread) {
21918a252783SSeth Howell 		SPDK_ERRLOG("called from non-SPDK thread\n");
2192c4978672STomasz Zawadzki 		assert(false);
2193c4978672STomasz Zawadzki 		return;
2194c4978672STomasz Zawadzki 	}
2195c4978672STomasz Zawadzki 
219602d75f62SBen Walker 	dev = calloc(1, sizeof(struct io_device));
219702d75f62SBen Walker 	if (dev == NULL) {
219802d75f62SBen Walker 		SPDK_ERRLOG("could not allocate io_device\n");
219902d75f62SBen Walker 		return;
220002d75f62SBen Walker 	}
220102d75f62SBen Walker 
220202d75f62SBen Walker 	dev->io_device = io_device;
2203c9402000SBen Walker 	if (name) {
22043d1995c3SShuhei Matsumoto 		snprintf(dev->name, sizeof(dev->name), "%s", name);
2205c9402000SBen Walker 	} else {
22063d1995c3SShuhei Matsumoto 		snprintf(dev->name, sizeof(dev->name), "%p", dev);
2207c9402000SBen Walker 	}
220802d75f62SBen Walker 	dev->create_cb = create_cb;
220902d75f62SBen Walker 	dev->destroy_cb = destroy_cb;
221002d75f62SBen Walker 	dev->unregister_cb = NULL;
221102d75f62SBen Walker 	dev->ctx_size = ctx_size;
221202d75f62SBen Walker 	dev->for_each_count = 0;
221302d75f62SBen Walker 	dev->unregistered = false;
221402d75f62SBen Walker 	dev->refcnt = 0;
221502d75f62SBen Walker 
22162172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Registering io_device %s (%p) on thread %s\n",
2217c4978672STomasz Zawadzki 		      dev->name, dev->io_device, thread->name);
2218ec571793SBen Walker 
221902d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
222049c6afbfSJiewei Ke 	tmp = RB_INSERT(io_device_tree, &g_io_devices, dev);
222149c6afbfSJiewei Ke 	if (tmp != NULL) {
2222f3d72156SJim Harris 		SPDK_ERRLOG("io_device %p already registered (old:%s new:%s)\n",
2223f3d72156SJim Harris 			    io_device, tmp->name, dev->name);
222449c6afbfSJiewei Ke 		free(dev);
222502d75f62SBen Walker 	}
222649c6afbfSJiewei Ke 
222702d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
222802d75f62SBen Walker }
222902d75f62SBen Walker 
223002d75f62SBen Walker static void
223102d75f62SBen Walker _finish_unregister(void *arg)
223202d75f62SBen Walker {
223302d75f62SBen Walker 	struct io_device *dev = arg;
22346fdc71ecSShuhei Matsumoto 	struct spdk_thread *thread;
22356fdc71ecSShuhei Matsumoto 
22366fdc71ecSShuhei Matsumoto 	thread = spdk_get_thread();
22376fdc71ecSShuhei Matsumoto 	assert(thread == dev->unregister_thread);
223802d75f62SBen Walker 
22392172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Finishing unregistration of io_device %s (%p) on thread %s\n",
22406fdc71ecSShuhei Matsumoto 		      dev->name, dev->io_device, thread->name);
22416fdc71ecSShuhei Matsumoto 
22426fdc71ecSShuhei Matsumoto 	assert(thread->pending_unregister_count > 0);
22436fdc71ecSShuhei Matsumoto 	thread->pending_unregister_count--;
2244ec571793SBen Walker 
224502d75f62SBen Walker 	dev->unregister_cb(dev->io_device);
224602d75f62SBen Walker 	free(dev);
224702d75f62SBen Walker }
224802d75f62SBen Walker 
224902d75f62SBen Walker static void
22500af754f0SSeth Howell io_device_free(struct io_device *dev)
225102d75f62SBen Walker {
22525feebd85SShuhei Matsumoto 	int rc __attribute__((unused));
22535feebd85SShuhei Matsumoto 
225402d75f62SBen Walker 	if (dev->unregister_cb == NULL) {
225502d75f62SBen Walker 		free(dev);
225602d75f62SBen Walker 	} else {
225702d75f62SBen Walker 		assert(dev->unregister_thread != NULL);
22582172c432STomasz Zawadzki 		SPDK_DEBUGLOG(thread, "io_device %s (%p) needs to unregister from thread %s\n",
2259c9402000SBen Walker 			      dev->name, dev->io_device, dev->unregister_thread->name);
22605feebd85SShuhei Matsumoto 		rc = spdk_thread_send_msg(dev->unregister_thread, _finish_unregister, dev);
22615feebd85SShuhei Matsumoto 		assert(rc == 0);
226202d75f62SBen Walker 	}
226302d75f62SBen Walker }
226402d75f62SBen Walker 
226502d75f62SBen Walker void
226602d75f62SBen Walker spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregister_cb)
226702d75f62SBen Walker {
226802d75f62SBen Walker 	struct io_device *dev;
226902d75f62SBen Walker 	uint32_t refcnt;
227002d75f62SBen Walker 	struct spdk_thread *thread;
227102d75f62SBen Walker 
227202d75f62SBen Walker 	thread = spdk_get_thread();
2273c4978672STomasz Zawadzki 	if (!thread) {
22748a252783SSeth Howell 		SPDK_ERRLOG("called from non-SPDK thread\n");
2275c4978672STomasz Zawadzki 		assert(false);
2276c4978672STomasz Zawadzki 		return;
2277c4978672STomasz Zawadzki 	}
227802d75f62SBen Walker 
227902d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
228049c6afbfSJiewei Ke 	dev = io_device_get(io_device);
228102d75f62SBen Walker 	if (!dev) {
228202d75f62SBen Walker 		SPDK_ERRLOG("io_device %p not found\n", io_device);
228369679c80SJim Harris 		assert(false);
228402d75f62SBen Walker 		pthread_mutex_unlock(&g_devlist_mutex);
228502d75f62SBen Walker 		return;
228602d75f62SBen Walker 	}
228702d75f62SBen Walker 
2288d33497d3SJim Harris 	/* The for_each_count check differentiates the user attempting to unregister the
2289d33497d3SJim Harris 	 * device a second time, from the internal call to this function that occurs
2290d33497d3SJim Harris 	 * after the for_each_count reaches 0.
2291d33497d3SJim Harris 	 */
2292d33497d3SJim Harris 	if (dev->pending_unregister && dev->for_each_count > 0) {
2293d33497d3SJim Harris 		SPDK_ERRLOG("io_device %p already has a pending unregister\n", io_device);
2294d33497d3SJim Harris 		assert(false);
229502d75f62SBen Walker 		pthread_mutex_unlock(&g_devlist_mutex);
229602d75f62SBen Walker 		return;
229702d75f62SBen Walker 	}
229802d75f62SBen Walker 
229902d75f62SBen Walker 	dev->unregister_cb = unregister_cb;
2300d33497d3SJim Harris 	dev->unregister_thread = thread;
2301d33497d3SJim Harris 
2302d33497d3SJim Harris 	if (dev->for_each_count > 0) {
2303d33497d3SJim Harris 		SPDK_WARNLOG("io_device %s (%p) has %u for_each calls outstanding\n",
2304d33497d3SJim Harris 			     dev->name, io_device, dev->for_each_count);
2305d33497d3SJim Harris 		dev->pending_unregister = true;
2306d33497d3SJim Harris 		pthread_mutex_unlock(&g_devlist_mutex);
2307d33497d3SJim Harris 		return;
2308d33497d3SJim Harris 	}
2309d33497d3SJim Harris 
231002d75f62SBen Walker 	dev->unregistered = true;
231149c6afbfSJiewei Ke 	RB_REMOVE(io_device_tree, &g_io_devices, dev);
231202d75f62SBen Walker 	refcnt = dev->refcnt;
231302d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
231402d75f62SBen Walker 
23152172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Unregistering io_device %s (%p) from thread %s\n",
2316c9402000SBen Walker 		      dev->name, dev->io_device, thread->name);
2317ec571793SBen Walker 
23186fdc71ecSShuhei Matsumoto 	if (unregister_cb) {
23196fdc71ecSShuhei Matsumoto 		thread->pending_unregister_count++;
23206fdc71ecSShuhei Matsumoto 	}
23216fdc71ecSShuhei Matsumoto 
232202d75f62SBen Walker 	if (refcnt > 0) {
232302d75f62SBen Walker 		/* defer deletion */
232402d75f62SBen Walker 		return;
232502d75f62SBen Walker 	}
232602d75f62SBen Walker 
23270af754f0SSeth Howell 	io_device_free(dev);
232802d75f62SBen Walker }
232902d75f62SBen Walker 
2330eb9d8f34SShuhei Matsumoto const char *
2331eb9d8f34SShuhei Matsumoto spdk_io_device_get_name(struct io_device *dev)
2332eb9d8f34SShuhei Matsumoto {
2333eb9d8f34SShuhei Matsumoto 	return dev->name;
2334eb9d8f34SShuhei Matsumoto }
2335eb9d8f34SShuhei Matsumoto 
2336df559ab6SJiewei Ke static struct spdk_io_channel *
2337df559ab6SJiewei Ke thread_get_io_channel(struct spdk_thread *thread, struct io_device *dev)
2338df559ab6SJiewei Ke {
2339df559ab6SJiewei Ke 	struct spdk_io_channel find = {};
2340df559ab6SJiewei Ke 
2341df559ab6SJiewei Ke 	find.dev = dev;
2342df559ab6SJiewei Ke 	return RB_FIND(io_channel_tree, &thread->io_channels, &find);
2343df559ab6SJiewei Ke }
2344df559ab6SJiewei Ke 
234502d75f62SBen Walker struct spdk_io_channel *
234602d75f62SBen Walker spdk_get_io_channel(void *io_device)
234702d75f62SBen Walker {
234802d75f62SBen Walker 	struct spdk_io_channel *ch;
234902d75f62SBen Walker 	struct spdk_thread *thread;
235002d75f62SBen Walker 	struct io_device *dev;
235102d75f62SBen Walker 	int rc;
235202d75f62SBen Walker 
235302d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
235449c6afbfSJiewei Ke 	dev = io_device_get(io_device);
235502d75f62SBen Walker 	if (dev == NULL) {
235602d75f62SBen Walker 		SPDK_ERRLOG("could not find io_device %p\n", io_device);
235702d75f62SBen Walker 		pthread_mutex_unlock(&g_devlist_mutex);
235802d75f62SBen Walker 		return NULL;
235902d75f62SBen Walker 	}
236002d75f62SBen Walker 
236102d75f62SBen Walker 	thread = _get_thread();
236202d75f62SBen Walker 	if (!thread) {
236302d75f62SBen Walker 		SPDK_ERRLOG("No thread allocated\n");
236402d75f62SBen Walker 		pthread_mutex_unlock(&g_devlist_mutex);
236502d75f62SBen Walker 		return NULL;
236602d75f62SBen Walker 	}
236702d75f62SBen Walker 
23686397735bSShuhei Matsumoto 	if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) {
236913595495SShuhei Matsumoto 		SPDK_ERRLOG("Thread %s is marked as exited\n", thread->name);
237013595495SShuhei Matsumoto 		pthread_mutex_unlock(&g_devlist_mutex);
237113595495SShuhei Matsumoto 		return NULL;
237213595495SShuhei Matsumoto 	}
237313595495SShuhei Matsumoto 
2374df559ab6SJiewei Ke 	ch = thread_get_io_channel(thread, dev);
2375df559ab6SJiewei Ke 	if (ch != NULL) {
237602d75f62SBen Walker 		ch->ref++;
2377ec571793SBen Walker 
23782172c432STomasz Zawadzki 		SPDK_DEBUGLOG(thread, "Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
2379c9402000SBen Walker 			      ch, dev->name, dev->io_device, thread->name, ch->ref);
2380ec571793SBen Walker 
238102d75f62SBen Walker 		/*
238202d75f62SBen Walker 		 * An I/O channel already exists for this device on this
238302d75f62SBen Walker 		 *  thread, so return it.
238402d75f62SBen Walker 		 */
238502d75f62SBen Walker 		pthread_mutex_unlock(&g_devlist_mutex);
2386462eb754SKonrad Sztyber 		spdk_trace_record(TRACE_THREAD_IOCH_GET, 0, 0,
2387d87afb4cSKonrad Sztyber 				  (uint64_t)spdk_io_channel_get_ctx(ch), ch->ref);
238802d75f62SBen Walker 		return ch;
238902d75f62SBen Walker 	}
239002d75f62SBen Walker 
239102d75f62SBen Walker 	ch = calloc(1, sizeof(*ch) + dev->ctx_size);
239202d75f62SBen Walker 	if (ch == NULL) {
239302d75f62SBen Walker 		SPDK_ERRLOG("could not calloc spdk_io_channel\n");
239402d75f62SBen Walker 		pthread_mutex_unlock(&g_devlist_mutex);
239502d75f62SBen Walker 		return NULL;
239602d75f62SBen Walker 	}
239702d75f62SBen Walker 
239802d75f62SBen Walker 	ch->dev = dev;
239902d75f62SBen Walker 	ch->destroy_cb = dev->destroy_cb;
240002d75f62SBen Walker 	ch->thread = thread;
240102d75f62SBen Walker 	ch->ref = 1;
2402399a38a5SBen Walker 	ch->destroy_ref = 0;
2403df559ab6SJiewei Ke 	RB_INSERT(io_channel_tree, &thread->io_channels, ch);
240402d75f62SBen Walker 
24052172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread, "Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
2406c9402000SBen Walker 		      ch, dev->name, dev->io_device, thread->name, ch->ref);
2407ec571793SBen Walker 
240802d75f62SBen Walker 	dev->refcnt++;
240902d75f62SBen Walker 
241002d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
241102d75f62SBen Walker 
241202d75f62SBen Walker 	rc = dev->create_cb(io_device, (uint8_t *)ch + sizeof(*ch));
2413bb2486a4Syidong0635 	if (rc != 0) {
241402d75f62SBen Walker 		pthread_mutex_lock(&g_devlist_mutex);
2415df559ab6SJiewei Ke 		RB_REMOVE(io_channel_tree, &ch->thread->io_channels, ch);
241602d75f62SBen Walker 		dev->refcnt--;
241702d75f62SBen Walker 		free(ch);
2418f77ea250SDenis Barakhtanov 		SPDK_ERRLOG("could not create io_channel for io_device %s (%p): %s (rc=%d)\n",
2419f77ea250SDenis Barakhtanov 			    dev->name, io_device, spdk_strerror(-rc), rc);
242002d75f62SBen Walker 		pthread_mutex_unlock(&g_devlist_mutex);
242102d75f62SBen Walker 		return NULL;
242202d75f62SBen Walker 	}
242302d75f62SBen Walker 
2424d87afb4cSKonrad Sztyber 	spdk_trace_record(TRACE_THREAD_IOCH_GET, 0, 0, (uint64_t)spdk_io_channel_get_ctx(ch), 1);
242502d75f62SBen Walker 	return ch;
242602d75f62SBen Walker }
242702d75f62SBen Walker 
242802d75f62SBen Walker static void
24290af754f0SSeth Howell put_io_channel(void *arg)
243002d75f62SBen Walker {
243102d75f62SBen Walker 	struct spdk_io_channel *ch = arg;
243202d75f62SBen Walker 	bool do_remove_dev = true;
2433c4978672STomasz Zawadzki 	struct spdk_thread *thread;
2434c4978672STomasz Zawadzki 
2435c4978672STomasz Zawadzki 	thread = spdk_get_thread();
2436c4978672STomasz Zawadzki 	if (!thread) {
24378a252783SSeth Howell 		SPDK_ERRLOG("called from non-SPDK thread\n");
2438c4978672STomasz Zawadzki 		assert(false);
2439c4978672STomasz Zawadzki 		return;
2440c4978672STomasz Zawadzki 	}
244102d75f62SBen Walker 
24422172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread,
2443655e8e16SShuhei Matsumoto 		      "Releasing io_channel %p for io_device %s (%p) on thread %s\n",
2444655e8e16SShuhei Matsumoto 		      ch, ch->dev->name, ch->dev->io_device, thread->name);
2445ec571793SBen Walker 
2446c4978672STomasz Zawadzki 	assert(ch->thread == thread);
244702d75f62SBen Walker 
2448399a38a5SBen Walker 	ch->destroy_ref--;
2449399a38a5SBen Walker 
2450399a38a5SBen Walker 	if (ch->ref > 0 || ch->destroy_ref > 0) {
24519ddf6438SJim Harris 		/*
24529ddf6438SJim Harris 		 * Another reference to the associated io_device was requested
24539ddf6438SJim Harris 		 *  after this message was sent but before it had a chance to
24549ddf6438SJim Harris 		 *  execute.
24559ddf6438SJim Harris 		 */
24569ddf6438SJim Harris 		return;
24579ddf6438SJim Harris 	}
24589ddf6438SJim Harris 
24599ddf6438SJim Harris 	pthread_mutex_lock(&g_devlist_mutex);
2460df559ab6SJiewei Ke 	RB_REMOVE(io_channel_tree, &ch->thread->io_channels, ch);
24619ddf6438SJim Harris 	pthread_mutex_unlock(&g_devlist_mutex);
24629ddf6438SJim Harris 
24639ddf6438SJim Harris 	/* Don't hold the devlist mutex while the destroy_cb is called. */
246402d75f62SBen Walker 	ch->destroy_cb(ch->dev->io_device, spdk_io_channel_get_ctx(ch));
246502d75f62SBen Walker 
246602d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
246702d75f62SBen Walker 	ch->dev->refcnt--;
246802d75f62SBen Walker 
246902d75f62SBen Walker 	if (!ch->dev->unregistered) {
247002d75f62SBen Walker 		do_remove_dev = false;
247102d75f62SBen Walker 	}
247202d75f62SBen Walker 
247302d75f62SBen Walker 	if (ch->dev->refcnt > 0) {
247402d75f62SBen Walker 		do_remove_dev = false;
247502d75f62SBen Walker 	}
247602d75f62SBen Walker 
247702d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
247802d75f62SBen Walker 
247902d75f62SBen Walker 	if (do_remove_dev) {
24800af754f0SSeth Howell 		io_device_free(ch->dev);
248102d75f62SBen Walker 	}
248202d75f62SBen Walker 	free(ch);
248302d75f62SBen Walker }
248402d75f62SBen Walker 
248502d75f62SBen Walker void
248602d75f62SBen Walker spdk_put_io_channel(struct spdk_io_channel *ch)
248702d75f62SBen Walker {
2488655e8e16SShuhei Matsumoto 	struct spdk_thread *thread;
24895feebd85SShuhei Matsumoto 	int rc __attribute__((unused));
2490655e8e16SShuhei Matsumoto 
2491462eb754SKonrad Sztyber 	spdk_trace_record(TRACE_THREAD_IOCH_PUT, 0, 0,
2492d87afb4cSKonrad Sztyber 			  (uint64_t)spdk_io_channel_get_ctx(ch), ch->ref);
2493462eb754SKonrad Sztyber 
2494655e8e16SShuhei Matsumoto 	thread = spdk_get_thread();
2495655e8e16SShuhei Matsumoto 	if (!thread) {
2496655e8e16SShuhei Matsumoto 		SPDK_ERRLOG("called from non-SPDK thread\n");
2497655e8e16SShuhei Matsumoto 		assert(false);
2498655e8e16SShuhei Matsumoto 		return;
2499655e8e16SShuhei Matsumoto 	}
2500655e8e16SShuhei Matsumoto 
2501655e8e16SShuhei Matsumoto 	if (ch->thread != thread) {
2502d4255740SJohn Levon 		wrong_thread(__func__, "ch", ch->thread, thread);
2503655e8e16SShuhei Matsumoto 		return;
2504655e8e16SShuhei Matsumoto 	}
2505655e8e16SShuhei Matsumoto 
25062172c432STomasz Zawadzki 	SPDK_DEBUGLOG(thread,
2507c9402000SBen Walker 		      "Putting io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
2508655e8e16SShuhei Matsumoto 		      ch, ch->dev->name, ch->dev->io_device, thread->name, ch->ref);
2509ec571793SBen Walker 
251002d75f62SBen Walker 	ch->ref--;
251102d75f62SBen Walker 
251202d75f62SBen Walker 	if (ch->ref == 0) {
2513399a38a5SBen Walker 		ch->destroy_ref++;
25140af754f0SSeth Howell 		rc = spdk_thread_send_msg(thread, put_io_channel, ch);
25155feebd85SShuhei Matsumoto 		assert(rc == 0);
251602d75f62SBen Walker 	}
251702d75f62SBen Walker }
251802d75f62SBen Walker 
251902d75f62SBen Walker struct spdk_io_channel *
252002d75f62SBen Walker spdk_io_channel_from_ctx(void *ctx)
252102d75f62SBen Walker {
252202d75f62SBen Walker 	return (struct spdk_io_channel *)((uint8_t *)ctx - sizeof(struct spdk_io_channel));
252302d75f62SBen Walker }
252402d75f62SBen Walker 
252502d75f62SBen Walker struct spdk_thread *
252602d75f62SBen Walker spdk_io_channel_get_thread(struct spdk_io_channel *ch)
252702d75f62SBen Walker {
252802d75f62SBen Walker 	return ch->thread;
252902d75f62SBen Walker }
253002d75f62SBen Walker 
25310247a994SShuhei Matsumoto void *
25320247a994SShuhei Matsumoto spdk_io_channel_get_io_device(struct spdk_io_channel *ch)
25330247a994SShuhei Matsumoto {
25340247a994SShuhei Matsumoto 	return ch->dev->io_device;
25350247a994SShuhei Matsumoto }
25360247a994SShuhei Matsumoto 
25375fc0475cSJiewei Ke const char *
25385fc0475cSJiewei Ke spdk_io_channel_get_io_device_name(struct spdk_io_channel *ch)
25395fc0475cSJiewei Ke {
25405fc0475cSJiewei Ke 	return spdk_io_device_get_name(ch->dev);
25415fc0475cSJiewei Ke }
25425fc0475cSJiewei Ke 
2543bd2fae2fSShuhei Matsumoto int
2544bd2fae2fSShuhei Matsumoto spdk_io_channel_get_ref_count(struct spdk_io_channel *ch)
2545bd2fae2fSShuhei Matsumoto {
2546bd2fae2fSShuhei Matsumoto 	return ch->ref;
2547bd2fae2fSShuhei Matsumoto }
2548bd2fae2fSShuhei Matsumoto 
254902d75f62SBen Walker struct spdk_io_channel_iter {
255002d75f62SBen Walker 	void *io_device;
255102d75f62SBen Walker 	struct io_device *dev;
255202d75f62SBen Walker 	spdk_channel_msg fn;
255302d75f62SBen Walker 	int status;
255402d75f62SBen Walker 	void *ctx;
255502d75f62SBen Walker 	struct spdk_io_channel *ch;
255602d75f62SBen Walker 
255702d75f62SBen Walker 	struct spdk_thread *cur_thread;
255802d75f62SBen Walker 
255902d75f62SBen Walker 	struct spdk_thread *orig_thread;
256002d75f62SBen Walker 	spdk_channel_for_each_cpl cpl;
256102d75f62SBen Walker };
256202d75f62SBen Walker 
256302d75f62SBen Walker void *
256402d75f62SBen Walker spdk_io_channel_iter_get_io_device(struct spdk_io_channel_iter *i)
256502d75f62SBen Walker {
256602d75f62SBen Walker 	return i->io_device;
256702d75f62SBen Walker }
256802d75f62SBen Walker 
256902d75f62SBen Walker struct spdk_io_channel *
257002d75f62SBen Walker spdk_io_channel_iter_get_channel(struct spdk_io_channel_iter *i)
257102d75f62SBen Walker {
257202d75f62SBen Walker 	return i->ch;
257302d75f62SBen Walker }
257402d75f62SBen Walker 
257502d75f62SBen Walker void *
257602d75f62SBen Walker spdk_io_channel_iter_get_ctx(struct spdk_io_channel_iter *i)
257702d75f62SBen Walker {
257802d75f62SBen Walker 	return i->ctx;
257902d75f62SBen Walker }
258002d75f62SBen Walker 
258102d75f62SBen Walker static void
258202d75f62SBen Walker _call_completion(void *ctx)
258302d75f62SBen Walker {
258402d75f62SBen Walker 	struct spdk_io_channel_iter *i = ctx;
258502d75f62SBen Walker 
25866f4e0c95SShuhei Matsumoto 	assert(i->orig_thread->for_each_count > 0);
25876f4e0c95SShuhei Matsumoto 	i->orig_thread->for_each_count--;
25886f4e0c95SShuhei Matsumoto 
258902d75f62SBen Walker 	if (i->cpl != NULL) {
259002d75f62SBen Walker 		i->cpl(i, i->status);
259102d75f62SBen Walker 	}
259202d75f62SBen Walker 	free(i);
259302d75f62SBen Walker }
259402d75f62SBen Walker 
259502d75f62SBen Walker static void
259602d75f62SBen Walker _call_channel(void *ctx)
259702d75f62SBen Walker {
259802d75f62SBen Walker 	struct spdk_io_channel_iter *i = ctx;
259902d75f62SBen Walker 	struct spdk_io_channel *ch;
260002d75f62SBen Walker 
260102d75f62SBen Walker 	/*
260202d75f62SBen Walker 	 * It is possible that the channel was deleted before this
260302d75f62SBen Walker 	 *  message had a chance to execute.  If so, skip calling
260402d75f62SBen Walker 	 *  the fn() on this thread.
260502d75f62SBen Walker 	 */
260602d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
2607df559ab6SJiewei Ke 	ch = thread_get_io_channel(i->cur_thread, i->dev);
260802d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
260902d75f62SBen Walker 
261002d75f62SBen Walker 	if (ch) {
261102d75f62SBen Walker 		i->fn(i);
261202d75f62SBen Walker 	} else {
261302d75f62SBen Walker 		spdk_for_each_channel_continue(i, 0);
261402d75f62SBen Walker 	}
261502d75f62SBen Walker }
261602d75f62SBen Walker 
261702d75f62SBen Walker void
261802d75f62SBen Walker spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx,
261902d75f62SBen Walker 		      spdk_channel_for_each_cpl cpl)
262002d75f62SBen Walker {
262102d75f62SBen Walker 	struct spdk_thread *thread;
262202d75f62SBen Walker 	struct spdk_io_channel *ch;
262302d75f62SBen Walker 	struct spdk_io_channel_iter *i;
2624750f2b4bSJim Harris 	int rc __attribute__((unused));
262502d75f62SBen Walker 
262602d75f62SBen Walker 	i = calloc(1, sizeof(*i));
262702d75f62SBen Walker 	if (!i) {
262802d75f62SBen Walker 		SPDK_ERRLOG("Unable to allocate iterator\n");
262913c7a98dSGangCao 		assert(false);
263002d75f62SBen Walker 		return;
263102d75f62SBen Walker 	}
263202d75f62SBen Walker 
263302d75f62SBen Walker 	i->io_device = io_device;
263402d75f62SBen Walker 	i->fn = fn;
263502d75f62SBen Walker 	i->ctx = ctx;
263602d75f62SBen Walker 	i->cpl = cpl;
2637e45450d2SJiewei Ke 	i->orig_thread = _get_thread();
263802d75f62SBen Walker 
26396f4e0c95SShuhei Matsumoto 	i->orig_thread->for_each_count++;
26406f4e0c95SShuhei Matsumoto 
264102d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
2642e45450d2SJiewei Ke 	i->dev = io_device_get(io_device);
2643e45450d2SJiewei Ke 	if (i->dev == NULL) {
2644e45450d2SJiewei Ke 		SPDK_ERRLOG("could not find io_device %p\n", io_device);
2645e45450d2SJiewei Ke 		assert(false);
2646821e673cSJim Harris 		i->status = -ENODEV;
2647e45450d2SJiewei Ke 		goto end;
2648e45450d2SJiewei Ke 	}
264902d75f62SBen Walker 
2650d33497d3SJim Harris 	/* Do not allow new for_each operations if we are already waiting to unregister
2651d33497d3SJim Harris 	 * the device for other for_each operations to complete.
2652d33497d3SJim Harris 	 */
2653d33497d3SJim Harris 	if (i->dev->pending_unregister) {
2654d33497d3SJim Harris 		SPDK_ERRLOG("io_device %p has a pending unregister\n", io_device);
2655d33497d3SJim Harris 		i->status = -ENODEV;
2656d33497d3SJim Harris 		goto end;
2657d33497d3SJim Harris 	}
2658d33497d3SJim Harris 
265902d75f62SBen Walker 	TAILQ_FOREACH(thread, &g_threads, tailq) {
2660df559ab6SJiewei Ke 		ch = thread_get_io_channel(thread, i->dev);
2661df559ab6SJiewei Ke 		if (ch != NULL) {
266202d75f62SBen Walker 			ch->dev->for_each_count++;
266302d75f62SBen Walker 			i->cur_thread = thread;
266402d75f62SBen Walker 			i->ch = ch;
266502d75f62SBen Walker 			pthread_mutex_unlock(&g_devlist_mutex);
26664036f95bSJim Harris 			rc = spdk_thread_send_msg(thread, _call_channel, i);
26674036f95bSJim Harris 			assert(rc == 0);
266802d75f62SBen Walker 			return;
266902d75f62SBen Walker 		}
267002d75f62SBen Walker 	}
267102d75f62SBen Walker 
2672e45450d2SJiewei Ke end:
267302d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
267402d75f62SBen Walker 
26754036f95bSJim Harris 	rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i);
26764036f95bSJim Harris 	assert(rc == 0);
267702d75f62SBen Walker }
267802d75f62SBen Walker 
2679d33497d3SJim Harris static void
2680d33497d3SJim Harris __pending_unregister(void *arg)
2681d33497d3SJim Harris {
2682d33497d3SJim Harris 	struct io_device *dev = arg;
2683d33497d3SJim Harris 
2684d33497d3SJim Harris 	assert(dev->pending_unregister);
2685d33497d3SJim Harris 	assert(dev->for_each_count == 0);
2686d33497d3SJim Harris 	spdk_io_device_unregister(dev->io_device, dev->unregister_cb);
2687d33497d3SJim Harris }
2688d33497d3SJim Harris 
268902d75f62SBen Walker void
269002d75f62SBen Walker spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status)
269102d75f62SBen Walker {
269202d75f62SBen Walker 	struct spdk_thread *thread;
269302d75f62SBen Walker 	struct spdk_io_channel *ch;
2694d33497d3SJim Harris 	struct io_device *dev;
26955feebd85SShuhei Matsumoto 	int rc __attribute__((unused));
269602d75f62SBen Walker 
269702d75f62SBen Walker 	assert(i->cur_thread == spdk_get_thread());
269802d75f62SBen Walker 
269902d75f62SBen Walker 	i->status = status;
270002d75f62SBen Walker 
270102d75f62SBen Walker 	pthread_mutex_lock(&g_devlist_mutex);
2702d33497d3SJim Harris 	dev = i->dev;
270302d75f62SBen Walker 	if (status) {
270402d75f62SBen Walker 		goto end;
270502d75f62SBen Walker 	}
2706d33497d3SJim Harris 
270702d75f62SBen Walker 	thread = TAILQ_NEXT(i->cur_thread, tailq);
270802d75f62SBen Walker 	while (thread) {
2709d33497d3SJim Harris 		ch = thread_get_io_channel(thread, dev);
2710df559ab6SJiewei Ke 		if (ch != NULL) {
271102d75f62SBen Walker 			i->cur_thread = thread;
271202d75f62SBen Walker 			i->ch = ch;
271302d75f62SBen Walker 			pthread_mutex_unlock(&g_devlist_mutex);
27145feebd85SShuhei Matsumoto 			rc = spdk_thread_send_msg(thread, _call_channel, i);
27155feebd85SShuhei Matsumoto 			assert(rc == 0);
271602d75f62SBen Walker 			return;
271702d75f62SBen Walker 		}
271802d75f62SBen Walker 		thread = TAILQ_NEXT(thread, tailq);
271902d75f62SBen Walker 	}
272002d75f62SBen Walker 
272102d75f62SBen Walker end:
2722d33497d3SJim Harris 	dev->for_each_count--;
272302d75f62SBen Walker 	i->ch = NULL;
272402d75f62SBen Walker 	pthread_mutex_unlock(&g_devlist_mutex);
272502d75f62SBen Walker 
27265feebd85SShuhei Matsumoto 	rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i);
27275feebd85SShuhei Matsumoto 	assert(rc == 0);
2728d33497d3SJim Harris 
2729d33497d3SJim Harris 	pthread_mutex_lock(&g_devlist_mutex);
2730d33497d3SJim Harris 	if (dev->pending_unregister && dev->for_each_count == 0) {
2731d33497d3SJim Harris 		rc = spdk_thread_send_msg(dev->unregister_thread, __pending_unregister, dev);
2732d33497d3SJim Harris 		assert(rc == 0);
2733d33497d3SJim Harris 	}
2734d33497d3SJim Harris 	pthread_mutex_unlock(&g_devlist_mutex);
273502d75f62SBen Walker }
2736ec571793SBen Walker 
27374bf6e4bbSLiu Xiaodong static void
27384bf6e4bbSLiu Xiaodong thread_interrupt_destroy(struct spdk_thread *thread)
27394bf6e4bbSLiu Xiaodong {
27404bf6e4bbSLiu Xiaodong 	struct spdk_fd_group *fgrp = thread->fgrp;
27414bf6e4bbSLiu Xiaodong 
27424bf6e4bbSLiu Xiaodong 	SPDK_INFOLOG(thread, "destroy fgrp for thread (%s)\n", thread->name);
27434bf6e4bbSLiu Xiaodong 
274443607106SLiu Xiaodong 	if (thread->msg_fd < 0) {
27454bf6e4bbSLiu Xiaodong 		return;
27464bf6e4bbSLiu Xiaodong 	}
27474bf6e4bbSLiu Xiaodong 
27484bf6e4bbSLiu Xiaodong 	spdk_fd_group_remove(fgrp, thread->msg_fd);
27494bf6e4bbSLiu Xiaodong 	close(thread->msg_fd);
275043607106SLiu Xiaodong 	thread->msg_fd = -1;
27514bf6e4bbSLiu Xiaodong 
27524bf6e4bbSLiu Xiaodong 	spdk_fd_group_destroy(fgrp);
27534bf6e4bbSLiu Xiaodong 	thread->fgrp = NULL;
27544bf6e4bbSLiu Xiaodong }
27554bf6e4bbSLiu Xiaodong 
27564bf6e4bbSLiu Xiaodong #ifdef __linux__
27574bf6e4bbSLiu Xiaodong static int
27584bf6e4bbSLiu Xiaodong thread_interrupt_msg_process(void *arg)
27594bf6e4bbSLiu Xiaodong {
27604bf6e4bbSLiu Xiaodong 	struct spdk_thread *thread = arg;
2761f3c1b59aSBen Walker 	struct spdk_thread *orig_thread;
27624bf6e4bbSLiu Xiaodong 	uint32_t msg_count;
27634bf6e4bbSLiu Xiaodong 	spdk_msg_fn critical_msg;
27644bf6e4bbSLiu Xiaodong 	int rc = 0;
27654e8032abSLiu Xiaodong 	uint64_t notify = 1;
27664e8032abSLiu Xiaodong 
27674e8032abSLiu Xiaodong 	assert(spdk_interrupt_mode_is_enabled());
27684e8032abSLiu Xiaodong 
2769f3c1b59aSBen Walker 	orig_thread = spdk_get_thread();
2770f3c1b59aSBen Walker 	spdk_set_thread(thread);
2771f3c1b59aSBen Walker 
27724bf6e4bbSLiu Xiaodong 	critical_msg = thread->critical_msg;
27734bf6e4bbSLiu Xiaodong 	if (spdk_unlikely(critical_msg != NULL)) {
27744bf6e4bbSLiu Xiaodong 		critical_msg(NULL);
27754bf6e4bbSLiu Xiaodong 		thread->critical_msg = NULL;
2776b8db1af4SLiu Xiaodong 		rc = 1;
27774bf6e4bbSLiu Xiaodong 	}
27784bf6e4bbSLiu Xiaodong 
27794bf6e4bbSLiu Xiaodong 	msg_count = msg_queue_run_batch(thread, 0);
27804bf6e4bbSLiu Xiaodong 	if (msg_count) {
27814bf6e4bbSLiu Xiaodong 		rc = 1;
27824bf6e4bbSLiu Xiaodong 	}
27834bf6e4bbSLiu Xiaodong 
2784f3c1b59aSBen Walker 	SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
2785f3c1b59aSBen Walker 	if (spdk_unlikely(!thread->in_interrupt)) {
2786f3c1b59aSBen Walker 		/* The thread transitioned to poll mode in a msg during the above processing.
2787f3c1b59aSBen Walker 		 * Clear msg_fd since thread messages will be polled directly in poll mode.
2788f3c1b59aSBen Walker 		 */
2789f3c1b59aSBen Walker 		rc = read(thread->msg_fd, &notify, sizeof(notify));
2790f3c1b59aSBen Walker 		if (rc < 0 && errno != EAGAIN) {
2791f3c1b59aSBen Walker 			SPDK_ERRLOG("failed to acknowledge msg queue: %s.\n", spdk_strerror(errno));
2792f3c1b59aSBen Walker 		}
2793f3c1b59aSBen Walker 	}
2794f3c1b59aSBen Walker 
2795f3c1b59aSBen Walker 	spdk_set_thread(orig_thread);
27964bf6e4bbSLiu Xiaodong 	return rc;
27974bf6e4bbSLiu Xiaodong }
27984bf6e4bbSLiu Xiaodong 
27994bf6e4bbSLiu Xiaodong static int
28004bf6e4bbSLiu Xiaodong thread_interrupt_create(struct spdk_thread *thread)
28014bf6e4bbSLiu Xiaodong {
2802*7219bd1aSAnkit Kumar 	struct spdk_event_handler_opts opts = {};
28034bf6e4bbSLiu Xiaodong 	int rc;
28044bf6e4bbSLiu Xiaodong 
28054bf6e4bbSLiu Xiaodong 	SPDK_INFOLOG(thread, "Create fgrp for thread (%s)\n", thread->name);
28064bf6e4bbSLiu Xiaodong 
28074bf6e4bbSLiu Xiaodong 	rc = spdk_fd_group_create(&thread->fgrp);
28084bf6e4bbSLiu Xiaodong 	if (rc) {
2809*7219bd1aSAnkit Kumar 		thread->msg_fd = -1;
28104bf6e4bbSLiu Xiaodong 		return rc;
28114bf6e4bbSLiu Xiaodong 	}
28124bf6e4bbSLiu Xiaodong 
28134bf6e4bbSLiu Xiaodong 	thread->msg_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
28144bf6e4bbSLiu Xiaodong 	if (thread->msg_fd < 0) {
28154bf6e4bbSLiu Xiaodong 		rc = -errno;
28164bf6e4bbSLiu Xiaodong 		spdk_fd_group_destroy(thread->fgrp);
28174bf6e4bbSLiu Xiaodong 		thread->fgrp = NULL;
28184bf6e4bbSLiu Xiaodong 
28194bf6e4bbSLiu Xiaodong 		return rc;
28204bf6e4bbSLiu Xiaodong 	}
28214bf6e4bbSLiu Xiaodong 
2822*7219bd1aSAnkit Kumar 	spdk_fd_group_get_default_event_handler_opts(&opts, sizeof(opts));
2823*7219bd1aSAnkit Kumar 	opts.fd_type = SPDK_FD_TYPE_EVENTFD;
2824*7219bd1aSAnkit Kumar 
2825*7219bd1aSAnkit Kumar 	return SPDK_FD_GROUP_ADD_EXT(thread->fgrp, thread->msg_fd,
2826*7219bd1aSAnkit Kumar 				     thread_interrupt_msg_process, thread, &opts);
28274bf6e4bbSLiu Xiaodong }
28284bf6e4bbSLiu Xiaodong #else
28294bf6e4bbSLiu Xiaodong static int
28304bf6e4bbSLiu Xiaodong thread_interrupt_create(struct spdk_thread *thread)
28314bf6e4bbSLiu Xiaodong {
28324bf6e4bbSLiu Xiaodong 	return -ENOTSUP;
28334bf6e4bbSLiu Xiaodong }
28344bf6e4bbSLiu Xiaodong #endif
28354bf6e4bbSLiu Xiaodong 
2836f3c1b59aSBen Walker static int
2837f3c1b59aSBen Walker _interrupt_wrapper(void *ctx)
2838f3c1b59aSBen Walker {
2839f3c1b59aSBen Walker 	struct spdk_interrupt *intr = ctx;
2840f3c1b59aSBen Walker 	struct spdk_thread *orig_thread, *thread;
2841f3c1b59aSBen Walker 	int rc;
2842f3c1b59aSBen Walker 
2843f3c1b59aSBen Walker 	orig_thread = spdk_get_thread();
2844f3c1b59aSBen Walker 	thread = intr->thread;
2845f3c1b59aSBen Walker 
2846f3c1b59aSBen Walker 	spdk_set_thread(thread);
2847f3c1b59aSBen Walker 
2848f3c1b59aSBen Walker 	SPDK_DTRACE_PROBE4(interrupt_fd_process, intr->name, intr->efd,
2849f3c1b59aSBen Walker 			   intr->fn, intr->arg);
2850f3c1b59aSBen Walker 
2851f3c1b59aSBen Walker 	rc = intr->fn(intr->arg);
2852f3c1b59aSBen Walker 
2853f3c1b59aSBen Walker 	SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
2854f3c1b59aSBen Walker 
2855f3c1b59aSBen Walker 	spdk_set_thread(orig_thread);
2856f3c1b59aSBen Walker 
2857f3c1b59aSBen Walker 	return rc;
2858f3c1b59aSBen Walker }
2859f3c1b59aSBen Walker 
28604bf6e4bbSLiu Xiaodong struct spdk_interrupt *
28614bf6e4bbSLiu Xiaodong spdk_interrupt_register(int efd, spdk_interrupt_fn fn,
28624bf6e4bbSLiu Xiaodong 			void *arg, const char *name)
28634bf6e4bbSLiu Xiaodong {
2864df96ddccSKrzysztof Goreczny 	return spdk_interrupt_register_for_events(efd, SPDK_INTERRUPT_EVENT_IN, fn, arg, name);
2865df96ddccSKrzysztof Goreczny }
2866df96ddccSKrzysztof Goreczny 
2867df96ddccSKrzysztof Goreczny struct spdk_interrupt *
2868df96ddccSKrzysztof Goreczny spdk_interrupt_register_for_events(int efd, uint32_t events, spdk_interrupt_fn fn, void *arg,
2869df96ddccSKrzysztof Goreczny 				   const char *name)
2870df96ddccSKrzysztof Goreczny {
28713f50defdSAnkit Kumar 	struct spdk_event_handler_opts opts = {};
28723f50defdSAnkit Kumar 
28733f50defdSAnkit Kumar 	spdk_fd_group_get_default_event_handler_opts(&opts, sizeof(opts));
28743f50defdSAnkit Kumar 	opts.events = events;
28753f50defdSAnkit Kumar 	opts.fd_type = SPDK_FD_TYPE_DEFAULT;
28763f50defdSAnkit Kumar 
28773f50defdSAnkit Kumar 	return spdk_interrupt_register_ext(efd, fn, arg, name, &opts);
28783f50defdSAnkit Kumar }
28793f50defdSAnkit Kumar 
2880851f166eSKonrad Sztyber static struct spdk_interrupt *
2881969b360dSKonrad Sztyber alloc_interrupt(int efd, struct spdk_fd_group *fgrp, spdk_interrupt_fn fn, void *arg,
2882969b360dSKonrad Sztyber 		const char *name)
28833f50defdSAnkit Kumar {
28844bf6e4bbSLiu Xiaodong 	struct spdk_thread *thread;
28854bf6e4bbSLiu Xiaodong 	struct spdk_interrupt *intr;
28864bf6e4bbSLiu Xiaodong 
28874bf6e4bbSLiu Xiaodong 	thread = spdk_get_thread();
28884bf6e4bbSLiu Xiaodong 	if (!thread) {
28894bf6e4bbSLiu Xiaodong 		assert(false);
28904bf6e4bbSLiu Xiaodong 		return NULL;
28914bf6e4bbSLiu Xiaodong 	}
28924bf6e4bbSLiu Xiaodong 
28934bf6e4bbSLiu Xiaodong 	if (spdk_unlikely(thread->state != SPDK_THREAD_STATE_RUNNING)) {
28944bf6e4bbSLiu Xiaodong 		SPDK_ERRLOG("thread %s is marked as exited\n", thread->name);
28954bf6e4bbSLiu Xiaodong 		return NULL;
28964bf6e4bbSLiu Xiaodong 	}
28974bf6e4bbSLiu Xiaodong 
28984bf6e4bbSLiu Xiaodong 	intr = calloc(1, sizeof(*intr));
28994bf6e4bbSLiu Xiaodong 	if (intr == NULL) {
29004bf6e4bbSLiu Xiaodong 		SPDK_ERRLOG("Interrupt handler allocation failed\n");
29014bf6e4bbSLiu Xiaodong 		return NULL;
29024bf6e4bbSLiu Xiaodong 	}
29034bf6e4bbSLiu Xiaodong 
29044bf6e4bbSLiu Xiaodong 	if (name) {
29054bf6e4bbSLiu Xiaodong 		snprintf(intr->name, sizeof(intr->name), "%s", name);
29064bf6e4bbSLiu Xiaodong 	} else {
29074bf6e4bbSLiu Xiaodong 		snprintf(intr->name, sizeof(intr->name), "%p", fn);
29084bf6e4bbSLiu Xiaodong 	}
29094bf6e4bbSLiu Xiaodong 
2910969b360dSKonrad Sztyber 	assert(efd < 0 || fgrp == NULL);
29114bf6e4bbSLiu Xiaodong 	intr->efd = efd;
2912969b360dSKonrad Sztyber 	intr->fgrp = fgrp;
29134bf6e4bbSLiu Xiaodong 	intr->thread = thread;
2914f3c1b59aSBen Walker 	intr->fn = fn;
2915f3c1b59aSBen Walker 	intr->arg = arg;
29164bf6e4bbSLiu Xiaodong 
2917851f166eSKonrad Sztyber 	return intr;
2918851f166eSKonrad Sztyber }
291985478eccSBen Walker 
2920851f166eSKonrad Sztyber struct spdk_interrupt *
2921851f166eSKonrad Sztyber spdk_interrupt_register_ext(int efd, spdk_interrupt_fn fn, void *arg, const char *name,
2922851f166eSKonrad Sztyber 			    struct spdk_event_handler_opts *opts)
2923851f166eSKonrad Sztyber {
2924851f166eSKonrad Sztyber 	struct spdk_interrupt *intr;
2925851f166eSKonrad Sztyber 	int ret;
2926851f166eSKonrad Sztyber 
2927969b360dSKonrad Sztyber 	intr = alloc_interrupt(efd, NULL, fn, arg, name);
2928851f166eSKonrad Sztyber 	if (intr == NULL) {
2929851f166eSKonrad Sztyber 		return NULL;
2930851f166eSKonrad Sztyber 	}
2931851f166eSKonrad Sztyber 
2932851f166eSKonrad Sztyber 	ret = spdk_fd_group_add_ext(intr->thread->fgrp, efd,
2933851f166eSKonrad Sztyber 				    _interrupt_wrapper, intr, intr->name, opts);
293485478eccSBen Walker 	if (ret != 0) {
293585478eccSBen Walker 		SPDK_ERRLOG("thread %s: failed to add fd %d: %s\n",
2936851f166eSKonrad Sztyber 			    intr->thread->name, efd, spdk_strerror(-ret));
293785478eccSBen Walker 		free(intr);
293885478eccSBen Walker 		return NULL;
293985478eccSBen Walker 	}
294085478eccSBen Walker 
29414bf6e4bbSLiu Xiaodong 	return intr;
29424bf6e4bbSLiu Xiaodong }
29434bf6e4bbSLiu Xiaodong 
2944969b360dSKonrad Sztyber static int
2945969b360dSKonrad Sztyber interrupt_fd_group_wrapper(void *wrap_ctx, spdk_fd_fn cb_fn, void *cb_ctx)
2946969b360dSKonrad Sztyber {
2947969b360dSKonrad Sztyber 	struct spdk_interrupt *intr = wrap_ctx;
2948969b360dSKonrad Sztyber 	struct spdk_thread *orig_thread, *thread;
2949969b360dSKonrad Sztyber 	int rc;
2950969b360dSKonrad Sztyber 
2951969b360dSKonrad Sztyber 	orig_thread = spdk_get_thread();
2952969b360dSKonrad Sztyber 	thread = intr->thread;
2953969b360dSKonrad Sztyber 
2954969b360dSKonrad Sztyber 	spdk_set_thread(thread);
2955969b360dSKonrad Sztyber 	rc = cb_fn(cb_ctx);
2956969b360dSKonrad Sztyber 	SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
2957969b360dSKonrad Sztyber 	spdk_set_thread(orig_thread);
2958969b360dSKonrad Sztyber 
2959969b360dSKonrad Sztyber 	return rc;
2960969b360dSKonrad Sztyber }
2961969b360dSKonrad Sztyber 
2962969b360dSKonrad Sztyber struct spdk_interrupt *
2963969b360dSKonrad Sztyber spdk_interrupt_register_fd_group(struct spdk_fd_group *fgrp, const char *name)
2964969b360dSKonrad Sztyber {
2965969b360dSKonrad Sztyber 	struct spdk_interrupt *intr;
2966969b360dSKonrad Sztyber 	int rc;
2967969b360dSKonrad Sztyber 
2968969b360dSKonrad Sztyber 	intr = alloc_interrupt(-1, fgrp, NULL, NULL, name);
2969969b360dSKonrad Sztyber 	if (intr == NULL) {
2970969b360dSKonrad Sztyber 		return NULL;
2971969b360dSKonrad Sztyber 	}
2972969b360dSKonrad Sztyber 
2973969b360dSKonrad Sztyber 	rc = spdk_fd_group_set_wrapper(fgrp, interrupt_fd_group_wrapper, intr);
2974969b360dSKonrad Sztyber 	if (rc != 0) {
2975969b360dSKonrad Sztyber 		SPDK_ERRLOG("thread %s: failed to set wrapper for fd_group %d: %s\n",
2976969b360dSKonrad Sztyber 			    intr->thread->name, spdk_fd_group_get_fd(fgrp), spdk_strerror(-rc));
2977969b360dSKonrad Sztyber 		free(intr);
2978969b360dSKonrad Sztyber 		return NULL;
2979969b360dSKonrad Sztyber 	}
2980969b360dSKonrad Sztyber 
2981969b360dSKonrad Sztyber 	rc = spdk_fd_group_nest(intr->thread->fgrp, fgrp);
2982969b360dSKonrad Sztyber 	if (rc != 0) {
2983969b360dSKonrad Sztyber 		SPDK_ERRLOG("thread %s: failed to nest fd_group %d: %s\n",
2984969b360dSKonrad Sztyber 			    intr->thread->name, spdk_fd_group_get_fd(fgrp), spdk_strerror(-rc));
2985969b360dSKonrad Sztyber 		spdk_fd_group_set_wrapper(fgrp, NULL, NULL);
2986969b360dSKonrad Sztyber 		free(intr);
2987969b360dSKonrad Sztyber 		return NULL;
2988969b360dSKonrad Sztyber 	}
2989969b360dSKonrad Sztyber 
2990969b360dSKonrad Sztyber 	return intr;
2991969b360dSKonrad Sztyber }
2992969b360dSKonrad Sztyber 
29934bf6e4bbSLiu Xiaodong void
29944bf6e4bbSLiu Xiaodong spdk_interrupt_unregister(struct spdk_interrupt **pintr)
29954bf6e4bbSLiu Xiaodong {
29964bf6e4bbSLiu Xiaodong 	struct spdk_thread *thread;
29974bf6e4bbSLiu Xiaodong 	struct spdk_interrupt *intr;
29984bf6e4bbSLiu Xiaodong 
29994bf6e4bbSLiu Xiaodong 	intr = *pintr;
30004bf6e4bbSLiu Xiaodong 	if (intr == NULL) {
30014bf6e4bbSLiu Xiaodong 		return;
30024bf6e4bbSLiu Xiaodong 	}
30034bf6e4bbSLiu Xiaodong 
30044bf6e4bbSLiu Xiaodong 	*pintr = NULL;
30054bf6e4bbSLiu Xiaodong 
30064bf6e4bbSLiu Xiaodong 	thread = spdk_get_thread();
30074bf6e4bbSLiu Xiaodong 	if (!thread) {
30084bf6e4bbSLiu Xiaodong 		assert(false);
30094bf6e4bbSLiu Xiaodong 		return;
30104bf6e4bbSLiu Xiaodong 	}
30114bf6e4bbSLiu Xiaodong 
30124bf6e4bbSLiu Xiaodong 	if (intr->thread != thread) {
3013d4255740SJohn Levon 		wrong_thread(__func__, intr->name, intr->thread, thread);
30144bf6e4bbSLiu Xiaodong 		return;
30154bf6e4bbSLiu Xiaodong 	}
30164bf6e4bbSLiu Xiaodong 
3017969b360dSKonrad Sztyber 	if (intr->fgrp != NULL) {
3018969b360dSKonrad Sztyber 		assert(intr->efd < 0);
3019969b360dSKonrad Sztyber 		spdk_fd_group_unnest(thread->fgrp, intr->fgrp);
3020969b360dSKonrad Sztyber 		spdk_fd_group_set_wrapper(thread->fgrp, NULL, NULL);
3021969b360dSKonrad Sztyber 	} else {
30224bf6e4bbSLiu Xiaodong 		spdk_fd_group_remove(thread->fgrp, intr->efd);
3023969b360dSKonrad Sztyber 	}
3024969b360dSKonrad Sztyber 
30254bf6e4bbSLiu Xiaodong 	free(intr);
30264bf6e4bbSLiu Xiaodong }
30274bf6e4bbSLiu Xiaodong 
30284bf6e4bbSLiu Xiaodong int
30294bf6e4bbSLiu Xiaodong spdk_interrupt_set_event_types(struct spdk_interrupt *intr,
30304bf6e4bbSLiu Xiaodong 			       enum spdk_interrupt_event_types event_types)
30314bf6e4bbSLiu Xiaodong {
30324bf6e4bbSLiu Xiaodong 	struct spdk_thread *thread;
30334bf6e4bbSLiu Xiaodong 
30344bf6e4bbSLiu Xiaodong 	thread = spdk_get_thread();
30354bf6e4bbSLiu Xiaodong 	if (!thread) {
30364bf6e4bbSLiu Xiaodong 		assert(false);
30374bf6e4bbSLiu Xiaodong 		return -EINVAL;
30384bf6e4bbSLiu Xiaodong 	}
30394bf6e4bbSLiu Xiaodong 
30404bf6e4bbSLiu Xiaodong 	if (intr->thread != thread) {
3041d4255740SJohn Levon 		wrong_thread(__func__, intr->name, intr->thread, thread);
30424bf6e4bbSLiu Xiaodong 		return -EINVAL;
30434bf6e4bbSLiu Xiaodong 	}
30444bf6e4bbSLiu Xiaodong 
3045969b360dSKonrad Sztyber 	if (intr->efd < 0) {
3046969b360dSKonrad Sztyber 		assert(false);
3047969b360dSKonrad Sztyber 		return -EINVAL;
3048969b360dSKonrad Sztyber 	}
3049969b360dSKonrad Sztyber 
30504bf6e4bbSLiu Xiaodong 	return spdk_fd_group_event_modify(thread->fgrp, intr->efd, event_types);
30514bf6e4bbSLiu Xiaodong }
30524bf6e4bbSLiu Xiaodong 
30534bf6e4bbSLiu Xiaodong int
30544bf6e4bbSLiu Xiaodong spdk_thread_get_interrupt_fd(struct spdk_thread *thread)
30554bf6e4bbSLiu Xiaodong {
30564bf6e4bbSLiu Xiaodong 	return spdk_fd_group_get_fd(thread->fgrp);
30574bf6e4bbSLiu Xiaodong }
30584bf6e4bbSLiu Xiaodong 
305907ca24ecSBen Walker struct spdk_fd_group *
306007ca24ecSBen Walker spdk_thread_get_interrupt_fd_group(struct spdk_thread *thread)
306107ca24ecSBen Walker {
306207ca24ecSBen Walker 	return thread->fgrp;
306307ca24ecSBen Walker }
306407ca24ecSBen Walker 
30654bf6e4bbSLiu Xiaodong static bool g_interrupt_mode = false;
30664bf6e4bbSLiu Xiaodong 
30674bf6e4bbSLiu Xiaodong int
30684bf6e4bbSLiu Xiaodong spdk_interrupt_mode_enable(void)
30694bf6e4bbSLiu Xiaodong {
3070fbd6c30bSLiu Xiaodong 	/* It must be called once prior to initializing the threading library.
3071fbd6c30bSLiu Xiaodong 	 * g_spdk_msg_mempool will be valid if thread library is initialized.
3072fbd6c30bSLiu Xiaodong 	 */
3073fbd6c30bSLiu Xiaodong 	if (g_spdk_msg_mempool) {
3074cc6920a4SJosh Soref 		SPDK_ERRLOG("Failed due to threading library is already initialized.\n");
3075fbd6c30bSLiu Xiaodong 		return -1;
3076fbd6c30bSLiu Xiaodong 	}
3077fbd6c30bSLiu Xiaodong 
30784bf6e4bbSLiu Xiaodong #ifdef __linux__
30794bf6e4bbSLiu Xiaodong 	SPDK_NOTICELOG("Set SPDK running in interrupt mode.\n");
30804bf6e4bbSLiu Xiaodong 	g_interrupt_mode = true;
30814bf6e4bbSLiu Xiaodong 	return 0;
30824bf6e4bbSLiu Xiaodong #else
30834bf6e4bbSLiu Xiaodong 	SPDK_ERRLOG("SPDK interrupt mode supports only Linux platform now.\n");
30844bf6e4bbSLiu Xiaodong 	g_interrupt_mode = false;
30854bf6e4bbSLiu Xiaodong 	return -ENOTSUP;
30864bf6e4bbSLiu Xiaodong #endif
30874bf6e4bbSLiu Xiaodong }
30884bf6e4bbSLiu Xiaodong 
30894bf6e4bbSLiu Xiaodong bool
30904bf6e4bbSLiu Xiaodong spdk_interrupt_mode_is_enabled(void)
30914bf6e4bbSLiu Xiaodong {
30924bf6e4bbSLiu Xiaodong 	return g_interrupt_mode;
30934bf6e4bbSLiu Xiaodong }
3094ec571793SBen Walker 
3095531258aaSMike Gerdts #define SSPIN_DEBUG_STACK_FRAMES 16
3096531258aaSMike Gerdts 
3097531258aaSMike Gerdts struct sspin_stack {
3098531258aaSMike Gerdts 	void *addrs[SSPIN_DEBUG_STACK_FRAMES];
3099531258aaSMike Gerdts 	uint32_t depth;
3100531258aaSMike Gerdts };
3101531258aaSMike Gerdts 
3102531258aaSMike Gerdts struct spdk_spinlock_internal {
3103531258aaSMike Gerdts 	struct sspin_stack init_stack;
3104531258aaSMike Gerdts 	struct sspin_stack lock_stack;
3105531258aaSMike Gerdts 	struct sspin_stack unlock_stack;
3106531258aaSMike Gerdts };
3107531258aaSMike Gerdts 
3108531258aaSMike Gerdts static void
3109531258aaSMike Gerdts sspin_init_internal(struct spdk_spinlock *sspin)
3110531258aaSMike Gerdts {
3111531258aaSMike Gerdts #ifdef DEBUG
3112531258aaSMike Gerdts 	sspin->internal = calloc(1, sizeof(*sspin->internal));
3113531258aaSMike Gerdts #endif
3114531258aaSMike Gerdts }
3115531258aaSMike Gerdts 
3116531258aaSMike Gerdts static void
3117531258aaSMike Gerdts sspin_fini_internal(struct spdk_spinlock *sspin)
3118531258aaSMike Gerdts {
3119531258aaSMike Gerdts #ifdef DEBUG
3120531258aaSMike Gerdts 	free(sspin->internal);
3121531258aaSMike Gerdts 	sspin->internal = NULL;
3122531258aaSMike Gerdts #endif
3123531258aaSMike Gerdts }
3124531258aaSMike Gerdts 
312501452b1bSDuncan Bellamy #if defined(DEBUG) && defined(SPDK_HAVE_EXECINFO_H)
3126531258aaSMike Gerdts #define SSPIN_GET_STACK(sspin, which) \
3127531258aaSMike Gerdts 	do { \
3128531258aaSMike Gerdts 		if (sspin->internal != NULL) { \
3129531258aaSMike Gerdts 			struct sspin_stack *stack = &sspin->internal->which ## _stack; \
3130531258aaSMike Gerdts 			stack->depth = backtrace(stack->addrs, SPDK_COUNTOF(stack->addrs)); \
3131531258aaSMike Gerdts 		} \
3132531258aaSMike Gerdts 	} while (0)
3133531258aaSMike Gerdts #else
3134531258aaSMike Gerdts #define SSPIN_GET_STACK(sspin, which) do { } while (0)
3135531258aaSMike Gerdts #endif
3136531258aaSMike Gerdts 
31373d9395c6SMike Gerdts static void
31383d9395c6SMike Gerdts sspin_stack_print(const char *title, const struct sspin_stack *sspin_stack)
31393d9395c6SMike Gerdts {
314001452b1bSDuncan Bellamy #ifdef SPDK_HAVE_EXECINFO_H
31413d9395c6SMike Gerdts 	char **stack;
31423d9395c6SMike Gerdts 	size_t i;
31433d9395c6SMike Gerdts 
31443d9395c6SMike Gerdts 	stack = backtrace_symbols(sspin_stack->addrs, sspin_stack->depth);
31453d9395c6SMike Gerdts 	if (stack == NULL) {
31463d9395c6SMike Gerdts 		SPDK_ERRLOG("Out of memory while allocate stack for %s\n", title);
31473d9395c6SMike Gerdts 		return;
31483d9395c6SMike Gerdts 	}
31493d9395c6SMike Gerdts 	SPDK_ERRLOG("  %s:\n", title);
31503d9395c6SMike Gerdts 	for (i = 0; i < sspin_stack->depth; i++) {
31513d9395c6SMike Gerdts 		/*
31523d9395c6SMike Gerdts 		 * This does not print line numbers. In gdb, use something like "list *0x444b6b" or
31533d9395c6SMike Gerdts 		 * "list *sspin_stack->addrs[0]".  Or more conveniently, load the spdk gdb macros
31543d9395c6SMike Gerdts 		 * and use use "print *sspin" or "print sspin->internal.lock_stack".  See
31553d9395c6SMike Gerdts 		 * gdb_macros.md in the docs directory for details.
31563d9395c6SMike Gerdts 		 */
31573d9395c6SMike Gerdts 		SPDK_ERRLOG("    #%" PRIu64 ": %s\n", i, stack[i]);
31583d9395c6SMike Gerdts 	}
31593d9395c6SMike Gerdts 	free(stack);
316001452b1bSDuncan Bellamy #endif /* SPDK_HAVE_EXECINFO_H */
31613d9395c6SMike Gerdts }
31623d9395c6SMike Gerdts 
31633d9395c6SMike Gerdts static void
31643d9395c6SMike Gerdts sspin_stacks_print(const struct spdk_spinlock *sspin)
31653d9395c6SMike Gerdts {
31663d9395c6SMike Gerdts 	if (sspin->internal == NULL) {
31673d9395c6SMike Gerdts 		return;
31683d9395c6SMike Gerdts 	}
31693d9395c6SMike Gerdts 	SPDK_ERRLOG("spinlock %p\n", sspin);
317034edd9f1SKamil Godzwon 	sspin_stack_print("Lock initialized at", &sspin->internal->init_stack);
31713d9395c6SMike Gerdts 	sspin_stack_print("Last locked at", &sspin->internal->lock_stack);
31723d9395c6SMike Gerdts 	sspin_stack_print("Last unlocked at", &sspin->internal->unlock_stack);
31733d9395c6SMike Gerdts }
31743d9395c6SMike Gerdts 
3175cd2bcf10SMike Gerdts void
3176cd2bcf10SMike Gerdts spdk_spin_init(struct spdk_spinlock *sspin)
3177cd2bcf10SMike Gerdts {
3178cd2bcf10SMike Gerdts 	int rc;
3179cd2bcf10SMike Gerdts 
3180cd2bcf10SMike Gerdts 	memset(sspin, 0, sizeof(*sspin));
3181cd2bcf10SMike Gerdts 	rc = pthread_spin_init(&sspin->spinlock, PTHREAD_PROCESS_PRIVATE);
31823d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
3183531258aaSMike Gerdts 	sspin_init_internal(sspin);
3184531258aaSMike Gerdts 	SSPIN_GET_STACK(sspin, init);
3185c9f3613fSMike Gerdts 	sspin->initialized = true;
3186cd2bcf10SMike Gerdts }
3187cd2bcf10SMike Gerdts 
3188cd2bcf10SMike Gerdts void
3189cd2bcf10SMike Gerdts spdk_spin_destroy(struct spdk_spinlock *sspin)
3190cd2bcf10SMike Gerdts {
3191cd2bcf10SMike Gerdts 	int rc;
3192cd2bcf10SMike Gerdts 
3193c9f3613fSMike Gerdts 	SPIN_ASSERT_LOG_STACKS(!sspin->destroyed, SPIN_ERR_DESTROYED, sspin);
3194c9f3613fSMike Gerdts 	SPIN_ASSERT_LOG_STACKS(sspin->initialized, SPIN_ERR_NOT_INITIALIZED, sspin);
31953d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(sspin->thread == NULL, SPIN_ERR_LOCK_HELD, sspin);
3196cd2bcf10SMike Gerdts 
3197cd2bcf10SMike Gerdts 	rc = pthread_spin_destroy(&sspin->spinlock);
31983d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
3199531258aaSMike Gerdts 
3200531258aaSMike Gerdts 	sspin_fini_internal(sspin);
3201c9f3613fSMike Gerdts 	sspin->initialized = false;
3202c9f3613fSMike Gerdts 	sspin->destroyed = true;
3203cd2bcf10SMike Gerdts }
3204cd2bcf10SMike Gerdts 
3205cd2bcf10SMike Gerdts void
3206cd2bcf10SMike Gerdts spdk_spin_lock(struct spdk_spinlock *sspin)
3207cd2bcf10SMike Gerdts {
3208cd2bcf10SMike Gerdts 	struct spdk_thread *thread = spdk_get_thread();
3209cd2bcf10SMike Gerdts 	int rc;
3210cd2bcf10SMike Gerdts 
3211c9f3613fSMike Gerdts 	SPIN_ASSERT_LOG_STACKS(!sspin->destroyed, SPIN_ERR_DESTROYED, sspin);
3212c9f3613fSMike Gerdts 	SPIN_ASSERT_LOG_STACKS(sspin->initialized, SPIN_ERR_NOT_INITIALIZED, sspin);
32133d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(thread != NULL, SPIN_ERR_NOT_SPDK_THREAD, sspin);
32143d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(thread != sspin->thread, SPIN_ERR_DEADLOCK, sspin);
3215cd2bcf10SMike Gerdts 
3216cd2bcf10SMike Gerdts 	rc = pthread_spin_lock(&sspin->spinlock);
32173d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
3218cd2bcf10SMike Gerdts 
3219cd2bcf10SMike Gerdts 	sspin->thread = thread;
3220cd2bcf10SMike Gerdts 	sspin->thread->lock_count++;
3221531258aaSMike Gerdts 
3222531258aaSMike Gerdts 	SSPIN_GET_STACK(sspin, lock);
3223cd2bcf10SMike Gerdts }
3224cd2bcf10SMike Gerdts 
3225cd2bcf10SMike Gerdts void
3226cd2bcf10SMike Gerdts spdk_spin_unlock(struct spdk_spinlock *sspin)
3227cd2bcf10SMike Gerdts {
3228cd2bcf10SMike Gerdts 	struct spdk_thread *thread = spdk_get_thread();
3229cd2bcf10SMike Gerdts 	int rc;
3230cd2bcf10SMike Gerdts 
3231c9f3613fSMike Gerdts 	SPIN_ASSERT_LOG_STACKS(!sspin->destroyed, SPIN_ERR_DESTROYED, sspin);
3232c9f3613fSMike Gerdts 	SPIN_ASSERT_LOG_STACKS(sspin->initialized, SPIN_ERR_NOT_INITIALIZED, sspin);
32333d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(thread != NULL, SPIN_ERR_NOT_SPDK_THREAD, sspin);
32343d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(thread == sspin->thread, SPIN_ERR_WRONG_THREAD, sspin);
3235cd2bcf10SMike Gerdts 
32363d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(thread->lock_count > 0, SPIN_ERR_LOCK_COUNT, sspin);
3237cd2bcf10SMike Gerdts 	thread->lock_count--;
3238cd2bcf10SMike Gerdts 	sspin->thread = NULL;
3239cd2bcf10SMike Gerdts 
3240531258aaSMike Gerdts 	SSPIN_GET_STACK(sspin, unlock);
3241531258aaSMike Gerdts 
3242cd2bcf10SMike Gerdts 	rc = pthread_spin_unlock(&sspin->spinlock);
32433d9395c6SMike Gerdts 	SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
3244cd2bcf10SMike Gerdts }
3245cd2bcf10SMike Gerdts 
3246cd2bcf10SMike Gerdts bool
3247cd2bcf10SMike Gerdts spdk_spin_held(struct spdk_spinlock *sspin)
3248cd2bcf10SMike Gerdts {
3249cd2bcf10SMike Gerdts 	struct spdk_thread *thread = spdk_get_thread();
3250cd2bcf10SMike Gerdts 
3251cd2bcf10SMike Gerdts 	SPIN_ASSERT_RETURN(thread != NULL, SPIN_ERR_NOT_SPDK_THREAD, false);
3252cd2bcf10SMike Gerdts 
3253cd2bcf10SMike Gerdts 	return sspin->thread == thread;
3254cd2bcf10SMike Gerdts }
3255cd2bcf10SMike Gerdts 
325698eca6faSAlexey Marchuk void
325798eca6faSAlexey Marchuk spdk_thread_register_post_poller_handler(spdk_post_poller_fn fn, void *fn_arg)
325898eca6faSAlexey Marchuk {
325998eca6faSAlexey Marchuk 	struct spdk_thread *thr;
326098eca6faSAlexey Marchuk 
326198eca6faSAlexey Marchuk 	thr = _get_thread();
326298eca6faSAlexey Marchuk 	assert(thr);
326398eca6faSAlexey Marchuk 	if (spdk_unlikely(thr->num_pp_handlers == SPDK_THREAD_MAX_POST_POLLER_HANDLERS)) {
326498eca6faSAlexey Marchuk 		SPDK_ERRLOG("Too many handlers registered");
326598eca6faSAlexey Marchuk 		return;
326698eca6faSAlexey Marchuk 	}
326798eca6faSAlexey Marchuk 
326898eca6faSAlexey Marchuk 	thr->pp_handlers[thr->num_pp_handlers].fn = fn;
326998eca6faSAlexey Marchuk 	thr->pp_handlers[thr->num_pp_handlers].fn_arg = fn_arg;
327098eca6faSAlexey Marchuk 	thr->num_pp_handlers++;
327198eca6faSAlexey Marchuk }
327298eca6faSAlexey Marchuk 
32732172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(thread)
3274