xref: /spdk/lib/event/reactor.c (revision 1a5bdab325d2bb6c7cf6257050726e29e3aaf414)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2016 Intel Corporation.
3eeeac667SDaniel Verkamp  *   All rights reserved.
4eeeac667SDaniel Verkamp  */
5eeeac667SDaniel Verkamp 
6b961d9ccSBen Walker #include "spdk/stdinc.h"
7b9be940aSLance Hartmann #include "spdk/likely.h"
8eeeac667SDaniel Verkamp 
9462fd69eSSeungYeon Shin #include "event_internal.h"
10462fd69eSSeungYeon Shin 
11b961d9ccSBen Walker #include "spdk_internal/event.h"
1270f3606bSJohn Levon #include "spdk_internal/usdt.h"
13eeeac667SDaniel Verkamp 
14eeeac667SDaniel Verkamp #include "spdk/log.h"
15a83f91c2SBen Walker #include "spdk/thread.h"
160aa29864SBen Walker #include "spdk/env.h"
1790dfc392SDariusz Stojaczyk #include "spdk/util.h"
18a86e40f3STomasz Zawadzki #include "spdk/scheduler.h"
194bf6e4bbSLiu Xiaodong #include "spdk/string.h"
204bf6e4bbSLiu Xiaodong #include "spdk/fd_group.h"
2182c46626SAnisa Su #include "spdk/trace.h"
2282c46626SAnisa Su #include "spdk_internal/trace_defs.h"
23eeeac667SDaniel Verkamp 
2438527b07SShuhei Matsumoto #ifdef __linux__
2538527b07SShuhei Matsumoto #include <sys/prctl.h>
264bf6e4bbSLiu Xiaodong #include <sys/eventfd.h>
2738527b07SShuhei Matsumoto #endif
2838527b07SShuhei Matsumoto 
2938527b07SShuhei Matsumoto #ifdef __FreeBSD__
3038527b07SShuhei Matsumoto #include <pthread_np.h>
3138527b07SShuhei Matsumoto #endif
3238527b07SShuhei Matsumoto 
3352bbb267SDaniel Verkamp #define SPDK_EVENT_BATCH_SIZE		8
3452bbb267SDaniel Verkamp 
35d34d32f6SShuhei Matsumoto static struct spdk_reactor *g_reactors;
36992b168eSLiu Xiaodong static uint32_t g_reactor_count;
37ae4360f0SShuhei Matsumoto static struct spdk_cpuset g_reactor_core_mask;
38f7ddfcf7SDarek Stojaczyk static enum spdk_reactor_state	g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZED;
39eeeac667SDaniel Verkamp 
4025c5e3f5SShuhei Matsumoto static bool g_framework_context_switch_monitor_enabled = true;
4170c3e1f2SDaniel Verkamp 
42e1ec5c60SBen Walker static struct spdk_mempool *g_spdk_event_mempool = NULL;
43eeeac667SDaniel Verkamp 
442a146cd9SMaciej Szwed TAILQ_HEAD(, spdk_scheduler) g_scheduler_list
452a146cd9SMaciej Szwed 	= TAILQ_HEAD_INITIALIZER(g_scheduler_list);
462a146cd9SMaciej Szwed 
47d6c4f8cfSTomasz Zawadzki static struct spdk_scheduler *g_scheduler = NULL;
482cffc800SVitaliy Mysak static struct spdk_reactor *g_scheduling_reactor;
49cff96883STomasz Zawadzki bool g_scheduling_in_progress = false;
50f01c6f2dSJim Harris static uint64_t g_scheduler_period_in_tsc = 0;
51f01c6f2dSJim Harris static uint64_t g_scheduler_period_in_us;
526859a49aSMaciej Szwed static uint32_t g_scheduler_core_number;
532cffc800SVitaliy Mysak static struct spdk_scheduler_core_info *g_core_infos = NULL;
54462fd69eSSeungYeon Shin static struct spdk_cpuset g_scheduler_isolated_core_mask;
552a146cd9SMaciej Szwed 
56c6adf304SMaciej Szwed TAILQ_HEAD(, spdk_governor) g_governor_list
57c6adf304SMaciej Szwed 	= TAILQ_HEAD_INITIALIZER(g_governor_list);
58c6adf304SMaciej Szwed 
59a09bf113STomasz Zawadzki static struct spdk_governor *g_governor = NULL;
60c6adf304SMaciej Szwed 
614bf6e4bbSLiu Xiaodong static int reactor_interrupt_init(struct spdk_reactor *reactor);
624bf6e4bbSLiu Xiaodong static void reactor_interrupt_fini(struct spdk_reactor *reactor);
634bf6e4bbSLiu Xiaodong 
64c4f086b6SJim Harris static pthread_mutex_t g_stopping_reactors_mtx = PTHREAD_MUTEX_INITIALIZER;
65c4f086b6SJim Harris static bool g_stopping_reactors = false;
66c4f086b6SJim Harris 
672a146cd9SMaciej Szwed static struct spdk_scheduler *
68a86c94b3STomasz Zawadzki _scheduler_find(const char *name)
692a146cd9SMaciej Szwed {
702a146cd9SMaciej Szwed 	struct spdk_scheduler *tmp;
712a146cd9SMaciej Szwed 
722a146cd9SMaciej Szwed 	TAILQ_FOREACH(tmp, &g_scheduler_list, link) {
732a146cd9SMaciej Szwed 		if (strcmp(name, tmp->name) == 0) {
742a146cd9SMaciej Szwed 			return tmp;
752a146cd9SMaciej Szwed 		}
762a146cd9SMaciej Szwed 	}
772a146cd9SMaciej Szwed 
782a146cd9SMaciej Szwed 	return NULL;
792a146cd9SMaciej Szwed }
802a146cd9SMaciej Szwed 
812a146cd9SMaciej Szwed int
82a86e40f3STomasz Zawadzki spdk_scheduler_set(const char *name)
832a146cd9SMaciej Szwed {
842a146cd9SMaciej Szwed 	struct spdk_scheduler *scheduler;
85d6c4f8cfSTomasz Zawadzki 	int rc = 0;
86d6c4f8cfSTomasz Zawadzki 
87d6c4f8cfSTomasz Zawadzki 	/* NULL scheduler was specifically requested */
88d6c4f8cfSTomasz Zawadzki 	if (name == NULL) {
89d6c4f8cfSTomasz Zawadzki 		if (g_scheduler) {
90d6c4f8cfSTomasz Zawadzki 			g_scheduler->deinit();
91d6c4f8cfSTomasz Zawadzki 		}
92d6c4f8cfSTomasz Zawadzki 		g_scheduler = NULL;
93d6c4f8cfSTomasz Zawadzki 		return 0;
94d6c4f8cfSTomasz Zawadzki 	}
952a146cd9SMaciej Szwed 
962a146cd9SMaciej Szwed 	scheduler = _scheduler_find(name);
972a146cd9SMaciej Szwed 	if (scheduler == NULL) {
987148f333SVitaliy Mysak 		SPDK_ERRLOG("Requested scheduler is missing\n");
99d6c4f8cfSTomasz Zawadzki 		return -EINVAL;
1002a146cd9SMaciej Szwed 	}
1012a146cd9SMaciej Szwed 
102d6c4f8cfSTomasz Zawadzki 	if (g_scheduler == scheduler) {
103d6c4f8cfSTomasz Zawadzki 		return 0;
10475022aa2SMaciej Szwed 	}
105d6c4f8cfSTomasz Zawadzki 
106d6c4f8cfSTomasz Zawadzki 	if (g_scheduler) {
1075bf2973eSTomasz Zawadzki 		g_scheduler->deinit();
108c6adf304SMaciej Szwed 	}
109f2a6a84aSJim Harris 
110f2a6a84aSJim Harris 	rc = scheduler->init();
111f2a6a84aSJim Harris 	if (rc == 0) {
1121b1e52cbSMaciej Szwed 		g_scheduler = scheduler;
113f2a6a84aSJim Harris 	} else {
114f2a6a84aSJim Harris 		/* Could not switch to the new scheduler, so keep the old
115ba9e050eSMarcin Spiewak 		 * one. We need to check if it wasn't NULL, and ->init() it again.
116f2a6a84aSJim Harris 		 */
117ba9e050eSMarcin Spiewak 		if (g_scheduler) {
118f2a6a84aSJim Harris 			SPDK_ERRLOG("Could not ->init() '%s' scheduler, reverting to '%s'\n",
119f2a6a84aSJim Harris 				    name, g_scheduler->name);
120f2a6a84aSJim Harris 			g_scheduler->init();
121ba9e050eSMarcin Spiewak 		} else {
122ba9e050eSMarcin Spiewak 			SPDK_ERRLOG("Could not ->init() '%s' scheduler.\n", name);
123ba9e050eSMarcin Spiewak 		}
1241b1e52cbSMaciej Szwed 	}
1251b1e52cbSMaciej Szwed 
126d6c4f8cfSTomasz Zawadzki 	return rc;
1272a146cd9SMaciej Szwed }
1282a146cd9SMaciej Szwed 
129abf52d7dSKrzysztof Karas struct spdk_scheduler *
130a86e40f3STomasz Zawadzki spdk_scheduler_get(void)
131abf52d7dSKrzysztof Karas {
132abf52d7dSKrzysztof Karas 	return g_scheduler;
133abf52d7dSKrzysztof Karas }
134abf52d7dSKrzysztof Karas 
135abf52d7dSKrzysztof Karas uint64_t
136a86e40f3STomasz Zawadzki spdk_scheduler_get_period(void)
137abf52d7dSKrzysztof Karas {
138f01c6f2dSJim Harris 	return g_scheduler_period_in_us;
139abf52d7dSKrzysztof Karas }
140abf52d7dSKrzysztof Karas 
1412a146cd9SMaciej Szwed void
142a86e40f3STomasz Zawadzki spdk_scheduler_set_period(uint64_t period)
1432a146cd9SMaciej Szwed {
144f01c6f2dSJim Harris 	g_scheduler_period_in_us = period;
145f01c6f2dSJim Harris 	g_scheduler_period_in_tsc = period * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
1462a146cd9SMaciej Szwed }
1472a146cd9SMaciej Szwed 
1482a146cd9SMaciej Szwed void
149a86e40f3STomasz Zawadzki spdk_scheduler_register(struct spdk_scheduler *scheduler)
1502a146cd9SMaciej Szwed {
1512a146cd9SMaciej Szwed 	if (_scheduler_find(scheduler->name)) {
1522a146cd9SMaciej Szwed 		SPDK_ERRLOG("scheduler named '%s' already registered.\n", scheduler->name);
1532a146cd9SMaciej Szwed 		assert(false);
1542a146cd9SMaciej Szwed 		return;
1552a146cd9SMaciej Szwed 	}
1562a146cd9SMaciej Szwed 
1572a146cd9SMaciej Szwed 	TAILQ_INSERT_TAIL(&g_scheduler_list, scheduler, link);
1582a146cd9SMaciej Szwed }
1592a146cd9SMaciej Szwed 
160758225a3Ssyeon.shin uint32_t
161758225a3Ssyeon.shin spdk_scheduler_get_scheduling_lcore(void)
162758225a3Ssyeon.shin {
163758225a3Ssyeon.shin 	return g_scheduling_reactor->lcore;
164758225a3Ssyeon.shin }
165758225a3Ssyeon.shin 
166462fd69eSSeungYeon Shin bool
167462fd69eSSeungYeon Shin spdk_scheduler_set_scheduling_lcore(uint32_t core)
168462fd69eSSeungYeon Shin {
169462fd69eSSeungYeon Shin 	struct spdk_reactor *reactor = spdk_reactor_get(core);
170462fd69eSSeungYeon Shin 	if (reactor == NULL) {
171462fd69eSSeungYeon Shin 		SPDK_ERRLOG("Failed to set scheduling reactor. Reactor(lcore:%d) does not exist", core);
172462fd69eSSeungYeon Shin 		return false;
173462fd69eSSeungYeon Shin 	}
174462fd69eSSeungYeon Shin 
175462fd69eSSeungYeon Shin 	g_scheduling_reactor = reactor;
176462fd69eSSeungYeon Shin 	return true;
177462fd69eSSeungYeon Shin }
178462fd69eSSeungYeon Shin 
179462fd69eSSeungYeon Shin bool
180462fd69eSSeungYeon Shin scheduler_set_isolated_core_mask(struct spdk_cpuset isolated_core_mask)
181462fd69eSSeungYeon Shin {
182462fd69eSSeungYeon Shin 	struct spdk_cpuset tmp_mask;
183462fd69eSSeungYeon Shin 
184462fd69eSSeungYeon Shin 	spdk_cpuset_copy(&tmp_mask, spdk_app_get_core_mask());
185462fd69eSSeungYeon Shin 	spdk_cpuset_or(&tmp_mask, &isolated_core_mask);
186462fd69eSSeungYeon Shin 	if (spdk_cpuset_equal(&tmp_mask, spdk_app_get_core_mask()) == false) {
187462fd69eSSeungYeon Shin 		SPDK_ERRLOG("Isolated core mask is not included in app core mask.\n");
188462fd69eSSeungYeon Shin 		return false;
189462fd69eSSeungYeon Shin 	}
190462fd69eSSeungYeon Shin 	spdk_cpuset_copy(&g_scheduler_isolated_core_mask, &isolated_core_mask);
191462fd69eSSeungYeon Shin 	return true;
192462fd69eSSeungYeon Shin }
193462fd69eSSeungYeon Shin 
194462fd69eSSeungYeon Shin const char *
195462fd69eSSeungYeon Shin scheduler_get_isolated_core_mask(void)
196462fd69eSSeungYeon Shin {
197462fd69eSSeungYeon Shin 	return spdk_cpuset_fmt(&g_scheduler_isolated_core_mask);
198462fd69eSSeungYeon Shin }
199462fd69eSSeungYeon Shin 
200462fd69eSSeungYeon Shin static bool
201462fd69eSSeungYeon Shin scheduler_is_isolated_core(uint32_t core)
202462fd69eSSeungYeon Shin {
203462fd69eSSeungYeon Shin 	return spdk_cpuset_get_cpu(&g_scheduler_isolated_core_mask, core);
204462fd69eSSeungYeon Shin }
205462fd69eSSeungYeon Shin 
2062139bfa9SBen Walker static void
207a7592dbeSSeth Howell reactor_construct(struct spdk_reactor *reactor, uint32_t lcore)
2082139bfa9SBen Walker {
2092139bfa9SBen Walker 	reactor->lcore = lcore;
2107f9c41beSBen Walker 	reactor->flags.is_valid = true;
2112139bfa9SBen Walker 
2122139bfa9SBen Walker 	TAILQ_INIT(&reactor->threads);
213ea863bb0SShuhei Matsumoto 	reactor->thread_count = 0;
214eff5b149SLiu Xiaodong 	spdk_cpuset_zero(&reactor->notify_cpuset);
2152139bfa9SBen Walker 
216186b109dSJim Harris 	reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_NUMA_ID_ANY);
2174237d2d8Ssunshihao520 	if (reactor->events == NULL) {
2184237d2d8Ssunshihao520 		SPDK_ERRLOG("Failed to allocate events ring\n");
2194237d2d8Ssunshihao520 		assert(false);
2204237d2d8Ssunshihao520 	}
2214bf6e4bbSLiu Xiaodong 
222eff5b149SLiu Xiaodong 	/* Always initialize interrupt facilities for reactor */
223eff5b149SLiu Xiaodong 	if (reactor_interrupt_init(reactor) != 0) {
22434edd9f1SKamil Godzwon 		/* Reactor interrupt facilities are necessary if setting app to interrupt mode. */
2254bf6e4bbSLiu Xiaodong 		if (spdk_interrupt_mode_is_enabled()) {
226eff5b149SLiu Xiaodong 			SPDK_ERRLOG("Failed to prepare intr facilities\n");
227eff5b149SLiu Xiaodong 			assert(false);
228eff5b149SLiu Xiaodong 		}
229eff5b149SLiu Xiaodong 		return;
230eff5b149SLiu Xiaodong 	}
231eff5b149SLiu Xiaodong 
232eff5b149SLiu Xiaodong 	/* If application runs with full interrupt ability,
233eff5b149SLiu Xiaodong 	 * all reactors are going to run in interrupt mode.
234eff5b149SLiu Xiaodong 	 */
235eff5b149SLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
236eff5b149SLiu Xiaodong 		uint32_t i;
237eff5b149SLiu Xiaodong 
238eff5b149SLiu Xiaodong 		SPDK_ENV_FOREACH_CORE(i) {
239eff5b149SLiu Xiaodong 			spdk_cpuset_set_cpu(&reactor->notify_cpuset, i, true);
240eff5b149SLiu Xiaodong 		}
241eff5b149SLiu Xiaodong 		reactor->in_interrupt = true;
2424bf6e4bbSLiu Xiaodong 	}
2432139bfa9SBen Walker }
2442139bfa9SBen Walker 
245abbd6ed8SShuhei Matsumoto struct spdk_reactor *
246eeeac667SDaniel Verkamp spdk_reactor_get(uint32_t lcore)
247eeeac667SDaniel Verkamp {
248eeeac667SDaniel Verkamp 	struct spdk_reactor *reactor;
249085ade57SBen Walker 
250fd0f0364SBen Walker 	if (g_reactors == NULL) {
251fd0f0364SBen Walker 		SPDK_WARNLOG("Called spdk_reactor_get() while the g_reactors array was NULL!\n");
252fd0f0364SBen Walker 		return NULL;
253fd0f0364SBen Walker 	}
254085ade57SBen Walker 
2555a499ac0SLiu Xiaodong 	if (lcore >= g_reactor_count) {
2565a499ac0SLiu Xiaodong 		return NULL;
2575a499ac0SLiu Xiaodong 	}
2585a499ac0SLiu Xiaodong 
259085ade57SBen Walker 	reactor = &g_reactors[lcore];
260085ade57SBen Walker 
2617f9c41beSBen Walker 	if (reactor->flags.is_valid == false) {
262085ade57SBen Walker 		return NULL;
263085ade57SBen Walker 	}
264085ade57SBen Walker 
265eeeac667SDaniel Verkamp 	return reactor;
266eeeac667SDaniel Verkamp }
267eeeac667SDaniel Verkamp 
268a7592dbeSSeth Howell static int reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op);
269a7592dbeSSeth Howell static bool reactor_thread_op_supported(enum spdk_thread_op op);
2702139bfa9SBen Walker 
2716895e9d9SJim Harris /* Power of 2 minus 1 is optimal for memory consumption */
2726895e9d9SJim Harris #define EVENT_MSG_MEMPOOL_SHIFT 14 /* 2^14 = 16384 */
2736895e9d9SJim Harris #define EVENT_MSG_MEMPOOL_SIZE ((1 << EVENT_MSG_MEMPOOL_SHIFT) - 1)
2746895e9d9SJim Harris 
2752139bfa9SBen Walker int
276a71cd521SAlexis Lescouet spdk_reactors_init(size_t msg_mempool_size)
2772139bfa9SBen Walker {
27875022aa2SMaciej Szwed 	struct spdk_reactor *reactor;
2792139bfa9SBen Walker 	int rc;
280992b168eSLiu Xiaodong 	uint32_t i, current_core;
2812139bfa9SBen Walker 	char mempool_name[32];
2822139bfa9SBen Walker 
2832139bfa9SBen Walker 	snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid());
2842139bfa9SBen Walker 	g_spdk_event_mempool = spdk_mempool_create(mempool_name,
2856895e9d9SJim Harris 			       EVENT_MSG_MEMPOOL_SIZE,
2862139bfa9SBen Walker 			       sizeof(struct spdk_event),
2872139bfa9SBen Walker 			       SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
288186b109dSJim Harris 			       SPDK_ENV_NUMA_ID_ANY);
2892139bfa9SBen Walker 
2902139bfa9SBen Walker 	if (g_spdk_event_mempool == NULL) {
2912139bfa9SBen Walker 		SPDK_ERRLOG("spdk_event_mempool creation failed\n");
2922139bfa9SBen Walker 		return -1;
2932139bfa9SBen Walker 	}
2942139bfa9SBen Walker 
2952139bfa9SBen Walker 	/* struct spdk_reactor must be aligned on 64 byte boundary */
296992b168eSLiu Xiaodong 	g_reactor_count = spdk_env_get_last_core() + 1;
2972139bfa9SBen Walker 	rc = posix_memalign((void **)&g_reactors, 64,
298992b168eSLiu Xiaodong 			    g_reactor_count * sizeof(struct spdk_reactor));
2992139bfa9SBen Walker 	if (rc != 0) {
3002139bfa9SBen Walker 		SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n",
301992b168eSLiu Xiaodong 			    g_reactor_count);
3022139bfa9SBen Walker 		spdk_mempool_free(g_spdk_event_mempool);
3032139bfa9SBen Walker 		return -1;
3042139bfa9SBen Walker 	}
3052139bfa9SBen Walker 
306992b168eSLiu Xiaodong 	g_core_infos = calloc(g_reactor_count, sizeof(*g_core_infos));
3072cffc800SVitaliy Mysak 	if (g_core_infos == NULL) {
3082cffc800SVitaliy Mysak 		SPDK_ERRLOG("Could not allocate memory for g_core_infos\n");
3092cffc800SVitaliy Mysak 		spdk_mempool_free(g_spdk_event_mempool);
3102cffc800SVitaliy Mysak 		free(g_reactors);
3112cffc800SVitaliy Mysak 		return -ENOMEM;
3122cffc800SVitaliy Mysak 	}
3132cffc800SVitaliy Mysak 
314992b168eSLiu Xiaodong 	memset(g_reactors, 0, (g_reactor_count) * sizeof(struct spdk_reactor));
3152139bfa9SBen Walker 
3165de98ef8Syidong0635 	rc = spdk_thread_lib_init_ext(reactor_thread_op, reactor_thread_op_supported,
317a71cd521SAlexis Lescouet 				      sizeof(struct spdk_lw_thread), msg_mempool_size);
3185de98ef8Syidong0635 	if (rc != 0) {
3195de98ef8Syidong0635 		SPDK_ERRLOG("Initialize spdk thread lib failed\n");
3205de98ef8Syidong0635 		spdk_mempool_free(g_spdk_event_mempool);
3215de98ef8Syidong0635 		free(g_reactors);
3225de98ef8Syidong0635 		free(g_core_infos);
3235de98ef8Syidong0635 		return rc;
3245de98ef8Syidong0635 	}
3252139bfa9SBen Walker 
3262139bfa9SBen Walker 	SPDK_ENV_FOREACH_CORE(i) {
327a7592dbeSSeth Howell 		reactor_construct(&g_reactors[i], i);
3282139bfa9SBen Walker 	}
3292139bfa9SBen Walker 
33075022aa2SMaciej Szwed 	current_core = spdk_env_get_current_core();
33175022aa2SMaciej Szwed 	reactor = spdk_reactor_get(current_core);
33275022aa2SMaciej Szwed 	assert(reactor != NULL);
33375022aa2SMaciej Szwed 	g_scheduling_reactor = reactor;
33475022aa2SMaciej Szwed 
3352139bfa9SBen Walker 	g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
3362139bfa9SBen Walker 
3372139bfa9SBen Walker 	return 0;
3382139bfa9SBen Walker }
3392139bfa9SBen Walker 
3406aff44ccSBen Walker void
3416aff44ccSBen Walker spdk_reactors_fini(void)
3426aff44ccSBen Walker {
3436aff44ccSBen Walker 	uint32_t i;
3446aff44ccSBen Walker 	struct spdk_reactor *reactor;
3456aff44ccSBen Walker 
346f7ddfcf7SDarek Stojaczyk 	if (g_reactor_state == SPDK_REACTOR_STATE_UNINITIALIZED) {
347f7ddfcf7SDarek Stojaczyk 		return;
348f7ddfcf7SDarek Stojaczyk 	}
349f7ddfcf7SDarek Stojaczyk 
3506aff44ccSBen Walker 	spdk_thread_lib_fini();
3516aff44ccSBen Walker 
3526aff44ccSBen Walker 	SPDK_ENV_FOREACH_CORE(i) {
3536aff44ccSBen Walker 		reactor = spdk_reactor_get(i);
3542e69975fSSeth Howell 		assert(reactor != NULL);
355ea863bb0SShuhei Matsumoto 		assert(reactor->thread_count == 0);
3562e69975fSSeth Howell 		if (reactor->events != NULL) {
3576aff44ccSBen Walker 			spdk_ring_free(reactor->events);
3586aff44ccSBen Walker 		}
3594bf6e4bbSLiu Xiaodong 
3604bf6e4bbSLiu Xiaodong 		reactor_interrupt_fini(reactor);
3612cffc800SVitaliy Mysak 
3622cffc800SVitaliy Mysak 		if (g_core_infos != NULL) {
363b74b6133STomasz Zawadzki 			free(g_core_infos[i].thread_infos);
3642cffc800SVitaliy Mysak 		}
3656aff44ccSBen Walker 	}
3666aff44ccSBen Walker 
3676aff44ccSBen Walker 	spdk_mempool_free(g_spdk_event_mempool);
3686aff44ccSBen Walker 
3696aff44ccSBen Walker 	free(g_reactors);
3706aff44ccSBen Walker 	g_reactors = NULL;
3712cffc800SVitaliy Mysak 	free(g_core_infos);
3722cffc800SVitaliy Mysak 	g_core_infos = NULL;
3736aff44ccSBen Walker }
3746aff44ccSBen Walker 
375a2074554SLiu Xiaodong static void _reactor_set_interrupt_mode(void *arg1, void *arg2);
376a2074554SLiu Xiaodong 
377a2074554SLiu Xiaodong static void
378a2074554SLiu Xiaodong _reactor_set_notify_cpuset(void *arg1, void *arg2)
379a2074554SLiu Xiaodong {
380a2074554SLiu Xiaodong 	struct spdk_reactor *target = arg1;
381a2074554SLiu Xiaodong 	struct spdk_reactor *reactor = spdk_reactor_get(spdk_env_get_current_core());
382a2074554SLiu Xiaodong 
383ebeac5deSGangCao 	assert(reactor != NULL);
384a2074554SLiu Xiaodong 	spdk_cpuset_set_cpu(&reactor->notify_cpuset, target->lcore, target->new_in_interrupt);
385a2074554SLiu Xiaodong }
386a2074554SLiu Xiaodong 
387a2074554SLiu Xiaodong static void
3889e81535eSyidong0635 _event_call(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
3899e81535eSyidong0635 {
3909e81535eSyidong0635 	struct spdk_event *ev;
3919e81535eSyidong0635 
3929e81535eSyidong0635 	ev = spdk_event_allocate(lcore, fn, arg1, arg2);
3939e81535eSyidong0635 	assert(ev);
3949e81535eSyidong0635 	spdk_event_call(ev);
3959e81535eSyidong0635 }
3969e81535eSyidong0635 
3979e81535eSyidong0635 static void
398a2074554SLiu Xiaodong _reactor_set_notify_cpuset_cpl(void *arg1, void *arg2)
399a2074554SLiu Xiaodong {
400a2074554SLiu Xiaodong 	struct spdk_reactor *target = arg1;
401a2074554SLiu Xiaodong 
402a2074554SLiu Xiaodong 	if (target->new_in_interrupt == false) {
403a2074554SLiu Xiaodong 		target->set_interrupt_mode_in_progress = false;
40411ff66feSsyeon.shin 		_event_call(spdk_scheduler_get_scheduling_lcore(), target->set_interrupt_mode_cb_fn,
40511ff66feSsyeon.shin 			    target->set_interrupt_mode_cb_arg, NULL);
406a2074554SLiu Xiaodong 	} else {
4079e81535eSyidong0635 		_event_call(target->lcore, _reactor_set_interrupt_mode, target, NULL);
408a2074554SLiu Xiaodong 	}
409a2074554SLiu Xiaodong }
410a2074554SLiu Xiaodong 
411a2074554SLiu Xiaodong static void
412b763ebfeSLiu Xiaodong _reactor_set_thread_interrupt_mode(void *ctx)
413b763ebfeSLiu Xiaodong {
414b763ebfeSLiu Xiaodong 	struct spdk_reactor *reactor = ctx;
415b763ebfeSLiu Xiaodong 
416b763ebfeSLiu Xiaodong 	spdk_thread_set_interrupt_mode(reactor->in_interrupt);
417b763ebfeSLiu Xiaodong }
418b763ebfeSLiu Xiaodong 
419b763ebfeSLiu Xiaodong static void
420a2074554SLiu Xiaodong _reactor_set_interrupt_mode(void *arg1, void *arg2)
421a2074554SLiu Xiaodong {
422a2074554SLiu Xiaodong 	struct spdk_reactor *target = arg1;
423b763ebfeSLiu Xiaodong 	struct spdk_thread *thread;
424fe2d64ceSBen Walker 	struct spdk_fd_group *grp;
425b763ebfeSLiu Xiaodong 	struct spdk_lw_thread *lw_thread, *tmp;
426a2074554SLiu Xiaodong 
427de3878ecSyidong0635 	assert(target == spdk_reactor_get(spdk_env_get_current_core()));
428a2074554SLiu Xiaodong 	assert(target != NULL);
429a2074554SLiu Xiaodong 	assert(target->in_interrupt != target->new_in_interrupt);
430a2074554SLiu Xiaodong 	SPDK_DEBUGLOG(reactor, "Do reactor set on core %u from %s to state %s\n",
431c07a6a94SRichael Zhuang 		      target->lcore, target->in_interrupt ? "intr" : "poll", target->new_in_interrupt ? "intr" : "poll");
432a2074554SLiu Xiaodong 
433a2074554SLiu Xiaodong 	target->in_interrupt = target->new_in_interrupt;
434a2074554SLiu Xiaodong 
435f98ac63eSTomasz Zawadzki 	if (spdk_interrupt_mode_is_enabled()) {
436b763ebfeSLiu Xiaodong 		/* Align spdk_thread with reactor to interrupt mode or poll mode */
437b763ebfeSLiu Xiaodong 		TAILQ_FOREACH_SAFE(lw_thread, &target->threads, link, tmp) {
438b763ebfeSLiu Xiaodong 			thread = spdk_thread_get_from_ctx(lw_thread);
439fe2d64ceSBen Walker 			if (target->in_interrupt) {
440fe2d64ceSBen Walker 				grp = spdk_thread_get_interrupt_fd_group(thread);
441fe2d64ceSBen Walker 				spdk_fd_group_nest(target->fgrp, grp);
442fe2d64ceSBen Walker 			} else {
443fe2d64ceSBen Walker 				grp = spdk_thread_get_interrupt_fd_group(thread);
444fe2d64ceSBen Walker 				spdk_fd_group_unnest(target->fgrp, grp);
445fe2d64ceSBen Walker 			}
446fe2d64ceSBen Walker 
447b763ebfeSLiu Xiaodong 			spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, target);
448b763ebfeSLiu Xiaodong 		}
449f98ac63eSTomasz Zawadzki 	}
450b763ebfeSLiu Xiaodong 
451a2074554SLiu Xiaodong 	if (target->new_in_interrupt == false) {
452da11c9d2STomasz Zawadzki 		/* Reactor is no longer in interrupt mode. Refresh the tsc_last to accurately
453da11c9d2STomasz Zawadzki 		 * track reactor stats. */
454da11c9d2STomasz Zawadzki 		target->tsc_last = spdk_get_ticks();
455a2074554SLiu Xiaodong 		spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
456a2074554SLiu Xiaodong 	} else {
457a2074554SLiu Xiaodong 		uint64_t notify = 1;
458a2074554SLiu Xiaodong 		int rc = 0;
459a2074554SLiu Xiaodong 
460a2074554SLiu Xiaodong 		/* Always trigger spdk_event and resched event in case of race condition */
461a2074554SLiu Xiaodong 		rc = write(target->events_fd, &notify, sizeof(notify));
462a2074554SLiu Xiaodong 		if (rc < 0) {
463a2074554SLiu Xiaodong 			SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
464a2074554SLiu Xiaodong 		}
465a2074554SLiu Xiaodong 		rc = write(target->resched_fd, &notify, sizeof(notify));
466a2074554SLiu Xiaodong 		if (rc < 0) {
467a2074554SLiu Xiaodong 			SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
468a2074554SLiu Xiaodong 		}
469a2074554SLiu Xiaodong 
470a2074554SLiu Xiaodong 		target->set_interrupt_mode_in_progress = false;
47111ff66feSsyeon.shin 		_event_call(spdk_scheduler_get_scheduling_lcore(), target->set_interrupt_mode_cb_fn,
47211ff66feSsyeon.shin 			    target->set_interrupt_mode_cb_arg, NULL);
473a2074554SLiu Xiaodong 	}
474a2074554SLiu Xiaodong }
475a2074554SLiu Xiaodong 
476a2074554SLiu Xiaodong int
477a2074554SLiu Xiaodong spdk_reactor_set_interrupt_mode(uint32_t lcore, bool new_in_interrupt,
478a2074554SLiu Xiaodong 				spdk_reactor_set_interrupt_mode_cb cb_fn, void *cb_arg)
479a2074554SLiu Xiaodong {
480a2074554SLiu Xiaodong 	struct spdk_reactor *target;
481a2074554SLiu Xiaodong 
482a2074554SLiu Xiaodong 	target = spdk_reactor_get(lcore);
483a2074554SLiu Xiaodong 	if (target == NULL) {
484a2074554SLiu Xiaodong 		return -EINVAL;
485a2074554SLiu Xiaodong 	}
486a2074554SLiu Xiaodong 
487a7b15178STomasz Zawadzki 	/* Eventfd has to be supported in order to use interrupt functionality. */
488a7b15178STomasz Zawadzki 	if (target->fgrp == NULL) {
489a7b15178STomasz Zawadzki 		return -ENOTSUP;
490a7b15178STomasz Zawadzki 	}
491a7b15178STomasz Zawadzki 
492f470a0dcSJim Harris 	if (spdk_env_get_current_core() != g_scheduling_reactor->lcore) {
493f470a0dcSJim Harris 		SPDK_ERRLOG("It is only permitted within scheduling reactor.\n");
494a2074554SLiu Xiaodong 		return -EPERM;
495a2074554SLiu Xiaodong 	}
496a2074554SLiu Xiaodong 
497a2074554SLiu Xiaodong 	if (target->in_interrupt == new_in_interrupt) {
49811ff66feSsyeon.shin 		cb_fn(cb_arg, NULL);
499a2074554SLiu Xiaodong 		return 0;
500a2074554SLiu Xiaodong 	}
501a2074554SLiu Xiaodong 
502a2074554SLiu Xiaodong 	if (target->set_interrupt_mode_in_progress) {
503a2074554SLiu Xiaodong 		SPDK_NOTICELOG("Reactor(%u) is already in progress to set interrupt mode\n", lcore);
504a2074554SLiu Xiaodong 		return -EBUSY;
505a2074554SLiu Xiaodong 	}
506a2074554SLiu Xiaodong 	target->set_interrupt_mode_in_progress = true;
507a2074554SLiu Xiaodong 
508a2074554SLiu Xiaodong 	target->new_in_interrupt = new_in_interrupt;
509a2074554SLiu Xiaodong 	target->set_interrupt_mode_cb_fn = cb_fn;
510a2074554SLiu Xiaodong 	target->set_interrupt_mode_cb_arg = cb_arg;
511a2074554SLiu Xiaodong 
512a2074554SLiu Xiaodong 	SPDK_DEBUGLOG(reactor, "Starting reactor event from %d to %d\n",
513a2074554SLiu Xiaodong 		      spdk_env_get_current_core(), lcore);
514a2074554SLiu Xiaodong 
515a2074554SLiu Xiaodong 	if (new_in_interrupt == false) {
516a2074554SLiu Xiaodong 		/* For potential race cases, when setting the reactor to poll mode,
517a2074554SLiu Xiaodong 		 * first change the mode of the reactor and then clear the corresponding
518a2074554SLiu Xiaodong 		 * bit of the notify_cpuset of each reactor.
519a2074554SLiu Xiaodong 		 */
5209e81535eSyidong0635 		_event_call(lcore, _reactor_set_interrupt_mode, target, NULL);
521a2074554SLiu Xiaodong 	} else {
522cc6920a4SJosh Soref 		/* For race cases, when setting the reactor to interrupt mode, first set the
523a2074554SLiu Xiaodong 		 * corresponding bit of the notify_cpuset of each reactor and then change the mode.
524a2074554SLiu Xiaodong 		 */
525a2074554SLiu Xiaodong 		spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
526a2074554SLiu Xiaodong 	}
527a2074554SLiu Xiaodong 
528a2074554SLiu Xiaodong 	return 0;
529a2074554SLiu Xiaodong }
530a2074554SLiu Xiaodong 
531c3ede774SDaniel Verkamp struct spdk_event *
5327ac9a4ecSDaniel Verkamp spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
533eeeac667SDaniel Verkamp {
534eeeac667SDaniel Verkamp 	struct spdk_event *event = NULL;
5355135af43SDaniel Verkamp 	struct spdk_reactor *reactor = spdk_reactor_get(lcore);
53652bbb267SDaniel Verkamp 
5376518338fSBen Walker 	if (!reactor) {
5386518338fSBen Walker 		assert(false);
5396518338fSBen Walker 		return NULL;
5406518338fSBen Walker 	}
5416518338fSBen Walker 
542e1ec5c60SBen Walker 	event = spdk_mempool_get(g_spdk_event_mempool);
5437f5b671dSBen Walker 	if (event == NULL) {
544a17ad921SBen Walker 		assert(false);
545a17ad921SBen Walker 		return NULL;
546a17ad921SBen Walker 	}
547eeeac667SDaniel Verkamp 
548eeeac667SDaniel Verkamp 	event->lcore = lcore;
549eeeac667SDaniel Verkamp 	event->fn = fn;
550eeeac667SDaniel Verkamp 	event->arg1 = arg1;
551eeeac667SDaniel Verkamp 	event->arg2 = arg2;
552eeeac667SDaniel Verkamp 
553eeeac667SDaniel Verkamp 	return event;
554eeeac667SDaniel Verkamp }
555eeeac667SDaniel Verkamp 
556eeeac667SDaniel Verkamp void
557c3ede774SDaniel Verkamp spdk_event_call(struct spdk_event *event)
558eeeac667SDaniel Verkamp {
559eeeac667SDaniel Verkamp 	int rc;
560eeeac667SDaniel Verkamp 	struct spdk_reactor *reactor;
561eff5b149SLiu Xiaodong 	struct spdk_reactor *local_reactor = NULL;
562eff5b149SLiu Xiaodong 	uint32_t current_core = spdk_env_get_current_core();
563eeeac667SDaniel Verkamp 
564eeeac667SDaniel Verkamp 	reactor = spdk_reactor_get(event->lcore);
565eeeac667SDaniel Verkamp 
566085ade57SBen Walker 	assert(reactor != NULL);
567a17ad921SBen Walker 	assert(reactor->events != NULL);
568085ade57SBen Walker 
5691554a344SShuhei Matsumoto 	rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1, NULL);
57042491fb8SBen Walker 	if (rc != 1) {
571a17ad921SBen Walker 		assert(false);
572a17ad921SBen Walker 	}
5734bf6e4bbSLiu Xiaodong 
574eff5b149SLiu Xiaodong 	if (current_core != SPDK_ENV_LCORE_ID_ANY) {
575eff5b149SLiu Xiaodong 		local_reactor = spdk_reactor_get(current_core);
576eff5b149SLiu Xiaodong 	}
577eff5b149SLiu Xiaodong 
578eff5b149SLiu Xiaodong 	/* If spdk_event_call isn't called on a reactor, always send a notification.
579eff5b149SLiu Xiaodong 	 * If it is called on a reactor, send a notification if the destination reactor
580eff5b149SLiu Xiaodong 	 * is indicated in interrupt mode state.
581eff5b149SLiu Xiaodong 	 */
582eff5b149SLiu Xiaodong 	if (spdk_unlikely(local_reactor == NULL) ||
583eff5b149SLiu Xiaodong 	    spdk_unlikely(spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, event->lcore))) {
5844bf6e4bbSLiu Xiaodong 		uint64_t notify = 1;
5854bf6e4bbSLiu Xiaodong 
5864bf6e4bbSLiu Xiaodong 		rc = write(reactor->events_fd, &notify, sizeof(notify));
5874bf6e4bbSLiu Xiaodong 		if (rc < 0) {
5884bf6e4bbSLiu Xiaodong 			SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
5894bf6e4bbSLiu Xiaodong 		}
5904bf6e4bbSLiu Xiaodong 	}
591eeeac667SDaniel Verkamp }
592eeeac667SDaniel Verkamp 
5932d48b7dcSJohn Levon static inline int
5942d48b7dcSJohn Levon event_queue_run_batch(void *arg)
595eeeac667SDaniel Verkamp {
5962d48b7dcSJohn Levon 	struct spdk_reactor *reactor = arg;
5972d48b7dcSJohn Levon 	size_t count, i;
59852bbb267SDaniel Verkamp 	void *events[SPDK_EVENT_BATCH_SIZE];
599eeeac667SDaniel Verkamp 
60093d63599SDaniel Verkamp #ifdef DEBUG
60193d63599SDaniel Verkamp 	/*
60242491fb8SBen Walker 	 * spdk_ring_dequeue() fills events and returns how many entries it wrote,
60393d63599SDaniel Verkamp 	 * so we will never actually read uninitialized data from events, but just to be sure
60493d63599SDaniel Verkamp 	 * (and to silence a static analyzer false positive), initialize the array to NULL pointers.
60593d63599SDaniel Verkamp 	 */
60693d63599SDaniel Verkamp 	memset(events, 0, sizeof(events));
60793d63599SDaniel Verkamp #endif
60842491fb8SBen Walker 
609eff5b149SLiu Xiaodong 	/* Operate event notification if this reactor currently runs in interrupt state */
610eff5b149SLiu Xiaodong 	if (spdk_unlikely(reactor->in_interrupt)) {
6114bf6e4bbSLiu Xiaodong 		uint64_t notify = 1;
6124bf6e4bbSLiu Xiaodong 		int rc;
6134bf6e4bbSLiu Xiaodong 
61442491fb8SBen Walker 		count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
6154bf6e4bbSLiu Xiaodong 
6164bf6e4bbSLiu Xiaodong 		if (spdk_ring_count(reactor->events) != 0) {
6174bf6e4bbSLiu Xiaodong 			/* Trigger new notification if there are still events in event-queue waiting for processing. */
6184bf6e4bbSLiu Xiaodong 			rc = write(reactor->events_fd, &notify, sizeof(notify));
6194bf6e4bbSLiu Xiaodong 			if (rc < 0) {
6204bf6e4bbSLiu Xiaodong 				SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
6214bf6e4bbSLiu Xiaodong 				return -errno;
6224bf6e4bbSLiu Xiaodong 			}
6234bf6e4bbSLiu Xiaodong 		}
6244bf6e4bbSLiu Xiaodong 	} else {
6254bf6e4bbSLiu Xiaodong 		count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
6264bf6e4bbSLiu Xiaodong 	}
6274bf6e4bbSLiu Xiaodong 
62852bbb267SDaniel Verkamp 	if (count == 0) {
629eeeac667SDaniel Verkamp 		return 0;
630eeeac667SDaniel Verkamp 	}
631eeeac667SDaniel Verkamp 
63252bbb267SDaniel Verkamp 	for (i = 0; i < count; i++) {
63352bbb267SDaniel Verkamp 		struct spdk_event *event = events[i];
634eeeac667SDaniel Verkamp 
63593d63599SDaniel Verkamp 		assert(event != NULL);
636f470a0dcSJim Harris 		assert(spdk_get_thread() == NULL);
63770f3606bSJohn Levon 		SPDK_DTRACE_PROBE3(event_exec, event->fn,
63870f3606bSJohn Levon 				   event->arg1, event->arg2);
63944ef085bSDaniel Verkamp 		event->fn(event->arg1, event->arg2);
6400598e484STomasz Zawadzki 	}
641605e530aSBen Walker 
642e1ec5c60SBen Walker 	spdk_mempool_put_bulk(g_spdk_event_mempool, events, count);
643086346a4SBen Walker 
6442d48b7dcSJohn Levon 	return (int)count;
645eeeac667SDaniel Verkamp }
646eeeac667SDaniel Verkamp 
64725baf714Ssuhua /* 1s */
64886a21aeeSBen Walker #define CONTEXT_SWITCH_MONITOR_PERIOD 1000000
64986a21aeeSBen Walker 
650c3bc40a6SRoman Sudarikov static int
65186a21aeeSBen Walker get_rusage(struct spdk_reactor *reactor)
6528d70322aSCunyin Chang {
6538d70322aSCunyin Chang 	struct rusage		rusage;
6548d70322aSCunyin Chang 
6558d70322aSCunyin Chang 	if (getrusage(RUSAGE_THREAD, &rusage) != 0) {
656c3bc40a6SRoman Sudarikov 		return -1;
6578d70322aSCunyin Chang 	}
6588d70322aSCunyin Chang 
6598d70322aSCunyin Chang 	if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) {
6602172c432STomasz Zawadzki 		SPDK_INFOLOG(reactor,
6618d70322aSCunyin Chang 			     "Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n",
6628d70322aSCunyin Chang 			     reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw,
6638d70322aSCunyin Chang 			     rusage.ru_nivcsw - reactor->rusage.ru_nivcsw);
6648d70322aSCunyin Chang 	}
6658d70322aSCunyin Chang 	reactor->rusage = rusage;
666c3bc40a6SRoman Sudarikov 
667c3bc40a6SRoman Sudarikov 	return -1;
6688d70322aSCunyin Chang }
66970c3e1f2SDaniel Verkamp 
67070c3e1f2SDaniel Verkamp void
67125c5e3f5SShuhei Matsumoto spdk_framework_enable_context_switch_monitor(bool enable)
67270c3e1f2SDaniel Verkamp {
67386a21aeeSBen Walker 	/* This global is being read by multiple threads, so this isn't
67486a21aeeSBen Walker 	 * strictly thread safe. However, we're toggling between true and
67586a21aeeSBen Walker 	 * false here, and if a thread sees the value update later than it
67686a21aeeSBen Walker 	 * should, it's no big deal. */
67725c5e3f5SShuhei Matsumoto 	g_framework_context_switch_monitor_enabled = enable;
67870c3e1f2SDaniel Verkamp }
67970c3e1f2SDaniel Verkamp 
68070c3e1f2SDaniel Verkamp bool
68125c5e3f5SShuhei Matsumoto spdk_framework_context_switch_monitor_enabled(void)
68270c3e1f2SDaniel Verkamp {
68325c5e3f5SShuhei Matsumoto 	return g_framework_context_switch_monitor_enabled;
68470c3e1f2SDaniel Verkamp }
6858d70322aSCunyin Chang 
68638527b07SShuhei Matsumoto static void
68738527b07SShuhei Matsumoto _set_thread_name(const char *thread_name)
68838527b07SShuhei Matsumoto {
68938527b07SShuhei Matsumoto #if defined(__linux__)
69038527b07SShuhei Matsumoto 	prctl(PR_SET_NAME, thread_name, 0, 0, 0);
69138527b07SShuhei Matsumoto #elif defined(__FreeBSD__)
69238527b07SShuhei Matsumoto 	pthread_set_name_np(pthread_self(), thread_name);
69338527b07SShuhei Matsumoto #else
694a14deb22SNick Connolly 	pthread_setname_np(pthread_self(), thread_name);
69538527b07SShuhei Matsumoto #endif
69638527b07SShuhei Matsumoto }
69738527b07SShuhei Matsumoto 
6982cffc800SVitaliy Mysak static void
6992cffc800SVitaliy Mysak _init_thread_stats(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
7002cffc800SVitaliy Mysak {
7012cffc800SVitaliy Mysak 	struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
702d9f5da13STomasz Zawadzki 	struct spdk_thread_stats prev_total_stats;
70347a6578eSTomasz Zawadzki 
704d9f5da13STomasz Zawadzki 	/* Read total_stats before updating it to calculate stats during the last scheduling period. */
705d9f5da13STomasz Zawadzki 	prev_total_stats = lw_thread->total_stats;
7062cffc800SVitaliy Mysak 
7072cffc800SVitaliy Mysak 	spdk_set_thread(thread);
708d9f5da13STomasz Zawadzki 	spdk_thread_get_stats(&lw_thread->total_stats);
70947a6578eSTomasz Zawadzki 	spdk_set_thread(NULL);
71047a6578eSTomasz Zawadzki 
711d9f5da13STomasz Zawadzki 	lw_thread->current_stats.busy_tsc = lw_thread->total_stats.busy_tsc - prev_total_stats.busy_tsc;
712d9f5da13STomasz Zawadzki 	lw_thread->current_stats.idle_tsc = lw_thread->total_stats.idle_tsc - prev_total_stats.idle_tsc;
7132cffc800SVitaliy Mysak }
7142cffc800SVitaliy Mysak 
7152cffc800SVitaliy Mysak static void
716b74b6133STomasz Zawadzki _threads_reschedule_thread(struct spdk_scheduler_thread_info *thread_info)
717b74b6133STomasz Zawadzki {
718b74b6133STomasz Zawadzki 	struct spdk_lw_thread *lw_thread;
719b74b6133STomasz Zawadzki 	struct spdk_thread *thread;
720b74b6133STomasz Zawadzki 
721b74b6133STomasz Zawadzki 	thread = spdk_thread_get_by_id(thread_info->thread_id);
722b74b6133STomasz Zawadzki 	if (thread == NULL) {
723b74b6133STomasz Zawadzki 		/* Thread no longer exists. */
724b74b6133STomasz Zawadzki 		return;
725b74b6133STomasz Zawadzki 	}
726b74b6133STomasz Zawadzki 	lw_thread = spdk_thread_get_ctx(thread);
727b74b6133STomasz Zawadzki 	assert(lw_thread != NULL);
728b74b6133STomasz Zawadzki 
729b74b6133STomasz Zawadzki 	lw_thread->lcore = thread_info->lcore;
730b74b6133STomasz Zawadzki 	lw_thread->resched = true;
731b74b6133STomasz Zawadzki }
732b74b6133STomasz Zawadzki 
733b74b6133STomasz Zawadzki static void
7342cffc800SVitaliy Mysak _threads_reschedule(struct spdk_scheduler_core_info *cores_info)
7352cffc800SVitaliy Mysak {
7362cffc800SVitaliy Mysak 	struct spdk_scheduler_core_info *core;
737b74b6133STomasz Zawadzki 	struct spdk_scheduler_thread_info *thread_info;
7382cffc800SVitaliy Mysak 	uint32_t i, j;
7392cffc800SVitaliy Mysak 
7402cffc800SVitaliy Mysak 	SPDK_ENV_FOREACH_CORE(i) {
7412cffc800SVitaliy Mysak 		core = &cores_info[i];
7422cffc800SVitaliy Mysak 		for (j = 0; j < core->threads_count; j++) {
743b74b6133STomasz Zawadzki 			thread_info = &core->thread_infos[j];
744b74b6133STomasz Zawadzki 			if (thread_info->lcore != i) {
745462fd69eSSeungYeon Shin 				if (core->isolated || cores_info[thread_info->lcore].isolated) {
746462fd69eSSeungYeon Shin 					SPDK_ERRLOG("A thread cannot be moved from an isolated core or \
747462fd69eSSeungYeon Shin 								moved to an isolated core. Skip rescheduling thread\n");
748462fd69eSSeungYeon Shin 					continue;
749462fd69eSSeungYeon Shin 				}
750b74b6133STomasz Zawadzki 				_threads_reschedule_thread(thread_info);
7512cffc800SVitaliy Mysak 			}
7522cffc800SVitaliy Mysak 		}
753fba4a97dSTomasz Zawadzki 		core->threads_count = 0;
754fba4a97dSTomasz Zawadzki 		free(core->thread_infos);
755fba4a97dSTomasz Zawadzki 		core->thread_infos = NULL;
7562cffc800SVitaliy Mysak 	}
7572cffc800SVitaliy Mysak }
7582cffc800SVitaliy Mysak 
7592cffc800SVitaliy Mysak static void
7606859a49aSMaciej Szwed _reactors_scheduler_fini(void)
7612cffc800SVitaliy Mysak {
7622cffc800SVitaliy Mysak 	/* Reschedule based on the balancing output */
7632cffc800SVitaliy Mysak 	_threads_reschedule(g_core_infos);
7642cffc800SVitaliy Mysak 
765cff96883STomasz Zawadzki 	g_scheduling_in_progress = false;
7662cffc800SVitaliy Mysak }
7676859a49aSMaciej Szwed 
7686859a49aSMaciej Szwed static void
76911ff66feSsyeon.shin _reactors_scheduler_update_core_mode(void *ctx1, void *ctx2)
7706859a49aSMaciej Szwed {
7716859a49aSMaciej Szwed 	struct spdk_reactor *reactor;
772cf155f23STomasz Zawadzki 	uint32_t i;
7736859a49aSMaciej Szwed 	int rc = 0;
7746859a49aSMaciej Szwed 
775cf155f23STomasz Zawadzki 	for (i = g_scheduler_core_number; i < SPDK_ENV_LCORE_ID_ANY; i = spdk_env_get_next_core(i)) {
776cf155f23STomasz Zawadzki 		reactor = spdk_reactor_get(i);
777ebeac5deSGangCao 		assert(reactor != NULL);
778cf155f23STomasz Zawadzki 		if (reactor->in_interrupt != g_core_infos[i].interrupt_mode) {
7796859a49aSMaciej Szwed 			/* Switch next found reactor to new state */
780cf155f23STomasz Zawadzki 			rc = spdk_reactor_set_interrupt_mode(i, g_core_infos[i].interrupt_mode,
781cf155f23STomasz Zawadzki 							     _reactors_scheduler_update_core_mode, NULL);
7826859a49aSMaciej Szwed 			if (rc == 0) {
783cf155f23STomasz Zawadzki 				/* Set core to start with after callback completes */
784cf155f23STomasz Zawadzki 				g_scheduler_core_number = spdk_env_get_next_core(i);
7856859a49aSMaciej Szwed 				return;
7866859a49aSMaciej Szwed 			}
7876859a49aSMaciej Szwed 		}
788cf155f23STomasz Zawadzki 	}
789cf155f23STomasz Zawadzki 	_reactors_scheduler_fini();
7906859a49aSMaciej Szwed }
7916859a49aSMaciej Szwed 
7926859a49aSMaciej Szwed static void
793b518cbe2STomasz Zawadzki _reactors_scheduler_cancel(void *arg1, void *arg2)
794b518cbe2STomasz Zawadzki {
795b518cbe2STomasz Zawadzki 	struct spdk_scheduler_core_info *core;
796b518cbe2STomasz Zawadzki 	uint32_t i;
797b518cbe2STomasz Zawadzki 
798b518cbe2STomasz Zawadzki 	SPDK_ENV_FOREACH_CORE(i) {
799b518cbe2STomasz Zawadzki 		core = &g_core_infos[i];
800b518cbe2STomasz Zawadzki 		core->threads_count = 0;
801b518cbe2STomasz Zawadzki 		free(core->thread_infos);
802b518cbe2STomasz Zawadzki 		core->thread_infos = NULL;
803b518cbe2STomasz Zawadzki 	}
804b518cbe2STomasz Zawadzki 
805b518cbe2STomasz Zawadzki 	g_scheduling_in_progress = false;
806b518cbe2STomasz Zawadzki }
807b518cbe2STomasz Zawadzki 
808b518cbe2STomasz Zawadzki static void
8096859a49aSMaciej Szwed _reactors_scheduler_balance(void *arg1, void *arg2)
8106859a49aSMaciej Szwed {
811a86e40f3STomasz Zawadzki 	struct spdk_scheduler *scheduler = spdk_scheduler_get();
812d6c4f8cfSTomasz Zawadzki 
813b518cbe2STomasz Zawadzki 	if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING || scheduler == NULL) {
814b518cbe2STomasz Zawadzki 		_reactors_scheduler_cancel(NULL, NULL);
815b518cbe2STomasz Zawadzki 		return;
816b518cbe2STomasz Zawadzki 	}
817b518cbe2STomasz Zawadzki 
818d6c4f8cfSTomasz Zawadzki 	scheduler->balance(g_core_infos, g_reactor_count);
8196859a49aSMaciej Szwed 
820cf155f23STomasz Zawadzki 	g_scheduler_core_number = spdk_env_get_first_core();
82111ff66feSsyeon.shin 	_reactors_scheduler_update_core_mode(NULL, NULL);
8226859a49aSMaciej Szwed }
823e2f773aaSLiu Xiaodong 
8242cffc800SVitaliy Mysak /* Phase 1 of thread scheduling is to gather metrics on the existing threads */
8252cffc800SVitaliy Mysak static void
8262cffc800SVitaliy Mysak _reactors_scheduler_gather_metrics(void *arg1, void *arg2)
8272cffc800SVitaliy Mysak {
8282cffc800SVitaliy Mysak 	struct spdk_scheduler_core_info *core_info;
8292cffc800SVitaliy Mysak 	struct spdk_lw_thread *lw_thread;
830b74b6133STomasz Zawadzki 	struct spdk_thread *thread;
8312cffc800SVitaliy Mysak 	struct spdk_reactor *reactor;
8322cffc800SVitaliy Mysak 	uint32_t next_core;
833fba4a97dSTomasz Zawadzki 	uint32_t i = 0;
8342cffc800SVitaliy Mysak 
8352cffc800SVitaliy Mysak 	reactor = spdk_reactor_get(spdk_env_get_current_core());
8365e01bdb4SWeifeng Su 	assert(reactor != NULL);
8372cffc800SVitaliy Mysak 	core_info = &g_core_infos[reactor->lcore];
8382cffc800SVitaliy Mysak 	core_info->lcore = reactor->lcore;
839abbfa1a5STomasz Zawadzki 	core_info->current_idle_tsc = reactor->idle_tsc - core_info->total_idle_tsc;
840e837ba3bSTomasz Zawadzki 	core_info->total_idle_tsc = reactor->idle_tsc;
841abbfa1a5STomasz Zawadzki 	core_info->current_busy_tsc = reactor->busy_tsc - core_info->total_busy_tsc;
842e837ba3bSTomasz Zawadzki 	core_info->total_busy_tsc = reactor->busy_tsc;
8439cde1509SMaciej Szwed 	core_info->interrupt_mode = reactor->in_interrupt;
844fba4a97dSTomasz Zawadzki 	core_info->threads_count = 0;
845462fd69eSSeungYeon Shin 	core_info->isolated = scheduler_is_isolated_core(reactor->lcore);
8462cffc800SVitaliy Mysak 
8472cffc800SVitaliy Mysak 	SPDK_DEBUGLOG(reactor, "Gathering metrics on %u\n", reactor->lcore);
8482cffc800SVitaliy Mysak 
84982c46626SAnisa Su 	spdk_trace_record(TRACE_SCHEDULER_CORE_STATS, reactor->trace_id, 0, 0,
85082c46626SAnisa Su 			  core_info->current_busy_tsc,
85182c46626SAnisa Su 			  core_info->current_idle_tsc);
85282c46626SAnisa Su 
853fba4a97dSTomasz Zawadzki 	if (reactor->thread_count > 0) {
854fba4a97dSTomasz Zawadzki 		core_info->thread_infos = calloc(reactor->thread_count, sizeof(*core_info->thread_infos));
855b74b6133STomasz Zawadzki 		if (core_info->thread_infos == NULL) {
856e2f773aaSLiu Xiaodong 			SPDK_ERRLOG("Failed to allocate memory when gathering metrics on %u\n", reactor->lcore);
857e2f773aaSLiu Xiaodong 
858e2f773aaSLiu Xiaodong 			/* Cancel this round of schedule work */
859462fd69eSSeungYeon Shin 			_event_call(spdk_scheduler_get_scheduling_lcore(), _reactors_scheduler_cancel, NULL, NULL);
860e2f773aaSLiu Xiaodong 			return;
861e2f773aaSLiu Xiaodong 		}
8622cffc800SVitaliy Mysak 
8632cffc800SVitaliy Mysak 		TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
864fba4a97dSTomasz Zawadzki 			_init_thread_stats(reactor, lw_thread);
865fba4a97dSTomasz Zawadzki 
866b74b6133STomasz Zawadzki 			core_info->thread_infos[i].lcore = lw_thread->lcore;
867b74b6133STomasz Zawadzki 			thread = spdk_thread_get_from_ctx(lw_thread);
868fba4a97dSTomasz Zawadzki 			assert(thread != NULL);
869b74b6133STomasz Zawadzki 			core_info->thread_infos[i].thread_id = spdk_thread_get_id(thread);
870b74b6133STomasz Zawadzki 			core_info->thread_infos[i].total_stats = lw_thread->total_stats;
871b74b6133STomasz Zawadzki 			core_info->thread_infos[i].current_stats = lw_thread->current_stats;
872fba4a97dSTomasz Zawadzki 			core_info->threads_count++;
873fba4a97dSTomasz Zawadzki 			assert(core_info->threads_count <= reactor->thread_count);
87482c46626SAnisa Su 
87582c46626SAnisa Su 			spdk_trace_record(TRACE_SCHEDULER_THREAD_STATS, spdk_thread_get_trace_id(thread), 0, 0,
87682c46626SAnisa Su 					  lw_thread->current_stats.busy_tsc,
87782c46626SAnisa Su 					  lw_thread->current_stats.idle_tsc);
87882c46626SAnisa Su 
8792cffc800SVitaliy Mysak 			i++;
8802cffc800SVitaliy Mysak 		}
8812cffc800SVitaliy Mysak 	}
8822cffc800SVitaliy Mysak 
8832cffc800SVitaliy Mysak 	next_core = spdk_env_get_next_core(reactor->lcore);
8842cffc800SVitaliy Mysak 	if (next_core == UINT32_MAX) {
8852cffc800SVitaliy Mysak 		next_core = spdk_env_get_first_core();
8862cffc800SVitaliy Mysak 	}
8872cffc800SVitaliy Mysak 
8882cffc800SVitaliy Mysak 	/* If we've looped back around to the scheduler thread, move to the next phase */
889462fd69eSSeungYeon Shin 	if (next_core == spdk_scheduler_get_scheduling_lcore()) {
8902cffc800SVitaliy Mysak 		/* Phase 2 of scheduling is rebalancing - deciding which threads to move where */
8919e81535eSyidong0635 		_event_call(next_core, _reactors_scheduler_balance, NULL, NULL);
8922cffc800SVitaliy Mysak 		return;
8932cffc800SVitaliy Mysak 	}
8942cffc800SVitaliy Mysak 
8959e81535eSyidong0635 	_event_call(next_core, _reactors_scheduler_gather_metrics, NULL, NULL);
8962cffc800SVitaliy Mysak }
8972cffc800SVitaliy Mysak 
898a4335febSShuhei Matsumoto static int _reactor_schedule_thread(struct spdk_thread *thread);
89968161ffcSShuhei Matsumoto static uint64_t g_rusage_period;
900a4335febSShuhei Matsumoto 
9010859c837Syidong0635 static void
9020859c837Syidong0635 _reactor_remove_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
9037d19e50aSLiu Xiaodong {
9047d19e50aSLiu Xiaodong 	struct spdk_thread	*thread = spdk_thread_get_from_ctx(lw_thread);
905fe2d64ceSBen Walker 	struct spdk_fd_group	*grp;
9067d19e50aSLiu Xiaodong 
9077d19e50aSLiu Xiaodong 	TAILQ_REMOVE(&reactor->threads, lw_thread, link);
9087d19e50aSLiu Xiaodong 	assert(reactor->thread_count > 0);
9097d19e50aSLiu Xiaodong 	reactor->thread_count--;
9104bf6e4bbSLiu Xiaodong 
911eff5b149SLiu Xiaodong 	/* Operate thread intr if running with full interrupt ability */
912eff5b149SLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
913fe2d64ceSBen Walker 		if (reactor->in_interrupt) {
914fe2d64ceSBen Walker 			grp = spdk_thread_get_interrupt_fd_group(thread);
915fe2d64ceSBen Walker 			spdk_fd_group_unnest(reactor->fgrp, grp);
916fe2d64ceSBen Walker 		}
9174bf6e4bbSLiu Xiaodong 	}
9180859c837Syidong0635 }
9190859c837Syidong0635 
9200859c837Syidong0635 static bool
9210859c837Syidong0635 reactor_post_process_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
9220859c837Syidong0635 {
9230859c837Syidong0635 	struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
9240859c837Syidong0635 
9257d19e50aSLiu Xiaodong 	if (spdk_unlikely(spdk_thread_is_exited(thread) &&
9267d19e50aSLiu Xiaodong 			  spdk_thread_is_idle(thread))) {
9270859c837Syidong0635 		_reactor_remove_lw_thread(reactor, lw_thread);
9287d19e50aSLiu Xiaodong 		spdk_thread_destroy(thread);
9297d19e50aSLiu Xiaodong 		return true;
9307d19e50aSLiu Xiaodong 	}
9317d19e50aSLiu Xiaodong 
9323f4b2c67SYifan Bian 	if (spdk_unlikely(lw_thread->resched && !spdk_thread_is_bound(thread))) {
93389c1e5bfSApokleos 		lw_thread->resched = false;
93489c1e5bfSApokleos 		_reactor_remove_lw_thread(reactor, lw_thread);
93589c1e5bfSApokleos 		_reactor_schedule_thread(thread);
93689c1e5bfSApokleos 		return true;
93789c1e5bfSApokleos 	}
93889c1e5bfSApokleos 
9397d19e50aSLiu Xiaodong 	return false;
9407d19e50aSLiu Xiaodong }
9417d19e50aSLiu Xiaodong 
942fbb77a56SShuhei Matsumoto static void
9434bf6e4bbSLiu Xiaodong reactor_interrupt_run(struct spdk_reactor *reactor)
9444bf6e4bbSLiu Xiaodong {
9454bf6e4bbSLiu Xiaodong 	int block_timeout = -1; /* _EPOLL_WAIT_FOREVER */
9464bf6e4bbSLiu Xiaodong 
9474bf6e4bbSLiu Xiaodong 	spdk_fd_group_wait(reactor->fgrp, block_timeout);
9484bf6e4bbSLiu Xiaodong }
9494bf6e4bbSLiu Xiaodong 
9504bf6e4bbSLiu Xiaodong static void
951fd361206SSeth Howell _reactor_run(struct spdk_reactor *reactor)
952eeeac667SDaniel Verkamp {
953032920f2SBen Walker 	struct spdk_thread	*thread;
95412759ab5SBen Walker 	struct spdk_lw_thread	*lw_thread, *tmp;
95578accbf4SShuhei Matsumoto 	uint64_t		now;
95678accbf4SShuhei Matsumoto 	int			rc;
95712759ab5SBen Walker 
958fd361206SSeth Howell 	event_queue_run_batch(reactor);
9592d0aa1adSBen Walker 
9607438c388STomasz Zawadzki 	/* If no threads are present on the reactor,
9617438c388STomasz Zawadzki 	 * tsc_last gets outdated. Update it to track
9627438c388STomasz Zawadzki 	 * thread execution time correctly. */
9637438c388STomasz Zawadzki 	if (spdk_unlikely(TAILQ_EMPTY(&reactor->threads))) {
964e6531062STomasz Zawadzki 		now = spdk_get_ticks();
965e6531062STomasz Zawadzki 		reactor->idle_tsc += now - reactor->tsc_last;
966e6531062STomasz Zawadzki 		reactor->tsc_last = now;
9677438c388STomasz Zawadzki 		return;
9687438c388STomasz Zawadzki 	}
9697438c388STomasz Zawadzki 
97012759ab5SBen Walker 	TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
97112759ab5SBen Walker 		thread = spdk_thread_get_from_ctx(lw_thread);
97278accbf4SShuhei Matsumoto 		rc = spdk_thread_poll(thread, 0, reactor->tsc_last);
97378accbf4SShuhei Matsumoto 
97478accbf4SShuhei Matsumoto 		now = spdk_thread_get_last_tsc(thread);
97578accbf4SShuhei Matsumoto 		if (rc == 0) {
97678accbf4SShuhei Matsumoto 			reactor->idle_tsc += now - reactor->tsc_last;
97778accbf4SShuhei Matsumoto 		} else if (rc > 0) {
97878accbf4SShuhei Matsumoto 			reactor->busy_tsc += now - reactor->tsc_last;
97978accbf4SShuhei Matsumoto 		}
98078accbf4SShuhei Matsumoto 		reactor->tsc_last = now;
981abb942bdSShuhei Matsumoto 
9827d19e50aSLiu Xiaodong 		reactor_post_process_lw_thread(reactor, lw_thread);
98312759ab5SBen Walker 	}
984eeeac667SDaniel Verkamp }
985eeeac667SDaniel Verkamp 
986fbb77a56SShuhei Matsumoto static int
987fd361206SSeth Howell reactor_run(void *arg)
988fbb77a56SShuhei Matsumoto {
989fbb77a56SShuhei Matsumoto 	struct spdk_reactor	*reactor = arg;
990fbb77a56SShuhei Matsumoto 	struct spdk_thread	*thread;
991fbb77a56SShuhei Matsumoto 	struct spdk_lw_thread	*lw_thread, *tmp;
992fbb77a56SShuhei Matsumoto 	char			thread_name[32];
9932cffc800SVitaliy Mysak 	uint64_t		last_sched = 0;
994fbb77a56SShuhei Matsumoto 
995fbb77a56SShuhei Matsumoto 	SPDK_NOTICELOG("Reactor started on core %u\n", reactor->lcore);
996fbb77a56SShuhei Matsumoto 
997fbb77a56SShuhei Matsumoto 	/* Rename the POSIX thread because the reactor is tied to the POSIX
998fbb77a56SShuhei Matsumoto 	 * thread in the SPDK event library.
999fbb77a56SShuhei Matsumoto 	 */
1000fbb77a56SShuhei Matsumoto 	snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore);
1001fbb77a56SShuhei Matsumoto 	_set_thread_name(thread_name);
1002fbb77a56SShuhei Matsumoto 
100382c46626SAnisa Su 	reactor->trace_id = spdk_trace_register_owner(OWNER_TYPE_REACTOR, thread_name);
100482c46626SAnisa Su 
100580323e20SShuhei Matsumoto 	reactor->tsc_last = spdk_get_ticks();
100680323e20SShuhei Matsumoto 
1007fbb77a56SShuhei Matsumoto 	while (1) {
1008eff5b149SLiu Xiaodong 		/* Execute interrupt process fn if this reactor currently runs in interrupt state */
1009eff5b149SLiu Xiaodong 		if (spdk_unlikely(reactor->in_interrupt)) {
10104bf6e4bbSLiu Xiaodong 			reactor_interrupt_run(reactor);
10114bf6e4bbSLiu Xiaodong 		} else {
1012fd361206SSeth Howell 			_reactor_run(reactor);
10134bf6e4bbSLiu Xiaodong 		}
1014fbb77a56SShuhei Matsumoto 
1015ba7aac83SLiu Xiaodong 		if (g_framework_context_switch_monitor_enabled) {
1016ba7aac83SLiu Xiaodong 			if ((reactor->last_rusage + g_rusage_period) < reactor->tsc_last) {
1017ba7aac83SLiu Xiaodong 				get_rusage(reactor);
1018ba7aac83SLiu Xiaodong 				reactor->last_rusage = reactor->tsc_last;
1019ba7aac83SLiu Xiaodong 			}
1020ba7aac83SLiu Xiaodong 		}
1021ba7aac83SLiu Xiaodong 
1022f01c6f2dSJim Harris 		if (spdk_unlikely(g_scheduler_period_in_tsc > 0 &&
1023f01c6f2dSJim Harris 				  (reactor->tsc_last - last_sched) > g_scheduler_period_in_tsc &&
10242cffc800SVitaliy Mysak 				  reactor == g_scheduling_reactor &&
1025cff96883STomasz Zawadzki 				  !g_scheduling_in_progress)) {
10262cffc800SVitaliy Mysak 			last_sched = reactor->tsc_last;
1027cff96883STomasz Zawadzki 			g_scheduling_in_progress = true;
102882c46626SAnisa Su 			spdk_trace_record(TRACE_SCHEDULER_PERIOD_START, 0, 0, 0);
10292cffc800SVitaliy Mysak 			_reactors_scheduler_gather_metrics(NULL, NULL);
10302cffc800SVitaliy Mysak 		}
10312cffc800SVitaliy Mysak 
1032fbb77a56SShuhei Matsumoto 		if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
1033fbb77a56SShuhei Matsumoto 			break;
1034fbb77a56SShuhei Matsumoto 		}
1035fbb77a56SShuhei Matsumoto 	}
1036fbb77a56SShuhei Matsumoto 
1037e9aec674SShuhei Matsumoto 	TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
1038e9aec674SShuhei Matsumoto 		thread = spdk_thread_get_from_ctx(lw_thread);
10391d2700d4SJim Harris 		/* All threads should have already had spdk_thread_exit() called on them, except
10401d2700d4SJim Harris 		 * for the app thread.
10411d2700d4SJim Harris 		 */
10421d2700d4SJim Harris 		if (spdk_thread_is_running(thread)) {
10431b1967bdSJim Harris 			if (!spdk_thread_is_app_thread(thread)) {
10441d2700d4SJim Harris 				SPDK_ERRLOG("spdk_thread_exit() was not called on thread '%s'\n",
10451d2700d4SJim Harris 					    spdk_thread_get_name(thread));
10461d2700d4SJim Harris 				SPDK_ERRLOG("This will result in a non-zero exit code in a future release.\n");
10471d2700d4SJim Harris 			}
1048e9aec674SShuhei Matsumoto 			spdk_set_thread(thread);
1049e9aec674SShuhei Matsumoto 			spdk_thread_exit(thread);
1050e9aec674SShuhei Matsumoto 		}
10511d2700d4SJim Harris 	}
1052e9aec674SShuhei Matsumoto 
1053e9aec674SShuhei Matsumoto 	while (!TAILQ_EMPTY(&reactor->threads)) {
1054032920f2SBen Walker 		TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
1055032920f2SBen Walker 			thread = spdk_thread_get_from_ctx(lw_thread);
1056e9aec674SShuhei Matsumoto 			spdk_set_thread(thread);
1057e9aec674SShuhei Matsumoto 			if (spdk_thread_is_exited(thread)) {
10580859c837Syidong0635 				_reactor_remove_lw_thread(reactor, lw_thread);
1059e9aec674SShuhei Matsumoto 				spdk_thread_destroy(thread);
1060e9aec674SShuhei Matsumoto 			} else {
1061abd932d6SJim Harris 				if (spdk_unlikely(reactor->in_interrupt)) {
1062abd932d6SJim Harris 					reactor_interrupt_run(reactor);
1063abd932d6SJim Harris 				} else {
1064e7ead00bSShuhei Matsumoto 					spdk_thread_poll(thread, 0, 0);
1065e7ead00bSShuhei Matsumoto 				}
1066e9aec674SShuhei Matsumoto 			}
1067032920f2SBen Walker 		}
1068abd932d6SJim Harris 	}
106912759ab5SBen Walker 
1070eeeac667SDaniel Verkamp 	return 0;
1071eeeac667SDaniel Verkamp }
1072eeeac667SDaniel Verkamp 
1073eeeac667SDaniel Verkamp int
1074601bcbcfSTomasz Kulasek spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
1075eeeac667SDaniel Verkamp {
1076601bcbcfSTomasz Kulasek 	int ret;
1077276c31cdSChangpeng Liu 	const struct spdk_cpuset *validmask;
1078eeeac667SDaniel Verkamp 
1079601bcbcfSTomasz Kulasek 	ret = spdk_cpuset_parse(cpumask, mask);
1080601bcbcfSTomasz Kulasek 	if (ret < 0) {
1081601bcbcfSTomasz Kulasek 		return ret;
1082eeeac667SDaniel Verkamp 	}
1083eeeac667SDaniel Verkamp 
1084601bcbcfSTomasz Kulasek 	validmask = spdk_app_get_core_mask();
1085601bcbcfSTomasz Kulasek 	spdk_cpuset_and(cpumask, validmask);
1086d7134439SBen Walker 
1087eeeac667SDaniel Verkamp 	return 0;
1088eeeac667SDaniel Verkamp }
1089eeeac667SDaniel Verkamp 
1090276c31cdSChangpeng Liu const struct spdk_cpuset *
1091eeeac667SDaniel Verkamp spdk_app_get_core_mask(void)
1092eeeac667SDaniel Verkamp {
1093ae4360f0SShuhei Matsumoto 	return &g_reactor_core_mask;
1094eeeac667SDaniel Verkamp }
1095eeeac667SDaniel Verkamp 
1096eeeac667SDaniel Verkamp void
1097f0d2e550SCunyin Chang spdk_reactors_start(void)
1098eeeac667SDaniel Verkamp {
1099eeeac667SDaniel Verkamp 	struct spdk_reactor *reactor;
11007f7c03a9SBen Walker 	uint32_t i, current_core;
11019b8ddd22SBen Walker 	int rc;
1102eeeac667SDaniel Verkamp 
110368161ffcSShuhei Matsumoto 	g_rusage_period = (CONTEXT_SWITCH_MONITOR_PERIOD * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC;
1104eeeac667SDaniel Verkamp 	g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
1105c4f086b6SJim Harris 	/* Reinitialize to false, in case the app framework is restarting in the same process. */
1106c4f086b6SJim Harris 	g_stopping_reactors = false;
1107eeeac667SDaniel Verkamp 
11087f7c03a9SBen Walker 	current_core = spdk_env_get_current_core();
11097f7c03a9SBen Walker 	SPDK_ENV_FOREACH_CORE(i) {
11107f7c03a9SBen Walker 		if (i != current_core) {
1111eeeac667SDaniel Verkamp 			reactor = spdk_reactor_get(i);
1112085ade57SBen Walker 			if (reactor == NULL) {
1113085ade57SBen Walker 				continue;
1114085ade57SBen Walker 			}
1115085ade57SBen Walker 
1116fd361206SSeth Howell 			rc = spdk_env_thread_launch_pinned(reactor->lcore, reactor_run, reactor);
11179b8ddd22SBen Walker 			if (rc < 0) {
11189b8ddd22SBen Walker 				SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore);
11199b8ddd22SBen Walker 				assert(false);
11209b8ddd22SBen Walker 				return;
11219b8ddd22SBen Walker 			}
1122eeeac667SDaniel Verkamp 		}
1123ae4360f0SShuhei Matsumoto 		spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true);
11247f7c03a9SBen Walker 	}
1125eeeac667SDaniel Verkamp 
11263795c1cfSJim Harris 	/* Start the main reactor */
11277f7c03a9SBen Walker 	reactor = spdk_reactor_get(current_core);
1128085ade57SBen Walker 	assert(reactor != NULL);
1129fd361206SSeth Howell 	reactor_run(reactor);
1130eeeac667SDaniel Verkamp 
11319b8ddd22SBen Walker 	spdk_env_thread_wait_all();
1132eeeac667SDaniel Verkamp 
1133eeeac667SDaniel Verkamp 	g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
1134eeeac667SDaniel Verkamp }
1135eeeac667SDaniel Verkamp 
1136c4f086b6SJim Harris static void
1137c4f086b6SJim Harris _reactors_stop(void *arg1, void *arg2)
1138eeeac667SDaniel Verkamp {
11394bf6e4bbSLiu Xiaodong 	uint32_t i;
11404bf6e4bbSLiu Xiaodong 	int rc;
11414bf6e4bbSLiu Xiaodong 	struct spdk_reactor *reactor;
1142eff5b149SLiu Xiaodong 	struct spdk_reactor *local_reactor;
11434bf6e4bbSLiu Xiaodong 	uint64_t notify = 1;
11444bf6e4bbSLiu Xiaodong 
1145eeeac667SDaniel Verkamp 	g_reactor_state = SPDK_REACTOR_STATE_EXITING;
1146eff5b149SLiu Xiaodong 	local_reactor = spdk_reactor_get(spdk_env_get_current_core());
11474bf6e4bbSLiu Xiaodong 
11484bf6e4bbSLiu Xiaodong 	SPDK_ENV_FOREACH_CORE(i) {
1149eff5b149SLiu Xiaodong 		/* If spdk_event_call isn't called  on a reactor, always send a notification.
1150eff5b149SLiu Xiaodong 		 * If it is called on a reactor, send a notification if the destination reactor
1151eff5b149SLiu Xiaodong 		 * is indicated in interrupt mode state.
1152eff5b149SLiu Xiaodong 		 */
1153eff5b149SLiu Xiaodong 		if (local_reactor == NULL || spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, i)) {
11544bf6e4bbSLiu Xiaodong 			reactor = spdk_reactor_get(i);
11555e01bdb4SWeifeng Su 			assert(reactor != NULL);
11564bf6e4bbSLiu Xiaodong 			rc = write(reactor->events_fd, &notify, sizeof(notify));
11574bf6e4bbSLiu Xiaodong 			if (rc < 0) {
11584bf6e4bbSLiu Xiaodong 				SPDK_ERRLOG("failed to notify event queue for reactor(%u): %s.\n", i, spdk_strerror(errno));
11594bf6e4bbSLiu Xiaodong 				continue;
11604bf6e4bbSLiu Xiaodong 			}
11614bf6e4bbSLiu Xiaodong 		}
11624bf6e4bbSLiu Xiaodong 	}
1163eeeac667SDaniel Verkamp }
1164eeeac667SDaniel Verkamp 
1165c4f086b6SJim Harris static void
1166c4f086b6SJim Harris nop(void *arg1, void *arg2)
1167c4f086b6SJim Harris {
1168c4f086b6SJim Harris }
1169c4f086b6SJim Harris 
1170c4f086b6SJim Harris void
1171c4f086b6SJim Harris spdk_reactors_stop(void *arg1)
1172c4f086b6SJim Harris {
1173abd932d6SJim Harris 	spdk_for_each_reactor(nop, NULL, NULL, _reactors_stop);
1174c4f086b6SJim Harris }
1175c4f086b6SJim Harris 
1176032920f2SBen Walker static pthread_mutex_t g_scheduler_mtx = PTHREAD_MUTEX_INITIALIZER;
1177032920f2SBen Walker static uint32_t g_next_core = UINT32_MAX;
1178032920f2SBen Walker 
1179032920f2SBen Walker static void
1180032920f2SBen Walker _schedule_thread(void *arg1, void *arg2)
1181032920f2SBen Walker {
1182032920f2SBen Walker 	struct spdk_lw_thread *lw_thread = arg1;
11833db73426STomasz Zawadzki 	struct spdk_thread *thread;
1184032920f2SBen Walker 	struct spdk_reactor *reactor;
118516ea979dSShuhei Matsumoto 	uint32_t current_core;
1186fe2d64ceSBen Walker 	struct spdk_fd_group *grp;
1187032920f2SBen Walker 
118816ea979dSShuhei Matsumoto 	current_core = spdk_env_get_current_core();
118916ea979dSShuhei Matsumoto 	reactor = spdk_reactor_get(current_core);
1190085ade57SBen Walker 	assert(reactor != NULL);
1191032920f2SBen Walker 
1192d9f5da13STomasz Zawadzki 	/* Update total_stats to reflect state of thread
11933db73426STomasz Zawadzki 	* at the end of the move. */
11943db73426STomasz Zawadzki 	thread = spdk_thread_get_from_ctx(lw_thread);
11953db73426STomasz Zawadzki 	spdk_set_thread(thread);
1196d9f5da13STomasz Zawadzki 	spdk_thread_get_stats(&lw_thread->total_stats);
11973db73426STomasz Zawadzki 	spdk_set_thread(NULL);
11983db73426STomasz Zawadzki 
1199118c273aSJim Harris 	if (lw_thread->initial_lcore == SPDK_ENV_LCORE_ID_ANY) {
1200118c273aSJim Harris 		lw_thread->initial_lcore = current_core;
1201118c273aSJim Harris 	}
120218667806STomasz Zawadzki 	lw_thread->lcore = current_core;
120318667806STomasz Zawadzki 
1204032920f2SBen Walker 	TAILQ_INSERT_TAIL(&reactor->threads, lw_thread, link);
1205ea863bb0SShuhei Matsumoto 	reactor->thread_count++;
12064bf6e4bbSLiu Xiaodong 
1207eff5b149SLiu Xiaodong 	/* Operate thread intr if running with full interrupt ability */
1208eff5b149SLiu Xiaodong 	if (spdk_interrupt_mode_is_enabled()) {
12094bf6e4bbSLiu Xiaodong 		int rc;
12104bf6e4bbSLiu Xiaodong 
1211fe2d64ceSBen Walker 		if (reactor->in_interrupt) {
1212fe2d64ceSBen Walker 			grp = spdk_thread_get_interrupt_fd_group(thread);
1213fe2d64ceSBen Walker 			rc = spdk_fd_group_nest(reactor->fgrp, grp);
12144bf6e4bbSLiu Xiaodong 			if (rc < 0) {
12154bf6e4bbSLiu Xiaodong 				SPDK_ERRLOG("Failed to schedule spdk_thread: %s.\n", spdk_strerror(-rc));
12164bf6e4bbSLiu Xiaodong 			}
1217fe2d64ceSBen Walker 		}
1218b763ebfeSLiu Xiaodong 
1219b763ebfeSLiu Xiaodong 		/* Align spdk_thread with reactor to interrupt mode or poll mode */
1220b763ebfeSLiu Xiaodong 		spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, reactor);
12214bf6e4bbSLiu Xiaodong 	}
1222032920f2SBen Walker }
1223032920f2SBen Walker 
1224835d21a2SBen Walker static int
12251368aec8SShuhei Matsumoto _reactor_schedule_thread(struct spdk_thread *thread)
1226032920f2SBen Walker {
1227118c273aSJim Harris 	uint32_t core, initial_core;
1228032920f2SBen Walker 	struct spdk_lw_thread *lw_thread;
122967234b6aSBen Walker 	struct spdk_event *evt = NULL;
123067234b6aSBen Walker 	struct spdk_cpuset *cpumask;
123167234b6aSBen Walker 	uint32_t i;
1232227c8b81SLiu Xiaodong 	struct spdk_reactor *local_reactor = NULL;
1233227c8b81SLiu Xiaodong 	uint32_t current_lcore = spdk_env_get_current_core();
1234227c8b81SLiu Xiaodong 	struct spdk_cpuset polling_cpumask;
1235227c8b81SLiu Xiaodong 	struct spdk_cpuset valid_cpumask;
123667234b6aSBen Walker 
123767234b6aSBen Walker 	cpumask = spdk_thread_get_cpumask(thread);
1238032920f2SBen Walker 
1239032920f2SBen Walker 	lw_thread = spdk_thread_get_ctx(thread);
1240032920f2SBen Walker 	assert(lw_thread != NULL);
12412cffc800SVitaliy Mysak 	core = lw_thread->lcore;
1242118c273aSJim Harris 	initial_core = lw_thread->initial_lcore;
1243032920f2SBen Walker 	memset(lw_thread, 0, sizeof(*lw_thread));
1244118c273aSJim Harris 	lw_thread->initial_lcore = initial_core;
1245032920f2SBen Walker 
1246227c8b81SLiu Xiaodong 	if (current_lcore != SPDK_ENV_LCORE_ID_ANY) {
1247227c8b81SLiu Xiaodong 		local_reactor = spdk_reactor_get(current_lcore);
1248227c8b81SLiu Xiaodong 		assert(local_reactor);
1249227c8b81SLiu Xiaodong 	}
1250227c8b81SLiu Xiaodong 
1251227c8b81SLiu Xiaodong 	/* When interrupt ability of spdk_thread is not enabled and the current
1252227c8b81SLiu Xiaodong 	 * reactor runs on DPDK thread, skip reactors which are in interrupt mode.
1253227c8b81SLiu Xiaodong 	 */
1254227c8b81SLiu Xiaodong 	if (!spdk_interrupt_mode_is_enabled() && local_reactor != NULL) {
1255227c8b81SLiu Xiaodong 		/* Get the cpumask of all reactors in polling */
1256227c8b81SLiu Xiaodong 		spdk_cpuset_zero(&polling_cpumask);
1257227c8b81SLiu Xiaodong 		SPDK_ENV_FOREACH_CORE(i) {
1258227c8b81SLiu Xiaodong 			spdk_cpuset_set_cpu(&polling_cpumask, i, true);
1259227c8b81SLiu Xiaodong 		}
1260227c8b81SLiu Xiaodong 		spdk_cpuset_xor(&polling_cpumask, &local_reactor->notify_cpuset);
1261227c8b81SLiu Xiaodong 
1262227c8b81SLiu Xiaodong 		if (core == SPDK_ENV_LCORE_ID_ANY) {
1263227c8b81SLiu Xiaodong 			/* Get the cpumask of all valid reactors which are suggested and also in polling */
1264227c8b81SLiu Xiaodong 			spdk_cpuset_copy(&valid_cpumask, &polling_cpumask);
1265227c8b81SLiu Xiaodong 			spdk_cpuset_and(&valid_cpumask, spdk_thread_get_cpumask(thread));
1266227c8b81SLiu Xiaodong 
1267227c8b81SLiu Xiaodong 			/* If there are any valid reactors, spdk_thread should be scheduled
1268227c8b81SLiu Xiaodong 			 * into one of the valid reactors.
1269227c8b81SLiu Xiaodong 			 * If there is no valid reactors, spdk_thread should be scheduled
1270227c8b81SLiu Xiaodong 			 * into one of the polling reactors.
1271227c8b81SLiu Xiaodong 			 */
1272227c8b81SLiu Xiaodong 			if (spdk_cpuset_count(&valid_cpumask) != 0) {
1273227c8b81SLiu Xiaodong 				cpumask = &valid_cpumask;
1274227c8b81SLiu Xiaodong 			} else {
1275227c8b81SLiu Xiaodong 				cpumask = &polling_cpumask;
1276227c8b81SLiu Xiaodong 			}
1277227c8b81SLiu Xiaodong 		} else if (!spdk_cpuset_get_cpu(&polling_cpumask, core)) {
1278227c8b81SLiu Xiaodong 			/* If specified reactor is not in polling, spdk_thread should be scheduled
1279227c8b81SLiu Xiaodong 			 * into one of the polling reactors.
1280227c8b81SLiu Xiaodong 			 */
1281227c8b81SLiu Xiaodong 			core = SPDK_ENV_LCORE_ID_ANY;
1282227c8b81SLiu Xiaodong 			cpumask = &polling_cpumask;
1283227c8b81SLiu Xiaodong 		}
1284227c8b81SLiu Xiaodong 	}
1285227c8b81SLiu Xiaodong 
1286032920f2SBen Walker 	pthread_mutex_lock(&g_scheduler_mtx);
12872cffc800SVitaliy Mysak 	if (core == SPDK_ENV_LCORE_ID_ANY) {
128867234b6aSBen Walker 		for (i = 0; i < spdk_env_get_core_count(); i++) {
1289992b168eSLiu Xiaodong 			if (g_next_core >= g_reactor_count) {
1290032920f2SBen Walker 				g_next_core = spdk_env_get_first_core();
1291032920f2SBen Walker 			}
1292032920f2SBen Walker 			core = g_next_core;
1293032920f2SBen Walker 			g_next_core = spdk_env_get_next_core(g_next_core);
129467234b6aSBen Walker 
129567234b6aSBen Walker 			if (spdk_cpuset_get_cpu(cpumask, core)) {
129667234b6aSBen Walker 				break;
129767234b6aSBen Walker 			}
129867234b6aSBen Walker 		}
12992cffc800SVitaliy Mysak 	}
13002cffc800SVitaliy Mysak 
13012cffc800SVitaliy Mysak 	evt = spdk_event_allocate(core, _schedule_thread, lw_thread, NULL);
13022cffc800SVitaliy Mysak 
130382c46626SAnisa Su 	if (current_lcore != core) {
130482c46626SAnisa Su 		spdk_trace_record(TRACE_SCHEDULER_MOVE_THREAD, spdk_thread_get_trace_id(thread), 0, 0,
130582c46626SAnisa Su 				  current_lcore, core);
130682c46626SAnisa Su 	}
130782c46626SAnisa Su 
1308032920f2SBen Walker 	pthread_mutex_unlock(&g_scheduler_mtx);
1309032920f2SBen Walker 
131067234b6aSBen Walker 	assert(evt != NULL);
131167234b6aSBen Walker 	if (evt == NULL) {
131267234b6aSBen Walker 		SPDK_ERRLOG("Unable to schedule thread on requested core mask.\n");
131367234b6aSBen Walker 		return -1;
131467234b6aSBen Walker 	}
131567234b6aSBen Walker 
1316f7e9e764SShuhei Matsumoto 	lw_thread->tsc_start = spdk_get_ticks();
1317f7e9e764SShuhei Matsumoto 
1318032920f2SBen Walker 	spdk_event_call(evt);
1319835d21a2SBen Walker 
1320835d21a2SBen Walker 	return 0;
1321032920f2SBen Walker }
1322032920f2SBen Walker 
1323a4335febSShuhei Matsumoto static void
1324a4335febSShuhei Matsumoto _reactor_request_thread_reschedule(struct spdk_thread *thread)
1325a4335febSShuhei Matsumoto {
1326a4335febSShuhei Matsumoto 	struct spdk_lw_thread *lw_thread;
13274bf6e4bbSLiu Xiaodong 	struct spdk_reactor *reactor;
13284bf6e4bbSLiu Xiaodong 	uint32_t current_core;
1329a4335febSShuhei Matsumoto 
1330a4335febSShuhei Matsumoto 	assert(thread == spdk_get_thread());
1331a4335febSShuhei Matsumoto 
1332a4335febSShuhei Matsumoto 	lw_thread = spdk_thread_get_ctx(thread);
1333a4335febSShuhei Matsumoto 
1334902c9e4dSTomasz Zawadzki 	assert(lw_thread != NULL);
1335902c9e4dSTomasz Zawadzki 	lw_thread->resched = true;
1336902c9e4dSTomasz Zawadzki 	lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
13374bf6e4bbSLiu Xiaodong 
13384bf6e4bbSLiu Xiaodong 	current_core = spdk_env_get_current_core();
13394bf6e4bbSLiu Xiaodong 	reactor = spdk_reactor_get(current_core);
13404bf6e4bbSLiu Xiaodong 	assert(reactor != NULL);
1341eff5b149SLiu Xiaodong 
1342eff5b149SLiu Xiaodong 	/* Send a notification if the destination reactor is indicated in intr mode state */
1343eff5b149SLiu Xiaodong 	if (spdk_unlikely(spdk_cpuset_get_cpu(&reactor->notify_cpuset, reactor->lcore))) {
13444bf6e4bbSLiu Xiaodong 		uint64_t notify = 1;
13454bf6e4bbSLiu Xiaodong 
13464bf6e4bbSLiu Xiaodong 		if (write(reactor->resched_fd, &notify, sizeof(notify)) < 0) {
13474bf6e4bbSLiu Xiaodong 			SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
13484bf6e4bbSLiu Xiaodong 		}
13494bf6e4bbSLiu Xiaodong 	}
1350a4335febSShuhei Matsumoto }
1351a4335febSShuhei Matsumoto 
13521368aec8SShuhei Matsumoto static int
1353a7592dbeSSeth Howell reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op)
13541368aec8SShuhei Matsumoto {
13552cffc800SVitaliy Mysak 	struct spdk_lw_thread *lw_thread;
13562cffc800SVitaliy Mysak 
13571368aec8SShuhei Matsumoto 	switch (op) {
13581368aec8SShuhei Matsumoto 	case SPDK_THREAD_OP_NEW:
13592cffc800SVitaliy Mysak 		lw_thread = spdk_thread_get_ctx(thread);
13602cffc800SVitaliy Mysak 		lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
1361118c273aSJim Harris 		lw_thread->initial_lcore = SPDK_ENV_LCORE_ID_ANY;
13621368aec8SShuhei Matsumoto 		return _reactor_schedule_thread(thread);
1363a4335febSShuhei Matsumoto 	case SPDK_THREAD_OP_RESCHED:
1364a4335febSShuhei Matsumoto 		_reactor_request_thread_reschedule(thread);
1365a4335febSShuhei Matsumoto 		return 0;
13661368aec8SShuhei Matsumoto 	default:
13671368aec8SShuhei Matsumoto 		return -ENOTSUP;
13681368aec8SShuhei Matsumoto 	}
13691368aec8SShuhei Matsumoto }
13701368aec8SShuhei Matsumoto 
13711368aec8SShuhei Matsumoto static bool
1372a7592dbeSSeth Howell reactor_thread_op_supported(enum spdk_thread_op op)
13731368aec8SShuhei Matsumoto {
13741368aec8SShuhei Matsumoto 	switch (op) {
13751368aec8SShuhei Matsumoto 	case SPDK_THREAD_OP_NEW:
1376a4335febSShuhei Matsumoto 	case SPDK_THREAD_OP_RESCHED:
13771368aec8SShuhei Matsumoto 		return true;
13781368aec8SShuhei Matsumoto 	default:
13791368aec8SShuhei Matsumoto 		return false;
13801368aec8SShuhei Matsumoto 	}
13811368aec8SShuhei Matsumoto }
13821368aec8SShuhei Matsumoto 
138360eb6da8SShuhei Matsumoto struct call_reactor {
138460eb6da8SShuhei Matsumoto 	uint32_t cur_core;
138560eb6da8SShuhei Matsumoto 	spdk_event_fn fn;
138660eb6da8SShuhei Matsumoto 	void *arg1;
138760eb6da8SShuhei Matsumoto 	void *arg2;
138860eb6da8SShuhei Matsumoto 
138960eb6da8SShuhei Matsumoto 	uint32_t orig_core;
139060eb6da8SShuhei Matsumoto 	spdk_event_fn cpl;
139160eb6da8SShuhei Matsumoto };
139260eb6da8SShuhei Matsumoto 
139360eb6da8SShuhei Matsumoto static void
1394a7592dbeSSeth Howell on_reactor(void *arg1, void *arg2)
139560eb6da8SShuhei Matsumoto {
139660eb6da8SShuhei Matsumoto 	struct call_reactor *cr = arg1;
139760eb6da8SShuhei Matsumoto 	struct spdk_event *evt;
139860eb6da8SShuhei Matsumoto 
139960eb6da8SShuhei Matsumoto 	cr->fn(cr->arg1, cr->arg2);
140060eb6da8SShuhei Matsumoto 
140160eb6da8SShuhei Matsumoto 	cr->cur_core = spdk_env_get_next_core(cr->cur_core);
140260eb6da8SShuhei Matsumoto 
1403992b168eSLiu Xiaodong 	if (cr->cur_core >= g_reactor_count) {
14042172c432STomasz Zawadzki 		SPDK_DEBUGLOG(reactor, "Completed reactor iteration\n");
140560eb6da8SShuhei Matsumoto 
140660eb6da8SShuhei Matsumoto 		evt = spdk_event_allocate(cr->orig_core, cr->cpl, cr->arg1, cr->arg2);
140760eb6da8SShuhei Matsumoto 		free(cr);
140860eb6da8SShuhei Matsumoto 	} else {
14092172c432STomasz Zawadzki 		SPDK_DEBUGLOG(reactor, "Continuing reactor iteration to %d\n",
141060eb6da8SShuhei Matsumoto 			      cr->cur_core);
141160eb6da8SShuhei Matsumoto 
1412a7592dbeSSeth Howell 		evt = spdk_event_allocate(cr->cur_core, on_reactor, arg1, NULL);
141360eb6da8SShuhei Matsumoto 	}
141460eb6da8SShuhei Matsumoto 	assert(evt != NULL);
141560eb6da8SShuhei Matsumoto 	spdk_event_call(evt);
141660eb6da8SShuhei Matsumoto }
141760eb6da8SShuhei Matsumoto 
141860eb6da8SShuhei Matsumoto void
141960eb6da8SShuhei Matsumoto spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl)
142060eb6da8SShuhei Matsumoto {
142160eb6da8SShuhei Matsumoto 	struct call_reactor *cr;
142260eb6da8SShuhei Matsumoto 
1423c4f086b6SJim Harris 	/* When the application framework is shutting down, we will send one
1424c4f086b6SJim Harris 	 * final for_each_reactor operation with completion callback _reactors_stop,
1425c4f086b6SJim Harris 	 * to flush any existing for_each_reactor operations to avoid any memory
1426c4f086b6SJim Harris 	 * leaks. We use a mutex here to protect a boolean flag that will ensure
1427c4f086b6SJim Harris 	 * we don't start any more operations once we've started shutting down.
1428c4f086b6SJim Harris 	 */
1429c4f086b6SJim Harris 	pthread_mutex_lock(&g_stopping_reactors_mtx);
1430c4f086b6SJim Harris 	if (g_stopping_reactors) {
1431c4f086b6SJim Harris 		pthread_mutex_unlock(&g_stopping_reactors_mtx);
1432c4f086b6SJim Harris 		return;
1433c4f086b6SJim Harris 	} else if (cpl == _reactors_stop) {
1434c4f086b6SJim Harris 		g_stopping_reactors = true;
1435c4f086b6SJim Harris 	}
1436c4f086b6SJim Harris 	pthread_mutex_unlock(&g_stopping_reactors_mtx);
1437c4f086b6SJim Harris 
143860eb6da8SShuhei Matsumoto 	cr = calloc(1, sizeof(*cr));
143960eb6da8SShuhei Matsumoto 	if (!cr) {
144060eb6da8SShuhei Matsumoto 		SPDK_ERRLOG("Unable to perform reactor iteration\n");
144160eb6da8SShuhei Matsumoto 		cpl(arg1, arg2);
144260eb6da8SShuhei Matsumoto 		return;
144360eb6da8SShuhei Matsumoto 	}
144460eb6da8SShuhei Matsumoto 
144560eb6da8SShuhei Matsumoto 	cr->fn = fn;
144660eb6da8SShuhei Matsumoto 	cr->arg1 = arg1;
144760eb6da8SShuhei Matsumoto 	cr->arg2 = arg2;
144860eb6da8SShuhei Matsumoto 	cr->cpl = cpl;
144960eb6da8SShuhei Matsumoto 	cr->orig_core = spdk_env_get_current_core();
145060eb6da8SShuhei Matsumoto 	cr->cur_core = spdk_env_get_first_core();
145160eb6da8SShuhei Matsumoto 
14522172c432STomasz Zawadzki 	SPDK_DEBUGLOG(reactor, "Starting reactor iteration from %d\n", cr->orig_core);
145360eb6da8SShuhei Matsumoto 
14549e81535eSyidong0635 	_event_call(cr->cur_core, on_reactor, cr, NULL);
145560eb6da8SShuhei Matsumoto }
145660eb6da8SShuhei Matsumoto 
14574bf6e4bbSLiu Xiaodong #ifdef __linux__
14584bf6e4bbSLiu Xiaodong static int
14594bf6e4bbSLiu Xiaodong reactor_schedule_thread_event(void *arg)
14604bf6e4bbSLiu Xiaodong {
14614bf6e4bbSLiu Xiaodong 	struct spdk_reactor *reactor = arg;
14624bf6e4bbSLiu Xiaodong 	struct spdk_lw_thread *lw_thread, *tmp;
14634bf6e4bbSLiu Xiaodong 	uint32_t count = 0;
14644bf6e4bbSLiu Xiaodong 
1465eff5b149SLiu Xiaodong 	assert(reactor->in_interrupt);
14664bf6e4bbSLiu Xiaodong 
14674bf6e4bbSLiu Xiaodong 	TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
14684bf6e4bbSLiu Xiaodong 		count += reactor_post_process_lw_thread(reactor, lw_thread) ? 1 : 0;
14694bf6e4bbSLiu Xiaodong 	}
14704bf6e4bbSLiu Xiaodong 
14714bf6e4bbSLiu Xiaodong 	return count;
14724bf6e4bbSLiu Xiaodong }
14734bf6e4bbSLiu Xiaodong 
14744bf6e4bbSLiu Xiaodong static int
14754bf6e4bbSLiu Xiaodong reactor_interrupt_init(struct spdk_reactor *reactor)
14764bf6e4bbSLiu Xiaodong {
1477*1a5bdab3SAnkit Kumar 	struct spdk_event_handler_opts opts = {};
14784bf6e4bbSLiu Xiaodong 	int rc;
14794bf6e4bbSLiu Xiaodong 
14804bf6e4bbSLiu Xiaodong 	rc = spdk_fd_group_create(&reactor->fgrp);
14814bf6e4bbSLiu Xiaodong 	if (rc != 0) {
14824bf6e4bbSLiu Xiaodong 		return rc;
14834bf6e4bbSLiu Xiaodong 	}
14844bf6e4bbSLiu Xiaodong 
14854bf6e4bbSLiu Xiaodong 	reactor->resched_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
14864bf6e4bbSLiu Xiaodong 	if (reactor->resched_fd < 0) {
14874bf6e4bbSLiu Xiaodong 		rc = -EBADF;
14884bf6e4bbSLiu Xiaodong 		goto err;
14894bf6e4bbSLiu Xiaodong 	}
14904bf6e4bbSLiu Xiaodong 
1491*1a5bdab3SAnkit Kumar 	spdk_fd_group_get_default_event_handler_opts(&opts, sizeof(opts));
1492*1a5bdab3SAnkit Kumar 	opts.fd_type = SPDK_FD_TYPE_EVENTFD;
1493*1a5bdab3SAnkit Kumar 
1494*1a5bdab3SAnkit Kumar 	rc = SPDK_FD_GROUP_ADD_EXT(reactor->fgrp, reactor->resched_fd,
1495*1a5bdab3SAnkit Kumar 				   reactor_schedule_thread_event, reactor, &opts);
14964bf6e4bbSLiu Xiaodong 	if (rc) {
14974bf6e4bbSLiu Xiaodong 		close(reactor->resched_fd);
14984bf6e4bbSLiu Xiaodong 		goto err;
14994bf6e4bbSLiu Xiaodong 	}
15004bf6e4bbSLiu Xiaodong 
15014bf6e4bbSLiu Xiaodong 	reactor->events_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
15024bf6e4bbSLiu Xiaodong 	if (reactor->events_fd < 0) {
15034bf6e4bbSLiu Xiaodong 		spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
15044bf6e4bbSLiu Xiaodong 		close(reactor->resched_fd);
15054bf6e4bbSLiu Xiaodong 
15064bf6e4bbSLiu Xiaodong 		rc = -EBADF;
15074bf6e4bbSLiu Xiaodong 		goto err;
15084bf6e4bbSLiu Xiaodong 	}
15094bf6e4bbSLiu Xiaodong 
1510*1a5bdab3SAnkit Kumar 	rc = SPDK_FD_GROUP_ADD_EXT(reactor->fgrp, reactor->events_fd,
1511*1a5bdab3SAnkit Kumar 				   event_queue_run_batch, reactor, &opts);
15124bf6e4bbSLiu Xiaodong 	if (rc) {
15134bf6e4bbSLiu Xiaodong 		spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
15144bf6e4bbSLiu Xiaodong 		close(reactor->resched_fd);
15154bf6e4bbSLiu Xiaodong 		close(reactor->events_fd);
15164bf6e4bbSLiu Xiaodong 		goto err;
15174bf6e4bbSLiu Xiaodong 	}
15184bf6e4bbSLiu Xiaodong 
15194bf6e4bbSLiu Xiaodong 	return 0;
15204bf6e4bbSLiu Xiaodong 
15214bf6e4bbSLiu Xiaodong err:
15224bf6e4bbSLiu Xiaodong 	spdk_fd_group_destroy(reactor->fgrp);
1523eff5b149SLiu Xiaodong 	reactor->fgrp = NULL;
15244bf6e4bbSLiu Xiaodong 	return rc;
15254bf6e4bbSLiu Xiaodong }
15264bf6e4bbSLiu Xiaodong #else
15274bf6e4bbSLiu Xiaodong static int
15284bf6e4bbSLiu Xiaodong reactor_interrupt_init(struct spdk_reactor *reactor)
15294bf6e4bbSLiu Xiaodong {
15304bf6e4bbSLiu Xiaodong 	return -ENOTSUP;
15314bf6e4bbSLiu Xiaodong }
15324bf6e4bbSLiu Xiaodong #endif
15334bf6e4bbSLiu Xiaodong 
15344bf6e4bbSLiu Xiaodong static void
15354bf6e4bbSLiu Xiaodong reactor_interrupt_fini(struct spdk_reactor *reactor)
15364bf6e4bbSLiu Xiaodong {
15374bf6e4bbSLiu Xiaodong 	struct spdk_fd_group *fgrp = reactor->fgrp;
15384bf6e4bbSLiu Xiaodong 
15394bf6e4bbSLiu Xiaodong 	if (!fgrp) {
15404bf6e4bbSLiu Xiaodong 		return;
15414bf6e4bbSLiu Xiaodong 	}
15424bf6e4bbSLiu Xiaodong 
15434bf6e4bbSLiu Xiaodong 	spdk_fd_group_remove(fgrp, reactor->events_fd);
15444bf6e4bbSLiu Xiaodong 	spdk_fd_group_remove(fgrp, reactor->resched_fd);
15454bf6e4bbSLiu Xiaodong 
15464bf6e4bbSLiu Xiaodong 	close(reactor->events_fd);
15474bf6e4bbSLiu Xiaodong 	close(reactor->resched_fd);
15484bf6e4bbSLiu Xiaodong 
15494bf6e4bbSLiu Xiaodong 	spdk_fd_group_destroy(fgrp);
15504bf6e4bbSLiu Xiaodong 	reactor->fgrp = NULL;
15514bf6e4bbSLiu Xiaodong }
15524bf6e4bbSLiu Xiaodong 
1553c6adf304SMaciej Szwed static struct spdk_governor *
1554ece94716STomasz Zawadzki _governor_find(const char *name)
1555c6adf304SMaciej Szwed {
1556c6adf304SMaciej Szwed 	struct spdk_governor *governor, *tmp;
1557c6adf304SMaciej Szwed 
1558c6adf304SMaciej Szwed 	TAILQ_FOREACH_SAFE(governor, &g_governor_list, link, tmp) {
1559c6adf304SMaciej Szwed 		if (strcmp(name, governor->name) == 0) {
1560c6adf304SMaciej Szwed 			return governor;
1561c6adf304SMaciej Szwed 		}
1562c6adf304SMaciej Szwed 	}
1563c6adf304SMaciej Szwed 
1564c6adf304SMaciej Szwed 	return NULL;
1565c6adf304SMaciej Szwed }
1566c6adf304SMaciej Szwed 
1567c6adf304SMaciej Szwed int
1568a86e40f3STomasz Zawadzki spdk_governor_set(const char *name)
1569c6adf304SMaciej Szwed {
1570c6adf304SMaciej Szwed 	struct spdk_governor *governor;
15715bf2973eSTomasz Zawadzki 	int rc = 0;
1572c6adf304SMaciej Szwed 
15732e394521STomasz Zawadzki 	/* NULL governor was specifically requested */
15742e394521STomasz Zawadzki 	if (name == NULL) {
15752e394521STomasz Zawadzki 		if (g_governor) {
15762e394521STomasz Zawadzki 			g_governor->deinit();
15772e394521STomasz Zawadzki 		}
15782e394521STomasz Zawadzki 		g_governor = NULL;
15792e394521STomasz Zawadzki 		return 0;
15802e394521STomasz Zawadzki 	}
15812e394521STomasz Zawadzki 
1582c6adf304SMaciej Szwed 	governor = _governor_find(name);
1583c6adf304SMaciej Szwed 	if (governor == NULL) {
1584c6adf304SMaciej Szwed 		return -EINVAL;
1585c6adf304SMaciej Szwed 	}
1586c6adf304SMaciej Szwed 
15872e394521STomasz Zawadzki 	if (g_governor == governor) {
15882e394521STomasz Zawadzki 		return 0;
15895bf2973eSTomasz Zawadzki 	}
15905bf2973eSTomasz Zawadzki 
15912e394521STomasz Zawadzki 	rc = governor->init();
15925bf2973eSTomasz Zawadzki 	if (rc == 0) {
15932e394521STomasz Zawadzki 		if (g_governor) {
15942e394521STomasz Zawadzki 			g_governor->deinit();
15955bf2973eSTomasz Zawadzki 		}
15962e394521STomasz Zawadzki 		g_governor = governor;
15972e394521STomasz Zawadzki 	}
15982e394521STomasz Zawadzki 
1599c6adf304SMaciej Szwed 	return rc;
1600c6adf304SMaciej Szwed }
1601c6adf304SMaciej Szwed 
1602abf52d7dSKrzysztof Karas struct spdk_governor *
1603a86e40f3STomasz Zawadzki spdk_governor_get(void)
1604abf52d7dSKrzysztof Karas {
16052e394521STomasz Zawadzki 	return g_governor;
1606abf52d7dSKrzysztof Karas }
1607abf52d7dSKrzysztof Karas 
1608c6adf304SMaciej Szwed void
1609a86e40f3STomasz Zawadzki spdk_governor_register(struct spdk_governor *governor)
1610c6adf304SMaciej Szwed {
1611c6adf304SMaciej Szwed 	if (_governor_find(governor->name)) {
1612c6adf304SMaciej Szwed 		SPDK_ERRLOG("governor named '%s' already registered.\n", governor->name);
1613c6adf304SMaciej Szwed 		assert(false);
1614c6adf304SMaciej Szwed 		return;
1615c6adf304SMaciej Szwed 	}
1616c6adf304SMaciej Szwed 
1617c6adf304SMaciej Szwed 	TAILQ_INSERT_TAIL(&g_governor_list, governor, link);
1618c6adf304SMaciej Szwed }
1619c6adf304SMaciej Szwed 
16202172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(reactor)
162182c46626SAnisa Su 
162282c46626SAnisa Su static void
162382c46626SAnisa Su scheduler_trace(void)
162482c46626SAnisa Su {
162582c46626SAnisa Su 	struct spdk_trace_tpoint_opts opts[] = {
162682c46626SAnisa Su 		{
162782c46626SAnisa Su 			"SCHEDULER_PERIOD_START", TRACE_SCHEDULER_PERIOD_START,
162882c46626SAnisa Su 			OWNER_TYPE_NONE, OBJECT_NONE, 0,
162982c46626SAnisa Su 			{
163082c46626SAnisa Su 
163182c46626SAnisa Su 			}
163282c46626SAnisa Su 		},
163382c46626SAnisa Su 		{
163482c46626SAnisa Su 			"SCHEDULER_CORE_STATS", TRACE_SCHEDULER_CORE_STATS,
163582c46626SAnisa Su 			OWNER_TYPE_REACTOR, OBJECT_NONE, 0,
163682c46626SAnisa Su 			{
163782c46626SAnisa Su 				{ "busy", SPDK_TRACE_ARG_TYPE_INT, 8},
163882c46626SAnisa Su 				{ "idle", SPDK_TRACE_ARG_TYPE_INT, 8}
163982c46626SAnisa Su 			}
164082c46626SAnisa Su 		},
164182c46626SAnisa Su 		{
164282c46626SAnisa Su 			"SCHEDULER_THREAD_STATS", TRACE_SCHEDULER_THREAD_STATS,
164382c46626SAnisa Su 			OWNER_TYPE_THREAD, OBJECT_NONE, 0,
164482c46626SAnisa Su 			{
164582c46626SAnisa Su 				{ "busy", SPDK_TRACE_ARG_TYPE_INT, 8},
164682c46626SAnisa Su 				{ "idle", SPDK_TRACE_ARG_TYPE_INT, 8}
164782c46626SAnisa Su 			}
164882c46626SAnisa Su 		},
164982c46626SAnisa Su 		{
165082c46626SAnisa Su 			"SCHEDULER_MOVE_THREAD", TRACE_SCHEDULER_MOVE_THREAD,
165182c46626SAnisa Su 			OWNER_TYPE_THREAD, OBJECT_NONE, 0,
165282c46626SAnisa Su 			{
165382c46626SAnisa Su 				{ "src", SPDK_TRACE_ARG_TYPE_INT, 8 },
165482c46626SAnisa Su 				{ "dst", SPDK_TRACE_ARG_TYPE_INT, 8 }
165582c46626SAnisa Su 			}
165682c46626SAnisa Su 		}
165782c46626SAnisa Su 	};
165882c46626SAnisa Su 
165982c46626SAnisa Su 	spdk_trace_register_owner_type(OWNER_TYPE_REACTOR, 'r');
166082c46626SAnisa Su 	spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
166182c46626SAnisa Su 
166282c46626SAnisa Su }
166382c46626SAnisa Su 
166482c46626SAnisa Su SPDK_TRACE_REGISTER_FN(scheduler_trace, "scheduler", TRACE_GROUP_SCHEDULER)
1665