1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2016 Intel Corporation. 3eeeac667SDaniel Verkamp * All rights reserved. 4eeeac667SDaniel Verkamp */ 5eeeac667SDaniel Verkamp 6b961d9ccSBen Walker #include "spdk/stdinc.h" 7b9be940aSLance Hartmann #include "spdk/likely.h" 8eeeac667SDaniel Verkamp 9462fd69eSSeungYeon Shin #include "event_internal.h" 10462fd69eSSeungYeon Shin 11b961d9ccSBen Walker #include "spdk_internal/event.h" 1270f3606bSJohn Levon #include "spdk_internal/usdt.h" 13eeeac667SDaniel Verkamp 14eeeac667SDaniel Verkamp #include "spdk/log.h" 15a83f91c2SBen Walker #include "spdk/thread.h" 160aa29864SBen Walker #include "spdk/env.h" 1790dfc392SDariusz Stojaczyk #include "spdk/util.h" 18a86e40f3STomasz Zawadzki #include "spdk/scheduler.h" 194bf6e4bbSLiu Xiaodong #include "spdk/string.h" 204bf6e4bbSLiu Xiaodong #include "spdk/fd_group.h" 2182c46626SAnisa Su #include "spdk/trace.h" 2282c46626SAnisa Su #include "spdk_internal/trace_defs.h" 23eeeac667SDaniel Verkamp 2438527b07SShuhei Matsumoto #ifdef __linux__ 2538527b07SShuhei Matsumoto #include <sys/prctl.h> 264bf6e4bbSLiu Xiaodong #include <sys/eventfd.h> 2738527b07SShuhei Matsumoto #endif 2838527b07SShuhei Matsumoto 2938527b07SShuhei Matsumoto #ifdef __FreeBSD__ 3038527b07SShuhei Matsumoto #include <pthread_np.h> 3138527b07SShuhei Matsumoto #endif 3238527b07SShuhei Matsumoto 3352bbb267SDaniel Verkamp #define SPDK_EVENT_BATCH_SIZE 8 3452bbb267SDaniel Verkamp 35d34d32f6SShuhei Matsumoto static struct spdk_reactor *g_reactors; 36992b168eSLiu Xiaodong static uint32_t g_reactor_count; 37ae4360f0SShuhei Matsumoto static struct spdk_cpuset g_reactor_core_mask; 38f7ddfcf7SDarek Stojaczyk static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZED; 39eeeac667SDaniel Verkamp 4025c5e3f5SShuhei Matsumoto static bool g_framework_context_switch_monitor_enabled = true; 4170c3e1f2SDaniel Verkamp 42e1ec5c60SBen Walker static struct spdk_mempool *g_spdk_event_mempool = NULL; 43eeeac667SDaniel Verkamp 442a146cd9SMaciej Szwed TAILQ_HEAD(, spdk_scheduler) g_scheduler_list 452a146cd9SMaciej Szwed = TAILQ_HEAD_INITIALIZER(g_scheduler_list); 462a146cd9SMaciej Szwed 47d6c4f8cfSTomasz Zawadzki static struct spdk_scheduler *g_scheduler = NULL; 482cffc800SVitaliy Mysak static struct spdk_reactor *g_scheduling_reactor; 49cff96883STomasz Zawadzki bool g_scheduling_in_progress = false; 50f01c6f2dSJim Harris static uint64_t g_scheduler_period_in_tsc = 0; 51f01c6f2dSJim Harris static uint64_t g_scheduler_period_in_us; 526859a49aSMaciej Szwed static uint32_t g_scheduler_core_number; 532cffc800SVitaliy Mysak static struct spdk_scheduler_core_info *g_core_infos = NULL; 54462fd69eSSeungYeon Shin static struct spdk_cpuset g_scheduler_isolated_core_mask; 552a146cd9SMaciej Szwed 56c6adf304SMaciej Szwed TAILQ_HEAD(, spdk_governor) g_governor_list 57c6adf304SMaciej Szwed = TAILQ_HEAD_INITIALIZER(g_governor_list); 58c6adf304SMaciej Szwed 59a09bf113STomasz Zawadzki static struct spdk_governor *g_governor = NULL; 60c6adf304SMaciej Szwed 614bf6e4bbSLiu Xiaodong static int reactor_interrupt_init(struct spdk_reactor *reactor); 624bf6e4bbSLiu Xiaodong static void reactor_interrupt_fini(struct spdk_reactor *reactor); 634bf6e4bbSLiu Xiaodong 64c4f086b6SJim Harris static pthread_mutex_t g_stopping_reactors_mtx = PTHREAD_MUTEX_INITIALIZER; 65c4f086b6SJim Harris static bool g_stopping_reactors = false; 66c4f086b6SJim Harris 672a146cd9SMaciej Szwed static struct spdk_scheduler * 68a86c94b3STomasz Zawadzki _scheduler_find(const char *name) 692a146cd9SMaciej Szwed { 702a146cd9SMaciej Szwed struct spdk_scheduler *tmp; 712a146cd9SMaciej Szwed 722a146cd9SMaciej Szwed TAILQ_FOREACH(tmp, &g_scheduler_list, link) { 732a146cd9SMaciej Szwed if (strcmp(name, tmp->name) == 0) { 742a146cd9SMaciej Szwed return tmp; 752a146cd9SMaciej Szwed } 762a146cd9SMaciej Szwed } 772a146cd9SMaciej Szwed 782a146cd9SMaciej Szwed return NULL; 792a146cd9SMaciej Szwed } 802a146cd9SMaciej Szwed 812a146cd9SMaciej Szwed int 82a86e40f3STomasz Zawadzki spdk_scheduler_set(const char *name) 832a146cd9SMaciej Szwed { 842a146cd9SMaciej Szwed struct spdk_scheduler *scheduler; 85d6c4f8cfSTomasz Zawadzki int rc = 0; 86d6c4f8cfSTomasz Zawadzki 87d6c4f8cfSTomasz Zawadzki /* NULL scheduler was specifically requested */ 88d6c4f8cfSTomasz Zawadzki if (name == NULL) { 89d6c4f8cfSTomasz Zawadzki if (g_scheduler) { 90d6c4f8cfSTomasz Zawadzki g_scheduler->deinit(); 91d6c4f8cfSTomasz Zawadzki } 92d6c4f8cfSTomasz Zawadzki g_scheduler = NULL; 93d6c4f8cfSTomasz Zawadzki return 0; 94d6c4f8cfSTomasz Zawadzki } 952a146cd9SMaciej Szwed 962a146cd9SMaciej Szwed scheduler = _scheduler_find(name); 972a146cd9SMaciej Szwed if (scheduler == NULL) { 987148f333SVitaliy Mysak SPDK_ERRLOG("Requested scheduler is missing\n"); 99d6c4f8cfSTomasz Zawadzki return -EINVAL; 1002a146cd9SMaciej Szwed } 1012a146cd9SMaciej Szwed 102d6c4f8cfSTomasz Zawadzki if (g_scheduler == scheduler) { 103d6c4f8cfSTomasz Zawadzki return 0; 10475022aa2SMaciej Szwed } 105d6c4f8cfSTomasz Zawadzki 106d6c4f8cfSTomasz Zawadzki if (g_scheduler) { 1075bf2973eSTomasz Zawadzki g_scheduler->deinit(); 108c6adf304SMaciej Szwed } 109f2a6a84aSJim Harris 110f2a6a84aSJim Harris rc = scheduler->init(); 111f2a6a84aSJim Harris if (rc == 0) { 1121b1e52cbSMaciej Szwed g_scheduler = scheduler; 113f2a6a84aSJim Harris } else { 114f2a6a84aSJim Harris /* Could not switch to the new scheduler, so keep the old 115ba9e050eSMarcin Spiewak * one. We need to check if it wasn't NULL, and ->init() it again. 116f2a6a84aSJim Harris */ 117ba9e050eSMarcin Spiewak if (g_scheduler) { 118f2a6a84aSJim Harris SPDK_ERRLOG("Could not ->init() '%s' scheduler, reverting to '%s'\n", 119f2a6a84aSJim Harris name, g_scheduler->name); 120f2a6a84aSJim Harris g_scheduler->init(); 121ba9e050eSMarcin Spiewak } else { 122ba9e050eSMarcin Spiewak SPDK_ERRLOG("Could not ->init() '%s' scheduler.\n", name); 123ba9e050eSMarcin Spiewak } 1241b1e52cbSMaciej Szwed } 1251b1e52cbSMaciej Szwed 126d6c4f8cfSTomasz Zawadzki return rc; 1272a146cd9SMaciej Szwed } 1282a146cd9SMaciej Szwed 129abf52d7dSKrzysztof Karas struct spdk_scheduler * 130a86e40f3STomasz Zawadzki spdk_scheduler_get(void) 131abf52d7dSKrzysztof Karas { 132abf52d7dSKrzysztof Karas return g_scheduler; 133abf52d7dSKrzysztof Karas } 134abf52d7dSKrzysztof Karas 135abf52d7dSKrzysztof Karas uint64_t 136a86e40f3STomasz Zawadzki spdk_scheduler_get_period(void) 137abf52d7dSKrzysztof Karas { 138f01c6f2dSJim Harris return g_scheduler_period_in_us; 139abf52d7dSKrzysztof Karas } 140abf52d7dSKrzysztof Karas 1412a146cd9SMaciej Szwed void 142a86e40f3STomasz Zawadzki spdk_scheduler_set_period(uint64_t period) 1432a146cd9SMaciej Szwed { 144f01c6f2dSJim Harris g_scheduler_period_in_us = period; 145f01c6f2dSJim Harris g_scheduler_period_in_tsc = period * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 1462a146cd9SMaciej Szwed } 1472a146cd9SMaciej Szwed 1482a146cd9SMaciej Szwed void 149a86e40f3STomasz Zawadzki spdk_scheduler_register(struct spdk_scheduler *scheduler) 1502a146cd9SMaciej Szwed { 1512a146cd9SMaciej Szwed if (_scheduler_find(scheduler->name)) { 1522a146cd9SMaciej Szwed SPDK_ERRLOG("scheduler named '%s' already registered.\n", scheduler->name); 1532a146cd9SMaciej Szwed assert(false); 1542a146cd9SMaciej Szwed return; 1552a146cd9SMaciej Szwed } 1562a146cd9SMaciej Szwed 1572a146cd9SMaciej Szwed TAILQ_INSERT_TAIL(&g_scheduler_list, scheduler, link); 1582a146cd9SMaciej Szwed } 1592a146cd9SMaciej Szwed 160758225a3Ssyeon.shin uint32_t 161758225a3Ssyeon.shin spdk_scheduler_get_scheduling_lcore(void) 162758225a3Ssyeon.shin { 163758225a3Ssyeon.shin return g_scheduling_reactor->lcore; 164758225a3Ssyeon.shin } 165758225a3Ssyeon.shin 166462fd69eSSeungYeon Shin bool 167462fd69eSSeungYeon Shin spdk_scheduler_set_scheduling_lcore(uint32_t core) 168462fd69eSSeungYeon Shin { 169462fd69eSSeungYeon Shin struct spdk_reactor *reactor = spdk_reactor_get(core); 170462fd69eSSeungYeon Shin if (reactor == NULL) { 171462fd69eSSeungYeon Shin SPDK_ERRLOG("Failed to set scheduling reactor. Reactor(lcore:%d) does not exist", core); 172462fd69eSSeungYeon Shin return false; 173462fd69eSSeungYeon Shin } 174462fd69eSSeungYeon Shin 175462fd69eSSeungYeon Shin g_scheduling_reactor = reactor; 176462fd69eSSeungYeon Shin return true; 177462fd69eSSeungYeon Shin } 178462fd69eSSeungYeon Shin 179462fd69eSSeungYeon Shin bool 180462fd69eSSeungYeon Shin scheduler_set_isolated_core_mask(struct spdk_cpuset isolated_core_mask) 181462fd69eSSeungYeon Shin { 182462fd69eSSeungYeon Shin struct spdk_cpuset tmp_mask; 183462fd69eSSeungYeon Shin 184462fd69eSSeungYeon Shin spdk_cpuset_copy(&tmp_mask, spdk_app_get_core_mask()); 185462fd69eSSeungYeon Shin spdk_cpuset_or(&tmp_mask, &isolated_core_mask); 186462fd69eSSeungYeon Shin if (spdk_cpuset_equal(&tmp_mask, spdk_app_get_core_mask()) == false) { 187462fd69eSSeungYeon Shin SPDK_ERRLOG("Isolated core mask is not included in app core mask.\n"); 188462fd69eSSeungYeon Shin return false; 189462fd69eSSeungYeon Shin } 190462fd69eSSeungYeon Shin spdk_cpuset_copy(&g_scheduler_isolated_core_mask, &isolated_core_mask); 191462fd69eSSeungYeon Shin return true; 192462fd69eSSeungYeon Shin } 193462fd69eSSeungYeon Shin 194462fd69eSSeungYeon Shin const char * 195462fd69eSSeungYeon Shin scheduler_get_isolated_core_mask(void) 196462fd69eSSeungYeon Shin { 197462fd69eSSeungYeon Shin return spdk_cpuset_fmt(&g_scheduler_isolated_core_mask); 198462fd69eSSeungYeon Shin } 199462fd69eSSeungYeon Shin 200462fd69eSSeungYeon Shin static bool 201462fd69eSSeungYeon Shin scheduler_is_isolated_core(uint32_t core) 202462fd69eSSeungYeon Shin { 203462fd69eSSeungYeon Shin return spdk_cpuset_get_cpu(&g_scheduler_isolated_core_mask, core); 204462fd69eSSeungYeon Shin } 205462fd69eSSeungYeon Shin 2062139bfa9SBen Walker static void 207a7592dbeSSeth Howell reactor_construct(struct spdk_reactor *reactor, uint32_t lcore) 2082139bfa9SBen Walker { 2092139bfa9SBen Walker reactor->lcore = lcore; 2107f9c41beSBen Walker reactor->flags.is_valid = true; 2112139bfa9SBen Walker 2122139bfa9SBen Walker TAILQ_INIT(&reactor->threads); 213ea863bb0SShuhei Matsumoto reactor->thread_count = 0; 214eff5b149SLiu Xiaodong spdk_cpuset_zero(&reactor->notify_cpuset); 2152139bfa9SBen Walker 216186b109dSJim Harris reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_NUMA_ID_ANY); 2174237d2d8Ssunshihao520 if (reactor->events == NULL) { 2184237d2d8Ssunshihao520 SPDK_ERRLOG("Failed to allocate events ring\n"); 2194237d2d8Ssunshihao520 assert(false); 2204237d2d8Ssunshihao520 } 2214bf6e4bbSLiu Xiaodong 222eff5b149SLiu Xiaodong /* Always initialize interrupt facilities for reactor */ 223eff5b149SLiu Xiaodong if (reactor_interrupt_init(reactor) != 0) { 22434edd9f1SKamil Godzwon /* Reactor interrupt facilities are necessary if setting app to interrupt mode. */ 2254bf6e4bbSLiu Xiaodong if (spdk_interrupt_mode_is_enabled()) { 226eff5b149SLiu Xiaodong SPDK_ERRLOG("Failed to prepare intr facilities\n"); 227eff5b149SLiu Xiaodong assert(false); 228eff5b149SLiu Xiaodong } 229eff5b149SLiu Xiaodong return; 230eff5b149SLiu Xiaodong } 231eff5b149SLiu Xiaodong 232eff5b149SLiu Xiaodong /* If application runs with full interrupt ability, 233eff5b149SLiu Xiaodong * all reactors are going to run in interrupt mode. 234eff5b149SLiu Xiaodong */ 235eff5b149SLiu Xiaodong if (spdk_interrupt_mode_is_enabled()) { 236eff5b149SLiu Xiaodong uint32_t i; 237eff5b149SLiu Xiaodong 238eff5b149SLiu Xiaodong SPDK_ENV_FOREACH_CORE(i) { 239eff5b149SLiu Xiaodong spdk_cpuset_set_cpu(&reactor->notify_cpuset, i, true); 240eff5b149SLiu Xiaodong } 241eff5b149SLiu Xiaodong reactor->in_interrupt = true; 2424bf6e4bbSLiu Xiaodong } 2432139bfa9SBen Walker } 2442139bfa9SBen Walker 245abbd6ed8SShuhei Matsumoto struct spdk_reactor * 246eeeac667SDaniel Verkamp spdk_reactor_get(uint32_t lcore) 247eeeac667SDaniel Verkamp { 248eeeac667SDaniel Verkamp struct spdk_reactor *reactor; 249085ade57SBen Walker 250fd0f0364SBen Walker if (g_reactors == NULL) { 251fd0f0364SBen Walker SPDK_WARNLOG("Called spdk_reactor_get() while the g_reactors array was NULL!\n"); 252fd0f0364SBen Walker return NULL; 253fd0f0364SBen Walker } 254085ade57SBen Walker 2555a499ac0SLiu Xiaodong if (lcore >= g_reactor_count) { 2565a499ac0SLiu Xiaodong return NULL; 2575a499ac0SLiu Xiaodong } 2585a499ac0SLiu Xiaodong 259085ade57SBen Walker reactor = &g_reactors[lcore]; 260085ade57SBen Walker 2617f9c41beSBen Walker if (reactor->flags.is_valid == false) { 262085ade57SBen Walker return NULL; 263085ade57SBen Walker } 264085ade57SBen Walker 265eeeac667SDaniel Verkamp return reactor; 266eeeac667SDaniel Verkamp } 267eeeac667SDaniel Verkamp 268a7592dbeSSeth Howell static int reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op); 269a7592dbeSSeth Howell static bool reactor_thread_op_supported(enum spdk_thread_op op); 2702139bfa9SBen Walker 2716895e9d9SJim Harris /* Power of 2 minus 1 is optimal for memory consumption */ 2726895e9d9SJim Harris #define EVENT_MSG_MEMPOOL_SHIFT 14 /* 2^14 = 16384 */ 2736895e9d9SJim Harris #define EVENT_MSG_MEMPOOL_SIZE ((1 << EVENT_MSG_MEMPOOL_SHIFT) - 1) 2746895e9d9SJim Harris 2752139bfa9SBen Walker int 276a71cd521SAlexis Lescouet spdk_reactors_init(size_t msg_mempool_size) 2772139bfa9SBen Walker { 27875022aa2SMaciej Szwed struct spdk_reactor *reactor; 2792139bfa9SBen Walker int rc; 280992b168eSLiu Xiaodong uint32_t i, current_core; 2812139bfa9SBen Walker char mempool_name[32]; 2822139bfa9SBen Walker 2832139bfa9SBen Walker snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid()); 2842139bfa9SBen Walker g_spdk_event_mempool = spdk_mempool_create(mempool_name, 2856895e9d9SJim Harris EVENT_MSG_MEMPOOL_SIZE, 2862139bfa9SBen Walker sizeof(struct spdk_event), 2872139bfa9SBen Walker SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 288186b109dSJim Harris SPDK_ENV_NUMA_ID_ANY); 2892139bfa9SBen Walker 2902139bfa9SBen Walker if (g_spdk_event_mempool == NULL) { 2912139bfa9SBen Walker SPDK_ERRLOG("spdk_event_mempool creation failed\n"); 2922139bfa9SBen Walker return -1; 2932139bfa9SBen Walker } 2942139bfa9SBen Walker 2952139bfa9SBen Walker /* struct spdk_reactor must be aligned on 64 byte boundary */ 296992b168eSLiu Xiaodong g_reactor_count = spdk_env_get_last_core() + 1; 2972139bfa9SBen Walker rc = posix_memalign((void **)&g_reactors, 64, 298992b168eSLiu Xiaodong g_reactor_count * sizeof(struct spdk_reactor)); 2992139bfa9SBen Walker if (rc != 0) { 3002139bfa9SBen Walker SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n", 301992b168eSLiu Xiaodong g_reactor_count); 3022139bfa9SBen Walker spdk_mempool_free(g_spdk_event_mempool); 3032139bfa9SBen Walker return -1; 3042139bfa9SBen Walker } 3052139bfa9SBen Walker 306992b168eSLiu Xiaodong g_core_infos = calloc(g_reactor_count, sizeof(*g_core_infos)); 3072cffc800SVitaliy Mysak if (g_core_infos == NULL) { 3082cffc800SVitaliy Mysak SPDK_ERRLOG("Could not allocate memory for g_core_infos\n"); 3092cffc800SVitaliy Mysak spdk_mempool_free(g_spdk_event_mempool); 3102cffc800SVitaliy Mysak free(g_reactors); 3112cffc800SVitaliy Mysak return -ENOMEM; 3122cffc800SVitaliy Mysak } 3132cffc800SVitaliy Mysak 314992b168eSLiu Xiaodong memset(g_reactors, 0, (g_reactor_count) * sizeof(struct spdk_reactor)); 3152139bfa9SBen Walker 3165de98ef8Syidong0635 rc = spdk_thread_lib_init_ext(reactor_thread_op, reactor_thread_op_supported, 317a71cd521SAlexis Lescouet sizeof(struct spdk_lw_thread), msg_mempool_size); 3185de98ef8Syidong0635 if (rc != 0) { 3195de98ef8Syidong0635 SPDK_ERRLOG("Initialize spdk thread lib failed\n"); 3205de98ef8Syidong0635 spdk_mempool_free(g_spdk_event_mempool); 3215de98ef8Syidong0635 free(g_reactors); 3225de98ef8Syidong0635 free(g_core_infos); 3235de98ef8Syidong0635 return rc; 3245de98ef8Syidong0635 } 3252139bfa9SBen Walker 3262139bfa9SBen Walker SPDK_ENV_FOREACH_CORE(i) { 327a7592dbeSSeth Howell reactor_construct(&g_reactors[i], i); 3282139bfa9SBen Walker } 3292139bfa9SBen Walker 33075022aa2SMaciej Szwed current_core = spdk_env_get_current_core(); 33175022aa2SMaciej Szwed reactor = spdk_reactor_get(current_core); 33275022aa2SMaciej Szwed assert(reactor != NULL); 33375022aa2SMaciej Szwed g_scheduling_reactor = reactor; 33475022aa2SMaciej Szwed 3352139bfa9SBen Walker g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED; 3362139bfa9SBen Walker 3372139bfa9SBen Walker return 0; 3382139bfa9SBen Walker } 3392139bfa9SBen Walker 3406aff44ccSBen Walker void 3416aff44ccSBen Walker spdk_reactors_fini(void) 3426aff44ccSBen Walker { 3436aff44ccSBen Walker uint32_t i; 3446aff44ccSBen Walker struct spdk_reactor *reactor; 3456aff44ccSBen Walker 346f7ddfcf7SDarek Stojaczyk if (g_reactor_state == SPDK_REACTOR_STATE_UNINITIALIZED) { 347f7ddfcf7SDarek Stojaczyk return; 348f7ddfcf7SDarek Stojaczyk } 349f7ddfcf7SDarek Stojaczyk 3506aff44ccSBen Walker spdk_thread_lib_fini(); 3516aff44ccSBen Walker 3526aff44ccSBen Walker SPDK_ENV_FOREACH_CORE(i) { 3536aff44ccSBen Walker reactor = spdk_reactor_get(i); 3542e69975fSSeth Howell assert(reactor != NULL); 355ea863bb0SShuhei Matsumoto assert(reactor->thread_count == 0); 3562e69975fSSeth Howell if (reactor->events != NULL) { 3576aff44ccSBen Walker spdk_ring_free(reactor->events); 3586aff44ccSBen Walker } 3594bf6e4bbSLiu Xiaodong 3604bf6e4bbSLiu Xiaodong reactor_interrupt_fini(reactor); 3612cffc800SVitaliy Mysak 3622cffc800SVitaliy Mysak if (g_core_infos != NULL) { 363b74b6133STomasz Zawadzki free(g_core_infos[i].thread_infos); 3642cffc800SVitaliy Mysak } 3656aff44ccSBen Walker } 3666aff44ccSBen Walker 3676aff44ccSBen Walker spdk_mempool_free(g_spdk_event_mempool); 3686aff44ccSBen Walker 3696aff44ccSBen Walker free(g_reactors); 3706aff44ccSBen Walker g_reactors = NULL; 3712cffc800SVitaliy Mysak free(g_core_infos); 3722cffc800SVitaliy Mysak g_core_infos = NULL; 3736aff44ccSBen Walker } 3746aff44ccSBen Walker 375a2074554SLiu Xiaodong static void _reactor_set_interrupt_mode(void *arg1, void *arg2); 376a2074554SLiu Xiaodong 377a2074554SLiu Xiaodong static void 378a2074554SLiu Xiaodong _reactor_set_notify_cpuset(void *arg1, void *arg2) 379a2074554SLiu Xiaodong { 380a2074554SLiu Xiaodong struct spdk_reactor *target = arg1; 381a2074554SLiu Xiaodong struct spdk_reactor *reactor = spdk_reactor_get(spdk_env_get_current_core()); 382a2074554SLiu Xiaodong 383ebeac5deSGangCao assert(reactor != NULL); 384a2074554SLiu Xiaodong spdk_cpuset_set_cpu(&reactor->notify_cpuset, target->lcore, target->new_in_interrupt); 385a2074554SLiu Xiaodong } 386a2074554SLiu Xiaodong 387a2074554SLiu Xiaodong static void 3889e81535eSyidong0635 _event_call(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2) 3899e81535eSyidong0635 { 3909e81535eSyidong0635 struct spdk_event *ev; 3919e81535eSyidong0635 3929e81535eSyidong0635 ev = spdk_event_allocate(lcore, fn, arg1, arg2); 3939e81535eSyidong0635 assert(ev); 3949e81535eSyidong0635 spdk_event_call(ev); 3959e81535eSyidong0635 } 3969e81535eSyidong0635 3979e81535eSyidong0635 static void 398a2074554SLiu Xiaodong _reactor_set_notify_cpuset_cpl(void *arg1, void *arg2) 399a2074554SLiu Xiaodong { 400a2074554SLiu Xiaodong struct spdk_reactor *target = arg1; 401a2074554SLiu Xiaodong 402a2074554SLiu Xiaodong if (target->new_in_interrupt == false) { 403a2074554SLiu Xiaodong target->set_interrupt_mode_in_progress = false; 40411ff66feSsyeon.shin _event_call(spdk_scheduler_get_scheduling_lcore(), target->set_interrupt_mode_cb_fn, 40511ff66feSsyeon.shin target->set_interrupt_mode_cb_arg, NULL); 406a2074554SLiu Xiaodong } else { 4079e81535eSyidong0635 _event_call(target->lcore, _reactor_set_interrupt_mode, target, NULL); 408a2074554SLiu Xiaodong } 409a2074554SLiu Xiaodong } 410a2074554SLiu Xiaodong 411a2074554SLiu Xiaodong static void 412b763ebfeSLiu Xiaodong _reactor_set_thread_interrupt_mode(void *ctx) 413b763ebfeSLiu Xiaodong { 414b763ebfeSLiu Xiaodong struct spdk_reactor *reactor = ctx; 415b763ebfeSLiu Xiaodong 416b763ebfeSLiu Xiaodong spdk_thread_set_interrupt_mode(reactor->in_interrupt); 417b763ebfeSLiu Xiaodong } 418b763ebfeSLiu Xiaodong 419b763ebfeSLiu Xiaodong static void 420a2074554SLiu Xiaodong _reactor_set_interrupt_mode(void *arg1, void *arg2) 421a2074554SLiu Xiaodong { 422a2074554SLiu Xiaodong struct spdk_reactor *target = arg1; 423b763ebfeSLiu Xiaodong struct spdk_thread *thread; 424fe2d64ceSBen Walker struct spdk_fd_group *grp; 425b763ebfeSLiu Xiaodong struct spdk_lw_thread *lw_thread, *tmp; 426a2074554SLiu Xiaodong 427de3878ecSyidong0635 assert(target == spdk_reactor_get(spdk_env_get_current_core())); 428a2074554SLiu Xiaodong assert(target != NULL); 429a2074554SLiu Xiaodong assert(target->in_interrupt != target->new_in_interrupt); 430a2074554SLiu Xiaodong SPDK_DEBUGLOG(reactor, "Do reactor set on core %u from %s to state %s\n", 431c07a6a94SRichael Zhuang target->lcore, target->in_interrupt ? "intr" : "poll", target->new_in_interrupt ? "intr" : "poll"); 432a2074554SLiu Xiaodong 433a2074554SLiu Xiaodong target->in_interrupt = target->new_in_interrupt; 434a2074554SLiu Xiaodong 435f98ac63eSTomasz Zawadzki if (spdk_interrupt_mode_is_enabled()) { 436b763ebfeSLiu Xiaodong /* Align spdk_thread with reactor to interrupt mode or poll mode */ 437b763ebfeSLiu Xiaodong TAILQ_FOREACH_SAFE(lw_thread, &target->threads, link, tmp) { 438b763ebfeSLiu Xiaodong thread = spdk_thread_get_from_ctx(lw_thread); 439fe2d64ceSBen Walker if (target->in_interrupt) { 440fe2d64ceSBen Walker grp = spdk_thread_get_interrupt_fd_group(thread); 441fe2d64ceSBen Walker spdk_fd_group_nest(target->fgrp, grp); 442fe2d64ceSBen Walker } else { 443fe2d64ceSBen Walker grp = spdk_thread_get_interrupt_fd_group(thread); 444fe2d64ceSBen Walker spdk_fd_group_unnest(target->fgrp, grp); 445fe2d64ceSBen Walker } 446fe2d64ceSBen Walker 447b763ebfeSLiu Xiaodong spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, target); 448b763ebfeSLiu Xiaodong } 449f98ac63eSTomasz Zawadzki } 450b763ebfeSLiu Xiaodong 451a2074554SLiu Xiaodong if (target->new_in_interrupt == false) { 452da11c9d2STomasz Zawadzki /* Reactor is no longer in interrupt mode. Refresh the tsc_last to accurately 453da11c9d2STomasz Zawadzki * track reactor stats. */ 454da11c9d2STomasz Zawadzki target->tsc_last = spdk_get_ticks(); 455a2074554SLiu Xiaodong spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl); 456a2074554SLiu Xiaodong } else { 457a2074554SLiu Xiaodong uint64_t notify = 1; 458a2074554SLiu Xiaodong int rc = 0; 459a2074554SLiu Xiaodong 460a2074554SLiu Xiaodong /* Always trigger spdk_event and resched event in case of race condition */ 461a2074554SLiu Xiaodong rc = write(target->events_fd, ¬ify, sizeof(notify)); 462a2074554SLiu Xiaodong if (rc < 0) { 463a2074554SLiu Xiaodong SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno)); 464a2074554SLiu Xiaodong } 465a2074554SLiu Xiaodong rc = write(target->resched_fd, ¬ify, sizeof(notify)); 466a2074554SLiu Xiaodong if (rc < 0) { 467a2074554SLiu Xiaodong SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno)); 468a2074554SLiu Xiaodong } 469a2074554SLiu Xiaodong 470a2074554SLiu Xiaodong target->set_interrupt_mode_in_progress = false; 47111ff66feSsyeon.shin _event_call(spdk_scheduler_get_scheduling_lcore(), target->set_interrupt_mode_cb_fn, 47211ff66feSsyeon.shin target->set_interrupt_mode_cb_arg, NULL); 473a2074554SLiu Xiaodong } 474a2074554SLiu Xiaodong } 475a2074554SLiu Xiaodong 476a2074554SLiu Xiaodong int 477a2074554SLiu Xiaodong spdk_reactor_set_interrupt_mode(uint32_t lcore, bool new_in_interrupt, 478a2074554SLiu Xiaodong spdk_reactor_set_interrupt_mode_cb cb_fn, void *cb_arg) 479a2074554SLiu Xiaodong { 480a2074554SLiu Xiaodong struct spdk_reactor *target; 481a2074554SLiu Xiaodong 482a2074554SLiu Xiaodong target = spdk_reactor_get(lcore); 483a2074554SLiu Xiaodong if (target == NULL) { 484a2074554SLiu Xiaodong return -EINVAL; 485a2074554SLiu Xiaodong } 486a2074554SLiu Xiaodong 487a7b15178STomasz Zawadzki /* Eventfd has to be supported in order to use interrupt functionality. */ 488a7b15178STomasz Zawadzki if (target->fgrp == NULL) { 489a7b15178STomasz Zawadzki return -ENOTSUP; 490a7b15178STomasz Zawadzki } 491a7b15178STomasz Zawadzki 492f470a0dcSJim Harris if (spdk_env_get_current_core() != g_scheduling_reactor->lcore) { 493f470a0dcSJim Harris SPDK_ERRLOG("It is only permitted within scheduling reactor.\n"); 494a2074554SLiu Xiaodong return -EPERM; 495a2074554SLiu Xiaodong } 496a2074554SLiu Xiaodong 497a2074554SLiu Xiaodong if (target->in_interrupt == new_in_interrupt) { 49811ff66feSsyeon.shin cb_fn(cb_arg, NULL); 499a2074554SLiu Xiaodong return 0; 500a2074554SLiu Xiaodong } 501a2074554SLiu Xiaodong 502a2074554SLiu Xiaodong if (target->set_interrupt_mode_in_progress) { 503a2074554SLiu Xiaodong SPDK_NOTICELOG("Reactor(%u) is already in progress to set interrupt mode\n", lcore); 504a2074554SLiu Xiaodong return -EBUSY; 505a2074554SLiu Xiaodong } 506a2074554SLiu Xiaodong target->set_interrupt_mode_in_progress = true; 507a2074554SLiu Xiaodong 508a2074554SLiu Xiaodong target->new_in_interrupt = new_in_interrupt; 509a2074554SLiu Xiaodong target->set_interrupt_mode_cb_fn = cb_fn; 510a2074554SLiu Xiaodong target->set_interrupt_mode_cb_arg = cb_arg; 511a2074554SLiu Xiaodong 512a2074554SLiu Xiaodong SPDK_DEBUGLOG(reactor, "Starting reactor event from %d to %d\n", 513a2074554SLiu Xiaodong spdk_env_get_current_core(), lcore); 514a2074554SLiu Xiaodong 515a2074554SLiu Xiaodong if (new_in_interrupt == false) { 516a2074554SLiu Xiaodong /* For potential race cases, when setting the reactor to poll mode, 517a2074554SLiu Xiaodong * first change the mode of the reactor and then clear the corresponding 518a2074554SLiu Xiaodong * bit of the notify_cpuset of each reactor. 519a2074554SLiu Xiaodong */ 5209e81535eSyidong0635 _event_call(lcore, _reactor_set_interrupt_mode, target, NULL); 521a2074554SLiu Xiaodong } else { 522cc6920a4SJosh Soref /* For race cases, when setting the reactor to interrupt mode, first set the 523a2074554SLiu Xiaodong * corresponding bit of the notify_cpuset of each reactor and then change the mode. 524a2074554SLiu Xiaodong */ 525a2074554SLiu Xiaodong spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl); 526a2074554SLiu Xiaodong } 527a2074554SLiu Xiaodong 528a2074554SLiu Xiaodong return 0; 529a2074554SLiu Xiaodong } 530a2074554SLiu Xiaodong 531c3ede774SDaniel Verkamp struct spdk_event * 5327ac9a4ecSDaniel Verkamp spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2) 533eeeac667SDaniel Verkamp { 534eeeac667SDaniel Verkamp struct spdk_event *event = NULL; 5355135af43SDaniel Verkamp struct spdk_reactor *reactor = spdk_reactor_get(lcore); 53652bbb267SDaniel Verkamp 5376518338fSBen Walker if (!reactor) { 5386518338fSBen Walker assert(false); 5396518338fSBen Walker return NULL; 5406518338fSBen Walker } 5416518338fSBen Walker 542e1ec5c60SBen Walker event = spdk_mempool_get(g_spdk_event_mempool); 5437f5b671dSBen Walker if (event == NULL) { 544a17ad921SBen Walker assert(false); 545a17ad921SBen Walker return NULL; 546a17ad921SBen Walker } 547eeeac667SDaniel Verkamp 548eeeac667SDaniel Verkamp event->lcore = lcore; 549eeeac667SDaniel Verkamp event->fn = fn; 550eeeac667SDaniel Verkamp event->arg1 = arg1; 551eeeac667SDaniel Verkamp event->arg2 = arg2; 552eeeac667SDaniel Verkamp 553eeeac667SDaniel Verkamp return event; 554eeeac667SDaniel Verkamp } 555eeeac667SDaniel Verkamp 556eeeac667SDaniel Verkamp void 557c3ede774SDaniel Verkamp spdk_event_call(struct spdk_event *event) 558eeeac667SDaniel Verkamp { 559eeeac667SDaniel Verkamp int rc; 560eeeac667SDaniel Verkamp struct spdk_reactor *reactor; 561eff5b149SLiu Xiaodong struct spdk_reactor *local_reactor = NULL; 562eff5b149SLiu Xiaodong uint32_t current_core = spdk_env_get_current_core(); 563eeeac667SDaniel Verkamp 564eeeac667SDaniel Verkamp reactor = spdk_reactor_get(event->lcore); 565eeeac667SDaniel Verkamp 566085ade57SBen Walker assert(reactor != NULL); 567a17ad921SBen Walker assert(reactor->events != NULL); 568085ade57SBen Walker 5691554a344SShuhei Matsumoto rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1, NULL); 57042491fb8SBen Walker if (rc != 1) { 571a17ad921SBen Walker assert(false); 572a17ad921SBen Walker } 5734bf6e4bbSLiu Xiaodong 574eff5b149SLiu Xiaodong if (current_core != SPDK_ENV_LCORE_ID_ANY) { 575eff5b149SLiu Xiaodong local_reactor = spdk_reactor_get(current_core); 576eff5b149SLiu Xiaodong } 577eff5b149SLiu Xiaodong 578eff5b149SLiu Xiaodong /* If spdk_event_call isn't called on a reactor, always send a notification. 579eff5b149SLiu Xiaodong * If it is called on a reactor, send a notification if the destination reactor 580eff5b149SLiu Xiaodong * is indicated in interrupt mode state. 581eff5b149SLiu Xiaodong */ 582eff5b149SLiu Xiaodong if (spdk_unlikely(local_reactor == NULL) || 583eff5b149SLiu Xiaodong spdk_unlikely(spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, event->lcore))) { 5844bf6e4bbSLiu Xiaodong uint64_t notify = 1; 5854bf6e4bbSLiu Xiaodong 5864bf6e4bbSLiu Xiaodong rc = write(reactor->events_fd, ¬ify, sizeof(notify)); 5874bf6e4bbSLiu Xiaodong if (rc < 0) { 5884bf6e4bbSLiu Xiaodong SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno)); 5894bf6e4bbSLiu Xiaodong } 5904bf6e4bbSLiu Xiaodong } 591eeeac667SDaniel Verkamp } 592eeeac667SDaniel Verkamp 5932d48b7dcSJohn Levon static inline int 5942d48b7dcSJohn Levon event_queue_run_batch(void *arg) 595eeeac667SDaniel Verkamp { 5962d48b7dcSJohn Levon struct spdk_reactor *reactor = arg; 5972d48b7dcSJohn Levon size_t count, i; 59852bbb267SDaniel Verkamp void *events[SPDK_EVENT_BATCH_SIZE]; 599eeeac667SDaniel Verkamp 60093d63599SDaniel Verkamp #ifdef DEBUG 60193d63599SDaniel Verkamp /* 60242491fb8SBen Walker * spdk_ring_dequeue() fills events and returns how many entries it wrote, 60393d63599SDaniel Verkamp * so we will never actually read uninitialized data from events, but just to be sure 60493d63599SDaniel Verkamp * (and to silence a static analyzer false positive), initialize the array to NULL pointers. 60593d63599SDaniel Verkamp */ 60693d63599SDaniel Verkamp memset(events, 0, sizeof(events)); 60793d63599SDaniel Verkamp #endif 60842491fb8SBen Walker 609eff5b149SLiu Xiaodong /* Operate event notification if this reactor currently runs in interrupt state */ 610eff5b149SLiu Xiaodong if (spdk_unlikely(reactor->in_interrupt)) { 6114bf6e4bbSLiu Xiaodong uint64_t notify = 1; 6124bf6e4bbSLiu Xiaodong int rc; 6134bf6e4bbSLiu Xiaodong 61442491fb8SBen Walker count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE); 6154bf6e4bbSLiu Xiaodong 6164bf6e4bbSLiu Xiaodong if (spdk_ring_count(reactor->events) != 0) { 6174bf6e4bbSLiu Xiaodong /* Trigger new notification if there are still events in event-queue waiting for processing. */ 6184bf6e4bbSLiu Xiaodong rc = write(reactor->events_fd, ¬ify, sizeof(notify)); 6194bf6e4bbSLiu Xiaodong if (rc < 0) { 6204bf6e4bbSLiu Xiaodong SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno)); 6214bf6e4bbSLiu Xiaodong return -errno; 6224bf6e4bbSLiu Xiaodong } 6234bf6e4bbSLiu Xiaodong } 6244bf6e4bbSLiu Xiaodong } else { 6254bf6e4bbSLiu Xiaodong count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE); 6264bf6e4bbSLiu Xiaodong } 6274bf6e4bbSLiu Xiaodong 62852bbb267SDaniel Verkamp if (count == 0) { 629eeeac667SDaniel Verkamp return 0; 630eeeac667SDaniel Verkamp } 631eeeac667SDaniel Verkamp 63252bbb267SDaniel Verkamp for (i = 0; i < count; i++) { 63352bbb267SDaniel Verkamp struct spdk_event *event = events[i]; 634eeeac667SDaniel Verkamp 63593d63599SDaniel Verkamp assert(event != NULL); 636f470a0dcSJim Harris assert(spdk_get_thread() == NULL); 63770f3606bSJohn Levon SPDK_DTRACE_PROBE3(event_exec, event->fn, 63870f3606bSJohn Levon event->arg1, event->arg2); 63944ef085bSDaniel Verkamp event->fn(event->arg1, event->arg2); 6400598e484STomasz Zawadzki } 641605e530aSBen Walker 642e1ec5c60SBen Walker spdk_mempool_put_bulk(g_spdk_event_mempool, events, count); 643086346a4SBen Walker 6442d48b7dcSJohn Levon return (int)count; 645eeeac667SDaniel Verkamp } 646eeeac667SDaniel Verkamp 64725baf714Ssuhua /* 1s */ 64886a21aeeSBen Walker #define CONTEXT_SWITCH_MONITOR_PERIOD 1000000 64986a21aeeSBen Walker 650c3bc40a6SRoman Sudarikov static int 65186a21aeeSBen Walker get_rusage(struct spdk_reactor *reactor) 6528d70322aSCunyin Chang { 6538d70322aSCunyin Chang struct rusage rusage; 6548d70322aSCunyin Chang 6558d70322aSCunyin Chang if (getrusage(RUSAGE_THREAD, &rusage) != 0) { 656c3bc40a6SRoman Sudarikov return -1; 6578d70322aSCunyin Chang } 6588d70322aSCunyin Chang 6598d70322aSCunyin Chang if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) { 6602172c432STomasz Zawadzki SPDK_INFOLOG(reactor, 6618d70322aSCunyin Chang "Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n", 6628d70322aSCunyin Chang reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw, 6638d70322aSCunyin Chang rusage.ru_nivcsw - reactor->rusage.ru_nivcsw); 6648d70322aSCunyin Chang } 6658d70322aSCunyin Chang reactor->rusage = rusage; 666c3bc40a6SRoman Sudarikov 667c3bc40a6SRoman Sudarikov return -1; 6688d70322aSCunyin Chang } 66970c3e1f2SDaniel Verkamp 67070c3e1f2SDaniel Verkamp void 67125c5e3f5SShuhei Matsumoto spdk_framework_enable_context_switch_monitor(bool enable) 67270c3e1f2SDaniel Verkamp { 67386a21aeeSBen Walker /* This global is being read by multiple threads, so this isn't 67486a21aeeSBen Walker * strictly thread safe. However, we're toggling between true and 67586a21aeeSBen Walker * false here, and if a thread sees the value update later than it 67686a21aeeSBen Walker * should, it's no big deal. */ 67725c5e3f5SShuhei Matsumoto g_framework_context_switch_monitor_enabled = enable; 67870c3e1f2SDaniel Verkamp } 67970c3e1f2SDaniel Verkamp 68070c3e1f2SDaniel Verkamp bool 68125c5e3f5SShuhei Matsumoto spdk_framework_context_switch_monitor_enabled(void) 68270c3e1f2SDaniel Verkamp { 68325c5e3f5SShuhei Matsumoto return g_framework_context_switch_monitor_enabled; 68470c3e1f2SDaniel Verkamp } 6858d70322aSCunyin Chang 68638527b07SShuhei Matsumoto static void 68738527b07SShuhei Matsumoto _set_thread_name(const char *thread_name) 68838527b07SShuhei Matsumoto { 68938527b07SShuhei Matsumoto #if defined(__linux__) 69038527b07SShuhei Matsumoto prctl(PR_SET_NAME, thread_name, 0, 0, 0); 69138527b07SShuhei Matsumoto #elif defined(__FreeBSD__) 69238527b07SShuhei Matsumoto pthread_set_name_np(pthread_self(), thread_name); 69338527b07SShuhei Matsumoto #else 694a14deb22SNick Connolly pthread_setname_np(pthread_self(), thread_name); 69538527b07SShuhei Matsumoto #endif 69638527b07SShuhei Matsumoto } 69738527b07SShuhei Matsumoto 6982cffc800SVitaliy Mysak static void 6992cffc800SVitaliy Mysak _init_thread_stats(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread) 7002cffc800SVitaliy Mysak { 7012cffc800SVitaliy Mysak struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread); 702d9f5da13STomasz Zawadzki struct spdk_thread_stats prev_total_stats; 70347a6578eSTomasz Zawadzki 704d9f5da13STomasz Zawadzki /* Read total_stats before updating it to calculate stats during the last scheduling period. */ 705d9f5da13STomasz Zawadzki prev_total_stats = lw_thread->total_stats; 7062cffc800SVitaliy Mysak 7072cffc800SVitaliy Mysak spdk_set_thread(thread); 708d9f5da13STomasz Zawadzki spdk_thread_get_stats(&lw_thread->total_stats); 70947a6578eSTomasz Zawadzki spdk_set_thread(NULL); 71047a6578eSTomasz Zawadzki 711d9f5da13STomasz Zawadzki lw_thread->current_stats.busy_tsc = lw_thread->total_stats.busy_tsc - prev_total_stats.busy_tsc; 712d9f5da13STomasz Zawadzki lw_thread->current_stats.idle_tsc = lw_thread->total_stats.idle_tsc - prev_total_stats.idle_tsc; 7132cffc800SVitaliy Mysak } 7142cffc800SVitaliy Mysak 7152cffc800SVitaliy Mysak static void 716b74b6133STomasz Zawadzki _threads_reschedule_thread(struct spdk_scheduler_thread_info *thread_info) 717b74b6133STomasz Zawadzki { 718b74b6133STomasz Zawadzki struct spdk_lw_thread *lw_thread; 719b74b6133STomasz Zawadzki struct spdk_thread *thread; 720b74b6133STomasz Zawadzki 721b74b6133STomasz Zawadzki thread = spdk_thread_get_by_id(thread_info->thread_id); 722b74b6133STomasz Zawadzki if (thread == NULL) { 723b74b6133STomasz Zawadzki /* Thread no longer exists. */ 724b74b6133STomasz Zawadzki return; 725b74b6133STomasz Zawadzki } 726b74b6133STomasz Zawadzki lw_thread = spdk_thread_get_ctx(thread); 727b74b6133STomasz Zawadzki assert(lw_thread != NULL); 728b74b6133STomasz Zawadzki 729b74b6133STomasz Zawadzki lw_thread->lcore = thread_info->lcore; 730b74b6133STomasz Zawadzki lw_thread->resched = true; 731b74b6133STomasz Zawadzki } 732b74b6133STomasz Zawadzki 733b74b6133STomasz Zawadzki static void 7342cffc800SVitaliy Mysak _threads_reschedule(struct spdk_scheduler_core_info *cores_info) 7352cffc800SVitaliy Mysak { 7362cffc800SVitaliy Mysak struct spdk_scheduler_core_info *core; 737b74b6133STomasz Zawadzki struct spdk_scheduler_thread_info *thread_info; 7382cffc800SVitaliy Mysak uint32_t i, j; 7392cffc800SVitaliy Mysak 7402cffc800SVitaliy Mysak SPDK_ENV_FOREACH_CORE(i) { 7412cffc800SVitaliy Mysak core = &cores_info[i]; 7422cffc800SVitaliy Mysak for (j = 0; j < core->threads_count; j++) { 743b74b6133STomasz Zawadzki thread_info = &core->thread_infos[j]; 744b74b6133STomasz Zawadzki if (thread_info->lcore != i) { 745462fd69eSSeungYeon Shin if (core->isolated || cores_info[thread_info->lcore].isolated) { 746462fd69eSSeungYeon Shin SPDK_ERRLOG("A thread cannot be moved from an isolated core or \ 747462fd69eSSeungYeon Shin moved to an isolated core. Skip rescheduling thread\n"); 748462fd69eSSeungYeon Shin continue; 749462fd69eSSeungYeon Shin } 750b74b6133STomasz Zawadzki _threads_reschedule_thread(thread_info); 7512cffc800SVitaliy Mysak } 7522cffc800SVitaliy Mysak } 753fba4a97dSTomasz Zawadzki core->threads_count = 0; 754fba4a97dSTomasz Zawadzki free(core->thread_infos); 755fba4a97dSTomasz Zawadzki core->thread_infos = NULL; 7562cffc800SVitaliy Mysak } 7572cffc800SVitaliy Mysak } 7582cffc800SVitaliy Mysak 7592cffc800SVitaliy Mysak static void 7606859a49aSMaciej Szwed _reactors_scheduler_fini(void) 7612cffc800SVitaliy Mysak { 7622cffc800SVitaliy Mysak /* Reschedule based on the balancing output */ 7632cffc800SVitaliy Mysak _threads_reschedule(g_core_infos); 7642cffc800SVitaliy Mysak 765cff96883STomasz Zawadzki g_scheduling_in_progress = false; 7662cffc800SVitaliy Mysak } 7676859a49aSMaciej Szwed 7686859a49aSMaciej Szwed static void 76911ff66feSsyeon.shin _reactors_scheduler_update_core_mode(void *ctx1, void *ctx2) 7706859a49aSMaciej Szwed { 7716859a49aSMaciej Szwed struct spdk_reactor *reactor; 772cf155f23STomasz Zawadzki uint32_t i; 7736859a49aSMaciej Szwed int rc = 0; 7746859a49aSMaciej Szwed 775cf155f23STomasz Zawadzki for (i = g_scheduler_core_number; i < SPDK_ENV_LCORE_ID_ANY; i = spdk_env_get_next_core(i)) { 776cf155f23STomasz Zawadzki reactor = spdk_reactor_get(i); 777ebeac5deSGangCao assert(reactor != NULL); 778cf155f23STomasz Zawadzki if (reactor->in_interrupt != g_core_infos[i].interrupt_mode) { 7796859a49aSMaciej Szwed /* Switch next found reactor to new state */ 780cf155f23STomasz Zawadzki rc = spdk_reactor_set_interrupt_mode(i, g_core_infos[i].interrupt_mode, 781cf155f23STomasz Zawadzki _reactors_scheduler_update_core_mode, NULL); 7826859a49aSMaciej Szwed if (rc == 0) { 783cf155f23STomasz Zawadzki /* Set core to start with after callback completes */ 784cf155f23STomasz Zawadzki g_scheduler_core_number = spdk_env_get_next_core(i); 7856859a49aSMaciej Szwed return; 7866859a49aSMaciej Szwed } 7876859a49aSMaciej Szwed } 788cf155f23STomasz Zawadzki } 789cf155f23STomasz Zawadzki _reactors_scheduler_fini(); 7906859a49aSMaciej Szwed } 7916859a49aSMaciej Szwed 7926859a49aSMaciej Szwed static void 793b518cbe2STomasz Zawadzki _reactors_scheduler_cancel(void *arg1, void *arg2) 794b518cbe2STomasz Zawadzki { 795b518cbe2STomasz Zawadzki struct spdk_scheduler_core_info *core; 796b518cbe2STomasz Zawadzki uint32_t i; 797b518cbe2STomasz Zawadzki 798b518cbe2STomasz Zawadzki SPDK_ENV_FOREACH_CORE(i) { 799b518cbe2STomasz Zawadzki core = &g_core_infos[i]; 800b518cbe2STomasz Zawadzki core->threads_count = 0; 801b518cbe2STomasz Zawadzki free(core->thread_infos); 802b518cbe2STomasz Zawadzki core->thread_infos = NULL; 803b518cbe2STomasz Zawadzki } 804b518cbe2STomasz Zawadzki 805b518cbe2STomasz Zawadzki g_scheduling_in_progress = false; 806b518cbe2STomasz Zawadzki } 807b518cbe2STomasz Zawadzki 808b518cbe2STomasz Zawadzki static void 8096859a49aSMaciej Szwed _reactors_scheduler_balance(void *arg1, void *arg2) 8106859a49aSMaciej Szwed { 811a86e40f3STomasz Zawadzki struct spdk_scheduler *scheduler = spdk_scheduler_get(); 812d6c4f8cfSTomasz Zawadzki 813b518cbe2STomasz Zawadzki if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING || scheduler == NULL) { 814b518cbe2STomasz Zawadzki _reactors_scheduler_cancel(NULL, NULL); 815b518cbe2STomasz Zawadzki return; 816b518cbe2STomasz Zawadzki } 817b518cbe2STomasz Zawadzki 818d6c4f8cfSTomasz Zawadzki scheduler->balance(g_core_infos, g_reactor_count); 8196859a49aSMaciej Szwed 820cf155f23STomasz Zawadzki g_scheduler_core_number = spdk_env_get_first_core(); 82111ff66feSsyeon.shin _reactors_scheduler_update_core_mode(NULL, NULL); 8226859a49aSMaciej Szwed } 823e2f773aaSLiu Xiaodong 8242cffc800SVitaliy Mysak /* Phase 1 of thread scheduling is to gather metrics on the existing threads */ 8252cffc800SVitaliy Mysak static void 8262cffc800SVitaliy Mysak _reactors_scheduler_gather_metrics(void *arg1, void *arg2) 8272cffc800SVitaliy Mysak { 8282cffc800SVitaliy Mysak struct spdk_scheduler_core_info *core_info; 8292cffc800SVitaliy Mysak struct spdk_lw_thread *lw_thread; 830b74b6133STomasz Zawadzki struct spdk_thread *thread; 8312cffc800SVitaliy Mysak struct spdk_reactor *reactor; 8322cffc800SVitaliy Mysak uint32_t next_core; 833fba4a97dSTomasz Zawadzki uint32_t i = 0; 8342cffc800SVitaliy Mysak 8352cffc800SVitaliy Mysak reactor = spdk_reactor_get(spdk_env_get_current_core()); 8365e01bdb4SWeifeng Su assert(reactor != NULL); 8372cffc800SVitaliy Mysak core_info = &g_core_infos[reactor->lcore]; 8382cffc800SVitaliy Mysak core_info->lcore = reactor->lcore; 839abbfa1a5STomasz Zawadzki core_info->current_idle_tsc = reactor->idle_tsc - core_info->total_idle_tsc; 840e837ba3bSTomasz Zawadzki core_info->total_idle_tsc = reactor->idle_tsc; 841abbfa1a5STomasz Zawadzki core_info->current_busy_tsc = reactor->busy_tsc - core_info->total_busy_tsc; 842e837ba3bSTomasz Zawadzki core_info->total_busy_tsc = reactor->busy_tsc; 8439cde1509SMaciej Szwed core_info->interrupt_mode = reactor->in_interrupt; 844fba4a97dSTomasz Zawadzki core_info->threads_count = 0; 845462fd69eSSeungYeon Shin core_info->isolated = scheduler_is_isolated_core(reactor->lcore); 8462cffc800SVitaliy Mysak 8472cffc800SVitaliy Mysak SPDK_DEBUGLOG(reactor, "Gathering metrics on %u\n", reactor->lcore); 8482cffc800SVitaliy Mysak 84982c46626SAnisa Su spdk_trace_record(TRACE_SCHEDULER_CORE_STATS, reactor->trace_id, 0, 0, 85082c46626SAnisa Su core_info->current_busy_tsc, 85182c46626SAnisa Su core_info->current_idle_tsc); 85282c46626SAnisa Su 853fba4a97dSTomasz Zawadzki if (reactor->thread_count > 0) { 854fba4a97dSTomasz Zawadzki core_info->thread_infos = calloc(reactor->thread_count, sizeof(*core_info->thread_infos)); 855b74b6133STomasz Zawadzki if (core_info->thread_infos == NULL) { 856e2f773aaSLiu Xiaodong SPDK_ERRLOG("Failed to allocate memory when gathering metrics on %u\n", reactor->lcore); 857e2f773aaSLiu Xiaodong 858e2f773aaSLiu Xiaodong /* Cancel this round of schedule work */ 859462fd69eSSeungYeon Shin _event_call(spdk_scheduler_get_scheduling_lcore(), _reactors_scheduler_cancel, NULL, NULL); 860e2f773aaSLiu Xiaodong return; 861e2f773aaSLiu Xiaodong } 8622cffc800SVitaliy Mysak 8632cffc800SVitaliy Mysak TAILQ_FOREACH(lw_thread, &reactor->threads, link) { 864fba4a97dSTomasz Zawadzki _init_thread_stats(reactor, lw_thread); 865fba4a97dSTomasz Zawadzki 866b74b6133STomasz Zawadzki core_info->thread_infos[i].lcore = lw_thread->lcore; 867b74b6133STomasz Zawadzki thread = spdk_thread_get_from_ctx(lw_thread); 868fba4a97dSTomasz Zawadzki assert(thread != NULL); 869b74b6133STomasz Zawadzki core_info->thread_infos[i].thread_id = spdk_thread_get_id(thread); 870b74b6133STomasz Zawadzki core_info->thread_infos[i].total_stats = lw_thread->total_stats; 871b74b6133STomasz Zawadzki core_info->thread_infos[i].current_stats = lw_thread->current_stats; 872fba4a97dSTomasz Zawadzki core_info->threads_count++; 873fba4a97dSTomasz Zawadzki assert(core_info->threads_count <= reactor->thread_count); 87482c46626SAnisa Su 87582c46626SAnisa Su spdk_trace_record(TRACE_SCHEDULER_THREAD_STATS, spdk_thread_get_trace_id(thread), 0, 0, 87682c46626SAnisa Su lw_thread->current_stats.busy_tsc, 87782c46626SAnisa Su lw_thread->current_stats.idle_tsc); 87882c46626SAnisa Su 8792cffc800SVitaliy Mysak i++; 8802cffc800SVitaliy Mysak } 8812cffc800SVitaliy Mysak } 8822cffc800SVitaliy Mysak 8832cffc800SVitaliy Mysak next_core = spdk_env_get_next_core(reactor->lcore); 8842cffc800SVitaliy Mysak if (next_core == UINT32_MAX) { 8852cffc800SVitaliy Mysak next_core = spdk_env_get_first_core(); 8862cffc800SVitaliy Mysak } 8872cffc800SVitaliy Mysak 8882cffc800SVitaliy Mysak /* If we've looped back around to the scheduler thread, move to the next phase */ 889462fd69eSSeungYeon Shin if (next_core == spdk_scheduler_get_scheduling_lcore()) { 8902cffc800SVitaliy Mysak /* Phase 2 of scheduling is rebalancing - deciding which threads to move where */ 8919e81535eSyidong0635 _event_call(next_core, _reactors_scheduler_balance, NULL, NULL); 8922cffc800SVitaliy Mysak return; 8932cffc800SVitaliy Mysak } 8942cffc800SVitaliy Mysak 8959e81535eSyidong0635 _event_call(next_core, _reactors_scheduler_gather_metrics, NULL, NULL); 8962cffc800SVitaliy Mysak } 8972cffc800SVitaliy Mysak 898a4335febSShuhei Matsumoto static int _reactor_schedule_thread(struct spdk_thread *thread); 89968161ffcSShuhei Matsumoto static uint64_t g_rusage_period; 900a4335febSShuhei Matsumoto 9010859c837Syidong0635 static void 9020859c837Syidong0635 _reactor_remove_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread) 9037d19e50aSLiu Xiaodong { 9047d19e50aSLiu Xiaodong struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread); 905fe2d64ceSBen Walker struct spdk_fd_group *grp; 9067d19e50aSLiu Xiaodong 9077d19e50aSLiu Xiaodong TAILQ_REMOVE(&reactor->threads, lw_thread, link); 9087d19e50aSLiu Xiaodong assert(reactor->thread_count > 0); 9097d19e50aSLiu Xiaodong reactor->thread_count--; 9104bf6e4bbSLiu Xiaodong 911eff5b149SLiu Xiaodong /* Operate thread intr if running with full interrupt ability */ 912eff5b149SLiu Xiaodong if (spdk_interrupt_mode_is_enabled()) { 913fe2d64ceSBen Walker if (reactor->in_interrupt) { 914fe2d64ceSBen Walker grp = spdk_thread_get_interrupt_fd_group(thread); 915fe2d64ceSBen Walker spdk_fd_group_unnest(reactor->fgrp, grp); 916fe2d64ceSBen Walker } 9174bf6e4bbSLiu Xiaodong } 9180859c837Syidong0635 } 9190859c837Syidong0635 9200859c837Syidong0635 static bool 9210859c837Syidong0635 reactor_post_process_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread) 9220859c837Syidong0635 { 9230859c837Syidong0635 struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread); 9240859c837Syidong0635 9257d19e50aSLiu Xiaodong if (spdk_unlikely(spdk_thread_is_exited(thread) && 9267d19e50aSLiu Xiaodong spdk_thread_is_idle(thread))) { 9270859c837Syidong0635 _reactor_remove_lw_thread(reactor, lw_thread); 9287d19e50aSLiu Xiaodong spdk_thread_destroy(thread); 9297d19e50aSLiu Xiaodong return true; 9307d19e50aSLiu Xiaodong } 9317d19e50aSLiu Xiaodong 9323f4b2c67SYifan Bian if (spdk_unlikely(lw_thread->resched && !spdk_thread_is_bound(thread))) { 93389c1e5bfSApokleos lw_thread->resched = false; 93489c1e5bfSApokleos _reactor_remove_lw_thread(reactor, lw_thread); 93589c1e5bfSApokleos _reactor_schedule_thread(thread); 93689c1e5bfSApokleos return true; 93789c1e5bfSApokleos } 93889c1e5bfSApokleos 9397d19e50aSLiu Xiaodong return false; 9407d19e50aSLiu Xiaodong } 9417d19e50aSLiu Xiaodong 942fbb77a56SShuhei Matsumoto static void 9434bf6e4bbSLiu Xiaodong reactor_interrupt_run(struct spdk_reactor *reactor) 9444bf6e4bbSLiu Xiaodong { 9454bf6e4bbSLiu Xiaodong int block_timeout = -1; /* _EPOLL_WAIT_FOREVER */ 9464bf6e4bbSLiu Xiaodong 9474bf6e4bbSLiu Xiaodong spdk_fd_group_wait(reactor->fgrp, block_timeout); 9484bf6e4bbSLiu Xiaodong } 9494bf6e4bbSLiu Xiaodong 9504bf6e4bbSLiu Xiaodong static void 951fd361206SSeth Howell _reactor_run(struct spdk_reactor *reactor) 952eeeac667SDaniel Verkamp { 953032920f2SBen Walker struct spdk_thread *thread; 95412759ab5SBen Walker struct spdk_lw_thread *lw_thread, *tmp; 95578accbf4SShuhei Matsumoto uint64_t now; 95678accbf4SShuhei Matsumoto int rc; 95712759ab5SBen Walker 958fd361206SSeth Howell event_queue_run_batch(reactor); 9592d0aa1adSBen Walker 9607438c388STomasz Zawadzki /* If no threads are present on the reactor, 9617438c388STomasz Zawadzki * tsc_last gets outdated. Update it to track 9627438c388STomasz Zawadzki * thread execution time correctly. */ 9637438c388STomasz Zawadzki if (spdk_unlikely(TAILQ_EMPTY(&reactor->threads))) { 964e6531062STomasz Zawadzki now = spdk_get_ticks(); 965e6531062STomasz Zawadzki reactor->idle_tsc += now - reactor->tsc_last; 966e6531062STomasz Zawadzki reactor->tsc_last = now; 9677438c388STomasz Zawadzki return; 9687438c388STomasz Zawadzki } 9697438c388STomasz Zawadzki 97012759ab5SBen Walker TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) { 97112759ab5SBen Walker thread = spdk_thread_get_from_ctx(lw_thread); 97278accbf4SShuhei Matsumoto rc = spdk_thread_poll(thread, 0, reactor->tsc_last); 97378accbf4SShuhei Matsumoto 97478accbf4SShuhei Matsumoto now = spdk_thread_get_last_tsc(thread); 97578accbf4SShuhei Matsumoto if (rc == 0) { 97678accbf4SShuhei Matsumoto reactor->idle_tsc += now - reactor->tsc_last; 97778accbf4SShuhei Matsumoto } else if (rc > 0) { 97878accbf4SShuhei Matsumoto reactor->busy_tsc += now - reactor->tsc_last; 97978accbf4SShuhei Matsumoto } 98078accbf4SShuhei Matsumoto reactor->tsc_last = now; 981abb942bdSShuhei Matsumoto 9827d19e50aSLiu Xiaodong reactor_post_process_lw_thread(reactor, lw_thread); 98312759ab5SBen Walker } 984eeeac667SDaniel Verkamp } 985eeeac667SDaniel Verkamp 986fbb77a56SShuhei Matsumoto static int 987fd361206SSeth Howell reactor_run(void *arg) 988fbb77a56SShuhei Matsumoto { 989fbb77a56SShuhei Matsumoto struct spdk_reactor *reactor = arg; 990fbb77a56SShuhei Matsumoto struct spdk_thread *thread; 991fbb77a56SShuhei Matsumoto struct spdk_lw_thread *lw_thread, *tmp; 992fbb77a56SShuhei Matsumoto char thread_name[32]; 9932cffc800SVitaliy Mysak uint64_t last_sched = 0; 994fbb77a56SShuhei Matsumoto 995fbb77a56SShuhei Matsumoto SPDK_NOTICELOG("Reactor started on core %u\n", reactor->lcore); 996fbb77a56SShuhei Matsumoto 997fbb77a56SShuhei Matsumoto /* Rename the POSIX thread because the reactor is tied to the POSIX 998fbb77a56SShuhei Matsumoto * thread in the SPDK event library. 999fbb77a56SShuhei Matsumoto */ 1000fbb77a56SShuhei Matsumoto snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore); 1001fbb77a56SShuhei Matsumoto _set_thread_name(thread_name); 1002fbb77a56SShuhei Matsumoto 100382c46626SAnisa Su reactor->trace_id = spdk_trace_register_owner(OWNER_TYPE_REACTOR, thread_name); 100482c46626SAnisa Su 100580323e20SShuhei Matsumoto reactor->tsc_last = spdk_get_ticks(); 100680323e20SShuhei Matsumoto 1007fbb77a56SShuhei Matsumoto while (1) { 1008eff5b149SLiu Xiaodong /* Execute interrupt process fn if this reactor currently runs in interrupt state */ 1009eff5b149SLiu Xiaodong if (spdk_unlikely(reactor->in_interrupt)) { 10104bf6e4bbSLiu Xiaodong reactor_interrupt_run(reactor); 10114bf6e4bbSLiu Xiaodong } else { 1012fd361206SSeth Howell _reactor_run(reactor); 10134bf6e4bbSLiu Xiaodong } 1014fbb77a56SShuhei Matsumoto 1015ba7aac83SLiu Xiaodong if (g_framework_context_switch_monitor_enabled) { 1016ba7aac83SLiu Xiaodong if ((reactor->last_rusage + g_rusage_period) < reactor->tsc_last) { 1017ba7aac83SLiu Xiaodong get_rusage(reactor); 1018ba7aac83SLiu Xiaodong reactor->last_rusage = reactor->tsc_last; 1019ba7aac83SLiu Xiaodong } 1020ba7aac83SLiu Xiaodong } 1021ba7aac83SLiu Xiaodong 1022f01c6f2dSJim Harris if (spdk_unlikely(g_scheduler_period_in_tsc > 0 && 1023f01c6f2dSJim Harris (reactor->tsc_last - last_sched) > g_scheduler_period_in_tsc && 10242cffc800SVitaliy Mysak reactor == g_scheduling_reactor && 1025cff96883STomasz Zawadzki !g_scheduling_in_progress)) { 10262cffc800SVitaliy Mysak last_sched = reactor->tsc_last; 1027cff96883STomasz Zawadzki g_scheduling_in_progress = true; 102882c46626SAnisa Su spdk_trace_record(TRACE_SCHEDULER_PERIOD_START, 0, 0, 0); 10292cffc800SVitaliy Mysak _reactors_scheduler_gather_metrics(NULL, NULL); 10302cffc800SVitaliy Mysak } 10312cffc800SVitaliy Mysak 1032fbb77a56SShuhei Matsumoto if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) { 1033fbb77a56SShuhei Matsumoto break; 1034fbb77a56SShuhei Matsumoto } 1035fbb77a56SShuhei Matsumoto } 1036fbb77a56SShuhei Matsumoto 1037e9aec674SShuhei Matsumoto TAILQ_FOREACH(lw_thread, &reactor->threads, link) { 1038e9aec674SShuhei Matsumoto thread = spdk_thread_get_from_ctx(lw_thread); 10391d2700d4SJim Harris /* All threads should have already had spdk_thread_exit() called on them, except 10401d2700d4SJim Harris * for the app thread. 10411d2700d4SJim Harris */ 10421d2700d4SJim Harris if (spdk_thread_is_running(thread)) { 10431b1967bdSJim Harris if (!spdk_thread_is_app_thread(thread)) { 10441d2700d4SJim Harris SPDK_ERRLOG("spdk_thread_exit() was not called on thread '%s'\n", 10451d2700d4SJim Harris spdk_thread_get_name(thread)); 10461d2700d4SJim Harris SPDK_ERRLOG("This will result in a non-zero exit code in a future release.\n"); 10471d2700d4SJim Harris } 1048e9aec674SShuhei Matsumoto spdk_set_thread(thread); 1049e9aec674SShuhei Matsumoto spdk_thread_exit(thread); 1050e9aec674SShuhei Matsumoto } 10511d2700d4SJim Harris } 1052e9aec674SShuhei Matsumoto 1053e9aec674SShuhei Matsumoto while (!TAILQ_EMPTY(&reactor->threads)) { 1054032920f2SBen Walker TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) { 1055032920f2SBen Walker thread = spdk_thread_get_from_ctx(lw_thread); 1056e9aec674SShuhei Matsumoto spdk_set_thread(thread); 1057e9aec674SShuhei Matsumoto if (spdk_thread_is_exited(thread)) { 10580859c837Syidong0635 _reactor_remove_lw_thread(reactor, lw_thread); 1059e9aec674SShuhei Matsumoto spdk_thread_destroy(thread); 1060e9aec674SShuhei Matsumoto } else { 1061abd932d6SJim Harris if (spdk_unlikely(reactor->in_interrupt)) { 1062abd932d6SJim Harris reactor_interrupt_run(reactor); 1063abd932d6SJim Harris } else { 1064e7ead00bSShuhei Matsumoto spdk_thread_poll(thread, 0, 0); 1065e7ead00bSShuhei Matsumoto } 1066e9aec674SShuhei Matsumoto } 1067032920f2SBen Walker } 1068abd932d6SJim Harris } 106912759ab5SBen Walker 1070eeeac667SDaniel Verkamp return 0; 1071eeeac667SDaniel Verkamp } 1072eeeac667SDaniel Verkamp 1073eeeac667SDaniel Verkamp int 1074601bcbcfSTomasz Kulasek spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask) 1075eeeac667SDaniel Verkamp { 1076601bcbcfSTomasz Kulasek int ret; 1077276c31cdSChangpeng Liu const struct spdk_cpuset *validmask; 1078eeeac667SDaniel Verkamp 1079601bcbcfSTomasz Kulasek ret = spdk_cpuset_parse(cpumask, mask); 1080601bcbcfSTomasz Kulasek if (ret < 0) { 1081601bcbcfSTomasz Kulasek return ret; 1082eeeac667SDaniel Verkamp } 1083eeeac667SDaniel Verkamp 1084601bcbcfSTomasz Kulasek validmask = spdk_app_get_core_mask(); 1085601bcbcfSTomasz Kulasek spdk_cpuset_and(cpumask, validmask); 1086d7134439SBen Walker 1087eeeac667SDaniel Verkamp return 0; 1088eeeac667SDaniel Verkamp } 1089eeeac667SDaniel Verkamp 1090276c31cdSChangpeng Liu const struct spdk_cpuset * 1091eeeac667SDaniel Verkamp spdk_app_get_core_mask(void) 1092eeeac667SDaniel Verkamp { 1093ae4360f0SShuhei Matsumoto return &g_reactor_core_mask; 1094eeeac667SDaniel Verkamp } 1095eeeac667SDaniel Verkamp 1096eeeac667SDaniel Verkamp void 1097f0d2e550SCunyin Chang spdk_reactors_start(void) 1098eeeac667SDaniel Verkamp { 1099eeeac667SDaniel Verkamp struct spdk_reactor *reactor; 11007f7c03a9SBen Walker uint32_t i, current_core; 11019b8ddd22SBen Walker int rc; 1102eeeac667SDaniel Verkamp 110368161ffcSShuhei Matsumoto g_rusage_period = (CONTEXT_SWITCH_MONITOR_PERIOD * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC; 1104eeeac667SDaniel Verkamp g_reactor_state = SPDK_REACTOR_STATE_RUNNING; 1105c4f086b6SJim Harris /* Reinitialize to false, in case the app framework is restarting in the same process. */ 1106c4f086b6SJim Harris g_stopping_reactors = false; 1107eeeac667SDaniel Verkamp 11087f7c03a9SBen Walker current_core = spdk_env_get_current_core(); 11097f7c03a9SBen Walker SPDK_ENV_FOREACH_CORE(i) { 11107f7c03a9SBen Walker if (i != current_core) { 1111eeeac667SDaniel Verkamp reactor = spdk_reactor_get(i); 1112085ade57SBen Walker if (reactor == NULL) { 1113085ade57SBen Walker continue; 1114085ade57SBen Walker } 1115085ade57SBen Walker 1116fd361206SSeth Howell rc = spdk_env_thread_launch_pinned(reactor->lcore, reactor_run, reactor); 11179b8ddd22SBen Walker if (rc < 0) { 11189b8ddd22SBen Walker SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore); 11199b8ddd22SBen Walker assert(false); 11209b8ddd22SBen Walker return; 11219b8ddd22SBen Walker } 1122eeeac667SDaniel Verkamp } 1123ae4360f0SShuhei Matsumoto spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true); 11247f7c03a9SBen Walker } 1125eeeac667SDaniel Verkamp 11263795c1cfSJim Harris /* Start the main reactor */ 11277f7c03a9SBen Walker reactor = spdk_reactor_get(current_core); 1128085ade57SBen Walker assert(reactor != NULL); 1129fd361206SSeth Howell reactor_run(reactor); 1130eeeac667SDaniel Verkamp 11319b8ddd22SBen Walker spdk_env_thread_wait_all(); 1132eeeac667SDaniel Verkamp 1133eeeac667SDaniel Verkamp g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN; 1134eeeac667SDaniel Verkamp } 1135eeeac667SDaniel Verkamp 1136c4f086b6SJim Harris static void 1137c4f086b6SJim Harris _reactors_stop(void *arg1, void *arg2) 1138eeeac667SDaniel Verkamp { 11394bf6e4bbSLiu Xiaodong uint32_t i; 11404bf6e4bbSLiu Xiaodong int rc; 11414bf6e4bbSLiu Xiaodong struct spdk_reactor *reactor; 1142eff5b149SLiu Xiaodong struct spdk_reactor *local_reactor; 11434bf6e4bbSLiu Xiaodong uint64_t notify = 1; 11444bf6e4bbSLiu Xiaodong 1145eeeac667SDaniel Verkamp g_reactor_state = SPDK_REACTOR_STATE_EXITING; 1146eff5b149SLiu Xiaodong local_reactor = spdk_reactor_get(spdk_env_get_current_core()); 11474bf6e4bbSLiu Xiaodong 11484bf6e4bbSLiu Xiaodong SPDK_ENV_FOREACH_CORE(i) { 1149eff5b149SLiu Xiaodong /* If spdk_event_call isn't called on a reactor, always send a notification. 1150eff5b149SLiu Xiaodong * If it is called on a reactor, send a notification if the destination reactor 1151eff5b149SLiu Xiaodong * is indicated in interrupt mode state. 1152eff5b149SLiu Xiaodong */ 1153eff5b149SLiu Xiaodong if (local_reactor == NULL || spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, i)) { 11544bf6e4bbSLiu Xiaodong reactor = spdk_reactor_get(i); 11555e01bdb4SWeifeng Su assert(reactor != NULL); 11564bf6e4bbSLiu Xiaodong rc = write(reactor->events_fd, ¬ify, sizeof(notify)); 11574bf6e4bbSLiu Xiaodong if (rc < 0) { 11584bf6e4bbSLiu Xiaodong SPDK_ERRLOG("failed to notify event queue for reactor(%u): %s.\n", i, spdk_strerror(errno)); 11594bf6e4bbSLiu Xiaodong continue; 11604bf6e4bbSLiu Xiaodong } 11614bf6e4bbSLiu Xiaodong } 11624bf6e4bbSLiu Xiaodong } 1163eeeac667SDaniel Verkamp } 1164eeeac667SDaniel Verkamp 1165c4f086b6SJim Harris static void 1166c4f086b6SJim Harris nop(void *arg1, void *arg2) 1167c4f086b6SJim Harris { 1168c4f086b6SJim Harris } 1169c4f086b6SJim Harris 1170c4f086b6SJim Harris void 1171c4f086b6SJim Harris spdk_reactors_stop(void *arg1) 1172c4f086b6SJim Harris { 1173abd932d6SJim Harris spdk_for_each_reactor(nop, NULL, NULL, _reactors_stop); 1174c4f086b6SJim Harris } 1175c4f086b6SJim Harris 1176032920f2SBen Walker static pthread_mutex_t g_scheduler_mtx = PTHREAD_MUTEX_INITIALIZER; 1177032920f2SBen Walker static uint32_t g_next_core = UINT32_MAX; 1178032920f2SBen Walker 1179032920f2SBen Walker static void 1180032920f2SBen Walker _schedule_thread(void *arg1, void *arg2) 1181032920f2SBen Walker { 1182032920f2SBen Walker struct spdk_lw_thread *lw_thread = arg1; 11833db73426STomasz Zawadzki struct spdk_thread *thread; 1184032920f2SBen Walker struct spdk_reactor *reactor; 118516ea979dSShuhei Matsumoto uint32_t current_core; 1186fe2d64ceSBen Walker struct spdk_fd_group *grp; 1187032920f2SBen Walker 118816ea979dSShuhei Matsumoto current_core = spdk_env_get_current_core(); 118916ea979dSShuhei Matsumoto reactor = spdk_reactor_get(current_core); 1190085ade57SBen Walker assert(reactor != NULL); 1191032920f2SBen Walker 1192d9f5da13STomasz Zawadzki /* Update total_stats to reflect state of thread 11933db73426STomasz Zawadzki * at the end of the move. */ 11943db73426STomasz Zawadzki thread = spdk_thread_get_from_ctx(lw_thread); 11953db73426STomasz Zawadzki spdk_set_thread(thread); 1196d9f5da13STomasz Zawadzki spdk_thread_get_stats(&lw_thread->total_stats); 11973db73426STomasz Zawadzki spdk_set_thread(NULL); 11983db73426STomasz Zawadzki 1199118c273aSJim Harris if (lw_thread->initial_lcore == SPDK_ENV_LCORE_ID_ANY) { 1200118c273aSJim Harris lw_thread->initial_lcore = current_core; 1201118c273aSJim Harris } 120218667806STomasz Zawadzki lw_thread->lcore = current_core; 120318667806STomasz Zawadzki 1204032920f2SBen Walker TAILQ_INSERT_TAIL(&reactor->threads, lw_thread, link); 1205ea863bb0SShuhei Matsumoto reactor->thread_count++; 12064bf6e4bbSLiu Xiaodong 1207eff5b149SLiu Xiaodong /* Operate thread intr if running with full interrupt ability */ 1208eff5b149SLiu Xiaodong if (spdk_interrupt_mode_is_enabled()) { 12094bf6e4bbSLiu Xiaodong int rc; 12104bf6e4bbSLiu Xiaodong 1211fe2d64ceSBen Walker if (reactor->in_interrupt) { 1212fe2d64ceSBen Walker grp = spdk_thread_get_interrupt_fd_group(thread); 1213fe2d64ceSBen Walker rc = spdk_fd_group_nest(reactor->fgrp, grp); 12144bf6e4bbSLiu Xiaodong if (rc < 0) { 12154bf6e4bbSLiu Xiaodong SPDK_ERRLOG("Failed to schedule spdk_thread: %s.\n", spdk_strerror(-rc)); 12164bf6e4bbSLiu Xiaodong } 1217fe2d64ceSBen Walker } 1218b763ebfeSLiu Xiaodong 1219b763ebfeSLiu Xiaodong /* Align spdk_thread with reactor to interrupt mode or poll mode */ 1220b763ebfeSLiu Xiaodong spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, reactor); 12214bf6e4bbSLiu Xiaodong } 1222032920f2SBen Walker } 1223032920f2SBen Walker 1224835d21a2SBen Walker static int 12251368aec8SShuhei Matsumoto _reactor_schedule_thread(struct spdk_thread *thread) 1226032920f2SBen Walker { 1227118c273aSJim Harris uint32_t core, initial_core; 1228032920f2SBen Walker struct spdk_lw_thread *lw_thread; 122967234b6aSBen Walker struct spdk_event *evt = NULL; 123067234b6aSBen Walker struct spdk_cpuset *cpumask; 123167234b6aSBen Walker uint32_t i; 1232227c8b81SLiu Xiaodong struct spdk_reactor *local_reactor = NULL; 1233227c8b81SLiu Xiaodong uint32_t current_lcore = spdk_env_get_current_core(); 1234227c8b81SLiu Xiaodong struct spdk_cpuset polling_cpumask; 1235227c8b81SLiu Xiaodong struct spdk_cpuset valid_cpumask; 123667234b6aSBen Walker 123767234b6aSBen Walker cpumask = spdk_thread_get_cpumask(thread); 1238032920f2SBen Walker 1239032920f2SBen Walker lw_thread = spdk_thread_get_ctx(thread); 1240032920f2SBen Walker assert(lw_thread != NULL); 12412cffc800SVitaliy Mysak core = lw_thread->lcore; 1242118c273aSJim Harris initial_core = lw_thread->initial_lcore; 1243032920f2SBen Walker memset(lw_thread, 0, sizeof(*lw_thread)); 1244118c273aSJim Harris lw_thread->initial_lcore = initial_core; 1245032920f2SBen Walker 1246227c8b81SLiu Xiaodong if (current_lcore != SPDK_ENV_LCORE_ID_ANY) { 1247227c8b81SLiu Xiaodong local_reactor = spdk_reactor_get(current_lcore); 1248227c8b81SLiu Xiaodong assert(local_reactor); 1249227c8b81SLiu Xiaodong } 1250227c8b81SLiu Xiaodong 1251227c8b81SLiu Xiaodong /* When interrupt ability of spdk_thread is not enabled and the current 1252227c8b81SLiu Xiaodong * reactor runs on DPDK thread, skip reactors which are in interrupt mode. 1253227c8b81SLiu Xiaodong */ 1254227c8b81SLiu Xiaodong if (!spdk_interrupt_mode_is_enabled() && local_reactor != NULL) { 1255227c8b81SLiu Xiaodong /* Get the cpumask of all reactors in polling */ 1256227c8b81SLiu Xiaodong spdk_cpuset_zero(&polling_cpumask); 1257227c8b81SLiu Xiaodong SPDK_ENV_FOREACH_CORE(i) { 1258227c8b81SLiu Xiaodong spdk_cpuset_set_cpu(&polling_cpumask, i, true); 1259227c8b81SLiu Xiaodong } 1260227c8b81SLiu Xiaodong spdk_cpuset_xor(&polling_cpumask, &local_reactor->notify_cpuset); 1261227c8b81SLiu Xiaodong 1262227c8b81SLiu Xiaodong if (core == SPDK_ENV_LCORE_ID_ANY) { 1263227c8b81SLiu Xiaodong /* Get the cpumask of all valid reactors which are suggested and also in polling */ 1264227c8b81SLiu Xiaodong spdk_cpuset_copy(&valid_cpumask, &polling_cpumask); 1265227c8b81SLiu Xiaodong spdk_cpuset_and(&valid_cpumask, spdk_thread_get_cpumask(thread)); 1266227c8b81SLiu Xiaodong 1267227c8b81SLiu Xiaodong /* If there are any valid reactors, spdk_thread should be scheduled 1268227c8b81SLiu Xiaodong * into one of the valid reactors. 1269227c8b81SLiu Xiaodong * If there is no valid reactors, spdk_thread should be scheduled 1270227c8b81SLiu Xiaodong * into one of the polling reactors. 1271227c8b81SLiu Xiaodong */ 1272227c8b81SLiu Xiaodong if (spdk_cpuset_count(&valid_cpumask) != 0) { 1273227c8b81SLiu Xiaodong cpumask = &valid_cpumask; 1274227c8b81SLiu Xiaodong } else { 1275227c8b81SLiu Xiaodong cpumask = &polling_cpumask; 1276227c8b81SLiu Xiaodong } 1277227c8b81SLiu Xiaodong } else if (!spdk_cpuset_get_cpu(&polling_cpumask, core)) { 1278227c8b81SLiu Xiaodong /* If specified reactor is not in polling, spdk_thread should be scheduled 1279227c8b81SLiu Xiaodong * into one of the polling reactors. 1280227c8b81SLiu Xiaodong */ 1281227c8b81SLiu Xiaodong core = SPDK_ENV_LCORE_ID_ANY; 1282227c8b81SLiu Xiaodong cpumask = &polling_cpumask; 1283227c8b81SLiu Xiaodong } 1284227c8b81SLiu Xiaodong } 1285227c8b81SLiu Xiaodong 1286032920f2SBen Walker pthread_mutex_lock(&g_scheduler_mtx); 12872cffc800SVitaliy Mysak if (core == SPDK_ENV_LCORE_ID_ANY) { 128867234b6aSBen Walker for (i = 0; i < spdk_env_get_core_count(); i++) { 1289992b168eSLiu Xiaodong if (g_next_core >= g_reactor_count) { 1290032920f2SBen Walker g_next_core = spdk_env_get_first_core(); 1291032920f2SBen Walker } 1292032920f2SBen Walker core = g_next_core; 1293032920f2SBen Walker g_next_core = spdk_env_get_next_core(g_next_core); 129467234b6aSBen Walker 129567234b6aSBen Walker if (spdk_cpuset_get_cpu(cpumask, core)) { 129667234b6aSBen Walker break; 129767234b6aSBen Walker } 129867234b6aSBen Walker } 12992cffc800SVitaliy Mysak } 13002cffc800SVitaliy Mysak 13012cffc800SVitaliy Mysak evt = spdk_event_allocate(core, _schedule_thread, lw_thread, NULL); 13022cffc800SVitaliy Mysak 130382c46626SAnisa Su if (current_lcore != core) { 130482c46626SAnisa Su spdk_trace_record(TRACE_SCHEDULER_MOVE_THREAD, spdk_thread_get_trace_id(thread), 0, 0, 130582c46626SAnisa Su current_lcore, core); 130682c46626SAnisa Su } 130782c46626SAnisa Su 1308032920f2SBen Walker pthread_mutex_unlock(&g_scheduler_mtx); 1309032920f2SBen Walker 131067234b6aSBen Walker assert(evt != NULL); 131167234b6aSBen Walker if (evt == NULL) { 131267234b6aSBen Walker SPDK_ERRLOG("Unable to schedule thread on requested core mask.\n"); 131367234b6aSBen Walker return -1; 131467234b6aSBen Walker } 131567234b6aSBen Walker 1316f7e9e764SShuhei Matsumoto lw_thread->tsc_start = spdk_get_ticks(); 1317f7e9e764SShuhei Matsumoto 1318032920f2SBen Walker spdk_event_call(evt); 1319835d21a2SBen Walker 1320835d21a2SBen Walker return 0; 1321032920f2SBen Walker } 1322032920f2SBen Walker 1323a4335febSShuhei Matsumoto static void 1324a4335febSShuhei Matsumoto _reactor_request_thread_reschedule(struct spdk_thread *thread) 1325a4335febSShuhei Matsumoto { 1326a4335febSShuhei Matsumoto struct spdk_lw_thread *lw_thread; 13274bf6e4bbSLiu Xiaodong struct spdk_reactor *reactor; 13284bf6e4bbSLiu Xiaodong uint32_t current_core; 1329a4335febSShuhei Matsumoto 1330a4335febSShuhei Matsumoto assert(thread == spdk_get_thread()); 1331a4335febSShuhei Matsumoto 1332a4335febSShuhei Matsumoto lw_thread = spdk_thread_get_ctx(thread); 1333a4335febSShuhei Matsumoto 1334902c9e4dSTomasz Zawadzki assert(lw_thread != NULL); 1335902c9e4dSTomasz Zawadzki lw_thread->resched = true; 1336902c9e4dSTomasz Zawadzki lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY; 13374bf6e4bbSLiu Xiaodong 13384bf6e4bbSLiu Xiaodong current_core = spdk_env_get_current_core(); 13394bf6e4bbSLiu Xiaodong reactor = spdk_reactor_get(current_core); 13404bf6e4bbSLiu Xiaodong assert(reactor != NULL); 1341eff5b149SLiu Xiaodong 1342eff5b149SLiu Xiaodong /* Send a notification if the destination reactor is indicated in intr mode state */ 1343eff5b149SLiu Xiaodong if (spdk_unlikely(spdk_cpuset_get_cpu(&reactor->notify_cpuset, reactor->lcore))) { 13444bf6e4bbSLiu Xiaodong uint64_t notify = 1; 13454bf6e4bbSLiu Xiaodong 13464bf6e4bbSLiu Xiaodong if (write(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) { 13474bf6e4bbSLiu Xiaodong SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno)); 13484bf6e4bbSLiu Xiaodong } 13494bf6e4bbSLiu Xiaodong } 1350a4335febSShuhei Matsumoto } 1351a4335febSShuhei Matsumoto 13521368aec8SShuhei Matsumoto static int 1353a7592dbeSSeth Howell reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op) 13541368aec8SShuhei Matsumoto { 13552cffc800SVitaliy Mysak struct spdk_lw_thread *lw_thread; 13562cffc800SVitaliy Mysak 13571368aec8SShuhei Matsumoto switch (op) { 13581368aec8SShuhei Matsumoto case SPDK_THREAD_OP_NEW: 13592cffc800SVitaliy Mysak lw_thread = spdk_thread_get_ctx(thread); 13602cffc800SVitaliy Mysak lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY; 1361118c273aSJim Harris lw_thread->initial_lcore = SPDK_ENV_LCORE_ID_ANY; 13621368aec8SShuhei Matsumoto return _reactor_schedule_thread(thread); 1363a4335febSShuhei Matsumoto case SPDK_THREAD_OP_RESCHED: 1364a4335febSShuhei Matsumoto _reactor_request_thread_reschedule(thread); 1365a4335febSShuhei Matsumoto return 0; 13661368aec8SShuhei Matsumoto default: 13671368aec8SShuhei Matsumoto return -ENOTSUP; 13681368aec8SShuhei Matsumoto } 13691368aec8SShuhei Matsumoto } 13701368aec8SShuhei Matsumoto 13711368aec8SShuhei Matsumoto static bool 1372a7592dbeSSeth Howell reactor_thread_op_supported(enum spdk_thread_op op) 13731368aec8SShuhei Matsumoto { 13741368aec8SShuhei Matsumoto switch (op) { 13751368aec8SShuhei Matsumoto case SPDK_THREAD_OP_NEW: 1376a4335febSShuhei Matsumoto case SPDK_THREAD_OP_RESCHED: 13771368aec8SShuhei Matsumoto return true; 13781368aec8SShuhei Matsumoto default: 13791368aec8SShuhei Matsumoto return false; 13801368aec8SShuhei Matsumoto } 13811368aec8SShuhei Matsumoto } 13821368aec8SShuhei Matsumoto 138360eb6da8SShuhei Matsumoto struct call_reactor { 138460eb6da8SShuhei Matsumoto uint32_t cur_core; 138560eb6da8SShuhei Matsumoto spdk_event_fn fn; 138660eb6da8SShuhei Matsumoto void *arg1; 138760eb6da8SShuhei Matsumoto void *arg2; 138860eb6da8SShuhei Matsumoto 138960eb6da8SShuhei Matsumoto uint32_t orig_core; 139060eb6da8SShuhei Matsumoto spdk_event_fn cpl; 139160eb6da8SShuhei Matsumoto }; 139260eb6da8SShuhei Matsumoto 139360eb6da8SShuhei Matsumoto static void 1394a7592dbeSSeth Howell on_reactor(void *arg1, void *arg2) 139560eb6da8SShuhei Matsumoto { 139660eb6da8SShuhei Matsumoto struct call_reactor *cr = arg1; 139760eb6da8SShuhei Matsumoto struct spdk_event *evt; 139860eb6da8SShuhei Matsumoto 139960eb6da8SShuhei Matsumoto cr->fn(cr->arg1, cr->arg2); 140060eb6da8SShuhei Matsumoto 140160eb6da8SShuhei Matsumoto cr->cur_core = spdk_env_get_next_core(cr->cur_core); 140260eb6da8SShuhei Matsumoto 1403992b168eSLiu Xiaodong if (cr->cur_core >= g_reactor_count) { 14042172c432STomasz Zawadzki SPDK_DEBUGLOG(reactor, "Completed reactor iteration\n"); 140560eb6da8SShuhei Matsumoto 140660eb6da8SShuhei Matsumoto evt = spdk_event_allocate(cr->orig_core, cr->cpl, cr->arg1, cr->arg2); 140760eb6da8SShuhei Matsumoto free(cr); 140860eb6da8SShuhei Matsumoto } else { 14092172c432STomasz Zawadzki SPDK_DEBUGLOG(reactor, "Continuing reactor iteration to %d\n", 141060eb6da8SShuhei Matsumoto cr->cur_core); 141160eb6da8SShuhei Matsumoto 1412a7592dbeSSeth Howell evt = spdk_event_allocate(cr->cur_core, on_reactor, arg1, NULL); 141360eb6da8SShuhei Matsumoto } 141460eb6da8SShuhei Matsumoto assert(evt != NULL); 141560eb6da8SShuhei Matsumoto spdk_event_call(evt); 141660eb6da8SShuhei Matsumoto } 141760eb6da8SShuhei Matsumoto 141860eb6da8SShuhei Matsumoto void 141960eb6da8SShuhei Matsumoto spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl) 142060eb6da8SShuhei Matsumoto { 142160eb6da8SShuhei Matsumoto struct call_reactor *cr; 142260eb6da8SShuhei Matsumoto 1423c4f086b6SJim Harris /* When the application framework is shutting down, we will send one 1424c4f086b6SJim Harris * final for_each_reactor operation with completion callback _reactors_stop, 1425c4f086b6SJim Harris * to flush any existing for_each_reactor operations to avoid any memory 1426c4f086b6SJim Harris * leaks. We use a mutex here to protect a boolean flag that will ensure 1427c4f086b6SJim Harris * we don't start any more operations once we've started shutting down. 1428c4f086b6SJim Harris */ 1429c4f086b6SJim Harris pthread_mutex_lock(&g_stopping_reactors_mtx); 1430c4f086b6SJim Harris if (g_stopping_reactors) { 1431c4f086b6SJim Harris pthread_mutex_unlock(&g_stopping_reactors_mtx); 1432c4f086b6SJim Harris return; 1433c4f086b6SJim Harris } else if (cpl == _reactors_stop) { 1434c4f086b6SJim Harris g_stopping_reactors = true; 1435c4f086b6SJim Harris } 1436c4f086b6SJim Harris pthread_mutex_unlock(&g_stopping_reactors_mtx); 1437c4f086b6SJim Harris 143860eb6da8SShuhei Matsumoto cr = calloc(1, sizeof(*cr)); 143960eb6da8SShuhei Matsumoto if (!cr) { 144060eb6da8SShuhei Matsumoto SPDK_ERRLOG("Unable to perform reactor iteration\n"); 144160eb6da8SShuhei Matsumoto cpl(arg1, arg2); 144260eb6da8SShuhei Matsumoto return; 144360eb6da8SShuhei Matsumoto } 144460eb6da8SShuhei Matsumoto 144560eb6da8SShuhei Matsumoto cr->fn = fn; 144660eb6da8SShuhei Matsumoto cr->arg1 = arg1; 144760eb6da8SShuhei Matsumoto cr->arg2 = arg2; 144860eb6da8SShuhei Matsumoto cr->cpl = cpl; 144960eb6da8SShuhei Matsumoto cr->orig_core = spdk_env_get_current_core(); 145060eb6da8SShuhei Matsumoto cr->cur_core = spdk_env_get_first_core(); 145160eb6da8SShuhei Matsumoto 14522172c432STomasz Zawadzki SPDK_DEBUGLOG(reactor, "Starting reactor iteration from %d\n", cr->orig_core); 145360eb6da8SShuhei Matsumoto 14549e81535eSyidong0635 _event_call(cr->cur_core, on_reactor, cr, NULL); 145560eb6da8SShuhei Matsumoto } 145660eb6da8SShuhei Matsumoto 14574bf6e4bbSLiu Xiaodong #ifdef __linux__ 14584bf6e4bbSLiu Xiaodong static int 14594bf6e4bbSLiu Xiaodong reactor_schedule_thread_event(void *arg) 14604bf6e4bbSLiu Xiaodong { 14614bf6e4bbSLiu Xiaodong struct spdk_reactor *reactor = arg; 14624bf6e4bbSLiu Xiaodong struct spdk_lw_thread *lw_thread, *tmp; 14634bf6e4bbSLiu Xiaodong uint32_t count = 0; 14644bf6e4bbSLiu Xiaodong 1465eff5b149SLiu Xiaodong assert(reactor->in_interrupt); 14664bf6e4bbSLiu Xiaodong 14674bf6e4bbSLiu Xiaodong TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) { 14684bf6e4bbSLiu Xiaodong count += reactor_post_process_lw_thread(reactor, lw_thread) ? 1 : 0; 14694bf6e4bbSLiu Xiaodong } 14704bf6e4bbSLiu Xiaodong 14714bf6e4bbSLiu Xiaodong return count; 14724bf6e4bbSLiu Xiaodong } 14734bf6e4bbSLiu Xiaodong 14744bf6e4bbSLiu Xiaodong static int 14754bf6e4bbSLiu Xiaodong reactor_interrupt_init(struct spdk_reactor *reactor) 14764bf6e4bbSLiu Xiaodong { 1477*1a5bdab3SAnkit Kumar struct spdk_event_handler_opts opts = {}; 14784bf6e4bbSLiu Xiaodong int rc; 14794bf6e4bbSLiu Xiaodong 14804bf6e4bbSLiu Xiaodong rc = spdk_fd_group_create(&reactor->fgrp); 14814bf6e4bbSLiu Xiaodong if (rc != 0) { 14824bf6e4bbSLiu Xiaodong return rc; 14834bf6e4bbSLiu Xiaodong } 14844bf6e4bbSLiu Xiaodong 14854bf6e4bbSLiu Xiaodong reactor->resched_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 14864bf6e4bbSLiu Xiaodong if (reactor->resched_fd < 0) { 14874bf6e4bbSLiu Xiaodong rc = -EBADF; 14884bf6e4bbSLiu Xiaodong goto err; 14894bf6e4bbSLiu Xiaodong } 14904bf6e4bbSLiu Xiaodong 1491*1a5bdab3SAnkit Kumar spdk_fd_group_get_default_event_handler_opts(&opts, sizeof(opts)); 1492*1a5bdab3SAnkit Kumar opts.fd_type = SPDK_FD_TYPE_EVENTFD; 1493*1a5bdab3SAnkit Kumar 1494*1a5bdab3SAnkit Kumar rc = SPDK_FD_GROUP_ADD_EXT(reactor->fgrp, reactor->resched_fd, 1495*1a5bdab3SAnkit Kumar reactor_schedule_thread_event, reactor, &opts); 14964bf6e4bbSLiu Xiaodong if (rc) { 14974bf6e4bbSLiu Xiaodong close(reactor->resched_fd); 14984bf6e4bbSLiu Xiaodong goto err; 14994bf6e4bbSLiu Xiaodong } 15004bf6e4bbSLiu Xiaodong 15014bf6e4bbSLiu Xiaodong reactor->events_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 15024bf6e4bbSLiu Xiaodong if (reactor->events_fd < 0) { 15034bf6e4bbSLiu Xiaodong spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd); 15044bf6e4bbSLiu Xiaodong close(reactor->resched_fd); 15054bf6e4bbSLiu Xiaodong 15064bf6e4bbSLiu Xiaodong rc = -EBADF; 15074bf6e4bbSLiu Xiaodong goto err; 15084bf6e4bbSLiu Xiaodong } 15094bf6e4bbSLiu Xiaodong 1510*1a5bdab3SAnkit Kumar rc = SPDK_FD_GROUP_ADD_EXT(reactor->fgrp, reactor->events_fd, 1511*1a5bdab3SAnkit Kumar event_queue_run_batch, reactor, &opts); 15124bf6e4bbSLiu Xiaodong if (rc) { 15134bf6e4bbSLiu Xiaodong spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd); 15144bf6e4bbSLiu Xiaodong close(reactor->resched_fd); 15154bf6e4bbSLiu Xiaodong close(reactor->events_fd); 15164bf6e4bbSLiu Xiaodong goto err; 15174bf6e4bbSLiu Xiaodong } 15184bf6e4bbSLiu Xiaodong 15194bf6e4bbSLiu Xiaodong return 0; 15204bf6e4bbSLiu Xiaodong 15214bf6e4bbSLiu Xiaodong err: 15224bf6e4bbSLiu Xiaodong spdk_fd_group_destroy(reactor->fgrp); 1523eff5b149SLiu Xiaodong reactor->fgrp = NULL; 15244bf6e4bbSLiu Xiaodong return rc; 15254bf6e4bbSLiu Xiaodong } 15264bf6e4bbSLiu Xiaodong #else 15274bf6e4bbSLiu Xiaodong static int 15284bf6e4bbSLiu Xiaodong reactor_interrupt_init(struct spdk_reactor *reactor) 15294bf6e4bbSLiu Xiaodong { 15304bf6e4bbSLiu Xiaodong return -ENOTSUP; 15314bf6e4bbSLiu Xiaodong } 15324bf6e4bbSLiu Xiaodong #endif 15334bf6e4bbSLiu Xiaodong 15344bf6e4bbSLiu Xiaodong static void 15354bf6e4bbSLiu Xiaodong reactor_interrupt_fini(struct spdk_reactor *reactor) 15364bf6e4bbSLiu Xiaodong { 15374bf6e4bbSLiu Xiaodong struct spdk_fd_group *fgrp = reactor->fgrp; 15384bf6e4bbSLiu Xiaodong 15394bf6e4bbSLiu Xiaodong if (!fgrp) { 15404bf6e4bbSLiu Xiaodong return; 15414bf6e4bbSLiu Xiaodong } 15424bf6e4bbSLiu Xiaodong 15434bf6e4bbSLiu Xiaodong spdk_fd_group_remove(fgrp, reactor->events_fd); 15444bf6e4bbSLiu Xiaodong spdk_fd_group_remove(fgrp, reactor->resched_fd); 15454bf6e4bbSLiu Xiaodong 15464bf6e4bbSLiu Xiaodong close(reactor->events_fd); 15474bf6e4bbSLiu Xiaodong close(reactor->resched_fd); 15484bf6e4bbSLiu Xiaodong 15494bf6e4bbSLiu Xiaodong spdk_fd_group_destroy(fgrp); 15504bf6e4bbSLiu Xiaodong reactor->fgrp = NULL; 15514bf6e4bbSLiu Xiaodong } 15524bf6e4bbSLiu Xiaodong 1553c6adf304SMaciej Szwed static struct spdk_governor * 1554ece94716STomasz Zawadzki _governor_find(const char *name) 1555c6adf304SMaciej Szwed { 1556c6adf304SMaciej Szwed struct spdk_governor *governor, *tmp; 1557c6adf304SMaciej Szwed 1558c6adf304SMaciej Szwed TAILQ_FOREACH_SAFE(governor, &g_governor_list, link, tmp) { 1559c6adf304SMaciej Szwed if (strcmp(name, governor->name) == 0) { 1560c6adf304SMaciej Szwed return governor; 1561c6adf304SMaciej Szwed } 1562c6adf304SMaciej Szwed } 1563c6adf304SMaciej Szwed 1564c6adf304SMaciej Szwed return NULL; 1565c6adf304SMaciej Szwed } 1566c6adf304SMaciej Szwed 1567c6adf304SMaciej Szwed int 1568a86e40f3STomasz Zawadzki spdk_governor_set(const char *name) 1569c6adf304SMaciej Szwed { 1570c6adf304SMaciej Szwed struct spdk_governor *governor; 15715bf2973eSTomasz Zawadzki int rc = 0; 1572c6adf304SMaciej Szwed 15732e394521STomasz Zawadzki /* NULL governor was specifically requested */ 15742e394521STomasz Zawadzki if (name == NULL) { 15752e394521STomasz Zawadzki if (g_governor) { 15762e394521STomasz Zawadzki g_governor->deinit(); 15772e394521STomasz Zawadzki } 15782e394521STomasz Zawadzki g_governor = NULL; 15792e394521STomasz Zawadzki return 0; 15802e394521STomasz Zawadzki } 15812e394521STomasz Zawadzki 1582c6adf304SMaciej Szwed governor = _governor_find(name); 1583c6adf304SMaciej Szwed if (governor == NULL) { 1584c6adf304SMaciej Szwed return -EINVAL; 1585c6adf304SMaciej Szwed } 1586c6adf304SMaciej Szwed 15872e394521STomasz Zawadzki if (g_governor == governor) { 15882e394521STomasz Zawadzki return 0; 15895bf2973eSTomasz Zawadzki } 15905bf2973eSTomasz Zawadzki 15912e394521STomasz Zawadzki rc = governor->init(); 15925bf2973eSTomasz Zawadzki if (rc == 0) { 15932e394521STomasz Zawadzki if (g_governor) { 15942e394521STomasz Zawadzki g_governor->deinit(); 15955bf2973eSTomasz Zawadzki } 15962e394521STomasz Zawadzki g_governor = governor; 15972e394521STomasz Zawadzki } 15982e394521STomasz Zawadzki 1599c6adf304SMaciej Szwed return rc; 1600c6adf304SMaciej Szwed } 1601c6adf304SMaciej Szwed 1602abf52d7dSKrzysztof Karas struct spdk_governor * 1603a86e40f3STomasz Zawadzki spdk_governor_get(void) 1604abf52d7dSKrzysztof Karas { 16052e394521STomasz Zawadzki return g_governor; 1606abf52d7dSKrzysztof Karas } 1607abf52d7dSKrzysztof Karas 1608c6adf304SMaciej Szwed void 1609a86e40f3STomasz Zawadzki spdk_governor_register(struct spdk_governor *governor) 1610c6adf304SMaciej Szwed { 1611c6adf304SMaciej Szwed if (_governor_find(governor->name)) { 1612c6adf304SMaciej Szwed SPDK_ERRLOG("governor named '%s' already registered.\n", governor->name); 1613c6adf304SMaciej Szwed assert(false); 1614c6adf304SMaciej Szwed return; 1615c6adf304SMaciej Szwed } 1616c6adf304SMaciej Szwed 1617c6adf304SMaciej Szwed TAILQ_INSERT_TAIL(&g_governor_list, governor, link); 1618c6adf304SMaciej Szwed } 1619c6adf304SMaciej Szwed 16202172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(reactor) 162182c46626SAnisa Su 162282c46626SAnisa Su static void 162382c46626SAnisa Su scheduler_trace(void) 162482c46626SAnisa Su { 162582c46626SAnisa Su struct spdk_trace_tpoint_opts opts[] = { 162682c46626SAnisa Su { 162782c46626SAnisa Su "SCHEDULER_PERIOD_START", TRACE_SCHEDULER_PERIOD_START, 162882c46626SAnisa Su OWNER_TYPE_NONE, OBJECT_NONE, 0, 162982c46626SAnisa Su { 163082c46626SAnisa Su 163182c46626SAnisa Su } 163282c46626SAnisa Su }, 163382c46626SAnisa Su { 163482c46626SAnisa Su "SCHEDULER_CORE_STATS", TRACE_SCHEDULER_CORE_STATS, 163582c46626SAnisa Su OWNER_TYPE_REACTOR, OBJECT_NONE, 0, 163682c46626SAnisa Su { 163782c46626SAnisa Su { "busy", SPDK_TRACE_ARG_TYPE_INT, 8}, 163882c46626SAnisa Su { "idle", SPDK_TRACE_ARG_TYPE_INT, 8} 163982c46626SAnisa Su } 164082c46626SAnisa Su }, 164182c46626SAnisa Su { 164282c46626SAnisa Su "SCHEDULER_THREAD_STATS", TRACE_SCHEDULER_THREAD_STATS, 164382c46626SAnisa Su OWNER_TYPE_THREAD, OBJECT_NONE, 0, 164482c46626SAnisa Su { 164582c46626SAnisa Su { "busy", SPDK_TRACE_ARG_TYPE_INT, 8}, 164682c46626SAnisa Su { "idle", SPDK_TRACE_ARG_TYPE_INT, 8} 164782c46626SAnisa Su } 164882c46626SAnisa Su }, 164982c46626SAnisa Su { 165082c46626SAnisa Su "SCHEDULER_MOVE_THREAD", TRACE_SCHEDULER_MOVE_THREAD, 165182c46626SAnisa Su OWNER_TYPE_THREAD, OBJECT_NONE, 0, 165282c46626SAnisa Su { 165382c46626SAnisa Su { "src", SPDK_TRACE_ARG_TYPE_INT, 8 }, 165482c46626SAnisa Su { "dst", SPDK_TRACE_ARG_TYPE_INT, 8 } 165582c46626SAnisa Su } 165682c46626SAnisa Su } 165782c46626SAnisa Su }; 165882c46626SAnisa Su 165982c46626SAnisa Su spdk_trace_register_owner_type(OWNER_TYPE_REACTOR, 'r'); 166082c46626SAnisa Su spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 166182c46626SAnisa Su 166282c46626SAnisa Su } 166382c46626SAnisa Su 166482c46626SAnisa Su SPDK_TRACE_REGISTER_FN(scheduler_trace, "scheduler", TRACE_GROUP_SCHEDULER) 1665