1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdint.h> 7 #include <stdbool.h> 8 #include <inttypes.h> 9 #include <assert.h> 10 11 #include <rte_common.h> 12 #include <rte_cycles.h> 13 #include <rte_eal_memconfig.h> 14 #include <rte_memory.h> 15 #include <rte_lcore.h> 16 #include <rte_branch_prediction.h> 17 #include <rte_spinlock.h> 18 #include <rte_random.h> 19 #include <rte_pause.h> 20 #include <rte_memzone.h> 21 22 #include "rte_timer.h" 23 24 /** 25 * Per-lcore info for timers. 26 */ 27 struct priv_timer { 28 struct rte_timer pending_head; /**< dummy timer instance to head up list */ 29 rte_spinlock_t list_lock; /**< lock to protect list access */ 30 31 /** per-core variable that true if a timer was updated on this 32 * core since last reset of the variable */ 33 int updated; 34 35 /** track the current depth of the skiplist */ 36 unsigned curr_skiplist_depth; 37 38 unsigned prev_lcore; /**< used for lcore round robin */ 39 40 /** running timer on this lcore now */ 41 struct rte_timer *running_tim; 42 43 #ifdef RTE_LIBRTE_TIMER_DEBUG 44 /** per-lcore statistics */ 45 struct rte_timer_debug_stats stats; 46 #endif 47 } __rte_cache_aligned; 48 49 #define FL_ALLOCATED (1 << 0) 50 struct rte_timer_data { 51 struct priv_timer priv_timer[RTE_MAX_LCORE]; 52 uint8_t internal_flags; 53 }; 54 55 #define RTE_MAX_DATA_ELS 64 56 static const struct rte_memzone *rte_timer_data_mz; 57 static int *volatile rte_timer_mz_refcnt; 58 static struct rte_timer_data *rte_timer_data_arr; 59 static const uint32_t default_data_id; 60 static uint32_t rte_timer_subsystem_initialized; 61 62 /* when debug is enabled, store some statistics */ 63 #ifdef RTE_LIBRTE_TIMER_DEBUG 64 #define __TIMER_STAT_ADD(priv_timer, name, n) do { \ 65 unsigned __lcore_id = rte_lcore_id(); \ 66 if (__lcore_id < RTE_MAX_LCORE) \ 67 priv_timer[__lcore_id].stats.name += (n); \ 68 } while(0) 69 #else 70 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0) 71 #endif 72 73 static inline int 74 timer_data_valid(uint32_t id) 75 { 76 return rte_timer_data_arr && 77 (rte_timer_data_arr[id].internal_flags & FL_ALLOCATED); 78 } 79 80 /* validate ID and retrieve timer data pointer, or return error value */ 81 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do { \ 82 if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id)) \ 83 return retval; \ 84 timer_data = &rte_timer_data_arr[id]; \ 85 } while (0) 86 87 int 88 rte_timer_data_alloc(uint32_t *id_ptr) 89 { 90 int i; 91 struct rte_timer_data *data; 92 93 if (!rte_timer_subsystem_initialized) 94 return -ENOMEM; 95 96 for (i = 0; i < RTE_MAX_DATA_ELS; i++) { 97 data = &rte_timer_data_arr[i]; 98 if (!(data->internal_flags & FL_ALLOCATED)) { 99 data->internal_flags |= FL_ALLOCATED; 100 101 if (id_ptr) 102 *id_ptr = i; 103 104 return 0; 105 } 106 } 107 108 return -ENOSPC; 109 } 110 111 int 112 rte_timer_data_dealloc(uint32_t id) 113 { 114 struct rte_timer_data *timer_data; 115 TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL); 116 117 timer_data->internal_flags &= ~(FL_ALLOCATED); 118 119 return 0; 120 } 121 122 /* Init the timer library. Allocate an array of timer data structs in shared 123 * memory, and allocate the zeroth entry for use with original timer 124 * APIs. Since the intersection of the sets of lcore ids in primary and 125 * secondary processes should be empty, the zeroth entry can be shared by 126 * multiple processes. 127 */ 128 int 129 rte_timer_subsystem_init(void) 130 { 131 const struct rte_memzone *mz; 132 struct rte_timer_data *data; 133 int i, lcore_id; 134 static const char *mz_name = "rte_timer_mz"; 135 const size_t data_arr_size = 136 RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr); 137 const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt); 138 bool do_full_init = true; 139 140 rte_mcfg_timer_lock(); 141 142 if (rte_timer_subsystem_initialized) { 143 rte_mcfg_timer_unlock(); 144 return -EALREADY; 145 } 146 147 mz = rte_memzone_lookup(mz_name); 148 if (mz == NULL) { 149 mz = rte_memzone_reserve_aligned(mz_name, mem_size, 150 SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE); 151 if (mz == NULL) { 152 rte_mcfg_timer_unlock(); 153 return -ENOMEM; 154 } 155 do_full_init = true; 156 } else 157 do_full_init = false; 158 159 rte_timer_data_mz = mz; 160 rte_timer_data_arr = mz->addr; 161 rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size); 162 163 if (do_full_init) { 164 for (i = 0; i < RTE_MAX_DATA_ELS; i++) { 165 data = &rte_timer_data_arr[i]; 166 167 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; 168 lcore_id++) { 169 rte_spinlock_init( 170 &data->priv_timer[lcore_id].list_lock); 171 data->priv_timer[lcore_id].prev_lcore = 172 lcore_id; 173 } 174 } 175 } 176 177 rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED; 178 (*rte_timer_mz_refcnt)++; 179 180 rte_timer_subsystem_initialized = 1; 181 182 rte_mcfg_timer_unlock(); 183 184 return 0; 185 } 186 187 void 188 rte_timer_subsystem_finalize(void) 189 { 190 rte_mcfg_timer_lock(); 191 192 if (!rte_timer_subsystem_initialized) { 193 rte_mcfg_timer_unlock(); 194 return; 195 } 196 197 if (--(*rte_timer_mz_refcnt) == 0) 198 rte_memzone_free(rte_timer_data_mz); 199 200 rte_timer_subsystem_initialized = 0; 201 202 rte_mcfg_timer_unlock(); 203 } 204 205 /* Initialize the timer handle tim for use */ 206 void 207 rte_timer_init(struct rte_timer *tim) 208 { 209 union rte_timer_status status; 210 211 status.state = RTE_TIMER_STOP; 212 status.owner = RTE_TIMER_NO_OWNER; 213 rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_relaxed); 214 } 215 216 /* 217 * if timer is pending or stopped (or running on the same core than 218 * us), mark timer as configuring, and on success return the previous 219 * status of the timer 220 */ 221 static int 222 timer_set_config_state(struct rte_timer *tim, 223 union rte_timer_status *ret_prev_status, 224 struct priv_timer *priv_timer) 225 { 226 union rte_timer_status prev_status, status; 227 int success = 0; 228 unsigned lcore_id; 229 230 lcore_id = rte_lcore_id(); 231 232 /* wait that the timer is in correct status before update, 233 * and mark it as being configured */ 234 prev_status.u32 = rte_atomic_load_explicit(&tim->status.u32, rte_memory_order_relaxed); 235 236 while (success == 0) { 237 /* timer is running on another core 238 * or ready to run on local core, exit 239 */ 240 if (prev_status.state == RTE_TIMER_RUNNING && 241 (prev_status.owner != (uint16_t)lcore_id || 242 tim != priv_timer[lcore_id].running_tim)) 243 return -1; 244 245 /* timer is being configured on another core */ 246 if (prev_status.state == RTE_TIMER_CONFIG) 247 return -1; 248 249 /* here, we know that timer is stopped or pending, 250 * mark it atomically as being configured */ 251 status.state = RTE_TIMER_CONFIG; 252 status.owner = (int16_t)lcore_id; 253 /* CONFIG states are acting as locked states. If the 254 * timer is in CONFIG state, the state cannot be changed 255 * by other threads. So, we should use ACQUIRE here. 256 */ 257 success = rte_atomic_compare_exchange_strong_explicit(&tim->status.u32, 258 (uint32_t *)(uintptr_t)&prev_status.u32, 259 status.u32, 260 rte_memory_order_acquire, 261 rte_memory_order_relaxed); 262 } 263 264 ret_prev_status->u32 = prev_status.u32; 265 return 0; 266 } 267 268 /* 269 * if timer is pending, mark timer as running 270 */ 271 static int 272 timer_set_running_state(struct rte_timer *tim) 273 { 274 union rte_timer_status prev_status, status; 275 unsigned lcore_id = rte_lcore_id(); 276 int success = 0; 277 278 /* wait that the timer is in correct status before update, 279 * and mark it as running */ 280 prev_status.u32 = rte_atomic_load_explicit(&tim->status.u32, rte_memory_order_relaxed); 281 282 while (success == 0) { 283 /* timer is not pending anymore */ 284 if (prev_status.state != RTE_TIMER_PENDING) 285 return -1; 286 287 /* we know that the timer will be pending at this point 288 * mark it atomically as being running 289 */ 290 status.state = RTE_TIMER_RUNNING; 291 status.owner = (int16_t)lcore_id; 292 /* RUNNING states are acting as locked states. If the 293 * timer is in RUNNING state, the state cannot be changed 294 * by other threads. So, we should use ACQUIRE here. 295 */ 296 success = rte_atomic_compare_exchange_strong_explicit(&tim->status.u32, 297 (uint32_t *)(uintptr_t)&prev_status.u32, 298 status.u32, 299 rte_memory_order_acquire, 300 rte_memory_order_relaxed); 301 } 302 303 return 0; 304 } 305 306 /* 307 * Return a skiplist level for a new entry. 308 * This probabilistically gives a level with p=1/4 that an entry at level n 309 * will also appear at level n+1. 310 */ 311 static uint32_t 312 timer_get_skiplist_level(unsigned curr_depth) 313 { 314 #ifdef RTE_LIBRTE_TIMER_DEBUG 315 static uint32_t i, count = 0; 316 static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0}; 317 #endif 318 319 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1, 320 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest 321 * bit position of a (pseudo)random number. 322 */ 323 uint32_t rand = rte_rand() & (UINT32_MAX - 1); 324 uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2; 325 326 /* limit the levels used to one above our current level, so we don't, 327 * for instance, have a level 0 and a level 7 without anything between 328 */ 329 if (level > curr_depth) 330 level = curr_depth; 331 if (level >= MAX_SKIPLIST_DEPTH) 332 level = MAX_SKIPLIST_DEPTH-1; 333 #ifdef RTE_LIBRTE_TIMER_DEBUG 334 count ++; 335 levels[level]++; 336 if (count % 10000 == 0) 337 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++) 338 printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]); 339 #endif 340 return level; 341 } 342 343 /* 344 * For a given time value, get the entries at each level which 345 * are <= that time value. 346 */ 347 static void 348 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore, 349 struct rte_timer **prev, struct priv_timer *priv_timer) 350 { 351 unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth; 352 prev[lvl] = &priv_timer[tim_lcore].pending_head; 353 while(lvl != 0) { 354 lvl--; 355 prev[lvl] = prev[lvl+1]; 356 while (prev[lvl]->sl_next[lvl] && 357 prev[lvl]->sl_next[lvl]->expire <= time_val) 358 prev[lvl] = prev[lvl]->sl_next[lvl]; 359 } 360 } 361 362 /* 363 * Given a timer node in the skiplist, find the previous entries for it at 364 * all skiplist levels. 365 */ 366 static void 367 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore, 368 struct rte_timer **prev, 369 struct priv_timer *priv_timer) 370 { 371 int i; 372 373 /* to get a specific entry in the list, look for just lower than the time 374 * values, and then increment on each level individually if necessary 375 */ 376 timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer); 377 for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) { 378 while (prev[i]->sl_next[i] != NULL && 379 prev[i]->sl_next[i] != tim && 380 prev[i]->sl_next[i]->expire <= tim->expire) 381 prev[i] = prev[i]->sl_next[i]; 382 } 383 } 384 385 /* call with lock held as necessary 386 * add in list 387 * timer must be in config state 388 * timer must not be in a list 389 */ 390 static void 391 timer_add(struct rte_timer *tim, unsigned int tim_lcore, 392 struct priv_timer *priv_timer) 393 { 394 unsigned lvl; 395 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 396 397 /* find where exactly this element goes in the list of elements 398 * for each depth. */ 399 timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer); 400 401 /* now assign it a new level and add at that level */ 402 const unsigned tim_level = timer_get_skiplist_level( 403 priv_timer[tim_lcore].curr_skiplist_depth); 404 if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth) 405 priv_timer[tim_lcore].curr_skiplist_depth++; 406 407 lvl = tim_level; 408 while (lvl > 0) { 409 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl]; 410 prev[lvl]->sl_next[lvl] = tim; 411 lvl--; 412 } 413 tim->sl_next[0] = prev[0]->sl_next[0]; 414 prev[0]->sl_next[0] = tim; 415 416 /* save the lowest list entry into the expire field of the dummy hdr 417 * NOTE: this is not atomic on 32-bit*/ 418 priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\ 419 pending_head.sl_next[0]->expire; 420 } 421 422 /* 423 * del from list, lock if needed 424 * timer must be in config state 425 * timer must be in a list 426 */ 427 static void 428 timer_del(struct rte_timer *tim, union rte_timer_status prev_status, 429 int local_is_locked, struct priv_timer *priv_timer) 430 { 431 unsigned lcore_id = rte_lcore_id(); 432 unsigned prev_owner = prev_status.owner; 433 int i; 434 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 435 436 /* if timer needs is pending another core, we need to lock the 437 * list; if it is on local core, we need to lock if we are not 438 * called from rte_timer_manage() */ 439 if (prev_owner != lcore_id || !local_is_locked) 440 rte_spinlock_lock(&priv_timer[prev_owner].list_lock); 441 442 /* save the lowest list entry into the expire field of the dummy hdr. 443 * NOTE: this is not atomic on 32-bit */ 444 if (tim == priv_timer[prev_owner].pending_head.sl_next[0]) 445 priv_timer[prev_owner].pending_head.expire = 446 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire); 447 448 /* adjust pointers from previous entries to point past this */ 449 timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer); 450 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) { 451 if (prev[i]->sl_next[i] == tim) 452 prev[i]->sl_next[i] = tim->sl_next[i]; 453 } 454 455 /* in case we deleted last entry at a level, adjust down max level */ 456 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) 457 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL) 458 priv_timer[prev_owner].curr_skiplist_depth --; 459 else 460 break; 461 462 if (prev_owner != lcore_id || !local_is_locked) 463 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock); 464 } 465 466 /* Reset and start the timer associated with the timer handle (private func) */ 467 static int 468 __rte_timer_reset(struct rte_timer *tim, uint64_t expire, 469 uint64_t period, unsigned tim_lcore, 470 rte_timer_cb_t fct, void *arg, 471 int local_is_locked, 472 struct rte_timer_data *timer_data) 473 { 474 union rte_timer_status prev_status, status; 475 int ret; 476 unsigned lcore_id = rte_lcore_id(); 477 struct priv_timer *priv_timer = timer_data->priv_timer; 478 479 /* round robin for tim_lcore */ 480 if (tim_lcore == (unsigned)LCORE_ID_ANY) { 481 if (lcore_id < RTE_MAX_LCORE) { 482 /* EAL thread with valid lcore_id */ 483 tim_lcore = rte_get_next_lcore( 484 priv_timer[lcore_id].prev_lcore, 485 0, 1); 486 priv_timer[lcore_id].prev_lcore = tim_lcore; 487 } else 488 /* non-EAL thread do not run rte_timer_manage(), 489 * so schedule the timer on the first enabled lcore. */ 490 tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1); 491 } 492 493 /* wait that the timer is in correct status before update, 494 * and mark it as being configured */ 495 ret = timer_set_config_state(tim, &prev_status, priv_timer); 496 if (ret < 0) 497 return -1; 498 499 __TIMER_STAT_ADD(priv_timer, reset, 1); 500 if (prev_status.state == RTE_TIMER_RUNNING && 501 lcore_id < RTE_MAX_LCORE) { 502 priv_timer[lcore_id].updated = 1; 503 } 504 505 /* remove it from list */ 506 if (prev_status.state == RTE_TIMER_PENDING) { 507 timer_del(tim, prev_status, local_is_locked, priv_timer); 508 __TIMER_STAT_ADD(priv_timer, pending, -1); 509 } 510 511 tim->period = period; 512 tim->expire = expire; 513 tim->f = fct; 514 tim->arg = arg; 515 516 /* if timer needs to be scheduled on another core, we need to 517 * lock the destination list; if it is on local core, we need to lock if 518 * we are not called from rte_timer_manage() 519 */ 520 if (tim_lcore != lcore_id || !local_is_locked) 521 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock); 522 523 __TIMER_STAT_ADD(priv_timer, pending, 1); 524 timer_add(tim, tim_lcore, priv_timer); 525 526 /* update state: as we are in CONFIG state, only us can modify 527 * the state so we don't need to use cmpset() here */ 528 status.state = RTE_TIMER_PENDING; 529 status.owner = (int16_t)tim_lcore; 530 /* The "RELEASE" ordering guarantees the memory operations above 531 * the status update are observed before the update by all threads 532 */ 533 rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_release); 534 535 if (tim_lcore != lcore_id || !local_is_locked) 536 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); 537 538 return 0; 539 } 540 541 /* Reset and start the timer associated with the timer handle tim */ 542 int 543 rte_timer_reset(struct rte_timer *tim, uint64_t ticks, 544 enum rte_timer_type type, unsigned int tim_lcore, 545 rte_timer_cb_t fct, void *arg) 546 { 547 return rte_timer_alt_reset(default_data_id, tim, ticks, type, 548 tim_lcore, fct, arg); 549 } 550 551 int 552 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim, 553 uint64_t ticks, enum rte_timer_type type, 554 unsigned int tim_lcore, rte_timer_cb_t fct, void *arg) 555 { 556 uint64_t cur_time = rte_get_timer_cycles(); 557 uint64_t period; 558 struct rte_timer_data *timer_data; 559 560 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 561 562 if (type == PERIODICAL) 563 period = ticks; 564 else 565 period = 0; 566 567 return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore, 568 fct, arg, 0, timer_data); 569 } 570 571 /* loop until rte_timer_reset() succeed */ 572 void 573 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks, 574 enum rte_timer_type type, unsigned tim_lcore, 575 rte_timer_cb_t fct, void *arg) 576 { 577 while (rte_timer_reset(tim, ticks, type, tim_lcore, 578 fct, arg) != 0) 579 rte_pause(); 580 } 581 582 static int 583 __rte_timer_stop(struct rte_timer *tim, 584 struct rte_timer_data *timer_data) 585 { 586 union rte_timer_status prev_status, status; 587 unsigned lcore_id = rte_lcore_id(); 588 int ret; 589 struct priv_timer *priv_timer = timer_data->priv_timer; 590 591 /* wait that the timer is in correct status before update, 592 * and mark it as being configured */ 593 ret = timer_set_config_state(tim, &prev_status, priv_timer); 594 if (ret < 0) 595 return -1; 596 597 __TIMER_STAT_ADD(priv_timer, stop, 1); 598 if (prev_status.state == RTE_TIMER_RUNNING && 599 lcore_id < RTE_MAX_LCORE) { 600 priv_timer[lcore_id].updated = 1; 601 } 602 603 /* remove it from list */ 604 if (prev_status.state == RTE_TIMER_PENDING) { 605 timer_del(tim, prev_status, 0, priv_timer); 606 __TIMER_STAT_ADD(priv_timer, pending, -1); 607 } 608 609 /* mark timer as stopped */ 610 status.state = RTE_TIMER_STOP; 611 status.owner = RTE_TIMER_NO_OWNER; 612 /* The "RELEASE" ordering guarantees the memory operations above 613 * the status update are observed before the update by all threads 614 */ 615 rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_release); 616 617 return 0; 618 } 619 620 /* Stop the timer associated with the timer handle tim */ 621 int 622 rte_timer_stop(struct rte_timer *tim) 623 { 624 return rte_timer_alt_stop(default_data_id, tim); 625 } 626 627 int 628 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim) 629 { 630 struct rte_timer_data *timer_data; 631 632 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 633 634 return __rte_timer_stop(tim, timer_data); 635 } 636 637 /* loop until rte_timer_stop() succeed */ 638 void 639 rte_timer_stop_sync(struct rte_timer *tim) 640 { 641 while (rte_timer_stop(tim) != 0) 642 rte_pause(); 643 } 644 645 /* Test the PENDING status of the timer handle tim */ 646 int 647 rte_timer_pending(struct rte_timer *tim) 648 { 649 return rte_atomic_load_explicit(&tim->status.state, 650 rte_memory_order_relaxed) == RTE_TIMER_PENDING; 651 } 652 653 /* must be called periodically, run all timer that expired */ 654 static void 655 __rte_timer_manage(struct rte_timer_data *timer_data) 656 { 657 union rte_timer_status status; 658 struct rte_timer *tim, *next_tim; 659 struct rte_timer *run_first_tim, **pprev; 660 unsigned lcore_id = rte_lcore_id(); 661 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; 662 uint64_t cur_time; 663 int i, ret; 664 struct priv_timer *priv_timer = timer_data->priv_timer; 665 666 /* timer manager only runs on EAL thread with valid lcore_id */ 667 assert(lcore_id < RTE_MAX_LCORE); 668 669 __TIMER_STAT_ADD(priv_timer, manage, 1); 670 /* optimize for the case where per-cpu list is empty */ 671 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) 672 return; 673 cur_time = rte_get_timer_cycles(); 674 675 #ifdef RTE_ARCH_64 676 /* on 64-bit the value cached in the pending_head.expired will be 677 * updated atomically, so we can consult that for a quick check here 678 * outside the lock */ 679 if (likely(priv_timer[lcore_id].pending_head.expire > cur_time)) 680 return; 681 #endif 682 683 /* browse ordered list, add expired timers in 'expired' list */ 684 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 685 686 /* if nothing to do just unlock and return */ 687 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL || 688 priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) { 689 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 690 return; 691 } 692 693 /* save start of list of expired timers */ 694 tim = priv_timer[lcore_id].pending_head.sl_next[0]; 695 696 /* break the existing list at current time point */ 697 timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer); 698 for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) { 699 if (prev[i] == &priv_timer[lcore_id].pending_head) 700 continue; 701 priv_timer[lcore_id].pending_head.sl_next[i] = 702 prev[i]->sl_next[i]; 703 if (prev[i]->sl_next[i] == NULL) 704 priv_timer[lcore_id].curr_skiplist_depth--; 705 prev[i] ->sl_next[i] = NULL; 706 } 707 708 /* transition run-list from PENDING to RUNNING */ 709 run_first_tim = tim; 710 pprev = &run_first_tim; 711 712 for ( ; tim != NULL; tim = next_tim) { 713 next_tim = tim->sl_next[0]; 714 715 ret = timer_set_running_state(tim); 716 if (likely(ret == 0)) { 717 pprev = &tim->sl_next[0]; 718 } else { 719 /* another core is trying to re-config this one, 720 * remove it from local expired list 721 */ 722 *pprev = next_tim; 723 } 724 } 725 726 /* update the next to expire timer value */ 727 priv_timer[lcore_id].pending_head.expire = 728 (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 : 729 priv_timer[lcore_id].pending_head.sl_next[0]->expire; 730 731 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 732 733 /* now scan expired list and call callbacks */ 734 for (tim = run_first_tim; tim != NULL; tim = next_tim) { 735 next_tim = tim->sl_next[0]; 736 priv_timer[lcore_id].updated = 0; 737 priv_timer[lcore_id].running_tim = tim; 738 739 /* execute callback function with list unlocked */ 740 tim->f(tim, tim->arg); 741 742 __TIMER_STAT_ADD(priv_timer, pending, -1); 743 /* the timer was stopped or reloaded by the callback 744 * function, we have nothing to do here */ 745 if (priv_timer[lcore_id].updated == 1) 746 continue; 747 748 if (tim->period == 0) { 749 /* remove from done list and mark timer as stopped */ 750 status.state = RTE_TIMER_STOP; 751 status.owner = RTE_TIMER_NO_OWNER; 752 /* The "RELEASE" ordering guarantees the memory 753 * operations above the status update are observed 754 * before the update by all threads 755 */ 756 rte_atomic_store_explicit(&tim->status.u32, status.u32, 757 rte_memory_order_release); 758 } 759 else { 760 /* keep it in list and mark timer as pending */ 761 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 762 status.state = RTE_TIMER_PENDING; 763 __TIMER_STAT_ADD(priv_timer, pending, 1); 764 status.owner = (int16_t)lcore_id; 765 /* The "RELEASE" ordering guarantees the memory 766 * operations above the status update are observed 767 * before the update by all threads 768 */ 769 rte_atomic_store_explicit(&tim->status.u32, status.u32, 770 rte_memory_order_release); 771 __rte_timer_reset(tim, tim->expire + tim->period, 772 tim->period, lcore_id, tim->f, tim->arg, 1, 773 timer_data); 774 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 775 } 776 } 777 priv_timer[lcore_id].running_tim = NULL; 778 } 779 780 int 781 rte_timer_manage(void) 782 { 783 struct rte_timer_data *timer_data; 784 785 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL); 786 787 __rte_timer_manage(timer_data); 788 789 return 0; 790 } 791 792 int 793 rte_timer_alt_manage(uint32_t timer_data_id, 794 unsigned int *poll_lcores, 795 int nb_poll_lcores, 796 rte_timer_alt_manage_cb_t f) 797 { 798 unsigned int default_poll_lcores[] = {rte_lcore_id()}; 799 union rte_timer_status status; 800 struct rte_timer *tim, *next_tim, **pprev; 801 struct rte_timer *run_first_tims[RTE_MAX_LCORE]; 802 unsigned int this_lcore = rte_lcore_id(); 803 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; 804 uint64_t cur_time; 805 int i, j, ret; 806 int nb_runlists = 0; 807 struct rte_timer_data *data; 808 struct priv_timer *privp; 809 uint32_t poll_lcore; 810 811 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL); 812 813 /* timer manager only runs on EAL thread with valid lcore_id */ 814 assert(this_lcore < RTE_MAX_LCORE); 815 816 __TIMER_STAT_ADD(data->priv_timer, manage, 1); 817 818 if (poll_lcores == NULL) { 819 poll_lcores = default_poll_lcores; 820 nb_poll_lcores = RTE_DIM(default_poll_lcores); 821 } 822 823 for (i = 0; i < nb_poll_lcores; i++) { 824 poll_lcore = poll_lcores[i]; 825 privp = &data->priv_timer[poll_lcore]; 826 827 /* optimize for the case where per-cpu list is empty */ 828 if (privp->pending_head.sl_next[0] == NULL) 829 continue; 830 cur_time = rte_get_timer_cycles(); 831 832 #ifdef RTE_ARCH_64 833 /* on 64-bit the value cached in the pending_head.expired will 834 * be updated atomically, so we can consult that for a quick 835 * check here outside the lock 836 */ 837 if (likely(privp->pending_head.expire > cur_time)) 838 continue; 839 #endif 840 841 /* browse ordered list, add expired timers in 'expired' list */ 842 rte_spinlock_lock(&privp->list_lock); 843 844 /* if nothing to do just unlock and return */ 845 if (privp->pending_head.sl_next[0] == NULL || 846 privp->pending_head.sl_next[0]->expire > cur_time) { 847 rte_spinlock_unlock(&privp->list_lock); 848 continue; 849 } 850 851 /* save start of list of expired timers */ 852 tim = privp->pending_head.sl_next[0]; 853 854 /* break the existing list at current time point */ 855 timer_get_prev_entries(cur_time, poll_lcore, prev, 856 data->priv_timer); 857 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) { 858 if (prev[j] == &privp->pending_head) 859 continue; 860 privp->pending_head.sl_next[j] = 861 prev[j]->sl_next[j]; 862 if (prev[j]->sl_next[j] == NULL) 863 privp->curr_skiplist_depth--; 864 865 prev[j]->sl_next[j] = NULL; 866 } 867 868 /* transition run-list from PENDING to RUNNING */ 869 run_first_tims[nb_runlists] = tim; 870 pprev = &run_first_tims[nb_runlists]; 871 nb_runlists++; 872 873 for ( ; tim != NULL; tim = next_tim) { 874 next_tim = tim->sl_next[0]; 875 876 ret = timer_set_running_state(tim); 877 if (likely(ret == 0)) { 878 pprev = &tim->sl_next[0]; 879 } else { 880 /* another core is trying to re-config this one, 881 * remove it from local expired list 882 */ 883 *pprev = next_tim; 884 } 885 } 886 887 /* update the next to expire timer value */ 888 privp->pending_head.expire = 889 (privp->pending_head.sl_next[0] == NULL) ? 0 : 890 privp->pending_head.sl_next[0]->expire; 891 892 rte_spinlock_unlock(&privp->list_lock); 893 } 894 895 /* Now process the run lists */ 896 while (1) { 897 bool done = true; 898 uint64_t min_expire = UINT64_MAX; 899 int min_idx = 0; 900 901 /* Find the next oldest timer to process */ 902 for (i = 0; i < nb_runlists; i++) { 903 tim = run_first_tims[i]; 904 905 if (tim != NULL && tim->expire < min_expire) { 906 min_expire = tim->expire; 907 min_idx = i; 908 done = false; 909 } 910 } 911 912 if (done) 913 break; 914 915 tim = run_first_tims[min_idx]; 916 917 /* Move down the runlist from which we picked a timer to 918 * execute 919 */ 920 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0]; 921 922 data->priv_timer[this_lcore].updated = 0; 923 data->priv_timer[this_lcore].running_tim = tim; 924 925 /* Call the provided callback function */ 926 f(tim); 927 928 __TIMER_STAT_ADD(data->priv_timer, pending, -1); 929 930 /* the timer was stopped or reloaded by the callback 931 * function, we have nothing to do here 932 */ 933 if (data->priv_timer[this_lcore].updated == 1) 934 continue; 935 936 if (tim->period == 0) { 937 /* remove from done list and mark timer as stopped */ 938 status.state = RTE_TIMER_STOP; 939 status.owner = RTE_TIMER_NO_OWNER; 940 /* The "RELEASE" ordering guarantees the memory 941 * operations above the status update are observed 942 * before the update by all threads 943 */ 944 rte_atomic_store_explicit(&tim->status.u32, status.u32, 945 rte_memory_order_release); 946 } else { 947 /* keep it in list and mark timer as pending */ 948 rte_spinlock_lock( 949 &data->priv_timer[this_lcore].list_lock); 950 status.state = RTE_TIMER_PENDING; 951 __TIMER_STAT_ADD(data->priv_timer, pending, 1); 952 status.owner = (int16_t)this_lcore; 953 /* The "RELEASE" ordering guarantees the memory 954 * operations above the status update are observed 955 * before the update by all threads 956 */ 957 rte_atomic_store_explicit(&tim->status.u32, status.u32, 958 rte_memory_order_release); 959 __rte_timer_reset(tim, tim->expire + tim->period, 960 tim->period, this_lcore, tim->f, tim->arg, 1, 961 data); 962 rte_spinlock_unlock( 963 &data->priv_timer[this_lcore].list_lock); 964 } 965 966 data->priv_timer[this_lcore].running_tim = NULL; 967 } 968 969 return 0; 970 } 971 972 /* Walk pending lists, stopping timers and calling user-specified function */ 973 int 974 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores, 975 int nb_walk_lcores, 976 rte_timer_stop_all_cb_t f, void *f_arg) 977 { 978 int i; 979 struct priv_timer *priv_timer; 980 uint32_t walk_lcore; 981 struct rte_timer *tim, *next_tim; 982 struct rte_timer_data *timer_data; 983 984 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 985 986 for (i = 0; i < nb_walk_lcores; i++) { 987 walk_lcore = walk_lcores[i]; 988 priv_timer = &timer_data->priv_timer[walk_lcore]; 989 990 for (tim = priv_timer->pending_head.sl_next[0]; 991 tim != NULL; 992 tim = next_tim) { 993 next_tim = tim->sl_next[0]; 994 995 __rte_timer_stop(tim, timer_data); 996 997 if (f) 998 f(tim, f_arg); 999 } 1000 } 1001 1002 return 0; 1003 } 1004 1005 int64_t 1006 rte_timer_next_ticks(void) 1007 { 1008 unsigned int lcore_id = rte_lcore_id(); 1009 struct rte_timer_data *timer_data; 1010 struct priv_timer *priv_timer; 1011 const struct rte_timer *tm; 1012 uint64_t cur_time; 1013 int64_t left = -ENOENT; 1014 1015 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL); 1016 1017 priv_timer = timer_data->priv_timer; 1018 cur_time = rte_get_timer_cycles(); 1019 1020 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 1021 tm = priv_timer[lcore_id].pending_head.sl_next[0]; 1022 if (tm) { 1023 left = tm->expire - cur_time; 1024 if (left < 0) 1025 left = 0; 1026 } 1027 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 1028 1029 return left; 1030 } 1031 1032 /* dump statistics about timers */ 1033 static void 1034 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f) 1035 { 1036 #ifdef RTE_LIBRTE_TIMER_DEBUG 1037 struct rte_timer_debug_stats sum; 1038 unsigned lcore_id; 1039 struct priv_timer *priv_timer = timer_data->priv_timer; 1040 1041 memset(&sum, 0, sizeof(sum)); 1042 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1043 sum.reset += priv_timer[lcore_id].stats.reset; 1044 sum.stop += priv_timer[lcore_id].stats.stop; 1045 sum.manage += priv_timer[lcore_id].stats.manage; 1046 sum.pending += priv_timer[lcore_id].stats.pending; 1047 } 1048 fprintf(f, "Timer statistics:\n"); 1049 fprintf(f, " reset = %"PRIu64"\n", sum.reset); 1050 fprintf(f, " stop = %"PRIu64"\n", sum.stop); 1051 fprintf(f, " manage = %"PRIu64"\n", sum.manage); 1052 fprintf(f, " pending = %"PRIu64"\n", sum.pending); 1053 #else 1054 fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n"); 1055 #endif 1056 } 1057 1058 int 1059 rte_timer_dump_stats(FILE *f) 1060 { 1061 return rte_timer_alt_dump_stats(default_data_id, f); 1062 } 1063 1064 int 1065 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f) 1066 { 1067 struct rte_timer_data *timer_data; 1068 1069 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 1070 1071 __rte_timer_dump_stats(timer_data, f); 1072 1073 return 0; 1074 } 1075