1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <string.h> 6 #include <stdio.h> 7 #include <stdint.h> 8 #include <stdbool.h> 9 #include <inttypes.h> 10 #include <assert.h> 11 #include <sys/queue.h> 12 13 #include <rte_common.h> 14 #include <rte_cycles.h> 15 #include <rte_eal_memconfig.h> 16 #include <rte_per_lcore.h> 17 #include <rte_memory.h> 18 #include <rte_launch.h> 19 #include <rte_eal.h> 20 #include <rte_lcore.h> 21 #include <rte_branch_prediction.h> 22 #include <rte_spinlock.h> 23 #include <rte_random.h> 24 #include <rte_pause.h> 25 #include <rte_memzone.h> 26 #include <rte_malloc.h> 27 #include <rte_errno.h> 28 29 #include "rte_timer.h" 30 31 /** 32 * Per-lcore info for timers. 33 */ 34 struct priv_timer { 35 struct rte_timer pending_head; /**< dummy timer instance to head up list */ 36 rte_spinlock_t list_lock; /**< lock to protect list access */ 37 38 /** per-core variable that true if a timer was updated on this 39 * core since last reset of the variable */ 40 int updated; 41 42 /** track the current depth of the skiplist */ 43 unsigned curr_skiplist_depth; 44 45 unsigned prev_lcore; /**< used for lcore round robin */ 46 47 /** running timer on this lcore now */ 48 struct rte_timer *running_tim; 49 50 #ifdef RTE_LIBRTE_TIMER_DEBUG 51 /** per-lcore statistics */ 52 struct rte_timer_debug_stats stats; 53 #endif 54 } __rte_cache_aligned; 55 56 #define FL_ALLOCATED (1 << 0) 57 struct rte_timer_data { 58 struct priv_timer priv_timer[RTE_MAX_LCORE]; 59 uint8_t internal_flags; 60 }; 61 62 #define RTE_MAX_DATA_ELS 64 63 static const struct rte_memzone *rte_timer_data_mz; 64 static int *volatile rte_timer_mz_refcnt; 65 static struct rte_timer_data *rte_timer_data_arr; 66 static const uint32_t default_data_id; 67 static uint32_t rte_timer_subsystem_initialized; 68 69 /* when debug is enabled, store some statistics */ 70 #ifdef RTE_LIBRTE_TIMER_DEBUG 71 #define __TIMER_STAT_ADD(priv_timer, name, n) do { \ 72 unsigned __lcore_id = rte_lcore_id(); \ 73 if (__lcore_id < RTE_MAX_LCORE) \ 74 priv_timer[__lcore_id].stats.name += (n); \ 75 } while(0) 76 #else 77 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0) 78 #endif 79 80 static inline int 81 timer_data_valid(uint32_t id) 82 { 83 return rte_timer_data_arr && 84 (rte_timer_data_arr[id].internal_flags & FL_ALLOCATED); 85 } 86 87 /* validate ID and retrieve timer data pointer, or return error value */ 88 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do { \ 89 if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id)) \ 90 return retval; \ 91 timer_data = &rte_timer_data_arr[id]; \ 92 } while (0) 93 94 int 95 rte_timer_data_alloc(uint32_t *id_ptr) 96 { 97 int i; 98 struct rte_timer_data *data; 99 100 if (!rte_timer_subsystem_initialized) 101 return -ENOMEM; 102 103 for (i = 0; i < RTE_MAX_DATA_ELS; i++) { 104 data = &rte_timer_data_arr[i]; 105 if (!(data->internal_flags & FL_ALLOCATED)) { 106 data->internal_flags |= FL_ALLOCATED; 107 108 if (id_ptr) 109 *id_ptr = i; 110 111 return 0; 112 } 113 } 114 115 return -ENOSPC; 116 } 117 118 int 119 rte_timer_data_dealloc(uint32_t id) 120 { 121 struct rte_timer_data *timer_data; 122 TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL); 123 124 timer_data->internal_flags &= ~(FL_ALLOCATED); 125 126 return 0; 127 } 128 129 /* Init the timer library. Allocate an array of timer data structs in shared 130 * memory, and allocate the zeroth entry for use with original timer 131 * APIs. Since the intersection of the sets of lcore ids in primary and 132 * secondary processes should be empty, the zeroth entry can be shared by 133 * multiple processes. 134 */ 135 int 136 rte_timer_subsystem_init(void) 137 { 138 const struct rte_memzone *mz; 139 struct rte_timer_data *data; 140 int i, lcore_id; 141 static const char *mz_name = "rte_timer_mz"; 142 const size_t data_arr_size = 143 RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr); 144 const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt); 145 bool do_full_init = true; 146 147 rte_mcfg_timer_lock(); 148 149 if (rte_timer_subsystem_initialized) { 150 rte_mcfg_timer_unlock(); 151 return -EALREADY; 152 } 153 154 mz = rte_memzone_lookup(mz_name); 155 if (mz == NULL) { 156 mz = rte_memzone_reserve_aligned(mz_name, mem_size, 157 SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE); 158 if (mz == NULL) { 159 rte_mcfg_timer_unlock(); 160 return -ENOMEM; 161 } 162 do_full_init = true; 163 } else 164 do_full_init = false; 165 166 rte_timer_data_mz = mz; 167 rte_timer_data_arr = mz->addr; 168 rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size); 169 170 if (do_full_init) { 171 for (i = 0; i < RTE_MAX_DATA_ELS; i++) { 172 data = &rte_timer_data_arr[i]; 173 174 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; 175 lcore_id++) { 176 rte_spinlock_init( 177 &data->priv_timer[lcore_id].list_lock); 178 data->priv_timer[lcore_id].prev_lcore = 179 lcore_id; 180 } 181 } 182 } 183 184 rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED; 185 (*rte_timer_mz_refcnt)++; 186 187 rte_timer_subsystem_initialized = 1; 188 189 rte_mcfg_timer_unlock(); 190 191 return 0; 192 } 193 194 void 195 rte_timer_subsystem_finalize(void) 196 { 197 rte_mcfg_timer_lock(); 198 199 if (!rte_timer_subsystem_initialized) { 200 rte_mcfg_timer_unlock(); 201 return; 202 } 203 204 if (--(*rte_timer_mz_refcnt) == 0) 205 rte_memzone_free(rte_timer_data_mz); 206 207 rte_timer_subsystem_initialized = 0; 208 209 rte_mcfg_timer_unlock(); 210 } 211 212 /* Initialize the timer handle tim for use */ 213 void 214 rte_timer_init(struct rte_timer *tim) 215 { 216 union rte_timer_status status; 217 218 status.state = RTE_TIMER_STOP; 219 status.owner = RTE_TIMER_NO_OWNER; 220 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELAXED); 221 } 222 223 /* 224 * if timer is pending or stopped (or running on the same core than 225 * us), mark timer as configuring, and on success return the previous 226 * status of the timer 227 */ 228 static int 229 timer_set_config_state(struct rte_timer *tim, 230 union rte_timer_status *ret_prev_status, 231 struct priv_timer *priv_timer) 232 { 233 union rte_timer_status prev_status, status; 234 int success = 0; 235 unsigned lcore_id; 236 237 lcore_id = rte_lcore_id(); 238 239 /* wait that the timer is in correct status before update, 240 * and mark it as being configured */ 241 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED); 242 243 while (success == 0) { 244 /* timer is running on another core 245 * or ready to run on local core, exit 246 */ 247 if (prev_status.state == RTE_TIMER_RUNNING && 248 (prev_status.owner != (uint16_t)lcore_id || 249 tim != priv_timer[lcore_id].running_tim)) 250 return -1; 251 252 /* timer is being configured on another core */ 253 if (prev_status.state == RTE_TIMER_CONFIG) 254 return -1; 255 256 /* here, we know that timer is stopped or pending, 257 * mark it atomically as being configured */ 258 status.state = RTE_TIMER_CONFIG; 259 status.owner = (int16_t)lcore_id; 260 /* CONFIG states are acting as locked states. If the 261 * timer is in CONFIG state, the state cannot be changed 262 * by other threads. So, we should use ACQUIRE here. 263 */ 264 success = __atomic_compare_exchange_n(&tim->status.u32, 265 &prev_status.u32, 266 status.u32, 0, 267 __ATOMIC_ACQUIRE, 268 __ATOMIC_RELAXED); 269 } 270 271 ret_prev_status->u32 = prev_status.u32; 272 return 0; 273 } 274 275 /* 276 * if timer is pending, mark timer as running 277 */ 278 static int 279 timer_set_running_state(struct rte_timer *tim) 280 { 281 union rte_timer_status prev_status, status; 282 unsigned lcore_id = rte_lcore_id(); 283 int success = 0; 284 285 /* wait that the timer is in correct status before update, 286 * and mark it as running */ 287 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED); 288 289 while (success == 0) { 290 /* timer is not pending anymore */ 291 if (prev_status.state != RTE_TIMER_PENDING) 292 return -1; 293 294 /* we know that the timer will be pending at this point 295 * mark it atomically as being running 296 */ 297 status.state = RTE_TIMER_RUNNING; 298 status.owner = (int16_t)lcore_id; 299 /* RUNNING states are acting as locked states. If the 300 * timer is in RUNNING state, the state cannot be changed 301 * by other threads. So, we should use ACQUIRE here. 302 */ 303 success = __atomic_compare_exchange_n(&tim->status.u32, 304 &prev_status.u32, 305 status.u32, 0, 306 __ATOMIC_ACQUIRE, 307 __ATOMIC_RELAXED); 308 } 309 310 return 0; 311 } 312 313 /* 314 * Return a skiplist level for a new entry. 315 * This probabilistically gives a level with p=1/4 that an entry at level n 316 * will also appear at level n+1. 317 */ 318 static uint32_t 319 timer_get_skiplist_level(unsigned curr_depth) 320 { 321 #ifdef RTE_LIBRTE_TIMER_DEBUG 322 static uint32_t i, count = 0; 323 static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0}; 324 #endif 325 326 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1, 327 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest 328 * bit position of a (pseudo)random number. 329 */ 330 uint32_t rand = rte_rand() & (UINT32_MAX - 1); 331 uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2; 332 333 /* limit the levels used to one above our current level, so we don't, 334 * for instance, have a level 0 and a level 7 without anything between 335 */ 336 if (level > curr_depth) 337 level = curr_depth; 338 if (level >= MAX_SKIPLIST_DEPTH) 339 level = MAX_SKIPLIST_DEPTH-1; 340 #ifdef RTE_LIBRTE_TIMER_DEBUG 341 count ++; 342 levels[level]++; 343 if (count % 10000 == 0) 344 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++) 345 printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]); 346 #endif 347 return level; 348 } 349 350 /* 351 * For a given time value, get the entries at each level which 352 * are <= that time value. 353 */ 354 static void 355 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore, 356 struct rte_timer **prev, struct priv_timer *priv_timer) 357 { 358 unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth; 359 prev[lvl] = &priv_timer[tim_lcore].pending_head; 360 while(lvl != 0) { 361 lvl--; 362 prev[lvl] = prev[lvl+1]; 363 while (prev[lvl]->sl_next[lvl] && 364 prev[lvl]->sl_next[lvl]->expire <= time_val) 365 prev[lvl] = prev[lvl]->sl_next[lvl]; 366 } 367 } 368 369 /* 370 * Given a timer node in the skiplist, find the previous entries for it at 371 * all skiplist levels. 372 */ 373 static void 374 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore, 375 struct rte_timer **prev, 376 struct priv_timer *priv_timer) 377 { 378 int i; 379 380 /* to get a specific entry in the list, look for just lower than the time 381 * values, and then increment on each level individually if necessary 382 */ 383 timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer); 384 for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) { 385 while (prev[i]->sl_next[i] != NULL && 386 prev[i]->sl_next[i] != tim && 387 prev[i]->sl_next[i]->expire <= tim->expire) 388 prev[i] = prev[i]->sl_next[i]; 389 } 390 } 391 392 /* call with lock held as necessary 393 * add in list 394 * timer must be in config state 395 * timer must not be in a list 396 */ 397 static void 398 timer_add(struct rte_timer *tim, unsigned int tim_lcore, 399 struct priv_timer *priv_timer) 400 { 401 unsigned lvl; 402 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 403 404 /* find where exactly this element goes in the list of elements 405 * for each depth. */ 406 timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer); 407 408 /* now assign it a new level and add at that level */ 409 const unsigned tim_level = timer_get_skiplist_level( 410 priv_timer[tim_lcore].curr_skiplist_depth); 411 if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth) 412 priv_timer[tim_lcore].curr_skiplist_depth++; 413 414 lvl = tim_level; 415 while (lvl > 0) { 416 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl]; 417 prev[lvl]->sl_next[lvl] = tim; 418 lvl--; 419 } 420 tim->sl_next[0] = prev[0]->sl_next[0]; 421 prev[0]->sl_next[0] = tim; 422 423 /* save the lowest list entry into the expire field of the dummy hdr 424 * NOTE: this is not atomic on 32-bit*/ 425 priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\ 426 pending_head.sl_next[0]->expire; 427 } 428 429 /* 430 * del from list, lock if needed 431 * timer must be in config state 432 * timer must be in a list 433 */ 434 static void 435 timer_del(struct rte_timer *tim, union rte_timer_status prev_status, 436 int local_is_locked, struct priv_timer *priv_timer) 437 { 438 unsigned lcore_id = rte_lcore_id(); 439 unsigned prev_owner = prev_status.owner; 440 int i; 441 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 442 443 /* if timer needs is pending another core, we need to lock the 444 * list; if it is on local core, we need to lock if we are not 445 * called from rte_timer_manage() */ 446 if (prev_owner != lcore_id || !local_is_locked) 447 rte_spinlock_lock(&priv_timer[prev_owner].list_lock); 448 449 /* save the lowest list entry into the expire field of the dummy hdr. 450 * NOTE: this is not atomic on 32-bit */ 451 if (tim == priv_timer[prev_owner].pending_head.sl_next[0]) 452 priv_timer[prev_owner].pending_head.expire = 453 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire); 454 455 /* adjust pointers from previous entries to point past this */ 456 timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer); 457 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) { 458 if (prev[i]->sl_next[i] == tim) 459 prev[i]->sl_next[i] = tim->sl_next[i]; 460 } 461 462 /* in case we deleted last entry at a level, adjust down max level */ 463 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) 464 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL) 465 priv_timer[prev_owner].curr_skiplist_depth --; 466 else 467 break; 468 469 if (prev_owner != lcore_id || !local_is_locked) 470 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock); 471 } 472 473 /* Reset and start the timer associated with the timer handle (private func) */ 474 static int 475 __rte_timer_reset(struct rte_timer *tim, uint64_t expire, 476 uint64_t period, unsigned tim_lcore, 477 rte_timer_cb_t fct, void *arg, 478 int local_is_locked, 479 struct rte_timer_data *timer_data) 480 { 481 union rte_timer_status prev_status, status; 482 int ret; 483 unsigned lcore_id = rte_lcore_id(); 484 struct priv_timer *priv_timer = timer_data->priv_timer; 485 486 /* round robin for tim_lcore */ 487 if (tim_lcore == (unsigned)LCORE_ID_ANY) { 488 if (lcore_id < RTE_MAX_LCORE) { 489 /* EAL thread with valid lcore_id */ 490 tim_lcore = rte_get_next_lcore( 491 priv_timer[lcore_id].prev_lcore, 492 0, 1); 493 priv_timer[lcore_id].prev_lcore = tim_lcore; 494 } else 495 /* non-EAL thread do not run rte_timer_manage(), 496 * so schedule the timer on the first enabled lcore. */ 497 tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1); 498 } 499 500 /* wait that the timer is in correct status before update, 501 * and mark it as being configured */ 502 ret = timer_set_config_state(tim, &prev_status, priv_timer); 503 if (ret < 0) 504 return -1; 505 506 __TIMER_STAT_ADD(priv_timer, reset, 1); 507 if (prev_status.state == RTE_TIMER_RUNNING && 508 lcore_id < RTE_MAX_LCORE) { 509 priv_timer[lcore_id].updated = 1; 510 } 511 512 /* remove it from list */ 513 if (prev_status.state == RTE_TIMER_PENDING) { 514 timer_del(tim, prev_status, local_is_locked, priv_timer); 515 __TIMER_STAT_ADD(priv_timer, pending, -1); 516 } 517 518 tim->period = period; 519 tim->expire = expire; 520 tim->f = fct; 521 tim->arg = arg; 522 523 /* if timer needs to be scheduled on another core, we need to 524 * lock the destination list; if it is on local core, we need to lock if 525 * we are not called from rte_timer_manage() 526 */ 527 if (tim_lcore != lcore_id || !local_is_locked) 528 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock); 529 530 __TIMER_STAT_ADD(priv_timer, pending, 1); 531 timer_add(tim, tim_lcore, priv_timer); 532 533 /* update state: as we are in CONFIG state, only us can modify 534 * the state so we don't need to use cmpset() here */ 535 status.state = RTE_TIMER_PENDING; 536 status.owner = (int16_t)tim_lcore; 537 /* The "RELEASE" ordering guarantees the memory operations above 538 * the status update are observed before the update by all threads 539 */ 540 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); 541 542 if (tim_lcore != lcore_id || !local_is_locked) 543 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); 544 545 return 0; 546 } 547 548 /* Reset and start the timer associated with the timer handle tim */ 549 int 550 rte_timer_reset(struct rte_timer *tim, uint64_t ticks, 551 enum rte_timer_type type, unsigned int tim_lcore, 552 rte_timer_cb_t fct, void *arg) 553 { 554 return rte_timer_alt_reset(default_data_id, tim, ticks, type, 555 tim_lcore, fct, arg); 556 } 557 558 int 559 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim, 560 uint64_t ticks, enum rte_timer_type type, 561 unsigned int tim_lcore, rte_timer_cb_t fct, void *arg) 562 { 563 uint64_t cur_time = rte_get_timer_cycles(); 564 uint64_t period; 565 struct rte_timer_data *timer_data; 566 567 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 568 569 if (type == PERIODICAL) 570 period = ticks; 571 else 572 period = 0; 573 574 return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore, 575 fct, arg, 0, timer_data); 576 } 577 578 /* loop until rte_timer_reset() succeed */ 579 void 580 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks, 581 enum rte_timer_type type, unsigned tim_lcore, 582 rte_timer_cb_t fct, void *arg) 583 { 584 while (rte_timer_reset(tim, ticks, type, tim_lcore, 585 fct, arg) != 0) 586 rte_pause(); 587 } 588 589 static int 590 __rte_timer_stop(struct rte_timer *tim, int local_is_locked, 591 struct rte_timer_data *timer_data) 592 { 593 union rte_timer_status prev_status, status; 594 unsigned lcore_id = rte_lcore_id(); 595 int ret; 596 struct priv_timer *priv_timer = timer_data->priv_timer; 597 598 /* wait that the timer is in correct status before update, 599 * and mark it as being configured */ 600 ret = timer_set_config_state(tim, &prev_status, priv_timer); 601 if (ret < 0) 602 return -1; 603 604 __TIMER_STAT_ADD(priv_timer, stop, 1); 605 if (prev_status.state == RTE_TIMER_RUNNING && 606 lcore_id < RTE_MAX_LCORE) { 607 priv_timer[lcore_id].updated = 1; 608 } 609 610 /* remove it from list */ 611 if (prev_status.state == RTE_TIMER_PENDING) { 612 timer_del(tim, prev_status, local_is_locked, priv_timer); 613 __TIMER_STAT_ADD(priv_timer, pending, -1); 614 } 615 616 /* mark timer as stopped */ 617 status.state = RTE_TIMER_STOP; 618 status.owner = RTE_TIMER_NO_OWNER; 619 /* The "RELEASE" ordering guarantees the memory operations above 620 * the status update are observed before the update by all threads 621 */ 622 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); 623 624 return 0; 625 } 626 627 /* Stop the timer associated with the timer handle tim */ 628 int 629 rte_timer_stop(struct rte_timer *tim) 630 { 631 return rte_timer_alt_stop(default_data_id, tim); 632 } 633 634 int 635 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim) 636 { 637 struct rte_timer_data *timer_data; 638 639 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 640 641 return __rte_timer_stop(tim, 0, timer_data); 642 } 643 644 /* loop until rte_timer_stop() succeed */ 645 void 646 rte_timer_stop_sync(struct rte_timer *tim) 647 { 648 while (rte_timer_stop(tim) != 0) 649 rte_pause(); 650 } 651 652 /* Test the PENDING status of the timer handle tim */ 653 int 654 rte_timer_pending(struct rte_timer *tim) 655 { 656 return __atomic_load_n(&tim->status.state, 657 __ATOMIC_RELAXED) == RTE_TIMER_PENDING; 658 } 659 660 /* must be called periodically, run all timer that expired */ 661 static void 662 __rte_timer_manage(struct rte_timer_data *timer_data) 663 { 664 union rte_timer_status status; 665 struct rte_timer *tim, *next_tim; 666 struct rte_timer *run_first_tim, **pprev; 667 unsigned lcore_id = rte_lcore_id(); 668 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; 669 uint64_t cur_time; 670 int i, ret; 671 struct priv_timer *priv_timer = timer_data->priv_timer; 672 673 /* timer manager only runs on EAL thread with valid lcore_id */ 674 assert(lcore_id < RTE_MAX_LCORE); 675 676 __TIMER_STAT_ADD(priv_timer, manage, 1); 677 /* optimize for the case where per-cpu list is empty */ 678 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) 679 return; 680 cur_time = rte_get_timer_cycles(); 681 682 #ifdef RTE_ARCH_64 683 /* on 64-bit the value cached in the pending_head.expired will be 684 * updated atomically, so we can consult that for a quick check here 685 * outside the lock */ 686 if (likely(priv_timer[lcore_id].pending_head.expire > cur_time)) 687 return; 688 #endif 689 690 /* browse ordered list, add expired timers in 'expired' list */ 691 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 692 693 /* if nothing to do just unlock and return */ 694 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL || 695 priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) { 696 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 697 return; 698 } 699 700 /* save start of list of expired timers */ 701 tim = priv_timer[lcore_id].pending_head.sl_next[0]; 702 703 /* break the existing list at current time point */ 704 timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer); 705 for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) { 706 if (prev[i] == &priv_timer[lcore_id].pending_head) 707 continue; 708 priv_timer[lcore_id].pending_head.sl_next[i] = 709 prev[i]->sl_next[i]; 710 if (prev[i]->sl_next[i] == NULL) 711 priv_timer[lcore_id].curr_skiplist_depth--; 712 prev[i] ->sl_next[i] = NULL; 713 } 714 715 /* transition run-list from PENDING to RUNNING */ 716 run_first_tim = tim; 717 pprev = &run_first_tim; 718 719 for ( ; tim != NULL; tim = next_tim) { 720 next_tim = tim->sl_next[0]; 721 722 ret = timer_set_running_state(tim); 723 if (likely(ret == 0)) { 724 pprev = &tim->sl_next[0]; 725 } else { 726 /* another core is trying to re-config this one, 727 * remove it from local expired list 728 */ 729 *pprev = next_tim; 730 } 731 } 732 733 /* update the next to expire timer value */ 734 priv_timer[lcore_id].pending_head.expire = 735 (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 : 736 priv_timer[lcore_id].pending_head.sl_next[0]->expire; 737 738 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 739 740 /* now scan expired list and call callbacks */ 741 for (tim = run_first_tim; tim != NULL; tim = next_tim) { 742 next_tim = tim->sl_next[0]; 743 priv_timer[lcore_id].updated = 0; 744 priv_timer[lcore_id].running_tim = tim; 745 746 /* execute callback function with list unlocked */ 747 tim->f(tim, tim->arg); 748 749 __TIMER_STAT_ADD(priv_timer, pending, -1); 750 /* the timer was stopped or reloaded by the callback 751 * function, we have nothing to do here */ 752 if (priv_timer[lcore_id].updated == 1) 753 continue; 754 755 if (tim->period == 0) { 756 /* remove from done list and mark timer as stopped */ 757 status.state = RTE_TIMER_STOP; 758 status.owner = RTE_TIMER_NO_OWNER; 759 /* The "RELEASE" ordering guarantees the memory 760 * operations above the status update are observed 761 * before the update by all threads 762 */ 763 __atomic_store_n(&tim->status.u32, status.u32, 764 __ATOMIC_RELEASE); 765 } 766 else { 767 /* keep it in list and mark timer as pending */ 768 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 769 status.state = RTE_TIMER_PENDING; 770 __TIMER_STAT_ADD(priv_timer, pending, 1); 771 status.owner = (int16_t)lcore_id; 772 /* The "RELEASE" ordering guarantees the memory 773 * operations above the status update are observed 774 * before the update by all threads 775 */ 776 __atomic_store_n(&tim->status.u32, status.u32, 777 __ATOMIC_RELEASE); 778 __rte_timer_reset(tim, tim->expire + tim->period, 779 tim->period, lcore_id, tim->f, tim->arg, 1, 780 timer_data); 781 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 782 } 783 } 784 priv_timer[lcore_id].running_tim = NULL; 785 } 786 787 int 788 rte_timer_manage(void) 789 { 790 struct rte_timer_data *timer_data; 791 792 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL); 793 794 __rte_timer_manage(timer_data); 795 796 return 0; 797 } 798 799 int 800 rte_timer_alt_manage(uint32_t timer_data_id, 801 unsigned int *poll_lcores, 802 int nb_poll_lcores, 803 rte_timer_alt_manage_cb_t f) 804 { 805 unsigned int default_poll_lcores[] = {rte_lcore_id()}; 806 union rte_timer_status status; 807 struct rte_timer *tim, *next_tim, **pprev; 808 struct rte_timer *run_first_tims[RTE_MAX_LCORE]; 809 unsigned int this_lcore = rte_lcore_id(); 810 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; 811 uint64_t cur_time; 812 int i, j, ret; 813 int nb_runlists = 0; 814 struct rte_timer_data *data; 815 struct priv_timer *privp; 816 uint32_t poll_lcore; 817 818 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL); 819 820 /* timer manager only runs on EAL thread with valid lcore_id */ 821 assert(this_lcore < RTE_MAX_LCORE); 822 823 __TIMER_STAT_ADD(data->priv_timer, manage, 1); 824 825 if (poll_lcores == NULL) { 826 poll_lcores = default_poll_lcores; 827 nb_poll_lcores = RTE_DIM(default_poll_lcores); 828 } 829 830 for (i = 0; i < nb_poll_lcores; i++) { 831 poll_lcore = poll_lcores[i]; 832 privp = &data->priv_timer[poll_lcore]; 833 834 /* optimize for the case where per-cpu list is empty */ 835 if (privp->pending_head.sl_next[0] == NULL) 836 continue; 837 cur_time = rte_get_timer_cycles(); 838 839 #ifdef RTE_ARCH_64 840 /* on 64-bit the value cached in the pending_head.expired will 841 * be updated atomically, so we can consult that for a quick 842 * check here outside the lock 843 */ 844 if (likely(privp->pending_head.expire > cur_time)) 845 continue; 846 #endif 847 848 /* browse ordered list, add expired timers in 'expired' list */ 849 rte_spinlock_lock(&privp->list_lock); 850 851 /* if nothing to do just unlock and return */ 852 if (privp->pending_head.sl_next[0] == NULL || 853 privp->pending_head.sl_next[0]->expire > cur_time) { 854 rte_spinlock_unlock(&privp->list_lock); 855 continue; 856 } 857 858 /* save start of list of expired timers */ 859 tim = privp->pending_head.sl_next[0]; 860 861 /* break the existing list at current time point */ 862 timer_get_prev_entries(cur_time, poll_lcore, prev, 863 data->priv_timer); 864 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) { 865 if (prev[j] == &privp->pending_head) 866 continue; 867 privp->pending_head.sl_next[j] = 868 prev[j]->sl_next[j]; 869 if (prev[j]->sl_next[j] == NULL) 870 privp->curr_skiplist_depth--; 871 872 prev[j]->sl_next[j] = NULL; 873 } 874 875 /* transition run-list from PENDING to RUNNING */ 876 run_first_tims[nb_runlists] = tim; 877 pprev = &run_first_tims[nb_runlists]; 878 nb_runlists++; 879 880 for ( ; tim != NULL; tim = next_tim) { 881 next_tim = tim->sl_next[0]; 882 883 ret = timer_set_running_state(tim); 884 if (likely(ret == 0)) { 885 pprev = &tim->sl_next[0]; 886 } else { 887 /* another core is trying to re-config this one, 888 * remove it from local expired list 889 */ 890 *pprev = next_tim; 891 } 892 } 893 894 /* update the next to expire timer value */ 895 privp->pending_head.expire = 896 (privp->pending_head.sl_next[0] == NULL) ? 0 : 897 privp->pending_head.sl_next[0]->expire; 898 899 rte_spinlock_unlock(&privp->list_lock); 900 } 901 902 /* Now process the run lists */ 903 while (1) { 904 bool done = true; 905 uint64_t min_expire = UINT64_MAX; 906 int min_idx = 0; 907 908 /* Find the next oldest timer to process */ 909 for (i = 0; i < nb_runlists; i++) { 910 tim = run_first_tims[i]; 911 912 if (tim != NULL && tim->expire < min_expire) { 913 min_expire = tim->expire; 914 min_idx = i; 915 done = false; 916 } 917 } 918 919 if (done) 920 break; 921 922 tim = run_first_tims[min_idx]; 923 924 /* Move down the runlist from which we picked a timer to 925 * execute 926 */ 927 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0]; 928 929 data->priv_timer[this_lcore].updated = 0; 930 data->priv_timer[this_lcore].running_tim = tim; 931 932 /* Call the provided callback function */ 933 f(tim); 934 935 __TIMER_STAT_ADD(data->priv_timer, pending, -1); 936 937 /* the timer was stopped or reloaded by the callback 938 * function, we have nothing to do here 939 */ 940 if (data->priv_timer[this_lcore].updated == 1) 941 continue; 942 943 if (tim->period == 0) { 944 /* remove from done list and mark timer as stopped */ 945 status.state = RTE_TIMER_STOP; 946 status.owner = RTE_TIMER_NO_OWNER; 947 /* The "RELEASE" ordering guarantees the memory 948 * operations above the status update are observed 949 * before the update by all threads 950 */ 951 __atomic_store_n(&tim->status.u32, status.u32, 952 __ATOMIC_RELEASE); 953 } else { 954 /* keep it in list and mark timer as pending */ 955 rte_spinlock_lock( 956 &data->priv_timer[this_lcore].list_lock); 957 status.state = RTE_TIMER_PENDING; 958 __TIMER_STAT_ADD(data->priv_timer, pending, 1); 959 status.owner = (int16_t)this_lcore; 960 /* The "RELEASE" ordering guarantees the memory 961 * operations above the status update are observed 962 * before the update by all threads 963 */ 964 __atomic_store_n(&tim->status.u32, status.u32, 965 __ATOMIC_RELEASE); 966 __rte_timer_reset(tim, tim->expire + tim->period, 967 tim->period, this_lcore, tim->f, tim->arg, 1, 968 data); 969 rte_spinlock_unlock( 970 &data->priv_timer[this_lcore].list_lock); 971 } 972 973 data->priv_timer[this_lcore].running_tim = NULL; 974 } 975 976 return 0; 977 } 978 979 /* Walk pending lists, stopping timers and calling user-specified function */ 980 int 981 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores, 982 int nb_walk_lcores, 983 rte_timer_stop_all_cb_t f, void *f_arg) 984 { 985 int i; 986 struct priv_timer *priv_timer; 987 uint32_t walk_lcore; 988 struct rte_timer *tim, *next_tim; 989 struct rte_timer_data *timer_data; 990 991 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 992 993 for (i = 0; i < nb_walk_lcores; i++) { 994 walk_lcore = walk_lcores[i]; 995 priv_timer = &timer_data->priv_timer[walk_lcore]; 996 997 rte_spinlock_lock(&priv_timer->list_lock); 998 999 for (tim = priv_timer->pending_head.sl_next[0]; 1000 tim != NULL; 1001 tim = next_tim) { 1002 next_tim = tim->sl_next[0]; 1003 1004 /* Call timer_stop with lock held */ 1005 __rte_timer_stop(tim, 1, timer_data); 1006 1007 if (f) 1008 f(tim, f_arg); 1009 } 1010 1011 rte_spinlock_unlock(&priv_timer->list_lock); 1012 } 1013 1014 return 0; 1015 } 1016 1017 int64_t 1018 rte_timer_next_ticks(void) 1019 { 1020 unsigned int lcore_id = rte_lcore_id(); 1021 struct rte_timer_data *timer_data; 1022 struct priv_timer *priv_timer; 1023 const struct rte_timer *tm; 1024 uint64_t cur_time; 1025 int64_t left = -ENOENT; 1026 1027 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL); 1028 1029 priv_timer = timer_data->priv_timer; 1030 cur_time = rte_get_timer_cycles(); 1031 1032 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 1033 tm = priv_timer[lcore_id].pending_head.sl_next[0]; 1034 if (tm) { 1035 left = tm->expire - cur_time; 1036 if (left < 0) 1037 left = 0; 1038 } 1039 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 1040 1041 return left; 1042 } 1043 1044 /* dump statistics about timers */ 1045 static void 1046 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f) 1047 { 1048 #ifdef RTE_LIBRTE_TIMER_DEBUG 1049 struct rte_timer_debug_stats sum; 1050 unsigned lcore_id; 1051 struct priv_timer *priv_timer = timer_data->priv_timer; 1052 1053 memset(&sum, 0, sizeof(sum)); 1054 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1055 sum.reset += priv_timer[lcore_id].stats.reset; 1056 sum.stop += priv_timer[lcore_id].stats.stop; 1057 sum.manage += priv_timer[lcore_id].stats.manage; 1058 sum.pending += priv_timer[lcore_id].stats.pending; 1059 } 1060 fprintf(f, "Timer statistics:\n"); 1061 fprintf(f, " reset = %"PRIu64"\n", sum.reset); 1062 fprintf(f, " stop = %"PRIu64"\n", sum.stop); 1063 fprintf(f, " manage = %"PRIu64"\n", sum.manage); 1064 fprintf(f, " pending = %"PRIu64"\n", sum.pending); 1065 #else 1066 fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n"); 1067 #endif 1068 } 1069 1070 int 1071 rte_timer_dump_stats(FILE *f) 1072 { 1073 return rte_timer_alt_dump_stats(default_data_id, f); 1074 } 1075 1076 int 1077 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f) 1078 { 1079 struct rte_timer_data *timer_data; 1080 1081 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 1082 1083 __rte_timer_dump_stats(timer_data, f); 1084 1085 return 0; 1086 } 1087