1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdint.h> 7 #include <stdbool.h> 8 #include <inttypes.h> 9 #include <assert.h> 10 #include <sys/queue.h> 11 12 #include <rte_common.h> 13 #include <rte_cycles.h> 14 #include <rte_eal_memconfig.h> 15 #include <rte_memory.h> 16 #include <rte_lcore.h> 17 #include <rte_branch_prediction.h> 18 #include <rte_spinlock.h> 19 #include <rte_random.h> 20 #include <rte_pause.h> 21 #include <rte_memzone.h> 22 23 #include "rte_timer.h" 24 25 /** 26 * Per-lcore info for timers. 27 */ 28 struct priv_timer { 29 struct rte_timer pending_head; /**< dummy timer instance to head up list */ 30 rte_spinlock_t list_lock; /**< lock to protect list access */ 31 32 /** per-core variable that true if a timer was updated on this 33 * core since last reset of the variable */ 34 int updated; 35 36 /** track the current depth of the skiplist */ 37 unsigned curr_skiplist_depth; 38 39 unsigned prev_lcore; /**< used for lcore round robin */ 40 41 /** running timer on this lcore now */ 42 struct rte_timer *running_tim; 43 44 #ifdef RTE_LIBRTE_TIMER_DEBUG 45 /** per-lcore statistics */ 46 struct rte_timer_debug_stats stats; 47 #endif 48 } __rte_cache_aligned; 49 50 #define FL_ALLOCATED (1 << 0) 51 struct rte_timer_data { 52 struct priv_timer priv_timer[RTE_MAX_LCORE]; 53 uint8_t internal_flags; 54 }; 55 56 #define RTE_MAX_DATA_ELS 64 57 static const struct rte_memzone *rte_timer_data_mz; 58 static int *volatile rte_timer_mz_refcnt; 59 static struct rte_timer_data *rte_timer_data_arr; 60 static const uint32_t default_data_id; 61 static uint32_t rte_timer_subsystem_initialized; 62 63 /* when debug is enabled, store some statistics */ 64 #ifdef RTE_LIBRTE_TIMER_DEBUG 65 #define __TIMER_STAT_ADD(priv_timer, name, n) do { \ 66 unsigned __lcore_id = rte_lcore_id(); \ 67 if (__lcore_id < RTE_MAX_LCORE) \ 68 priv_timer[__lcore_id].stats.name += (n); \ 69 } while(0) 70 #else 71 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0) 72 #endif 73 74 static inline int 75 timer_data_valid(uint32_t id) 76 { 77 return rte_timer_data_arr && 78 (rte_timer_data_arr[id].internal_flags & FL_ALLOCATED); 79 } 80 81 /* validate ID and retrieve timer data pointer, or return error value */ 82 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do { \ 83 if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id)) \ 84 return retval; \ 85 timer_data = &rte_timer_data_arr[id]; \ 86 } while (0) 87 88 int 89 rte_timer_data_alloc(uint32_t *id_ptr) 90 { 91 int i; 92 struct rte_timer_data *data; 93 94 if (!rte_timer_subsystem_initialized) 95 return -ENOMEM; 96 97 for (i = 0; i < RTE_MAX_DATA_ELS; i++) { 98 data = &rte_timer_data_arr[i]; 99 if (!(data->internal_flags & FL_ALLOCATED)) { 100 data->internal_flags |= FL_ALLOCATED; 101 102 if (id_ptr) 103 *id_ptr = i; 104 105 return 0; 106 } 107 } 108 109 return -ENOSPC; 110 } 111 112 int 113 rte_timer_data_dealloc(uint32_t id) 114 { 115 struct rte_timer_data *timer_data; 116 TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL); 117 118 timer_data->internal_flags &= ~(FL_ALLOCATED); 119 120 return 0; 121 } 122 123 /* Init the timer library. Allocate an array of timer data structs in shared 124 * memory, and allocate the zeroth entry for use with original timer 125 * APIs. Since the intersection of the sets of lcore ids in primary and 126 * secondary processes should be empty, the zeroth entry can be shared by 127 * multiple processes. 128 */ 129 int 130 rte_timer_subsystem_init(void) 131 { 132 const struct rte_memzone *mz; 133 struct rte_timer_data *data; 134 int i, lcore_id; 135 static const char *mz_name = "rte_timer_mz"; 136 const size_t data_arr_size = 137 RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr); 138 const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt); 139 bool do_full_init = true; 140 141 rte_mcfg_timer_lock(); 142 143 if (rte_timer_subsystem_initialized) { 144 rte_mcfg_timer_unlock(); 145 return -EALREADY; 146 } 147 148 mz = rte_memzone_lookup(mz_name); 149 if (mz == NULL) { 150 mz = rte_memzone_reserve_aligned(mz_name, mem_size, 151 SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE); 152 if (mz == NULL) { 153 rte_mcfg_timer_unlock(); 154 return -ENOMEM; 155 } 156 do_full_init = true; 157 } else 158 do_full_init = false; 159 160 rte_timer_data_mz = mz; 161 rte_timer_data_arr = mz->addr; 162 rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size); 163 164 if (do_full_init) { 165 for (i = 0; i < RTE_MAX_DATA_ELS; i++) { 166 data = &rte_timer_data_arr[i]; 167 168 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; 169 lcore_id++) { 170 rte_spinlock_init( 171 &data->priv_timer[lcore_id].list_lock); 172 data->priv_timer[lcore_id].prev_lcore = 173 lcore_id; 174 } 175 } 176 } 177 178 rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED; 179 (*rte_timer_mz_refcnt)++; 180 181 rte_timer_subsystem_initialized = 1; 182 183 rte_mcfg_timer_unlock(); 184 185 return 0; 186 } 187 188 void 189 rte_timer_subsystem_finalize(void) 190 { 191 rte_mcfg_timer_lock(); 192 193 if (!rte_timer_subsystem_initialized) { 194 rte_mcfg_timer_unlock(); 195 return; 196 } 197 198 if (--(*rte_timer_mz_refcnt) == 0) 199 rte_memzone_free(rte_timer_data_mz); 200 201 rte_timer_subsystem_initialized = 0; 202 203 rte_mcfg_timer_unlock(); 204 } 205 206 /* Initialize the timer handle tim for use */ 207 void 208 rte_timer_init(struct rte_timer *tim) 209 { 210 union rte_timer_status status; 211 212 status.state = RTE_TIMER_STOP; 213 status.owner = RTE_TIMER_NO_OWNER; 214 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELAXED); 215 } 216 217 /* 218 * if timer is pending or stopped (or running on the same core than 219 * us), mark timer as configuring, and on success return the previous 220 * status of the timer 221 */ 222 static int 223 timer_set_config_state(struct rte_timer *tim, 224 union rte_timer_status *ret_prev_status, 225 struct priv_timer *priv_timer) 226 { 227 union rte_timer_status prev_status, status; 228 int success = 0; 229 unsigned lcore_id; 230 231 lcore_id = rte_lcore_id(); 232 233 /* wait that the timer is in correct status before update, 234 * and mark it as being configured */ 235 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED); 236 237 while (success == 0) { 238 /* timer is running on another core 239 * or ready to run on local core, exit 240 */ 241 if (prev_status.state == RTE_TIMER_RUNNING && 242 (prev_status.owner != (uint16_t)lcore_id || 243 tim != priv_timer[lcore_id].running_tim)) 244 return -1; 245 246 /* timer is being configured on another core */ 247 if (prev_status.state == RTE_TIMER_CONFIG) 248 return -1; 249 250 /* here, we know that timer is stopped or pending, 251 * mark it atomically as being configured */ 252 status.state = RTE_TIMER_CONFIG; 253 status.owner = (int16_t)lcore_id; 254 /* CONFIG states are acting as locked states. If the 255 * timer is in CONFIG state, the state cannot be changed 256 * by other threads. So, we should use ACQUIRE here. 257 */ 258 success = __atomic_compare_exchange_n(&tim->status.u32, 259 &prev_status.u32, 260 status.u32, 0, 261 __ATOMIC_ACQUIRE, 262 __ATOMIC_RELAXED); 263 } 264 265 ret_prev_status->u32 = prev_status.u32; 266 return 0; 267 } 268 269 /* 270 * if timer is pending, mark timer as running 271 */ 272 static int 273 timer_set_running_state(struct rte_timer *tim) 274 { 275 union rte_timer_status prev_status, status; 276 unsigned lcore_id = rte_lcore_id(); 277 int success = 0; 278 279 /* wait that the timer is in correct status before update, 280 * and mark it as running */ 281 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED); 282 283 while (success == 0) { 284 /* timer is not pending anymore */ 285 if (prev_status.state != RTE_TIMER_PENDING) 286 return -1; 287 288 /* we know that the timer will be pending at this point 289 * mark it atomically as being running 290 */ 291 status.state = RTE_TIMER_RUNNING; 292 status.owner = (int16_t)lcore_id; 293 /* RUNNING states are acting as locked states. If the 294 * timer is in RUNNING state, the state cannot be changed 295 * by other threads. So, we should use ACQUIRE here. 296 */ 297 success = __atomic_compare_exchange_n(&tim->status.u32, 298 &prev_status.u32, 299 status.u32, 0, 300 __ATOMIC_ACQUIRE, 301 __ATOMIC_RELAXED); 302 } 303 304 return 0; 305 } 306 307 /* 308 * Return a skiplist level for a new entry. 309 * This probabilistically gives a level with p=1/4 that an entry at level n 310 * will also appear at level n+1. 311 */ 312 static uint32_t 313 timer_get_skiplist_level(unsigned curr_depth) 314 { 315 #ifdef RTE_LIBRTE_TIMER_DEBUG 316 static uint32_t i, count = 0; 317 static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0}; 318 #endif 319 320 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1, 321 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest 322 * bit position of a (pseudo)random number. 323 */ 324 uint32_t rand = rte_rand() & (UINT32_MAX - 1); 325 uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2; 326 327 /* limit the levels used to one above our current level, so we don't, 328 * for instance, have a level 0 and a level 7 without anything between 329 */ 330 if (level > curr_depth) 331 level = curr_depth; 332 if (level >= MAX_SKIPLIST_DEPTH) 333 level = MAX_SKIPLIST_DEPTH-1; 334 #ifdef RTE_LIBRTE_TIMER_DEBUG 335 count ++; 336 levels[level]++; 337 if (count % 10000 == 0) 338 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++) 339 printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]); 340 #endif 341 return level; 342 } 343 344 /* 345 * For a given time value, get the entries at each level which 346 * are <= that time value. 347 */ 348 static void 349 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore, 350 struct rte_timer **prev, struct priv_timer *priv_timer) 351 { 352 unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth; 353 prev[lvl] = &priv_timer[tim_lcore].pending_head; 354 while(lvl != 0) { 355 lvl--; 356 prev[lvl] = prev[lvl+1]; 357 while (prev[lvl]->sl_next[lvl] && 358 prev[lvl]->sl_next[lvl]->expire <= time_val) 359 prev[lvl] = prev[lvl]->sl_next[lvl]; 360 } 361 } 362 363 /* 364 * Given a timer node in the skiplist, find the previous entries for it at 365 * all skiplist levels. 366 */ 367 static void 368 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore, 369 struct rte_timer **prev, 370 struct priv_timer *priv_timer) 371 { 372 int i; 373 374 /* to get a specific entry in the list, look for just lower than the time 375 * values, and then increment on each level individually if necessary 376 */ 377 timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer); 378 for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) { 379 while (prev[i]->sl_next[i] != NULL && 380 prev[i]->sl_next[i] != tim && 381 prev[i]->sl_next[i]->expire <= tim->expire) 382 prev[i] = prev[i]->sl_next[i]; 383 } 384 } 385 386 /* call with lock held as necessary 387 * add in list 388 * timer must be in config state 389 * timer must not be in a list 390 */ 391 static void 392 timer_add(struct rte_timer *tim, unsigned int tim_lcore, 393 struct priv_timer *priv_timer) 394 { 395 unsigned lvl; 396 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 397 398 /* find where exactly this element goes in the list of elements 399 * for each depth. */ 400 timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer); 401 402 /* now assign it a new level and add at that level */ 403 const unsigned tim_level = timer_get_skiplist_level( 404 priv_timer[tim_lcore].curr_skiplist_depth); 405 if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth) 406 priv_timer[tim_lcore].curr_skiplist_depth++; 407 408 lvl = tim_level; 409 while (lvl > 0) { 410 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl]; 411 prev[lvl]->sl_next[lvl] = tim; 412 lvl--; 413 } 414 tim->sl_next[0] = prev[0]->sl_next[0]; 415 prev[0]->sl_next[0] = tim; 416 417 /* save the lowest list entry into the expire field of the dummy hdr 418 * NOTE: this is not atomic on 32-bit*/ 419 priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\ 420 pending_head.sl_next[0]->expire; 421 } 422 423 /* 424 * del from list, lock if needed 425 * timer must be in config state 426 * timer must be in a list 427 */ 428 static void 429 timer_del(struct rte_timer *tim, union rte_timer_status prev_status, 430 int local_is_locked, struct priv_timer *priv_timer) 431 { 432 unsigned lcore_id = rte_lcore_id(); 433 unsigned prev_owner = prev_status.owner; 434 int i; 435 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 436 437 /* if timer needs is pending another core, we need to lock the 438 * list; if it is on local core, we need to lock if we are not 439 * called from rte_timer_manage() */ 440 if (prev_owner != lcore_id || !local_is_locked) 441 rte_spinlock_lock(&priv_timer[prev_owner].list_lock); 442 443 /* save the lowest list entry into the expire field of the dummy hdr. 444 * NOTE: this is not atomic on 32-bit */ 445 if (tim == priv_timer[prev_owner].pending_head.sl_next[0]) 446 priv_timer[prev_owner].pending_head.expire = 447 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire); 448 449 /* adjust pointers from previous entries to point past this */ 450 timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer); 451 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) { 452 if (prev[i]->sl_next[i] == tim) 453 prev[i]->sl_next[i] = tim->sl_next[i]; 454 } 455 456 /* in case we deleted last entry at a level, adjust down max level */ 457 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) 458 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL) 459 priv_timer[prev_owner].curr_skiplist_depth --; 460 else 461 break; 462 463 if (prev_owner != lcore_id || !local_is_locked) 464 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock); 465 } 466 467 /* Reset and start the timer associated with the timer handle (private func) */ 468 static int 469 __rte_timer_reset(struct rte_timer *tim, uint64_t expire, 470 uint64_t period, unsigned tim_lcore, 471 rte_timer_cb_t fct, void *arg, 472 int local_is_locked, 473 struct rte_timer_data *timer_data) 474 { 475 union rte_timer_status prev_status, status; 476 int ret; 477 unsigned lcore_id = rte_lcore_id(); 478 struct priv_timer *priv_timer = timer_data->priv_timer; 479 480 /* round robin for tim_lcore */ 481 if (tim_lcore == (unsigned)LCORE_ID_ANY) { 482 if (lcore_id < RTE_MAX_LCORE) { 483 /* EAL thread with valid lcore_id */ 484 tim_lcore = rte_get_next_lcore( 485 priv_timer[lcore_id].prev_lcore, 486 0, 1); 487 priv_timer[lcore_id].prev_lcore = tim_lcore; 488 } else 489 /* non-EAL thread do not run rte_timer_manage(), 490 * so schedule the timer on the first enabled lcore. */ 491 tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1); 492 } 493 494 /* wait that the timer is in correct status before update, 495 * and mark it as being configured */ 496 ret = timer_set_config_state(tim, &prev_status, priv_timer); 497 if (ret < 0) 498 return -1; 499 500 __TIMER_STAT_ADD(priv_timer, reset, 1); 501 if (prev_status.state == RTE_TIMER_RUNNING && 502 lcore_id < RTE_MAX_LCORE) { 503 priv_timer[lcore_id].updated = 1; 504 } 505 506 /* remove it from list */ 507 if (prev_status.state == RTE_TIMER_PENDING) { 508 timer_del(tim, prev_status, local_is_locked, priv_timer); 509 __TIMER_STAT_ADD(priv_timer, pending, -1); 510 } 511 512 tim->period = period; 513 tim->expire = expire; 514 tim->f = fct; 515 tim->arg = arg; 516 517 /* if timer needs to be scheduled on another core, we need to 518 * lock the destination list; if it is on local core, we need to lock if 519 * we are not called from rte_timer_manage() 520 */ 521 if (tim_lcore != lcore_id || !local_is_locked) 522 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock); 523 524 __TIMER_STAT_ADD(priv_timer, pending, 1); 525 timer_add(tim, tim_lcore, priv_timer); 526 527 /* update state: as we are in CONFIG state, only us can modify 528 * the state so we don't need to use cmpset() here */ 529 status.state = RTE_TIMER_PENDING; 530 status.owner = (int16_t)tim_lcore; 531 /* The "RELEASE" ordering guarantees the memory operations above 532 * the status update are observed before the update by all threads 533 */ 534 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); 535 536 if (tim_lcore != lcore_id || !local_is_locked) 537 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); 538 539 return 0; 540 } 541 542 /* Reset and start the timer associated with the timer handle tim */ 543 int 544 rte_timer_reset(struct rte_timer *tim, uint64_t ticks, 545 enum rte_timer_type type, unsigned int tim_lcore, 546 rte_timer_cb_t fct, void *arg) 547 { 548 return rte_timer_alt_reset(default_data_id, tim, ticks, type, 549 tim_lcore, fct, arg); 550 } 551 552 int 553 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim, 554 uint64_t ticks, enum rte_timer_type type, 555 unsigned int tim_lcore, rte_timer_cb_t fct, void *arg) 556 { 557 uint64_t cur_time = rte_get_timer_cycles(); 558 uint64_t period; 559 struct rte_timer_data *timer_data; 560 561 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 562 563 if (type == PERIODICAL) 564 period = ticks; 565 else 566 period = 0; 567 568 return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore, 569 fct, arg, 0, timer_data); 570 } 571 572 /* loop until rte_timer_reset() succeed */ 573 void 574 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks, 575 enum rte_timer_type type, unsigned tim_lcore, 576 rte_timer_cb_t fct, void *arg) 577 { 578 while (rte_timer_reset(tim, ticks, type, tim_lcore, 579 fct, arg) != 0) 580 rte_pause(); 581 } 582 583 static int 584 __rte_timer_stop(struct rte_timer *tim, int local_is_locked, 585 struct rte_timer_data *timer_data) 586 { 587 union rte_timer_status prev_status, status; 588 unsigned lcore_id = rte_lcore_id(); 589 int ret; 590 struct priv_timer *priv_timer = timer_data->priv_timer; 591 592 /* wait that the timer is in correct status before update, 593 * and mark it as being configured */ 594 ret = timer_set_config_state(tim, &prev_status, priv_timer); 595 if (ret < 0) 596 return -1; 597 598 __TIMER_STAT_ADD(priv_timer, stop, 1); 599 if (prev_status.state == RTE_TIMER_RUNNING && 600 lcore_id < RTE_MAX_LCORE) { 601 priv_timer[lcore_id].updated = 1; 602 } 603 604 /* remove it from list */ 605 if (prev_status.state == RTE_TIMER_PENDING) { 606 timer_del(tim, prev_status, local_is_locked, priv_timer); 607 __TIMER_STAT_ADD(priv_timer, pending, -1); 608 } 609 610 /* mark timer as stopped */ 611 status.state = RTE_TIMER_STOP; 612 status.owner = RTE_TIMER_NO_OWNER; 613 /* The "RELEASE" ordering guarantees the memory operations above 614 * the status update are observed before the update by all threads 615 */ 616 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); 617 618 return 0; 619 } 620 621 /* Stop the timer associated with the timer handle tim */ 622 int 623 rte_timer_stop(struct rte_timer *tim) 624 { 625 return rte_timer_alt_stop(default_data_id, tim); 626 } 627 628 int 629 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim) 630 { 631 struct rte_timer_data *timer_data; 632 633 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 634 635 return __rte_timer_stop(tim, 0, timer_data); 636 } 637 638 /* loop until rte_timer_stop() succeed */ 639 void 640 rte_timer_stop_sync(struct rte_timer *tim) 641 { 642 while (rte_timer_stop(tim) != 0) 643 rte_pause(); 644 } 645 646 /* Test the PENDING status of the timer handle tim */ 647 int 648 rte_timer_pending(struct rte_timer *tim) 649 { 650 return __atomic_load_n(&tim->status.state, 651 __ATOMIC_RELAXED) == RTE_TIMER_PENDING; 652 } 653 654 /* must be called periodically, run all timer that expired */ 655 static void 656 __rte_timer_manage(struct rte_timer_data *timer_data) 657 { 658 union rte_timer_status status; 659 struct rte_timer *tim, *next_tim; 660 struct rte_timer *run_first_tim, **pprev; 661 unsigned lcore_id = rte_lcore_id(); 662 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; 663 uint64_t cur_time; 664 int i, ret; 665 struct priv_timer *priv_timer = timer_data->priv_timer; 666 667 /* timer manager only runs on EAL thread with valid lcore_id */ 668 assert(lcore_id < RTE_MAX_LCORE); 669 670 __TIMER_STAT_ADD(priv_timer, manage, 1); 671 /* optimize for the case where per-cpu list is empty */ 672 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) 673 return; 674 cur_time = rte_get_timer_cycles(); 675 676 #ifdef RTE_ARCH_64 677 /* on 64-bit the value cached in the pending_head.expired will be 678 * updated atomically, so we can consult that for a quick check here 679 * outside the lock */ 680 if (likely(priv_timer[lcore_id].pending_head.expire > cur_time)) 681 return; 682 #endif 683 684 /* browse ordered list, add expired timers in 'expired' list */ 685 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 686 687 /* if nothing to do just unlock and return */ 688 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL || 689 priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) { 690 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 691 return; 692 } 693 694 /* save start of list of expired timers */ 695 tim = priv_timer[lcore_id].pending_head.sl_next[0]; 696 697 /* break the existing list at current time point */ 698 timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer); 699 for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) { 700 if (prev[i] == &priv_timer[lcore_id].pending_head) 701 continue; 702 priv_timer[lcore_id].pending_head.sl_next[i] = 703 prev[i]->sl_next[i]; 704 if (prev[i]->sl_next[i] == NULL) 705 priv_timer[lcore_id].curr_skiplist_depth--; 706 prev[i] ->sl_next[i] = NULL; 707 } 708 709 /* transition run-list from PENDING to RUNNING */ 710 run_first_tim = tim; 711 pprev = &run_first_tim; 712 713 for ( ; tim != NULL; tim = next_tim) { 714 next_tim = tim->sl_next[0]; 715 716 ret = timer_set_running_state(tim); 717 if (likely(ret == 0)) { 718 pprev = &tim->sl_next[0]; 719 } else { 720 /* another core is trying to re-config this one, 721 * remove it from local expired list 722 */ 723 *pprev = next_tim; 724 } 725 } 726 727 /* update the next to expire timer value */ 728 priv_timer[lcore_id].pending_head.expire = 729 (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 : 730 priv_timer[lcore_id].pending_head.sl_next[0]->expire; 731 732 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 733 734 /* now scan expired list and call callbacks */ 735 for (tim = run_first_tim; tim != NULL; tim = next_tim) { 736 next_tim = tim->sl_next[0]; 737 priv_timer[lcore_id].updated = 0; 738 priv_timer[lcore_id].running_tim = tim; 739 740 /* execute callback function with list unlocked */ 741 tim->f(tim, tim->arg); 742 743 __TIMER_STAT_ADD(priv_timer, pending, -1); 744 /* the timer was stopped or reloaded by the callback 745 * function, we have nothing to do here */ 746 if (priv_timer[lcore_id].updated == 1) 747 continue; 748 749 if (tim->period == 0) { 750 /* remove from done list and mark timer as stopped */ 751 status.state = RTE_TIMER_STOP; 752 status.owner = RTE_TIMER_NO_OWNER; 753 /* The "RELEASE" ordering guarantees the memory 754 * operations above the status update are observed 755 * before the update by all threads 756 */ 757 __atomic_store_n(&tim->status.u32, status.u32, 758 __ATOMIC_RELEASE); 759 } 760 else { 761 /* keep it in list and mark timer as pending */ 762 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 763 status.state = RTE_TIMER_PENDING; 764 __TIMER_STAT_ADD(priv_timer, pending, 1); 765 status.owner = (int16_t)lcore_id; 766 /* The "RELEASE" ordering guarantees the memory 767 * operations above the status update are observed 768 * before the update by all threads 769 */ 770 __atomic_store_n(&tim->status.u32, status.u32, 771 __ATOMIC_RELEASE); 772 __rte_timer_reset(tim, tim->expire + tim->period, 773 tim->period, lcore_id, tim->f, tim->arg, 1, 774 timer_data); 775 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 776 } 777 } 778 priv_timer[lcore_id].running_tim = NULL; 779 } 780 781 int 782 rte_timer_manage(void) 783 { 784 struct rte_timer_data *timer_data; 785 786 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL); 787 788 __rte_timer_manage(timer_data); 789 790 return 0; 791 } 792 793 int 794 rte_timer_alt_manage(uint32_t timer_data_id, 795 unsigned int *poll_lcores, 796 int nb_poll_lcores, 797 rte_timer_alt_manage_cb_t f) 798 { 799 unsigned int default_poll_lcores[] = {rte_lcore_id()}; 800 union rte_timer_status status; 801 struct rte_timer *tim, *next_tim, **pprev; 802 struct rte_timer *run_first_tims[RTE_MAX_LCORE]; 803 unsigned int this_lcore = rte_lcore_id(); 804 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; 805 uint64_t cur_time; 806 int i, j, ret; 807 int nb_runlists = 0; 808 struct rte_timer_data *data; 809 struct priv_timer *privp; 810 uint32_t poll_lcore; 811 812 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL); 813 814 /* timer manager only runs on EAL thread with valid lcore_id */ 815 assert(this_lcore < RTE_MAX_LCORE); 816 817 __TIMER_STAT_ADD(data->priv_timer, manage, 1); 818 819 if (poll_lcores == NULL) { 820 poll_lcores = default_poll_lcores; 821 nb_poll_lcores = RTE_DIM(default_poll_lcores); 822 } 823 824 for (i = 0; i < nb_poll_lcores; i++) { 825 poll_lcore = poll_lcores[i]; 826 privp = &data->priv_timer[poll_lcore]; 827 828 /* optimize for the case where per-cpu list is empty */ 829 if (privp->pending_head.sl_next[0] == NULL) 830 continue; 831 cur_time = rte_get_timer_cycles(); 832 833 #ifdef RTE_ARCH_64 834 /* on 64-bit the value cached in the pending_head.expired will 835 * be updated atomically, so we can consult that for a quick 836 * check here outside the lock 837 */ 838 if (likely(privp->pending_head.expire > cur_time)) 839 continue; 840 #endif 841 842 /* browse ordered list, add expired timers in 'expired' list */ 843 rte_spinlock_lock(&privp->list_lock); 844 845 /* if nothing to do just unlock and return */ 846 if (privp->pending_head.sl_next[0] == NULL || 847 privp->pending_head.sl_next[0]->expire > cur_time) { 848 rte_spinlock_unlock(&privp->list_lock); 849 continue; 850 } 851 852 /* save start of list of expired timers */ 853 tim = privp->pending_head.sl_next[0]; 854 855 /* break the existing list at current time point */ 856 timer_get_prev_entries(cur_time, poll_lcore, prev, 857 data->priv_timer); 858 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) { 859 if (prev[j] == &privp->pending_head) 860 continue; 861 privp->pending_head.sl_next[j] = 862 prev[j]->sl_next[j]; 863 if (prev[j]->sl_next[j] == NULL) 864 privp->curr_skiplist_depth--; 865 866 prev[j]->sl_next[j] = NULL; 867 } 868 869 /* transition run-list from PENDING to RUNNING */ 870 run_first_tims[nb_runlists] = tim; 871 pprev = &run_first_tims[nb_runlists]; 872 nb_runlists++; 873 874 for ( ; tim != NULL; tim = next_tim) { 875 next_tim = tim->sl_next[0]; 876 877 ret = timer_set_running_state(tim); 878 if (likely(ret == 0)) { 879 pprev = &tim->sl_next[0]; 880 } else { 881 /* another core is trying to re-config this one, 882 * remove it from local expired list 883 */ 884 *pprev = next_tim; 885 } 886 } 887 888 /* update the next to expire timer value */ 889 privp->pending_head.expire = 890 (privp->pending_head.sl_next[0] == NULL) ? 0 : 891 privp->pending_head.sl_next[0]->expire; 892 893 rte_spinlock_unlock(&privp->list_lock); 894 } 895 896 /* Now process the run lists */ 897 while (1) { 898 bool done = true; 899 uint64_t min_expire = UINT64_MAX; 900 int min_idx = 0; 901 902 /* Find the next oldest timer to process */ 903 for (i = 0; i < nb_runlists; i++) { 904 tim = run_first_tims[i]; 905 906 if (tim != NULL && tim->expire < min_expire) { 907 min_expire = tim->expire; 908 min_idx = i; 909 done = false; 910 } 911 } 912 913 if (done) 914 break; 915 916 tim = run_first_tims[min_idx]; 917 918 /* Move down the runlist from which we picked a timer to 919 * execute 920 */ 921 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0]; 922 923 data->priv_timer[this_lcore].updated = 0; 924 data->priv_timer[this_lcore].running_tim = tim; 925 926 /* Call the provided callback function */ 927 f(tim); 928 929 __TIMER_STAT_ADD(data->priv_timer, pending, -1); 930 931 /* the timer was stopped or reloaded by the callback 932 * function, we have nothing to do here 933 */ 934 if (data->priv_timer[this_lcore].updated == 1) 935 continue; 936 937 if (tim->period == 0) { 938 /* remove from done list and mark timer as stopped */ 939 status.state = RTE_TIMER_STOP; 940 status.owner = RTE_TIMER_NO_OWNER; 941 /* The "RELEASE" ordering guarantees the memory 942 * operations above the status update are observed 943 * before the update by all threads 944 */ 945 __atomic_store_n(&tim->status.u32, status.u32, 946 __ATOMIC_RELEASE); 947 } else { 948 /* keep it in list and mark timer as pending */ 949 rte_spinlock_lock( 950 &data->priv_timer[this_lcore].list_lock); 951 status.state = RTE_TIMER_PENDING; 952 __TIMER_STAT_ADD(data->priv_timer, pending, 1); 953 status.owner = (int16_t)this_lcore; 954 /* The "RELEASE" ordering guarantees the memory 955 * operations above the status update are observed 956 * before the update by all threads 957 */ 958 __atomic_store_n(&tim->status.u32, status.u32, 959 __ATOMIC_RELEASE); 960 __rte_timer_reset(tim, tim->expire + tim->period, 961 tim->period, this_lcore, tim->f, tim->arg, 1, 962 data); 963 rte_spinlock_unlock( 964 &data->priv_timer[this_lcore].list_lock); 965 } 966 967 data->priv_timer[this_lcore].running_tim = NULL; 968 } 969 970 return 0; 971 } 972 973 /* Walk pending lists, stopping timers and calling user-specified function */ 974 int 975 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores, 976 int nb_walk_lcores, 977 rte_timer_stop_all_cb_t f, void *f_arg) 978 { 979 int i; 980 struct priv_timer *priv_timer; 981 uint32_t walk_lcore; 982 struct rte_timer *tim, *next_tim; 983 struct rte_timer_data *timer_data; 984 985 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 986 987 for (i = 0; i < nb_walk_lcores; i++) { 988 walk_lcore = walk_lcores[i]; 989 priv_timer = &timer_data->priv_timer[walk_lcore]; 990 991 rte_spinlock_lock(&priv_timer->list_lock); 992 993 for (tim = priv_timer->pending_head.sl_next[0]; 994 tim != NULL; 995 tim = next_tim) { 996 next_tim = tim->sl_next[0]; 997 998 /* Call timer_stop with lock held */ 999 __rte_timer_stop(tim, 1, timer_data); 1000 1001 if (f) 1002 f(tim, f_arg); 1003 } 1004 1005 rte_spinlock_unlock(&priv_timer->list_lock); 1006 } 1007 1008 return 0; 1009 } 1010 1011 int64_t 1012 rte_timer_next_ticks(void) 1013 { 1014 unsigned int lcore_id = rte_lcore_id(); 1015 struct rte_timer_data *timer_data; 1016 struct priv_timer *priv_timer; 1017 const struct rte_timer *tm; 1018 uint64_t cur_time; 1019 int64_t left = -ENOENT; 1020 1021 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL); 1022 1023 priv_timer = timer_data->priv_timer; 1024 cur_time = rte_get_timer_cycles(); 1025 1026 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 1027 tm = priv_timer[lcore_id].pending_head.sl_next[0]; 1028 if (tm) { 1029 left = tm->expire - cur_time; 1030 if (left < 0) 1031 left = 0; 1032 } 1033 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 1034 1035 return left; 1036 } 1037 1038 /* dump statistics about timers */ 1039 static void 1040 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f) 1041 { 1042 #ifdef RTE_LIBRTE_TIMER_DEBUG 1043 struct rte_timer_debug_stats sum; 1044 unsigned lcore_id; 1045 struct priv_timer *priv_timer = timer_data->priv_timer; 1046 1047 memset(&sum, 0, sizeof(sum)); 1048 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 1049 sum.reset += priv_timer[lcore_id].stats.reset; 1050 sum.stop += priv_timer[lcore_id].stats.stop; 1051 sum.manage += priv_timer[lcore_id].stats.manage; 1052 sum.pending += priv_timer[lcore_id].stats.pending; 1053 } 1054 fprintf(f, "Timer statistics:\n"); 1055 fprintf(f, " reset = %"PRIu64"\n", sum.reset); 1056 fprintf(f, " stop = %"PRIu64"\n", sum.stop); 1057 fprintf(f, " manage = %"PRIu64"\n", sum.manage); 1058 fprintf(f, " pending = %"PRIu64"\n", sum.pending); 1059 #else 1060 fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n"); 1061 #endif 1062 } 1063 1064 int 1065 rte_timer_dump_stats(FILE *f) 1066 { 1067 return rte_timer_alt_dump_stats(default_data_id, f); 1068 } 1069 1070 int 1071 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f) 1072 { 1073 struct rte_timer_data *timer_data; 1074 1075 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL); 1076 1077 __rte_timer_dump_stats(timer_data, f); 1078 1079 return 0; 1080 } 1081