1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <stdbool.h>
8 #include <inttypes.h>
9 #include <assert.h>
10
11 #include <rte_common.h>
12 #include <rte_cycles.h>
13 #include <rte_eal_memconfig.h>
14 #include <rte_memory.h>
15 #include <rte_lcore.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_spinlock.h>
18 #include <rte_random.h>
19 #include <rte_pause.h>
20 #include <rte_memzone.h>
21
22 #include "rte_timer.h"
23
24 /**
25 * Per-lcore info for timers.
26 */
27 struct __rte_cache_aligned priv_timer {
28 struct rte_timer pending_head; /**< dummy timer instance to head up list */
29 rte_spinlock_t list_lock; /**< lock to protect list access */
30
31 /** per-core variable that true if a timer was updated on this
32 * core since last reset of the variable */
33 int updated;
34
35 /** track the current depth of the skiplist */
36 unsigned curr_skiplist_depth;
37
38 unsigned prev_lcore; /**< used for lcore round robin */
39
40 /** running timer on this lcore now */
41 struct rte_timer *running_tim;
42
43 #ifdef RTE_LIBRTE_TIMER_DEBUG
44 /** per-lcore statistics */
45 struct rte_timer_debug_stats stats;
46 #endif
47 };
48
49 #define FL_ALLOCATED (1 << 0)
50 struct rte_timer_data {
51 struct priv_timer priv_timer[RTE_MAX_LCORE];
52 uint8_t internal_flags;
53 };
54
55 #define RTE_MAX_DATA_ELS 64
56 static const struct rte_memzone *rte_timer_data_mz;
57 static int *volatile rte_timer_mz_refcnt;
58 static struct rte_timer_data *rte_timer_data_arr;
59 static const uint32_t default_data_id;
60 static uint32_t rte_timer_subsystem_initialized;
61
62 /* when debug is enabled, store some statistics */
63 #ifdef RTE_LIBRTE_TIMER_DEBUG
64 #define __TIMER_STAT_ADD(priv_timer, name, n) do { \
65 unsigned __lcore_id = rte_lcore_id(); \
66 if (__lcore_id < RTE_MAX_LCORE) \
67 priv_timer[__lcore_id].stats.name += (n); \
68 } while(0)
69 #else
70 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
71 #endif
72
73 static inline int
timer_data_valid(uint32_t id)74 timer_data_valid(uint32_t id)
75 {
76 return rte_timer_data_arr &&
77 (rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
78 }
79
80 /* validate ID and retrieve timer data pointer, or return error value */
81 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do { \
82 if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id)) \
83 return retval; \
84 timer_data = &rte_timer_data_arr[id]; \
85 } while (0)
86
87 int
rte_timer_data_alloc(uint32_t * id_ptr)88 rte_timer_data_alloc(uint32_t *id_ptr)
89 {
90 int i;
91 struct rte_timer_data *data;
92
93 if (!rte_timer_subsystem_initialized)
94 return -ENOMEM;
95
96 for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
97 data = &rte_timer_data_arr[i];
98 if (!(data->internal_flags & FL_ALLOCATED)) {
99 data->internal_flags |= FL_ALLOCATED;
100
101 if (id_ptr)
102 *id_ptr = i;
103
104 return 0;
105 }
106 }
107
108 return -ENOSPC;
109 }
110
111 int
rte_timer_data_dealloc(uint32_t id)112 rte_timer_data_dealloc(uint32_t id)
113 {
114 struct rte_timer_data *timer_data;
115 TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
116
117 timer_data->internal_flags &= ~(FL_ALLOCATED);
118
119 return 0;
120 }
121
122 /* Init the timer library. Allocate an array of timer data structs in shared
123 * memory, and allocate the zeroth entry for use with original timer
124 * APIs. Since the intersection of the sets of lcore ids in primary and
125 * secondary processes should be empty, the zeroth entry can be shared by
126 * multiple processes.
127 */
128 int
rte_timer_subsystem_init(void)129 rte_timer_subsystem_init(void)
130 {
131 const struct rte_memzone *mz;
132 struct rte_timer_data *data;
133 int i, lcore_id;
134 static const char *mz_name = "rte_timer_mz";
135 const size_t data_arr_size =
136 RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
137 const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
138 bool do_full_init = true;
139
140 rte_mcfg_timer_lock();
141
142 if (rte_timer_subsystem_initialized) {
143 rte_mcfg_timer_unlock();
144 return -EALREADY;
145 }
146
147 mz = rte_memzone_lookup(mz_name);
148 if (mz == NULL) {
149 mz = rte_memzone_reserve_aligned(mz_name, mem_size,
150 SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
151 if (mz == NULL) {
152 rte_mcfg_timer_unlock();
153 return -ENOMEM;
154 }
155 do_full_init = true;
156 } else
157 do_full_init = false;
158
159 rte_timer_data_mz = mz;
160 rte_timer_data_arr = mz->addr;
161 rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
162
163 if (do_full_init) {
164 for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
165 data = &rte_timer_data_arr[i];
166
167 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
168 lcore_id++) {
169 rte_spinlock_init(
170 &data->priv_timer[lcore_id].list_lock);
171 data->priv_timer[lcore_id].prev_lcore =
172 lcore_id;
173 }
174 }
175 }
176
177 rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
178 (*rte_timer_mz_refcnt)++;
179
180 rte_timer_subsystem_initialized = 1;
181
182 rte_mcfg_timer_unlock();
183
184 return 0;
185 }
186
187 void
rte_timer_subsystem_finalize(void)188 rte_timer_subsystem_finalize(void)
189 {
190 rte_mcfg_timer_lock();
191
192 if (!rte_timer_subsystem_initialized) {
193 rte_mcfg_timer_unlock();
194 return;
195 }
196
197 if (--(*rte_timer_mz_refcnt) == 0)
198 rte_memzone_free(rte_timer_data_mz);
199
200 rte_timer_subsystem_initialized = 0;
201
202 rte_mcfg_timer_unlock();
203 }
204
205 /* Initialize the timer handle tim for use */
206 void
rte_timer_init(struct rte_timer * tim)207 rte_timer_init(struct rte_timer *tim)
208 {
209 union rte_timer_status status;
210
211 status.state = RTE_TIMER_STOP;
212 status.owner = RTE_TIMER_NO_OWNER;
213 rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_relaxed);
214 }
215
216 /*
217 * if timer is pending or stopped (or running on the same core than
218 * us), mark timer as configuring, and on success return the previous
219 * status of the timer
220 */
221 static int
timer_set_config_state(struct rte_timer * tim,union rte_timer_status * ret_prev_status,struct priv_timer * priv_timer)222 timer_set_config_state(struct rte_timer *tim,
223 union rte_timer_status *ret_prev_status,
224 struct priv_timer *priv_timer)
225 {
226 union rte_timer_status prev_status, status;
227 int success = 0;
228 unsigned lcore_id;
229
230 lcore_id = rte_lcore_id();
231
232 /* wait that the timer is in correct status before update,
233 * and mark it as being configured */
234 prev_status.u32 = rte_atomic_load_explicit(&tim->status.u32, rte_memory_order_relaxed);
235
236 while (success == 0) {
237 /* timer is running on another core
238 * or ready to run on local core, exit
239 */
240 if (prev_status.state == RTE_TIMER_RUNNING &&
241 (prev_status.owner != (uint16_t)lcore_id ||
242 tim != priv_timer[lcore_id].running_tim))
243 return -1;
244
245 /* timer is being configured on another core */
246 if (prev_status.state == RTE_TIMER_CONFIG)
247 return -1;
248
249 /* here, we know that timer is stopped or pending,
250 * mark it atomically as being configured */
251 status.state = RTE_TIMER_CONFIG;
252 status.owner = (int16_t)lcore_id;
253 /* CONFIG states are acting as locked states. If the
254 * timer is in CONFIG state, the state cannot be changed
255 * by other threads. So, we should use ACQUIRE here.
256 */
257 success = rte_atomic_compare_exchange_strong_explicit(&tim->status.u32,
258 (uint32_t *)(uintptr_t)&prev_status.u32,
259 status.u32,
260 rte_memory_order_acquire,
261 rte_memory_order_relaxed);
262 }
263
264 ret_prev_status->u32 = prev_status.u32;
265 return 0;
266 }
267
268 /*
269 * if timer is pending, mark timer as running
270 */
271 static int
timer_set_running_state(struct rte_timer * tim)272 timer_set_running_state(struct rte_timer *tim)
273 {
274 union rte_timer_status prev_status, status;
275 unsigned lcore_id = rte_lcore_id();
276 int success = 0;
277
278 /* wait that the timer is in correct status before update,
279 * and mark it as running */
280 prev_status.u32 = rte_atomic_load_explicit(&tim->status.u32, rte_memory_order_relaxed);
281
282 while (success == 0) {
283 /* timer is not pending anymore */
284 if (prev_status.state != RTE_TIMER_PENDING)
285 return -1;
286
287 /* we know that the timer will be pending at this point
288 * mark it atomically as being running
289 */
290 status.state = RTE_TIMER_RUNNING;
291 status.owner = (int16_t)lcore_id;
292 /* RUNNING states are acting as locked states. If the
293 * timer is in RUNNING state, the state cannot be changed
294 * by other threads. So, we should use ACQUIRE here.
295 */
296 success = rte_atomic_compare_exchange_strong_explicit(&tim->status.u32,
297 (uint32_t *)(uintptr_t)&prev_status.u32,
298 status.u32,
299 rte_memory_order_acquire,
300 rte_memory_order_relaxed);
301 }
302
303 return 0;
304 }
305
306 /*
307 * Return a skiplist level for a new entry.
308 * This probabilistically gives a level with p=1/4 that an entry at level n
309 * will also appear at level n+1.
310 */
311 static uint32_t
timer_get_skiplist_level(unsigned curr_depth)312 timer_get_skiplist_level(unsigned curr_depth)
313 {
314 #ifdef RTE_LIBRTE_TIMER_DEBUG
315 static uint32_t i, count = 0;
316 static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
317 #endif
318
319 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
320 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
321 * bit position of a (pseudo)random number.
322 */
323 uint32_t rand = rte_rand() & (UINT32_MAX - 1);
324 uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
325
326 /* limit the levels used to one above our current level, so we don't,
327 * for instance, have a level 0 and a level 7 without anything between
328 */
329 if (level > curr_depth)
330 level = curr_depth;
331 if (level >= MAX_SKIPLIST_DEPTH)
332 level = MAX_SKIPLIST_DEPTH-1;
333 #ifdef RTE_LIBRTE_TIMER_DEBUG
334 count ++;
335 levels[level]++;
336 if (count % 10000 == 0)
337 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
338 printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
339 #endif
340 return level;
341 }
342
343 /*
344 * For a given time value, get the entries at each level which
345 * are <= that time value.
346 */
347 static void
timer_get_prev_entries(uint64_t time_val,unsigned tim_lcore,struct rte_timer ** prev,struct priv_timer * priv_timer)348 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
349 struct rte_timer **prev, struct priv_timer *priv_timer)
350 {
351 unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
352 prev[lvl] = &priv_timer[tim_lcore].pending_head;
353 while(lvl != 0) {
354 lvl--;
355 prev[lvl] = prev[lvl+1];
356 while (prev[lvl]->sl_next[lvl] &&
357 prev[lvl]->sl_next[lvl]->expire <= time_val)
358 prev[lvl] = prev[lvl]->sl_next[lvl];
359 }
360 }
361
362 /*
363 * Given a timer node in the skiplist, find the previous entries for it at
364 * all skiplist levels.
365 */
366 static void
timer_get_prev_entries_for_node(struct rte_timer * tim,unsigned tim_lcore,struct rte_timer ** prev,struct priv_timer * priv_timer)367 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
368 struct rte_timer **prev,
369 struct priv_timer *priv_timer)
370 {
371 int i;
372
373 /* to get a specific entry in the list, look for just lower than the time
374 * values, and then increment on each level individually if necessary
375 */
376 timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
377 for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
378 while (prev[i]->sl_next[i] != NULL &&
379 prev[i]->sl_next[i] != tim &&
380 prev[i]->sl_next[i]->expire <= tim->expire)
381 prev[i] = prev[i]->sl_next[i];
382 }
383 }
384
385 /* call with lock held as necessary
386 * add in list
387 * timer must be in config state
388 * timer must not be in a list
389 */
390 static void
timer_add(struct rte_timer * tim,unsigned int tim_lcore,struct priv_timer * priv_timer)391 timer_add(struct rte_timer *tim, unsigned int tim_lcore,
392 struct priv_timer *priv_timer)
393 {
394 unsigned lvl;
395 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
396
397 /* find where exactly this element goes in the list of elements
398 * for each depth. */
399 timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
400
401 /* now assign it a new level and add at that level */
402 const unsigned tim_level = timer_get_skiplist_level(
403 priv_timer[tim_lcore].curr_skiplist_depth);
404 if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
405 priv_timer[tim_lcore].curr_skiplist_depth++;
406
407 lvl = tim_level;
408 while (lvl > 0) {
409 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
410 prev[lvl]->sl_next[lvl] = tim;
411 lvl--;
412 }
413 tim->sl_next[0] = prev[0]->sl_next[0];
414 prev[0]->sl_next[0] = tim;
415
416 /* save the lowest list entry into the expire field of the dummy hdr
417 * NOTE: this is not atomic on 32-bit*/
418 priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
419 pending_head.sl_next[0]->expire;
420 }
421
422 /*
423 * del from list, lock if needed
424 * timer must be in config state
425 * timer must be in a list
426 */
427 static void
timer_del(struct rte_timer * tim,union rte_timer_status prev_status,int local_is_locked,struct priv_timer * priv_timer)428 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
429 int local_is_locked, struct priv_timer *priv_timer)
430 {
431 unsigned lcore_id = rte_lcore_id();
432 unsigned prev_owner = prev_status.owner;
433 int i;
434 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
435
436 /* if timer needs is pending another core, we need to lock the
437 * list; if it is on local core, we need to lock if we are not
438 * called from rte_timer_manage() */
439 if (prev_owner != lcore_id || !local_is_locked)
440 rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
441
442 /* save the lowest list entry into the expire field of the dummy hdr.
443 * NOTE: this is not atomic on 32-bit */
444 if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
445 priv_timer[prev_owner].pending_head.expire =
446 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
447
448 /* adjust pointers from previous entries to point past this */
449 timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
450 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
451 if (prev[i]->sl_next[i] == tim)
452 prev[i]->sl_next[i] = tim->sl_next[i];
453 }
454
455 /* in case we deleted last entry at a level, adjust down max level */
456 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
457 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
458 priv_timer[prev_owner].curr_skiplist_depth --;
459 else
460 break;
461
462 if (prev_owner != lcore_id || !local_is_locked)
463 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
464 }
465
466 /* Reset and start the timer associated with the timer handle (private func) */
467 static int
__rte_timer_reset(struct rte_timer * tim,uint64_t expire,uint64_t period,unsigned tim_lcore,rte_timer_cb_t fct,void * arg,int local_is_locked,struct rte_timer_data * timer_data)468 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
469 uint64_t period, unsigned tim_lcore,
470 rte_timer_cb_t fct, void *arg,
471 int local_is_locked,
472 struct rte_timer_data *timer_data)
473 {
474 union rte_timer_status prev_status, status;
475 int ret;
476 unsigned lcore_id = rte_lcore_id();
477 struct priv_timer *priv_timer = timer_data->priv_timer;
478
479 /* round robin for tim_lcore */
480 if (tim_lcore == (unsigned)LCORE_ID_ANY) {
481 if (lcore_id < RTE_MAX_LCORE) {
482 /* EAL thread with valid lcore_id */
483 tim_lcore = rte_get_next_lcore(
484 priv_timer[lcore_id].prev_lcore,
485 0, 1);
486 priv_timer[lcore_id].prev_lcore = tim_lcore;
487 } else
488 /* non-EAL thread do not run rte_timer_manage(),
489 * so schedule the timer on the first enabled lcore. */
490 tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
491 }
492
493 /* wait that the timer is in correct status before update,
494 * and mark it as being configured */
495 ret = timer_set_config_state(tim, &prev_status, priv_timer);
496 if (ret < 0)
497 return -1;
498
499 __TIMER_STAT_ADD(priv_timer, reset, 1);
500 if (prev_status.state == RTE_TIMER_RUNNING &&
501 lcore_id < RTE_MAX_LCORE) {
502 priv_timer[lcore_id].updated = 1;
503 }
504
505 /* remove it from list */
506 if (prev_status.state == RTE_TIMER_PENDING) {
507 timer_del(tim, prev_status, local_is_locked, priv_timer);
508 __TIMER_STAT_ADD(priv_timer, pending, -1);
509 }
510
511 tim->period = period;
512 tim->expire = expire;
513 tim->f = fct;
514 tim->arg = arg;
515
516 /* if timer needs to be scheduled on another core, we need to
517 * lock the destination list; if it is on local core, we need to lock if
518 * we are not called from rte_timer_manage()
519 */
520 if (tim_lcore != lcore_id || !local_is_locked)
521 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
522
523 __TIMER_STAT_ADD(priv_timer, pending, 1);
524 timer_add(tim, tim_lcore, priv_timer);
525
526 /* update state: as we are in CONFIG state, only us can modify
527 * the state so we don't need to use cmpset() here */
528 status.state = RTE_TIMER_PENDING;
529 status.owner = (int16_t)tim_lcore;
530 /* The "RELEASE" ordering guarantees the memory operations above
531 * the status update are observed before the update by all threads
532 */
533 rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_release);
534
535 if (tim_lcore != lcore_id || !local_is_locked)
536 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
537
538 return 0;
539 }
540
541 /* Reset and start the timer associated with the timer handle tim */
542 int
rte_timer_reset(struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned int tim_lcore,rte_timer_cb_t fct,void * arg)543 rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
544 enum rte_timer_type type, unsigned int tim_lcore,
545 rte_timer_cb_t fct, void *arg)
546 {
547 return rte_timer_alt_reset(default_data_id, tim, ticks, type,
548 tim_lcore, fct, arg);
549 }
550
551 int
rte_timer_alt_reset(uint32_t timer_data_id,struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned int tim_lcore,rte_timer_cb_t fct,void * arg)552 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
553 uint64_t ticks, enum rte_timer_type type,
554 unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
555 {
556 uint64_t cur_time = rte_get_timer_cycles();
557 uint64_t period;
558 struct rte_timer_data *timer_data;
559
560 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
561
562 if (type == PERIODICAL)
563 period = ticks;
564 else
565 period = 0;
566
567 return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore,
568 fct, arg, 0, timer_data);
569 }
570
571 /* loop until rte_timer_reset() succeed */
572 void
rte_timer_reset_sync(struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned tim_lcore,rte_timer_cb_t fct,void * arg)573 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
574 enum rte_timer_type type, unsigned tim_lcore,
575 rte_timer_cb_t fct, void *arg)
576 {
577 while (rte_timer_reset(tim, ticks, type, tim_lcore,
578 fct, arg) != 0)
579 rte_pause();
580 }
581
582 static int
__rte_timer_stop(struct rte_timer * tim,struct rte_timer_data * timer_data)583 __rte_timer_stop(struct rte_timer *tim,
584 struct rte_timer_data *timer_data)
585 {
586 union rte_timer_status prev_status, status;
587 unsigned lcore_id = rte_lcore_id();
588 int ret;
589 struct priv_timer *priv_timer = timer_data->priv_timer;
590
591 /* wait that the timer is in correct status before update,
592 * and mark it as being configured */
593 ret = timer_set_config_state(tim, &prev_status, priv_timer);
594 if (ret < 0)
595 return -1;
596
597 __TIMER_STAT_ADD(priv_timer, stop, 1);
598 if (prev_status.state == RTE_TIMER_RUNNING &&
599 lcore_id < RTE_MAX_LCORE) {
600 priv_timer[lcore_id].updated = 1;
601 }
602
603 /* remove it from list */
604 if (prev_status.state == RTE_TIMER_PENDING) {
605 timer_del(tim, prev_status, 0, priv_timer);
606 __TIMER_STAT_ADD(priv_timer, pending, -1);
607 }
608
609 /* mark timer as stopped */
610 status.state = RTE_TIMER_STOP;
611 status.owner = RTE_TIMER_NO_OWNER;
612 /* The "RELEASE" ordering guarantees the memory operations above
613 * the status update are observed before the update by all threads
614 */
615 rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_release);
616
617 return 0;
618 }
619
620 /* Stop the timer associated with the timer handle tim */
621 int
rte_timer_stop(struct rte_timer * tim)622 rte_timer_stop(struct rte_timer *tim)
623 {
624 return rte_timer_alt_stop(default_data_id, tim);
625 }
626
627 int
rte_timer_alt_stop(uint32_t timer_data_id,struct rte_timer * tim)628 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
629 {
630 struct rte_timer_data *timer_data;
631
632 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
633
634 return __rte_timer_stop(tim, timer_data);
635 }
636
637 /* loop until rte_timer_stop() succeed */
638 void
rte_timer_stop_sync(struct rte_timer * tim)639 rte_timer_stop_sync(struct rte_timer *tim)
640 {
641 while (rte_timer_stop(tim) != 0)
642 rte_pause();
643 }
644
645 /* Test the PENDING status of the timer handle tim */
646 int
rte_timer_pending(struct rte_timer * tim)647 rte_timer_pending(struct rte_timer *tim)
648 {
649 return rte_atomic_load_explicit(&tim->status.state,
650 rte_memory_order_relaxed) == RTE_TIMER_PENDING;
651 }
652
653 /* must be called periodically, run all timer that expired */
654 static void
__rte_timer_manage(struct rte_timer_data * timer_data)655 __rte_timer_manage(struct rte_timer_data *timer_data)
656 {
657 union rte_timer_status status;
658 struct rte_timer *tim, *next_tim;
659 struct rte_timer *run_first_tim, **pprev;
660 unsigned lcore_id = rte_lcore_id();
661 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
662 uint64_t cur_time;
663 int i, ret;
664 struct priv_timer *priv_timer = timer_data->priv_timer;
665
666 /* timer manager only runs on EAL thread with valid lcore_id */
667 assert(lcore_id < RTE_MAX_LCORE);
668
669 __TIMER_STAT_ADD(priv_timer, manage, 1);
670 /* optimize for the case where per-cpu list is empty */
671 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
672 return;
673 cur_time = rte_get_timer_cycles();
674
675 #ifdef RTE_ARCH_64
676 /* on 64-bit the value cached in the pending_head.expired will be
677 * updated atomically, so we can consult that for a quick check here
678 * outside the lock */
679 if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
680 return;
681 #endif
682
683 /* browse ordered list, add expired timers in 'expired' list */
684 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
685
686 /* if nothing to do just unlock and return */
687 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
688 priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
689 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
690 return;
691 }
692
693 /* save start of list of expired timers */
694 tim = priv_timer[lcore_id].pending_head.sl_next[0];
695
696 /* break the existing list at current time point */
697 timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
698 for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
699 if (prev[i] == &priv_timer[lcore_id].pending_head)
700 continue;
701 priv_timer[lcore_id].pending_head.sl_next[i] =
702 prev[i]->sl_next[i];
703 if (prev[i]->sl_next[i] == NULL)
704 priv_timer[lcore_id].curr_skiplist_depth--;
705 prev[i] ->sl_next[i] = NULL;
706 }
707
708 /* transition run-list from PENDING to RUNNING */
709 run_first_tim = tim;
710 pprev = &run_first_tim;
711
712 for ( ; tim != NULL; tim = next_tim) {
713 next_tim = tim->sl_next[0];
714
715 ret = timer_set_running_state(tim);
716 if (likely(ret == 0)) {
717 pprev = &tim->sl_next[0];
718 } else {
719 /* another core is trying to re-config this one,
720 * remove it from local expired list
721 */
722 *pprev = next_tim;
723 }
724 }
725
726 /* update the next to expire timer value */
727 priv_timer[lcore_id].pending_head.expire =
728 (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
729 priv_timer[lcore_id].pending_head.sl_next[0]->expire;
730
731 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
732
733 /* now scan expired list and call callbacks */
734 for (tim = run_first_tim; tim != NULL; tim = next_tim) {
735 next_tim = tim->sl_next[0];
736 priv_timer[lcore_id].updated = 0;
737 priv_timer[lcore_id].running_tim = tim;
738
739 /* execute callback function with list unlocked */
740 tim->f(tim, tim->arg);
741
742 __TIMER_STAT_ADD(priv_timer, pending, -1);
743 /* the timer was stopped or reloaded by the callback
744 * function, we have nothing to do here */
745 if (priv_timer[lcore_id].updated == 1)
746 continue;
747
748 if (tim->period == 0) {
749 /* remove from done list and mark timer as stopped */
750 status.state = RTE_TIMER_STOP;
751 status.owner = RTE_TIMER_NO_OWNER;
752 /* The "RELEASE" ordering guarantees the memory
753 * operations above the status update are observed
754 * before the update by all threads
755 */
756 rte_atomic_store_explicit(&tim->status.u32, status.u32,
757 rte_memory_order_release);
758 }
759 else {
760 /* keep it in list and mark timer as pending */
761 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
762 status.state = RTE_TIMER_PENDING;
763 __TIMER_STAT_ADD(priv_timer, pending, 1);
764 status.owner = (int16_t)lcore_id;
765 /* The "RELEASE" ordering guarantees the memory
766 * operations above the status update are observed
767 * before the update by all threads
768 */
769 rte_atomic_store_explicit(&tim->status.u32, status.u32,
770 rte_memory_order_release);
771 __rte_timer_reset(tim, tim->expire + tim->period,
772 tim->period, lcore_id, tim->f, tim->arg, 1,
773 timer_data);
774 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
775 }
776 }
777 priv_timer[lcore_id].running_tim = NULL;
778 }
779
780 int
rte_timer_manage(void)781 rte_timer_manage(void)
782 {
783 struct rte_timer_data *timer_data;
784
785 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
786
787 __rte_timer_manage(timer_data);
788
789 return 0;
790 }
791
792 int
rte_timer_alt_manage(uint32_t timer_data_id,unsigned int * poll_lcores,int nb_poll_lcores,rte_timer_alt_manage_cb_t f)793 rte_timer_alt_manage(uint32_t timer_data_id,
794 unsigned int *poll_lcores,
795 int nb_poll_lcores,
796 rte_timer_alt_manage_cb_t f)
797 {
798 unsigned int default_poll_lcores[] = {rte_lcore_id()};
799 union rte_timer_status status;
800 struct rte_timer *tim, *next_tim, **pprev;
801 struct rte_timer *run_first_tims[RTE_MAX_LCORE];
802 unsigned int this_lcore = rte_lcore_id();
803 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
804 uint64_t cur_time;
805 int i, j, ret;
806 int nb_runlists = 0;
807 struct rte_timer_data *data;
808 struct priv_timer *privp;
809 uint32_t poll_lcore;
810
811 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
812
813 /* timer manager only runs on EAL thread with valid lcore_id */
814 assert(this_lcore < RTE_MAX_LCORE);
815
816 __TIMER_STAT_ADD(data->priv_timer, manage, 1);
817
818 if (poll_lcores == NULL) {
819 poll_lcores = default_poll_lcores;
820 nb_poll_lcores = RTE_DIM(default_poll_lcores);
821 }
822
823 for (i = 0; i < nb_poll_lcores; i++) {
824 poll_lcore = poll_lcores[i];
825 privp = &data->priv_timer[poll_lcore];
826
827 /* optimize for the case where per-cpu list is empty */
828 if (privp->pending_head.sl_next[0] == NULL)
829 continue;
830 cur_time = rte_get_timer_cycles();
831
832 #ifdef RTE_ARCH_64
833 /* on 64-bit the value cached in the pending_head.expired will
834 * be updated atomically, so we can consult that for a quick
835 * check here outside the lock
836 */
837 if (likely(privp->pending_head.expire > cur_time))
838 continue;
839 #endif
840
841 /* browse ordered list, add expired timers in 'expired' list */
842 rte_spinlock_lock(&privp->list_lock);
843
844 /* if nothing to do just unlock and return */
845 if (privp->pending_head.sl_next[0] == NULL ||
846 privp->pending_head.sl_next[0]->expire > cur_time) {
847 rte_spinlock_unlock(&privp->list_lock);
848 continue;
849 }
850
851 /* save start of list of expired timers */
852 tim = privp->pending_head.sl_next[0];
853
854 /* break the existing list at current time point */
855 timer_get_prev_entries(cur_time, poll_lcore, prev,
856 data->priv_timer);
857 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
858 if (prev[j] == &privp->pending_head)
859 continue;
860 privp->pending_head.sl_next[j] =
861 prev[j]->sl_next[j];
862 if (prev[j]->sl_next[j] == NULL)
863 privp->curr_skiplist_depth--;
864
865 prev[j]->sl_next[j] = NULL;
866 }
867
868 /* transition run-list from PENDING to RUNNING */
869 run_first_tims[nb_runlists] = tim;
870 pprev = &run_first_tims[nb_runlists];
871 nb_runlists++;
872
873 for ( ; tim != NULL; tim = next_tim) {
874 next_tim = tim->sl_next[0];
875
876 ret = timer_set_running_state(tim);
877 if (likely(ret == 0)) {
878 pprev = &tim->sl_next[0];
879 } else {
880 /* another core is trying to re-config this one,
881 * remove it from local expired list
882 */
883 *pprev = next_tim;
884 }
885 }
886
887 /* update the next to expire timer value */
888 privp->pending_head.expire =
889 (privp->pending_head.sl_next[0] == NULL) ? 0 :
890 privp->pending_head.sl_next[0]->expire;
891
892 rte_spinlock_unlock(&privp->list_lock);
893 }
894
895 /* Now process the run lists */
896 while (1) {
897 bool done = true;
898 uint64_t min_expire = UINT64_MAX;
899 int min_idx = 0;
900
901 /* Find the next oldest timer to process */
902 for (i = 0; i < nb_runlists; i++) {
903 tim = run_first_tims[i];
904
905 if (tim != NULL && tim->expire < min_expire) {
906 min_expire = tim->expire;
907 min_idx = i;
908 done = false;
909 }
910 }
911
912 if (done)
913 break;
914
915 tim = run_first_tims[min_idx];
916
917 /* Move down the runlist from which we picked a timer to
918 * execute
919 */
920 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
921
922 data->priv_timer[this_lcore].updated = 0;
923 data->priv_timer[this_lcore].running_tim = tim;
924
925 /* Call the provided callback function */
926 f(tim);
927
928 __TIMER_STAT_ADD(data->priv_timer, pending, -1);
929
930 /* the timer was stopped or reloaded by the callback
931 * function, we have nothing to do here
932 */
933 if (data->priv_timer[this_lcore].updated == 1)
934 continue;
935
936 if (tim->period == 0) {
937 /* remove from done list and mark timer as stopped */
938 status.state = RTE_TIMER_STOP;
939 status.owner = RTE_TIMER_NO_OWNER;
940 /* The "RELEASE" ordering guarantees the memory
941 * operations above the status update are observed
942 * before the update by all threads
943 */
944 rte_atomic_store_explicit(&tim->status.u32, status.u32,
945 rte_memory_order_release);
946 } else {
947 /* keep it in list and mark timer as pending */
948 rte_spinlock_lock(
949 &data->priv_timer[this_lcore].list_lock);
950 status.state = RTE_TIMER_PENDING;
951 __TIMER_STAT_ADD(data->priv_timer, pending, 1);
952 status.owner = (int16_t)this_lcore;
953 /* The "RELEASE" ordering guarantees the memory
954 * operations above the status update are observed
955 * before the update by all threads
956 */
957 rte_atomic_store_explicit(&tim->status.u32, status.u32,
958 rte_memory_order_release);
959 __rte_timer_reset(tim, tim->expire + tim->period,
960 tim->period, this_lcore, tim->f, tim->arg, 1,
961 data);
962 rte_spinlock_unlock(
963 &data->priv_timer[this_lcore].list_lock);
964 }
965
966 data->priv_timer[this_lcore].running_tim = NULL;
967 }
968
969 return 0;
970 }
971
972 /* Walk pending lists, stopping timers and calling user-specified function */
973 int
rte_timer_stop_all(uint32_t timer_data_id,unsigned int * walk_lcores,int nb_walk_lcores,rte_timer_stop_all_cb_t f,void * f_arg)974 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
975 int nb_walk_lcores,
976 rte_timer_stop_all_cb_t f, void *f_arg)
977 {
978 int i;
979 struct priv_timer *priv_timer;
980 uint32_t walk_lcore;
981 struct rte_timer *tim, *next_tim;
982 struct rte_timer_data *timer_data;
983
984 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
985
986 for (i = 0; i < nb_walk_lcores; i++) {
987 walk_lcore = walk_lcores[i];
988 priv_timer = &timer_data->priv_timer[walk_lcore];
989
990 for (tim = priv_timer->pending_head.sl_next[0];
991 tim != NULL;
992 tim = next_tim) {
993 next_tim = tim->sl_next[0];
994
995 __rte_timer_stop(tim, timer_data);
996
997 if (f)
998 f(tim, f_arg);
999 }
1000 }
1001
1002 return 0;
1003 }
1004
1005 int64_t
rte_timer_next_ticks(void)1006 rte_timer_next_ticks(void)
1007 {
1008 unsigned int lcore_id = rte_lcore_id();
1009 struct rte_timer_data *timer_data;
1010 struct priv_timer *priv_timer;
1011 const struct rte_timer *tm;
1012 uint64_t cur_time;
1013 int64_t left = -ENOENT;
1014
1015 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
1016
1017 priv_timer = timer_data->priv_timer;
1018 cur_time = rte_get_timer_cycles();
1019
1020 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
1021 tm = priv_timer[lcore_id].pending_head.sl_next[0];
1022 if (tm) {
1023 left = tm->expire - cur_time;
1024 if (left < 0)
1025 left = 0;
1026 }
1027 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
1028
1029 return left;
1030 }
1031
1032 /* dump statistics about timers */
1033 static void
__rte_timer_dump_stats(struct rte_timer_data * timer_data __rte_unused,FILE * f)1034 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
1035 {
1036 #ifdef RTE_LIBRTE_TIMER_DEBUG
1037 struct rte_timer_debug_stats sum;
1038 unsigned lcore_id;
1039 struct priv_timer *priv_timer = timer_data->priv_timer;
1040
1041 memset(&sum, 0, sizeof(sum));
1042 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1043 sum.reset += priv_timer[lcore_id].stats.reset;
1044 sum.stop += priv_timer[lcore_id].stats.stop;
1045 sum.manage += priv_timer[lcore_id].stats.manage;
1046 sum.pending += priv_timer[lcore_id].stats.pending;
1047 }
1048 fprintf(f, "Timer statistics:\n");
1049 fprintf(f, " reset = %"PRIu64"\n", sum.reset);
1050 fprintf(f, " stop = %"PRIu64"\n", sum.stop);
1051 fprintf(f, " manage = %"PRIu64"\n", sum.manage);
1052 fprintf(f, " pending = %"PRIu64"\n", sum.pending);
1053 #else
1054 fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1055 #endif
1056 }
1057
1058 int
rte_timer_dump_stats(FILE * f)1059 rte_timer_dump_stats(FILE *f)
1060 {
1061 return rte_timer_alt_dump_stats(default_data_id, f);
1062 }
1063
1064 int
rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused,FILE * f)1065 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1066 {
1067 struct rte_timer_data *timer_data;
1068
1069 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1070
1071 __rte_timer_dump_stats(timer_data, f);
1072
1073 return 0;
1074 }
1075