xref: /dpdk/lib/timer/rte_timer.c (revision c6552d9a8deffa448de2d5e2e726f50508c1efd2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <stdbool.h>
8 #include <inttypes.h>
9 #include <assert.h>
10 
11 #include <rte_common.h>
12 #include <rte_cycles.h>
13 #include <rte_eal_memconfig.h>
14 #include <rte_memory.h>
15 #include <rte_lcore.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_spinlock.h>
18 #include <rte_random.h>
19 #include <rte_pause.h>
20 #include <rte_memzone.h>
21 
22 #include "rte_timer.h"
23 
24 /**
25  * Per-lcore info for timers.
26  */
27 struct __rte_cache_aligned priv_timer {
28 	struct rte_timer pending_head;  /**< dummy timer instance to head up list */
29 	rte_spinlock_t list_lock;       /**< lock to protect list access */
30 
31 	/** per-core variable that true if a timer was updated on this
32 	 *  core since last reset of the variable */
33 	int updated;
34 
35 	/** track the current depth of the skiplist */
36 	unsigned curr_skiplist_depth;
37 
38 	unsigned prev_lcore;              /**< used for lcore round robin */
39 
40 	/** running timer on this lcore now */
41 	struct rte_timer *running_tim;
42 
43 #ifdef RTE_LIBRTE_TIMER_DEBUG
44 	/** per-lcore statistics */
45 	struct rte_timer_debug_stats stats;
46 #endif
47 };
48 
49 #define FL_ALLOCATED	(1 << 0)
50 struct rte_timer_data {
51 	struct priv_timer priv_timer[RTE_MAX_LCORE];
52 	uint8_t internal_flags;
53 };
54 
55 #define RTE_MAX_DATA_ELS 64
56 static const struct rte_memzone *rte_timer_data_mz;
57 static int *volatile rte_timer_mz_refcnt;
58 static struct rte_timer_data *rte_timer_data_arr;
59 static const uint32_t default_data_id;
60 static uint32_t rte_timer_subsystem_initialized;
61 
62 /* when debug is enabled, store some statistics */
63 #ifdef RTE_LIBRTE_TIMER_DEBUG
64 #define __TIMER_STAT_ADD(priv_timer, name, n) do {			\
65 		unsigned __lcore_id = rte_lcore_id();			\
66 		if (__lcore_id < RTE_MAX_LCORE)				\
67 			priv_timer[__lcore_id].stats.name += (n);	\
68 	} while(0)
69 #else
70 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
71 #endif
72 
73 static inline int
timer_data_valid(uint32_t id)74 timer_data_valid(uint32_t id)
75 {
76 	return rte_timer_data_arr &&
77 		(rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
78 }
79 
80 /* validate ID and retrieve timer data pointer, or return error value */
81 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do {	\
82 	if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id))		\
83 		return retval;						\
84 	timer_data = &rte_timer_data_arr[id];				\
85 } while (0)
86 
87 int
rte_timer_data_alloc(uint32_t * id_ptr)88 rte_timer_data_alloc(uint32_t *id_ptr)
89 {
90 	int i;
91 	struct rte_timer_data *data;
92 
93 	if (!rte_timer_subsystem_initialized)
94 		return -ENOMEM;
95 
96 	for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
97 		data = &rte_timer_data_arr[i];
98 		if (!(data->internal_flags & FL_ALLOCATED)) {
99 			data->internal_flags |= FL_ALLOCATED;
100 
101 			if (id_ptr)
102 				*id_ptr = i;
103 
104 			return 0;
105 		}
106 	}
107 
108 	return -ENOSPC;
109 }
110 
111 int
rte_timer_data_dealloc(uint32_t id)112 rte_timer_data_dealloc(uint32_t id)
113 {
114 	struct rte_timer_data *timer_data;
115 	TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
116 
117 	timer_data->internal_flags &= ~(FL_ALLOCATED);
118 
119 	return 0;
120 }
121 
122 /* Init the timer library. Allocate an array of timer data structs in shared
123  * memory, and allocate the zeroth entry for use with original timer
124  * APIs. Since the intersection of the sets of lcore ids in primary and
125  * secondary processes should be empty, the zeroth entry can be shared by
126  * multiple processes.
127  */
128 int
rte_timer_subsystem_init(void)129 rte_timer_subsystem_init(void)
130 {
131 	const struct rte_memzone *mz;
132 	struct rte_timer_data *data;
133 	int i, lcore_id;
134 	static const char *mz_name = "rte_timer_mz";
135 	const size_t data_arr_size =
136 			RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
137 	const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
138 	bool do_full_init = true;
139 
140 	rte_mcfg_timer_lock();
141 
142 	if (rte_timer_subsystem_initialized) {
143 		rte_mcfg_timer_unlock();
144 		return -EALREADY;
145 	}
146 
147 	mz = rte_memzone_lookup(mz_name);
148 	if (mz == NULL) {
149 		mz = rte_memzone_reserve_aligned(mz_name, mem_size,
150 				SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
151 		if (mz == NULL) {
152 			rte_mcfg_timer_unlock();
153 			return -ENOMEM;
154 		}
155 		do_full_init = true;
156 	} else
157 		do_full_init = false;
158 
159 	rte_timer_data_mz = mz;
160 	rte_timer_data_arr = mz->addr;
161 	rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
162 
163 	if (do_full_init) {
164 		for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
165 			data = &rte_timer_data_arr[i];
166 
167 			for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
168 			     lcore_id++) {
169 				rte_spinlock_init(
170 					&data->priv_timer[lcore_id].list_lock);
171 				data->priv_timer[lcore_id].prev_lcore =
172 					lcore_id;
173 			}
174 		}
175 	}
176 
177 	rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
178 	(*rte_timer_mz_refcnt)++;
179 
180 	rte_timer_subsystem_initialized = 1;
181 
182 	rte_mcfg_timer_unlock();
183 
184 	return 0;
185 }
186 
187 void
rte_timer_subsystem_finalize(void)188 rte_timer_subsystem_finalize(void)
189 {
190 	rte_mcfg_timer_lock();
191 
192 	if (!rte_timer_subsystem_initialized) {
193 		rte_mcfg_timer_unlock();
194 		return;
195 	}
196 
197 	if (--(*rte_timer_mz_refcnt) == 0)
198 		rte_memzone_free(rte_timer_data_mz);
199 
200 	rte_timer_subsystem_initialized = 0;
201 
202 	rte_mcfg_timer_unlock();
203 }
204 
205 /* Initialize the timer handle tim for use */
206 void
rte_timer_init(struct rte_timer * tim)207 rte_timer_init(struct rte_timer *tim)
208 {
209 	union rte_timer_status status;
210 
211 	status.state = RTE_TIMER_STOP;
212 	status.owner = RTE_TIMER_NO_OWNER;
213 	rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_relaxed);
214 }
215 
216 /*
217  * if timer is pending or stopped (or running on the same core than
218  * us), mark timer as configuring, and on success return the previous
219  * status of the timer
220  */
221 static int
timer_set_config_state(struct rte_timer * tim,union rte_timer_status * ret_prev_status,struct priv_timer * priv_timer)222 timer_set_config_state(struct rte_timer *tim,
223 		       union rte_timer_status *ret_prev_status,
224 		       struct priv_timer *priv_timer)
225 {
226 	union rte_timer_status prev_status, status;
227 	int success = 0;
228 	unsigned lcore_id;
229 
230 	lcore_id = rte_lcore_id();
231 
232 	/* wait that the timer is in correct status before update,
233 	 * and mark it as being configured */
234 	prev_status.u32 = rte_atomic_load_explicit(&tim->status.u32, rte_memory_order_relaxed);
235 
236 	while (success == 0) {
237 		/* timer is running on another core
238 		 * or ready to run on local core, exit
239 		 */
240 		if (prev_status.state == RTE_TIMER_RUNNING &&
241 		    (prev_status.owner != (uint16_t)lcore_id ||
242 		     tim != priv_timer[lcore_id].running_tim))
243 			return -1;
244 
245 		/* timer is being configured on another core */
246 		if (prev_status.state == RTE_TIMER_CONFIG)
247 			return -1;
248 
249 		/* here, we know that timer is stopped or pending,
250 		 * mark it atomically as being configured */
251 		status.state = RTE_TIMER_CONFIG;
252 		status.owner = (int16_t)lcore_id;
253 		/* CONFIG states are acting as locked states. If the
254 		 * timer is in CONFIG state, the state cannot be changed
255 		 * by other threads. So, we should use ACQUIRE here.
256 		 */
257 		success = rte_atomic_compare_exchange_strong_explicit(&tim->status.u32,
258 					      (uint32_t *)(uintptr_t)&prev_status.u32,
259 					      status.u32,
260 					      rte_memory_order_acquire,
261 					      rte_memory_order_relaxed);
262 	}
263 
264 	ret_prev_status->u32 = prev_status.u32;
265 	return 0;
266 }
267 
268 /*
269  * if timer is pending, mark timer as running
270  */
271 static int
timer_set_running_state(struct rte_timer * tim)272 timer_set_running_state(struct rte_timer *tim)
273 {
274 	union rte_timer_status prev_status, status;
275 	unsigned lcore_id = rte_lcore_id();
276 	int success = 0;
277 
278 	/* wait that the timer is in correct status before update,
279 	 * and mark it as running */
280 	prev_status.u32 = rte_atomic_load_explicit(&tim->status.u32, rte_memory_order_relaxed);
281 
282 	while (success == 0) {
283 		/* timer is not pending anymore */
284 		if (prev_status.state != RTE_TIMER_PENDING)
285 			return -1;
286 
287 		/* we know that the timer will be pending at this point
288 		 * mark it atomically as being running
289 		 */
290 		status.state = RTE_TIMER_RUNNING;
291 		status.owner = (int16_t)lcore_id;
292 		/* RUNNING states are acting as locked states. If the
293 		 * timer is in RUNNING state, the state cannot be changed
294 		 * by other threads. So, we should use ACQUIRE here.
295 		 */
296 		success = rte_atomic_compare_exchange_strong_explicit(&tim->status.u32,
297 					      (uint32_t *)(uintptr_t)&prev_status.u32,
298 					      status.u32,
299 					      rte_memory_order_acquire,
300 					      rte_memory_order_relaxed);
301 	}
302 
303 	return 0;
304 }
305 
306 /*
307  * Return a skiplist level for a new entry.
308  * This probabilistically gives a level with p=1/4 that an entry at level n
309  * will also appear at level n+1.
310  */
311 static uint32_t
timer_get_skiplist_level(unsigned curr_depth)312 timer_get_skiplist_level(unsigned curr_depth)
313 {
314 #ifdef RTE_LIBRTE_TIMER_DEBUG
315 	static uint32_t i, count = 0;
316 	static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
317 #endif
318 
319 	/* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
320 	 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
321 	 * bit position of a (pseudo)random number.
322 	 */
323 	uint32_t rand = rte_rand() & (UINT32_MAX - 1);
324 	uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
325 
326 	/* limit the levels used to one above our current level, so we don't,
327 	 * for instance, have a level 0 and a level 7 without anything between
328 	 */
329 	if (level > curr_depth)
330 		level = curr_depth;
331 	if (level >= MAX_SKIPLIST_DEPTH)
332 		level = MAX_SKIPLIST_DEPTH-1;
333 #ifdef RTE_LIBRTE_TIMER_DEBUG
334 	count ++;
335 	levels[level]++;
336 	if (count % 10000 == 0)
337 		for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
338 			printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
339 #endif
340 	return level;
341 }
342 
343 /*
344  * For a given time value, get the entries at each level which
345  * are <= that time value.
346  */
347 static void
timer_get_prev_entries(uint64_t time_val,unsigned tim_lcore,struct rte_timer ** prev,struct priv_timer * priv_timer)348 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
349 		       struct rte_timer **prev, struct priv_timer *priv_timer)
350 {
351 	unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
352 	prev[lvl] = &priv_timer[tim_lcore].pending_head;
353 	while(lvl != 0) {
354 		lvl--;
355 		prev[lvl] = prev[lvl+1];
356 		while (prev[lvl]->sl_next[lvl] &&
357 				prev[lvl]->sl_next[lvl]->expire <= time_val)
358 			prev[lvl] = prev[lvl]->sl_next[lvl];
359 	}
360 }
361 
362 /*
363  * Given a timer node in the skiplist, find the previous entries for it at
364  * all skiplist levels.
365  */
366 static void
timer_get_prev_entries_for_node(struct rte_timer * tim,unsigned tim_lcore,struct rte_timer ** prev,struct priv_timer * priv_timer)367 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
368 				struct rte_timer **prev,
369 				struct priv_timer *priv_timer)
370 {
371 	int i;
372 
373 	/* to get a specific entry in the list, look for just lower than the time
374 	 * values, and then increment on each level individually if necessary
375 	 */
376 	timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
377 	for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
378 		while (prev[i]->sl_next[i] != NULL &&
379 				prev[i]->sl_next[i] != tim &&
380 				prev[i]->sl_next[i]->expire <= tim->expire)
381 			prev[i] = prev[i]->sl_next[i];
382 	}
383 }
384 
385 /* call with lock held as necessary
386  * add in list
387  * timer must be in config state
388  * timer must not be in a list
389  */
390 static void
timer_add(struct rte_timer * tim,unsigned int tim_lcore,struct priv_timer * priv_timer)391 timer_add(struct rte_timer *tim, unsigned int tim_lcore,
392 	  struct priv_timer *priv_timer)
393 {
394 	unsigned lvl;
395 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
396 
397 	/* find where exactly this element goes in the list of elements
398 	 * for each depth. */
399 	timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
400 
401 	/* now assign it a new level and add at that level */
402 	const unsigned tim_level = timer_get_skiplist_level(
403 			priv_timer[tim_lcore].curr_skiplist_depth);
404 	if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
405 		priv_timer[tim_lcore].curr_skiplist_depth++;
406 
407 	lvl = tim_level;
408 	while (lvl > 0) {
409 		tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
410 		prev[lvl]->sl_next[lvl] = tim;
411 		lvl--;
412 	}
413 	tim->sl_next[0] = prev[0]->sl_next[0];
414 	prev[0]->sl_next[0] = tim;
415 
416 	/* save the lowest list entry into the expire field of the dummy hdr
417 	 * NOTE: this is not atomic on 32-bit*/
418 	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
419 			pending_head.sl_next[0]->expire;
420 }
421 
422 /*
423  * del from list, lock if needed
424  * timer must be in config state
425  * timer must be in a list
426  */
427 static void
timer_del(struct rte_timer * tim,union rte_timer_status prev_status,int local_is_locked,struct priv_timer * priv_timer)428 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
429 	  int local_is_locked, struct priv_timer *priv_timer)
430 {
431 	unsigned lcore_id = rte_lcore_id();
432 	unsigned prev_owner = prev_status.owner;
433 	int i;
434 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
435 
436 	/* if timer needs is pending another core, we need to lock the
437 	 * list; if it is on local core, we need to lock if we are not
438 	 * called from rte_timer_manage() */
439 	if (prev_owner != lcore_id || !local_is_locked)
440 		rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
441 
442 	/* save the lowest list entry into the expire field of the dummy hdr.
443 	 * NOTE: this is not atomic on 32-bit */
444 	if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
445 		priv_timer[prev_owner].pending_head.expire =
446 				((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
447 
448 	/* adjust pointers from previous entries to point past this */
449 	timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
450 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
451 		if (prev[i]->sl_next[i] == tim)
452 			prev[i]->sl_next[i] = tim->sl_next[i];
453 	}
454 
455 	/* in case we deleted last entry at a level, adjust down max level */
456 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
457 		if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
458 			priv_timer[prev_owner].curr_skiplist_depth --;
459 		else
460 			break;
461 
462 	if (prev_owner != lcore_id || !local_is_locked)
463 		rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
464 }
465 
466 /* Reset and start the timer associated with the timer handle (private func) */
467 static int
__rte_timer_reset(struct rte_timer * tim,uint64_t expire,uint64_t period,unsigned tim_lcore,rte_timer_cb_t fct,void * arg,int local_is_locked,struct rte_timer_data * timer_data)468 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
469 		  uint64_t period, unsigned tim_lcore,
470 		  rte_timer_cb_t fct, void *arg,
471 		  int local_is_locked,
472 		  struct rte_timer_data *timer_data)
473 {
474 	union rte_timer_status prev_status, status;
475 	int ret;
476 	unsigned lcore_id = rte_lcore_id();
477 	struct priv_timer *priv_timer = timer_data->priv_timer;
478 
479 	/* round robin for tim_lcore */
480 	if (tim_lcore == (unsigned)LCORE_ID_ANY) {
481 		if (lcore_id < RTE_MAX_LCORE) {
482 			/* EAL thread with valid lcore_id */
483 			tim_lcore = rte_get_next_lcore(
484 				priv_timer[lcore_id].prev_lcore,
485 				0, 1);
486 			priv_timer[lcore_id].prev_lcore = tim_lcore;
487 		} else
488 			/* non-EAL thread do not run rte_timer_manage(),
489 			 * so schedule the timer on the first enabled lcore. */
490 			tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
491 	}
492 
493 	/* wait that the timer is in correct status before update,
494 	 * and mark it as being configured */
495 	ret = timer_set_config_state(tim, &prev_status, priv_timer);
496 	if (ret < 0)
497 		return -1;
498 
499 	__TIMER_STAT_ADD(priv_timer, reset, 1);
500 	if (prev_status.state == RTE_TIMER_RUNNING &&
501 	    lcore_id < RTE_MAX_LCORE) {
502 		priv_timer[lcore_id].updated = 1;
503 	}
504 
505 	/* remove it from list */
506 	if (prev_status.state == RTE_TIMER_PENDING) {
507 		timer_del(tim, prev_status, local_is_locked, priv_timer);
508 		__TIMER_STAT_ADD(priv_timer, pending, -1);
509 	}
510 
511 	tim->period = period;
512 	tim->expire = expire;
513 	tim->f = fct;
514 	tim->arg = arg;
515 
516 	/* if timer needs to be scheduled on another core, we need to
517 	 * lock the destination list; if it is on local core, we need to lock if
518 	 * we are not called from rte_timer_manage()
519 	 */
520 	if (tim_lcore != lcore_id || !local_is_locked)
521 		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
522 
523 	__TIMER_STAT_ADD(priv_timer, pending, 1);
524 	timer_add(tim, tim_lcore, priv_timer);
525 
526 	/* update state: as we are in CONFIG state, only us can modify
527 	 * the state so we don't need to use cmpset() here */
528 	status.state = RTE_TIMER_PENDING;
529 	status.owner = (int16_t)tim_lcore;
530 	/* The "RELEASE" ordering guarantees the memory operations above
531 	 * the status update are observed before the update by all threads
532 	 */
533 	rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_release);
534 
535 	if (tim_lcore != lcore_id || !local_is_locked)
536 		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
537 
538 	return 0;
539 }
540 
541 /* Reset and start the timer associated with the timer handle tim */
542 int
rte_timer_reset(struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned int tim_lcore,rte_timer_cb_t fct,void * arg)543 rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
544 		      enum rte_timer_type type, unsigned int tim_lcore,
545 		      rte_timer_cb_t fct, void *arg)
546 {
547 	return rte_timer_alt_reset(default_data_id, tim, ticks, type,
548 				   tim_lcore, fct, arg);
549 }
550 
551 int
rte_timer_alt_reset(uint32_t timer_data_id,struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned int tim_lcore,rte_timer_cb_t fct,void * arg)552 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
553 		    uint64_t ticks, enum rte_timer_type type,
554 		    unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
555 {
556 	uint64_t cur_time = rte_get_timer_cycles();
557 	uint64_t period;
558 	struct rte_timer_data *timer_data;
559 
560 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
561 
562 	if (type == PERIODICAL)
563 		period = ticks;
564 	else
565 		period = 0;
566 
567 	return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
568 				 fct, arg, 0, timer_data);
569 }
570 
571 /* loop until rte_timer_reset() succeed */
572 void
rte_timer_reset_sync(struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned tim_lcore,rte_timer_cb_t fct,void * arg)573 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
574 		     enum rte_timer_type type, unsigned tim_lcore,
575 		     rte_timer_cb_t fct, void *arg)
576 {
577 	while (rte_timer_reset(tim, ticks, type, tim_lcore,
578 			       fct, arg) != 0)
579 		rte_pause();
580 }
581 
582 static int
__rte_timer_stop(struct rte_timer * tim,struct rte_timer_data * timer_data)583 __rte_timer_stop(struct rte_timer *tim,
584 		 struct rte_timer_data *timer_data)
585 {
586 	union rte_timer_status prev_status, status;
587 	unsigned lcore_id = rte_lcore_id();
588 	int ret;
589 	struct priv_timer *priv_timer = timer_data->priv_timer;
590 
591 	/* wait that the timer is in correct status before update,
592 	 * and mark it as being configured */
593 	ret = timer_set_config_state(tim, &prev_status, priv_timer);
594 	if (ret < 0)
595 		return -1;
596 
597 	__TIMER_STAT_ADD(priv_timer, stop, 1);
598 	if (prev_status.state == RTE_TIMER_RUNNING &&
599 	    lcore_id < RTE_MAX_LCORE) {
600 		priv_timer[lcore_id].updated = 1;
601 	}
602 
603 	/* remove it from list */
604 	if (prev_status.state == RTE_TIMER_PENDING) {
605 		timer_del(tim, prev_status, 0, priv_timer);
606 		__TIMER_STAT_ADD(priv_timer, pending, -1);
607 	}
608 
609 	/* mark timer as stopped */
610 	status.state = RTE_TIMER_STOP;
611 	status.owner = RTE_TIMER_NO_OWNER;
612 	/* The "RELEASE" ordering guarantees the memory operations above
613 	 * the status update are observed before the update by all threads
614 	 */
615 	rte_atomic_store_explicit(&tim->status.u32, status.u32, rte_memory_order_release);
616 
617 	return 0;
618 }
619 
620 /* Stop the timer associated with the timer handle tim */
621 int
rte_timer_stop(struct rte_timer * tim)622 rte_timer_stop(struct rte_timer *tim)
623 {
624 	return rte_timer_alt_stop(default_data_id, tim);
625 }
626 
627 int
rte_timer_alt_stop(uint32_t timer_data_id,struct rte_timer * tim)628 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
629 {
630 	struct rte_timer_data *timer_data;
631 
632 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
633 
634 	return __rte_timer_stop(tim, timer_data);
635 }
636 
637 /* loop until rte_timer_stop() succeed */
638 void
rte_timer_stop_sync(struct rte_timer * tim)639 rte_timer_stop_sync(struct rte_timer *tim)
640 {
641 	while (rte_timer_stop(tim) != 0)
642 		rte_pause();
643 }
644 
645 /* Test the PENDING status of the timer handle tim */
646 int
rte_timer_pending(struct rte_timer * tim)647 rte_timer_pending(struct rte_timer *tim)
648 {
649 	return rte_atomic_load_explicit(&tim->status.state,
650 				rte_memory_order_relaxed) == RTE_TIMER_PENDING;
651 }
652 
653 /* must be called periodically, run all timer that expired */
654 static void
__rte_timer_manage(struct rte_timer_data * timer_data)655 __rte_timer_manage(struct rte_timer_data *timer_data)
656 {
657 	union rte_timer_status status;
658 	struct rte_timer *tim, *next_tim;
659 	struct rte_timer *run_first_tim, **pprev;
660 	unsigned lcore_id = rte_lcore_id();
661 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
662 	uint64_t cur_time;
663 	int i, ret;
664 	struct priv_timer *priv_timer = timer_data->priv_timer;
665 
666 	/* timer manager only runs on EAL thread with valid lcore_id */
667 	assert(lcore_id < RTE_MAX_LCORE);
668 
669 	__TIMER_STAT_ADD(priv_timer, manage, 1);
670 	/* optimize for the case where per-cpu list is empty */
671 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
672 		return;
673 	cur_time = rte_get_timer_cycles();
674 
675 #ifdef RTE_ARCH_64
676 	/* on 64-bit the value cached in the pending_head.expired will be
677 	 * updated atomically, so we can consult that for a quick check here
678 	 * outside the lock */
679 	if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
680 		return;
681 #endif
682 
683 	/* browse ordered list, add expired timers in 'expired' list */
684 	rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
685 
686 	/* if nothing to do just unlock and return */
687 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
688 	    priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
689 		rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
690 		return;
691 	}
692 
693 	/* save start of list of expired timers */
694 	tim = priv_timer[lcore_id].pending_head.sl_next[0];
695 
696 	/* break the existing list at current time point */
697 	timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
698 	for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
699 		if (prev[i] == &priv_timer[lcore_id].pending_head)
700 			continue;
701 		priv_timer[lcore_id].pending_head.sl_next[i] =
702 		    prev[i]->sl_next[i];
703 		if (prev[i]->sl_next[i] == NULL)
704 			priv_timer[lcore_id].curr_skiplist_depth--;
705 		prev[i] ->sl_next[i] = NULL;
706 	}
707 
708 	/* transition run-list from PENDING to RUNNING */
709 	run_first_tim = tim;
710 	pprev = &run_first_tim;
711 
712 	for ( ; tim != NULL; tim = next_tim) {
713 		next_tim = tim->sl_next[0];
714 
715 		ret = timer_set_running_state(tim);
716 		if (likely(ret == 0)) {
717 			pprev = &tim->sl_next[0];
718 		} else {
719 			/* another core is trying to re-config this one,
720 			 * remove it from local expired list
721 			 */
722 			*pprev = next_tim;
723 		}
724 	}
725 
726 	/* update the next to expire timer value */
727 	priv_timer[lcore_id].pending_head.expire =
728 	    (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
729 		priv_timer[lcore_id].pending_head.sl_next[0]->expire;
730 
731 	rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
732 
733 	/* now scan expired list and call callbacks */
734 	for (tim = run_first_tim; tim != NULL; tim = next_tim) {
735 		next_tim = tim->sl_next[0];
736 		priv_timer[lcore_id].updated = 0;
737 		priv_timer[lcore_id].running_tim = tim;
738 
739 		/* execute callback function with list unlocked */
740 		tim->f(tim, tim->arg);
741 
742 		__TIMER_STAT_ADD(priv_timer, pending, -1);
743 		/* the timer was stopped or reloaded by the callback
744 		 * function, we have nothing to do here */
745 		if (priv_timer[lcore_id].updated == 1)
746 			continue;
747 
748 		if (tim->period == 0) {
749 			/* remove from done list and mark timer as stopped */
750 			status.state = RTE_TIMER_STOP;
751 			status.owner = RTE_TIMER_NO_OWNER;
752 			/* The "RELEASE" ordering guarantees the memory
753 			 * operations above the status update are observed
754 			 * before the update by all threads
755 			 */
756 			rte_atomic_store_explicit(&tim->status.u32, status.u32,
757 				rte_memory_order_release);
758 		}
759 		else {
760 			/* keep it in list and mark timer as pending */
761 			rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
762 			status.state = RTE_TIMER_PENDING;
763 			__TIMER_STAT_ADD(priv_timer, pending, 1);
764 			status.owner = (int16_t)lcore_id;
765 			/* The "RELEASE" ordering guarantees the memory
766 			 * operations above the status update are observed
767 			 * before the update by all threads
768 			 */
769 			rte_atomic_store_explicit(&tim->status.u32, status.u32,
770 				rte_memory_order_release);
771 			__rte_timer_reset(tim, tim->expire + tim->period,
772 				tim->period, lcore_id, tim->f, tim->arg, 1,
773 				timer_data);
774 			rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
775 		}
776 	}
777 	priv_timer[lcore_id].running_tim = NULL;
778 }
779 
780 int
rte_timer_manage(void)781 rte_timer_manage(void)
782 {
783 	struct rte_timer_data *timer_data;
784 
785 	TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
786 
787 	__rte_timer_manage(timer_data);
788 
789 	return 0;
790 }
791 
792 int
rte_timer_alt_manage(uint32_t timer_data_id,unsigned int * poll_lcores,int nb_poll_lcores,rte_timer_alt_manage_cb_t f)793 rte_timer_alt_manage(uint32_t timer_data_id,
794 		     unsigned int *poll_lcores,
795 		     int nb_poll_lcores,
796 		     rte_timer_alt_manage_cb_t f)
797 {
798 	unsigned int default_poll_lcores[] = {rte_lcore_id()};
799 	union rte_timer_status status;
800 	struct rte_timer *tim, *next_tim, **pprev;
801 	struct rte_timer *run_first_tims[RTE_MAX_LCORE];
802 	unsigned int this_lcore = rte_lcore_id();
803 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
804 	uint64_t cur_time;
805 	int i, j, ret;
806 	int nb_runlists = 0;
807 	struct rte_timer_data *data;
808 	struct priv_timer *privp;
809 	uint32_t poll_lcore;
810 
811 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
812 
813 	/* timer manager only runs on EAL thread with valid lcore_id */
814 	assert(this_lcore < RTE_MAX_LCORE);
815 
816 	__TIMER_STAT_ADD(data->priv_timer, manage, 1);
817 
818 	if (poll_lcores == NULL) {
819 		poll_lcores = default_poll_lcores;
820 		nb_poll_lcores = RTE_DIM(default_poll_lcores);
821 	}
822 
823 	for (i = 0; i < nb_poll_lcores; i++) {
824 		poll_lcore = poll_lcores[i];
825 		privp = &data->priv_timer[poll_lcore];
826 
827 		/* optimize for the case where per-cpu list is empty */
828 		if (privp->pending_head.sl_next[0] == NULL)
829 			continue;
830 		cur_time = rte_get_timer_cycles();
831 
832 #ifdef RTE_ARCH_64
833 		/* on 64-bit the value cached in the pending_head.expired will
834 		 * be updated atomically, so we can consult that for a quick
835 		 * check here outside the lock
836 		 */
837 		if (likely(privp->pending_head.expire > cur_time))
838 			continue;
839 #endif
840 
841 		/* browse ordered list, add expired timers in 'expired' list */
842 		rte_spinlock_lock(&privp->list_lock);
843 
844 		/* if nothing to do just unlock and return */
845 		if (privp->pending_head.sl_next[0] == NULL ||
846 		    privp->pending_head.sl_next[0]->expire > cur_time) {
847 			rte_spinlock_unlock(&privp->list_lock);
848 			continue;
849 		}
850 
851 		/* save start of list of expired timers */
852 		tim = privp->pending_head.sl_next[0];
853 
854 		/* break the existing list at current time point */
855 		timer_get_prev_entries(cur_time, poll_lcore, prev,
856 				       data->priv_timer);
857 		for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
858 			if (prev[j] == &privp->pending_head)
859 				continue;
860 			privp->pending_head.sl_next[j] =
861 				prev[j]->sl_next[j];
862 			if (prev[j]->sl_next[j] == NULL)
863 				privp->curr_skiplist_depth--;
864 
865 			prev[j]->sl_next[j] = NULL;
866 		}
867 
868 		/* transition run-list from PENDING to RUNNING */
869 		run_first_tims[nb_runlists] = tim;
870 		pprev = &run_first_tims[nb_runlists];
871 		nb_runlists++;
872 
873 		for ( ; tim != NULL; tim = next_tim) {
874 			next_tim = tim->sl_next[0];
875 
876 			ret = timer_set_running_state(tim);
877 			if (likely(ret == 0)) {
878 				pprev = &tim->sl_next[0];
879 			} else {
880 				/* another core is trying to re-config this one,
881 				 * remove it from local expired list
882 				 */
883 				*pprev = next_tim;
884 			}
885 		}
886 
887 		/* update the next to expire timer value */
888 		privp->pending_head.expire =
889 		    (privp->pending_head.sl_next[0] == NULL) ? 0 :
890 			privp->pending_head.sl_next[0]->expire;
891 
892 		rte_spinlock_unlock(&privp->list_lock);
893 	}
894 
895 	/* Now process the run lists */
896 	while (1) {
897 		bool done = true;
898 		uint64_t min_expire = UINT64_MAX;
899 		int min_idx = 0;
900 
901 		/* Find the next oldest timer to process */
902 		for (i = 0; i < nb_runlists; i++) {
903 			tim = run_first_tims[i];
904 
905 			if (tim != NULL && tim->expire < min_expire) {
906 				min_expire = tim->expire;
907 				min_idx = i;
908 				done = false;
909 			}
910 		}
911 
912 		if (done)
913 			break;
914 
915 		tim = run_first_tims[min_idx];
916 
917 		/* Move down the runlist from which we picked a timer to
918 		 * execute
919 		 */
920 		run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
921 
922 		data->priv_timer[this_lcore].updated = 0;
923 		data->priv_timer[this_lcore].running_tim = tim;
924 
925 		/* Call the provided callback function */
926 		f(tim);
927 
928 		__TIMER_STAT_ADD(data->priv_timer, pending, -1);
929 
930 		/* the timer was stopped or reloaded by the callback
931 		 * function, we have nothing to do here
932 		 */
933 		if (data->priv_timer[this_lcore].updated == 1)
934 			continue;
935 
936 		if (tim->period == 0) {
937 			/* remove from done list and mark timer as stopped */
938 			status.state = RTE_TIMER_STOP;
939 			status.owner = RTE_TIMER_NO_OWNER;
940 			/* The "RELEASE" ordering guarantees the memory
941 			 * operations above the status update are observed
942 			 * before the update by all threads
943 			 */
944 			rte_atomic_store_explicit(&tim->status.u32, status.u32,
945 				rte_memory_order_release);
946 		} else {
947 			/* keep it in list and mark timer as pending */
948 			rte_spinlock_lock(
949 				&data->priv_timer[this_lcore].list_lock);
950 			status.state = RTE_TIMER_PENDING;
951 			__TIMER_STAT_ADD(data->priv_timer, pending, 1);
952 			status.owner = (int16_t)this_lcore;
953 			/* The "RELEASE" ordering guarantees the memory
954 			 * operations above the status update are observed
955 			 * before the update by all threads
956 			 */
957 			rte_atomic_store_explicit(&tim->status.u32, status.u32,
958 				rte_memory_order_release);
959 			__rte_timer_reset(tim, tim->expire + tim->period,
960 				tim->period, this_lcore, tim->f, tim->arg, 1,
961 				data);
962 			rte_spinlock_unlock(
963 				&data->priv_timer[this_lcore].list_lock);
964 		}
965 
966 		data->priv_timer[this_lcore].running_tim = NULL;
967 	}
968 
969 	return 0;
970 }
971 
972 /* Walk pending lists, stopping timers and calling user-specified function */
973 int
rte_timer_stop_all(uint32_t timer_data_id,unsigned int * walk_lcores,int nb_walk_lcores,rte_timer_stop_all_cb_t f,void * f_arg)974 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
975 		   int nb_walk_lcores,
976 		   rte_timer_stop_all_cb_t f, void *f_arg)
977 {
978 	int i;
979 	struct priv_timer *priv_timer;
980 	uint32_t walk_lcore;
981 	struct rte_timer *tim, *next_tim;
982 	struct rte_timer_data *timer_data;
983 
984 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
985 
986 	for (i = 0; i < nb_walk_lcores; i++) {
987 		walk_lcore = walk_lcores[i];
988 		priv_timer = &timer_data->priv_timer[walk_lcore];
989 
990 		for (tim = priv_timer->pending_head.sl_next[0];
991 		     tim != NULL;
992 		     tim = next_tim) {
993 			next_tim = tim->sl_next[0];
994 
995 			__rte_timer_stop(tim, timer_data);
996 
997 			if (f)
998 				f(tim, f_arg);
999 		}
1000 	}
1001 
1002 	return 0;
1003 }
1004 
1005 int64_t
rte_timer_next_ticks(void)1006 rte_timer_next_ticks(void)
1007 {
1008 	unsigned int lcore_id = rte_lcore_id();
1009 	struct rte_timer_data *timer_data;
1010 	struct priv_timer *priv_timer;
1011 	const struct rte_timer *tm;
1012 	uint64_t cur_time;
1013 	int64_t left = -ENOENT;
1014 
1015 	TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
1016 
1017 	priv_timer = timer_data->priv_timer;
1018 	cur_time = rte_get_timer_cycles();
1019 
1020 	rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
1021 	tm = priv_timer[lcore_id].pending_head.sl_next[0];
1022 	if (tm) {
1023 		left = tm->expire - cur_time;
1024 		if (left < 0)
1025 			left = 0;
1026 	}
1027 	rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
1028 
1029 	return left;
1030 }
1031 
1032 /* dump statistics about timers */
1033 static void
__rte_timer_dump_stats(struct rte_timer_data * timer_data __rte_unused,FILE * f)1034 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
1035 {
1036 #ifdef RTE_LIBRTE_TIMER_DEBUG
1037 	struct rte_timer_debug_stats sum;
1038 	unsigned lcore_id;
1039 	struct priv_timer *priv_timer = timer_data->priv_timer;
1040 
1041 	memset(&sum, 0, sizeof(sum));
1042 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1043 		sum.reset += priv_timer[lcore_id].stats.reset;
1044 		sum.stop += priv_timer[lcore_id].stats.stop;
1045 		sum.manage += priv_timer[lcore_id].stats.manage;
1046 		sum.pending += priv_timer[lcore_id].stats.pending;
1047 	}
1048 	fprintf(f, "Timer statistics:\n");
1049 	fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
1050 	fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
1051 	fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
1052 	fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
1053 #else
1054 	fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1055 #endif
1056 }
1057 
1058 int
rte_timer_dump_stats(FILE * f)1059 rte_timer_dump_stats(FILE *f)
1060 {
1061 	return rte_timer_alt_dump_stats(default_data_id, f);
1062 }
1063 
1064 int
rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused,FILE * f)1065 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1066 {
1067 	struct rte_timer_data *timer_data;
1068 
1069 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1070 
1071 	__rte_timer_dump_stats(timer_data, f);
1072 
1073 	return 0;
1074 }
1075