xref: /dpdk/lib/stack/rte_stack_lf_generic.h (revision b6a19ec1400c750288cc643a64eabd41b9342af8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Intel Corporation
3  */
4 
5 #ifndef _RTE_STACK_LF_GENERIC_H_
6 #define _RTE_STACK_LF_GENERIC_H_
7 
8 #include <rte_branch_prediction.h>
9 #include <rte_prefetch.h>
10 
11 static __rte_always_inline unsigned int
__rte_stack_lf_count(struct rte_stack * s)12 __rte_stack_lf_count(struct rte_stack *s)
13 {
14 	/* stack_lf_push() and stack_lf_pop() do not update the list's contents
15 	 * and stack_lf->len atomically, which can cause the list to appear
16 	 * shorter than it actually is if this function is called while other
17 	 * threads are modifying the list.
18 	 *
19 	 * However, given the inherently approximate nature of the get_count
20 	 * callback -- even if the list and its size were updated atomically,
21 	 * the size could change between when get_count executes and when the
22 	 * value is returned to the caller -- this is acceptable.
23 	 *
24 	 * The stack_lf->len updates are placed such that the list may appear to
25 	 * have fewer elements than it does, but will never appear to have more
26 	 * elements. If the mempool is near-empty to the point that this is a
27 	 * concern, the user should consider increasing the mempool size.
28 	 */
29 	/* NOTE: review for potential ordering optimization */
30 	return rte_atomic_load_explicit(&s->stack_lf.used.len, rte_memory_order_seq_cst);
31 }
32 
33 static __rte_always_inline void
__rte_stack_lf_push_elems(struct rte_stack_lf_list * list,struct rte_stack_lf_elem * first,struct rte_stack_lf_elem * last,unsigned int num)34 __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
35 			  struct rte_stack_lf_elem *first,
36 			  struct rte_stack_lf_elem *last,
37 			  unsigned int num)
38 {
39 	struct rte_stack_lf_head old_head;
40 	int success;
41 
42 	old_head = list->head;
43 
44 	do {
45 		struct rte_stack_lf_head new_head;
46 
47 		/* An acquire fence (or stronger) is needed for weak memory
48 		 * models to establish a synchronized-with relationship between
49 		 * the list->head load and store-release operations (as part of
50 		 * the rte_atomic128_cmp_exchange()).
51 		 */
52 		rte_smp_mb();
53 
54 		/* Swing the top pointer to the first element in the list and
55 		 * make the last element point to the old top.
56 		 */
57 		new_head.top = first;
58 		new_head.cnt = old_head.cnt + 1;
59 
60 		last->next = old_head.top;
61 
62 		/* old_head is updated on failure */
63 		success = rte_atomic128_cmp_exchange(
64 				(rte_int128_t *)&list->head,
65 				(rte_int128_t *)&old_head,
66 				(rte_int128_t *)&new_head,
67 				1, rte_memory_order_release,
68 				rte_memory_order_relaxed);
69 	} while (success == 0);
70 	/* NOTE: review for potential ordering optimization */
71 	rte_atomic_fetch_add_explicit(&list->len, num, rte_memory_order_seq_cst);
72 }
73 
74 static __rte_always_inline struct rte_stack_lf_elem *
__rte_stack_lf_pop_elems(struct rte_stack_lf_list * list,unsigned int num,void ** obj_table,struct rte_stack_lf_elem ** last)75 __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
76 			 unsigned int num,
77 			 void **obj_table,
78 			 struct rte_stack_lf_elem **last)
79 {
80 	struct rte_stack_lf_head old_head;
81 	int success = 0;
82 
83 	/* Reserve num elements, if available */
84 	while (1) {
85 		/* NOTE: review for potential ordering optimization */
86 		uint64_t len = rte_atomic_load_explicit(&list->len, rte_memory_order_seq_cst);
87 
88 		/* Does the list contain enough elements? */
89 		if (unlikely(len < num))
90 			return NULL;
91 
92 		/* NOTE: review for potential ordering optimization */
93 		if (rte_atomic_compare_exchange_strong_explicit(&list->len, &len, len - num,
94 				rte_memory_order_seq_cst, rte_memory_order_seq_cst))
95 			break;
96 	}
97 
98 	old_head = list->head;
99 
100 	/* Pop num elements */
101 	do {
102 		struct rte_stack_lf_head new_head;
103 		struct rte_stack_lf_elem *tmp;
104 		unsigned int i;
105 
106 		/* An acquire fence (or stronger) is needed for weak memory
107 		 * models to ensure the LF LIFO element reads are properly
108 		 * ordered with respect to the head pointer read.
109 		 */
110 		rte_smp_mb();
111 
112 		rte_prefetch0(old_head.top);
113 
114 		tmp = old_head.top;
115 
116 		/* Traverse the list to find the new head. A next pointer will
117 		 * either point to another element or NULL; if a thread
118 		 * encounters a pointer that has already been popped, the CAS
119 		 * will fail.
120 		 */
121 		for (i = 0; i < num && tmp != NULL; i++) {
122 			rte_prefetch0(tmp->next);
123 			if (obj_table)
124 				obj_table[i] = tmp->data;
125 			if (last)
126 				*last = tmp;
127 			tmp = tmp->next;
128 		}
129 
130 		/* If NULL was encountered, the list was modified while
131 		 * traversing it. Retry.
132 		 */
133 		if (i != num) {
134 			old_head = list->head;
135 			continue;
136 		}
137 
138 		new_head.top = tmp;
139 		new_head.cnt = old_head.cnt + 1;
140 
141 		/* old_head is updated on failure */
142 		success = rte_atomic128_cmp_exchange(
143 				(rte_int128_t *)&list->head,
144 				(rte_int128_t *)&old_head,
145 				(rte_int128_t *)&new_head,
146 				1, rte_memory_order_release,
147 				rte_memory_order_relaxed);
148 	} while (success == 0);
149 
150 	return old_head.top;
151 }
152 
153 #endif /* _RTE_STACK_LF_GENERIC_H_ */
154