xref: /dpdk/lib/stack/rte_stack_lf_c11.h (revision 283d843722f11c4cb4714fa8661f4cfb7986b0e6)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Intel Corporation
3  */
4 
5 #ifndef _RTE_STACK_LF_C11_H_
6 #define _RTE_STACK_LF_C11_H_
7 
8 #include <rte_branch_prediction.h>
9 #include <rte_prefetch.h>
10 
11 static __rte_always_inline unsigned int
__rte_stack_lf_count(struct rte_stack * s)12 __rte_stack_lf_count(struct rte_stack *s)
13 {
14 	/* stack_lf_push() and stack_lf_pop() do not update the list's contents
15 	 * and stack_lf->len atomically, which can cause the list to appear
16 	 * shorter than it actually is if this function is called while other
17 	 * threads are modifying the list.
18 	 *
19 	 * However, given the inherently approximate nature of the get_count
20 	 * callback -- even if the list and its size were updated atomically,
21 	 * the size could change between when get_count executes and when the
22 	 * value is returned to the caller -- this is acceptable.
23 	 *
24 	 * The stack_lf->len updates are placed such that the list may appear to
25 	 * have fewer elements than it does, but will never appear to have more
26 	 * elements. If the mempool is near-empty to the point that this is a
27 	 * concern, the user should consider increasing the mempool size.
28 	 */
29 	return (unsigned int)rte_atomic_load_explicit(&s->stack_lf.used.len,
30 					     rte_memory_order_relaxed);
31 }
32 
33 static __rte_always_inline void
__rte_stack_lf_push_elems(struct rte_stack_lf_list * list,struct rte_stack_lf_elem * first,struct rte_stack_lf_elem * last,unsigned int num)34 __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
35 			  struct rte_stack_lf_elem *first,
36 			  struct rte_stack_lf_elem *last,
37 			  unsigned int num)
38 {
39 	struct rte_stack_lf_head old_head;
40 	int success;
41 
42 	old_head = list->head;
43 
44 	do {
45 		struct rte_stack_lf_head new_head;
46 
47 		/* Swing the top pointer to the first element in the list and
48 		 * make the last element point to the old top.
49 		 */
50 		new_head.top = first;
51 		new_head.cnt = old_head.cnt + 1;
52 
53 		last->next = old_head.top;
54 
55 		/* Use the release memmodel to ensure the writes to the LF LIFO
56 		 * elements are visible before the head pointer write.
57 		 */
58 		success = rte_atomic128_cmp_exchange(
59 				(rte_int128_t *)&list->head,
60 				(rte_int128_t *)&old_head,
61 				(rte_int128_t *)&new_head,
62 				1, rte_memory_order_release,
63 				rte_memory_order_relaxed);
64 	} while (success == 0);
65 
66 	/* Ensure the stack modifications are not reordered with respect
67 	 * to the LIFO len update.
68 	 */
69 	rte_atomic_fetch_add_explicit(&list->len, num, rte_memory_order_release);
70 }
71 
72 static __rte_always_inline struct rte_stack_lf_elem *
__rte_stack_lf_pop_elems(struct rte_stack_lf_list * list,unsigned int num,void ** obj_table,struct rte_stack_lf_elem ** last)73 __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
74 			 unsigned int num,
75 			 void **obj_table,
76 			 struct rte_stack_lf_elem **last)
77 {
78 	struct rte_stack_lf_head old_head;
79 	uint64_t len;
80 	int success;
81 
82 	/* Reserve num elements, if available */
83 	len = rte_atomic_load_explicit(&list->len, rte_memory_order_relaxed);
84 
85 	while (1) {
86 		/* Does the list contain enough elements? */
87 		if (unlikely(len < num))
88 			return NULL;
89 
90 		/* len is updated on failure */
91 		if (rte_atomic_compare_exchange_weak_explicit(&list->len,
92 						&len, len - num,
93 						rte_memory_order_acquire,
94 						rte_memory_order_relaxed))
95 			break;
96 	}
97 
98 	/* If a torn read occurs, the CAS will fail and set old_head to the
99 	 * correct/latest value.
100 	 */
101 	old_head = list->head;
102 
103 	/* Pop num elements */
104 	do {
105 		struct rte_stack_lf_head new_head;
106 		struct rte_stack_lf_elem *tmp;
107 		unsigned int i;
108 
109 		/* Use the acquire memmodel to ensure the reads to the LF LIFO
110 		 * elements are properly ordered with respect to the head
111 		 * pointer read.
112 		 */
113 		rte_atomic_thread_fence(rte_memory_order_acquire);
114 
115 		rte_prefetch0(old_head.top);
116 
117 		tmp = old_head.top;
118 
119 		/* Traverse the list to find the new head. A next pointer will
120 		 * either point to another element or NULL; if a thread
121 		 * encounters a pointer that has already been popped, the CAS
122 		 * will fail.
123 		 */
124 		for (i = 0; i < num && tmp != NULL; i++) {
125 			rte_prefetch0(tmp->next);
126 			if (obj_table)
127 				obj_table[i] = tmp->data;
128 			if (last)
129 				*last = tmp;
130 			tmp = tmp->next;
131 		}
132 
133 		/* If NULL was encountered, the list was modified while
134 		 * traversing it. Retry.
135 		 */
136 		if (i != num) {
137 			old_head = list->head;
138 			continue;
139 		}
140 
141 		new_head.top = tmp;
142 		new_head.cnt = old_head.cnt + 1;
143 
144 		/*
145 		 * The CAS should have release semantics to ensure that
146 		 * items are read before the head is updated.
147 		 * But this function is internal, and items are read
148 		 * only when __rte_stack_lf_pop calls this function to
149 		 * pop items from used list.
150 		 * Then, those items are pushed to the free list.
151 		 * Push uses a CAS store-release on head, which makes
152 		 * sure that items are read before they are pushed to
153 		 * the free list, without need for a CAS release here.
154 		 * This CAS could also be used to ensure that the new
155 		 * length is visible before the head update, but
156 		 * acquire semantics on the length update is enough.
157 		 */
158 		success = rte_atomic128_cmp_exchange(
159 				(rte_int128_t *)&list->head,
160 				(rte_int128_t *)&old_head,
161 				(rte_int128_t *)&new_head,
162 				0, rte_memory_order_relaxed,
163 				rte_memory_order_relaxed);
164 	} while (success == 0);
165 
166 	return old_head.top;
167 }
168 
169 #endif /* _RTE_STACK_LF_C11_H_ */
170