1*99a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 2*99a2dd95SBruce Richardson * Copyright(c) 2019 Intel Corporation 3*99a2dd95SBruce Richardson */ 4*99a2dd95SBruce Richardson 5*99a2dd95SBruce Richardson #ifndef _RTE_STACK_LF_C11_H_ 6*99a2dd95SBruce Richardson #define _RTE_STACK_LF_C11_H_ 7*99a2dd95SBruce Richardson 8*99a2dd95SBruce Richardson #include <rte_branch_prediction.h> 9*99a2dd95SBruce Richardson #include <rte_prefetch.h> 10*99a2dd95SBruce Richardson 11*99a2dd95SBruce Richardson static __rte_always_inline unsigned int 12*99a2dd95SBruce Richardson __rte_stack_lf_count(struct rte_stack *s) 13*99a2dd95SBruce Richardson { 14*99a2dd95SBruce Richardson /* stack_lf_push() and stack_lf_pop() do not update the list's contents 15*99a2dd95SBruce Richardson * and stack_lf->len atomically, which can cause the list to appear 16*99a2dd95SBruce Richardson * shorter than it actually is if this function is called while other 17*99a2dd95SBruce Richardson * threads are modifying the list. 18*99a2dd95SBruce Richardson * 19*99a2dd95SBruce Richardson * However, given the inherently approximate nature of the get_count 20*99a2dd95SBruce Richardson * callback -- even if the list and its size were updated atomically, 21*99a2dd95SBruce Richardson * the size could change between when get_count executes and when the 22*99a2dd95SBruce Richardson * value is returned to the caller -- this is acceptable. 23*99a2dd95SBruce Richardson * 24*99a2dd95SBruce Richardson * The stack_lf->len updates are placed such that the list may appear to 25*99a2dd95SBruce Richardson * have fewer elements than it does, but will never appear to have more 26*99a2dd95SBruce Richardson * elements. If the mempool is near-empty to the point that this is a 27*99a2dd95SBruce Richardson * concern, the user should consider increasing the mempool size. 28*99a2dd95SBruce Richardson */ 29*99a2dd95SBruce Richardson return (unsigned int)__atomic_load_n(&s->stack_lf.used.len, 30*99a2dd95SBruce Richardson __ATOMIC_RELAXED); 31*99a2dd95SBruce Richardson } 32*99a2dd95SBruce Richardson 33*99a2dd95SBruce Richardson static __rte_always_inline void 34*99a2dd95SBruce Richardson __rte_stack_lf_push_elems(struct rte_stack_lf_list *list, 35*99a2dd95SBruce Richardson struct rte_stack_lf_elem *first, 36*99a2dd95SBruce Richardson struct rte_stack_lf_elem *last, 37*99a2dd95SBruce Richardson unsigned int num) 38*99a2dd95SBruce Richardson { 39*99a2dd95SBruce Richardson struct rte_stack_lf_head old_head; 40*99a2dd95SBruce Richardson int success; 41*99a2dd95SBruce Richardson 42*99a2dd95SBruce Richardson old_head = list->head; 43*99a2dd95SBruce Richardson 44*99a2dd95SBruce Richardson do { 45*99a2dd95SBruce Richardson struct rte_stack_lf_head new_head; 46*99a2dd95SBruce Richardson 47*99a2dd95SBruce Richardson /* Swing the top pointer to the first element in the list and 48*99a2dd95SBruce Richardson * make the last element point to the old top. 49*99a2dd95SBruce Richardson */ 50*99a2dd95SBruce Richardson new_head.top = first; 51*99a2dd95SBruce Richardson new_head.cnt = old_head.cnt + 1; 52*99a2dd95SBruce Richardson 53*99a2dd95SBruce Richardson last->next = old_head.top; 54*99a2dd95SBruce Richardson 55*99a2dd95SBruce Richardson /* Use the release memmodel to ensure the writes to the LF LIFO 56*99a2dd95SBruce Richardson * elements are visible before the head pointer write. 57*99a2dd95SBruce Richardson */ 58*99a2dd95SBruce Richardson success = rte_atomic128_cmp_exchange( 59*99a2dd95SBruce Richardson (rte_int128_t *)&list->head, 60*99a2dd95SBruce Richardson (rte_int128_t *)&old_head, 61*99a2dd95SBruce Richardson (rte_int128_t *)&new_head, 62*99a2dd95SBruce Richardson 1, __ATOMIC_RELEASE, 63*99a2dd95SBruce Richardson __ATOMIC_RELAXED); 64*99a2dd95SBruce Richardson } while (success == 0); 65*99a2dd95SBruce Richardson 66*99a2dd95SBruce Richardson /* Ensure the stack modifications are not reordered with respect 67*99a2dd95SBruce Richardson * to the LIFO len update. 68*99a2dd95SBruce Richardson */ 69*99a2dd95SBruce Richardson __atomic_add_fetch(&list->len, num, __ATOMIC_RELEASE); 70*99a2dd95SBruce Richardson } 71*99a2dd95SBruce Richardson 72*99a2dd95SBruce Richardson static __rte_always_inline struct rte_stack_lf_elem * 73*99a2dd95SBruce Richardson __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list, 74*99a2dd95SBruce Richardson unsigned int num, 75*99a2dd95SBruce Richardson void **obj_table, 76*99a2dd95SBruce Richardson struct rte_stack_lf_elem **last) 77*99a2dd95SBruce Richardson { 78*99a2dd95SBruce Richardson struct rte_stack_lf_head old_head; 79*99a2dd95SBruce Richardson uint64_t len; 80*99a2dd95SBruce Richardson int success; 81*99a2dd95SBruce Richardson 82*99a2dd95SBruce Richardson /* Reserve num elements, if available */ 83*99a2dd95SBruce Richardson len = __atomic_load_n(&list->len, __ATOMIC_RELAXED); 84*99a2dd95SBruce Richardson 85*99a2dd95SBruce Richardson while (1) { 86*99a2dd95SBruce Richardson /* Does the list contain enough elements? */ 87*99a2dd95SBruce Richardson if (unlikely(len < num)) 88*99a2dd95SBruce Richardson return NULL; 89*99a2dd95SBruce Richardson 90*99a2dd95SBruce Richardson /* len is updated on failure */ 91*99a2dd95SBruce Richardson if (__atomic_compare_exchange_n(&list->len, 92*99a2dd95SBruce Richardson &len, len - num, 93*99a2dd95SBruce Richardson 1, __ATOMIC_ACQUIRE, 94*99a2dd95SBruce Richardson __ATOMIC_RELAXED)) 95*99a2dd95SBruce Richardson break; 96*99a2dd95SBruce Richardson } 97*99a2dd95SBruce Richardson 98*99a2dd95SBruce Richardson /* If a torn read occurs, the CAS will fail and set old_head to the 99*99a2dd95SBruce Richardson * correct/latest value. 100*99a2dd95SBruce Richardson */ 101*99a2dd95SBruce Richardson old_head = list->head; 102*99a2dd95SBruce Richardson 103*99a2dd95SBruce Richardson /* Pop num elements */ 104*99a2dd95SBruce Richardson do { 105*99a2dd95SBruce Richardson struct rte_stack_lf_head new_head; 106*99a2dd95SBruce Richardson struct rte_stack_lf_elem *tmp; 107*99a2dd95SBruce Richardson unsigned int i; 108*99a2dd95SBruce Richardson 109*99a2dd95SBruce Richardson /* Use the acquire memmodel to ensure the reads to the LF LIFO 110*99a2dd95SBruce Richardson * elements are properly ordered with respect to the head 111*99a2dd95SBruce Richardson * pointer read. 112*99a2dd95SBruce Richardson */ 113*99a2dd95SBruce Richardson __atomic_thread_fence(__ATOMIC_ACQUIRE); 114*99a2dd95SBruce Richardson 115*99a2dd95SBruce Richardson rte_prefetch0(old_head.top); 116*99a2dd95SBruce Richardson 117*99a2dd95SBruce Richardson tmp = old_head.top; 118*99a2dd95SBruce Richardson 119*99a2dd95SBruce Richardson /* Traverse the list to find the new head. A next pointer will 120*99a2dd95SBruce Richardson * either point to another element or NULL; if a thread 121*99a2dd95SBruce Richardson * encounters a pointer that has already been popped, the CAS 122*99a2dd95SBruce Richardson * will fail. 123*99a2dd95SBruce Richardson */ 124*99a2dd95SBruce Richardson for (i = 0; i < num && tmp != NULL; i++) { 125*99a2dd95SBruce Richardson rte_prefetch0(tmp->next); 126*99a2dd95SBruce Richardson if (obj_table) 127*99a2dd95SBruce Richardson obj_table[i] = tmp->data; 128*99a2dd95SBruce Richardson if (last) 129*99a2dd95SBruce Richardson *last = tmp; 130*99a2dd95SBruce Richardson tmp = tmp->next; 131*99a2dd95SBruce Richardson } 132*99a2dd95SBruce Richardson 133*99a2dd95SBruce Richardson /* If NULL was encountered, the list was modified while 134*99a2dd95SBruce Richardson * traversing it. Retry. 135*99a2dd95SBruce Richardson */ 136*99a2dd95SBruce Richardson if (i != num) { 137*99a2dd95SBruce Richardson old_head = list->head; 138*99a2dd95SBruce Richardson continue; 139*99a2dd95SBruce Richardson } 140*99a2dd95SBruce Richardson 141*99a2dd95SBruce Richardson new_head.top = tmp; 142*99a2dd95SBruce Richardson new_head.cnt = old_head.cnt + 1; 143*99a2dd95SBruce Richardson 144*99a2dd95SBruce Richardson /* 145*99a2dd95SBruce Richardson * The CAS should have release semantics to ensure that 146*99a2dd95SBruce Richardson * items are read before the head is updated. 147*99a2dd95SBruce Richardson * But this function is internal, and items are read 148*99a2dd95SBruce Richardson * only when __rte_stack_lf_pop calls this function to 149*99a2dd95SBruce Richardson * pop items from used list. 150*99a2dd95SBruce Richardson * Then, those items are pushed to the free list. 151*99a2dd95SBruce Richardson * Push uses a CAS store-release on head, which makes 152*99a2dd95SBruce Richardson * sure that items are read before they are pushed to 153*99a2dd95SBruce Richardson * the free list, without need for a CAS release here. 154*99a2dd95SBruce Richardson * This CAS could also be used to ensure that the new 155*99a2dd95SBruce Richardson * length is visible before the head update, but 156*99a2dd95SBruce Richardson * acquire semantics on the length update is enough. 157*99a2dd95SBruce Richardson */ 158*99a2dd95SBruce Richardson success = rte_atomic128_cmp_exchange( 159*99a2dd95SBruce Richardson (rte_int128_t *)&list->head, 160*99a2dd95SBruce Richardson (rte_int128_t *)&old_head, 161*99a2dd95SBruce Richardson (rte_int128_t *)&new_head, 162*99a2dd95SBruce Richardson 0, __ATOMIC_RELAXED, 163*99a2dd95SBruce Richardson __ATOMIC_RELAXED); 164*99a2dd95SBruce Richardson } while (success == 0); 165*99a2dd95SBruce Richardson 166*99a2dd95SBruce Richardson return old_head.top; 167*99a2dd95SBruce Richardson } 168*99a2dd95SBruce Richardson 169*99a2dd95SBruce Richardson #endif /* _RTE_STACK_LF_C11_H_ */ 170