xref: /dpdk/drivers/event/opdl/opdl_ring.c (revision 29911b323e7a4200b95e2049df08779c0673fbfc)
1e07a3ed7SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2e07a3ed7SBruce Richardson  * Copyright(c) 2017 Intel Corporation
34236ce9bSLiang Ma  */
44236ce9bSLiang Ma 
54236ce9bSLiang Ma #include <stdbool.h>
64236ce9bSLiang Ma #include <stddef.h>
74236ce9bSLiang Ma #include <stdint.h>
84236ce9bSLiang Ma #include <stdio.h>
94236ce9bSLiang Ma 
106723c0fcSBruce Richardson #include <rte_string_fns.h>
114236ce9bSLiang Ma #include <rte_branch_prediction.h>
124236ce9bSLiang Ma #include <rte_debug.h>
134236ce9bSLiang Ma #include <rte_lcore.h>
144236ce9bSLiang Ma #include <rte_log.h>
154236ce9bSLiang Ma #include <rte_malloc.h>
164236ce9bSLiang Ma #include <rte_memcpy.h>
174236ce9bSLiang Ma #include <rte_memory.h>
184236ce9bSLiang Ma #include <rte_memzone.h>
1946090d65SGavin Hu #include <rte_atomic.h>
204236ce9bSLiang Ma 
214236ce9bSLiang Ma #include "opdl_ring.h"
224236ce9bSLiang Ma #include "opdl_log.h"
234236ce9bSLiang Ma 
244236ce9bSLiang Ma #define LIB_NAME "opdl_ring"
254236ce9bSLiang Ma 
264236ce9bSLiang Ma #define OPDL_NAME_SIZE 64
274236ce9bSLiang Ma 
284236ce9bSLiang Ma 
29b770f952SLiang Ma #define OPDL_EVENT_MASK  (0x00000000000FFFFFULL)
30b770f952SLiang Ma #define OPDL_FLOWID_MASK (0xFFFFF)
31b770f952SLiang Ma #define OPDL_OPA_MASK    (0xFF)
32b770f952SLiang Ma #define OPDL_OPA_OFFSET  (0x38)
334236ce9bSLiang Ma 
344236ce9bSLiang Ma /* Types of dependency between stages */
354236ce9bSLiang Ma enum dep_type {
364236ce9bSLiang Ma 	DEP_NONE = 0,  /* no dependency */
374236ce9bSLiang Ma 	DEP_DIRECT,  /* stage has direct dependency */
384236ce9bSLiang Ma 	DEP_INDIRECT,  /* in-direct dependency through other stage(s) */
394236ce9bSLiang Ma 	DEP_SELF,  /* stage dependency on itself, used to detect loops */
404236ce9bSLiang Ma };
414236ce9bSLiang Ma 
424236ce9bSLiang Ma /* Shared section of stage state.
434236ce9bSLiang Ma  * Care is needed when accessing and the layout is important, especially to
444236ce9bSLiang Ma  * limit the adjacent cache-line HW prefetcher from impacting performance.
454236ce9bSLiang Ma  */
4627595cd8STyler Retzlaff struct __rte_cache_aligned shared_state {
474236ce9bSLiang Ma 	/* Last known minimum sequence number of dependencies, used for multi
484236ce9bSLiang Ma 	 * thread operation
494236ce9bSLiang Ma 	 */
50e12a0166STyler Retzlaff 	RTE_ATOMIC(uint32_t) available_seq;
514236ce9bSLiang Ma 	char _pad1[RTE_CACHE_LINE_SIZE * 3];
52e12a0166STyler Retzlaff 	RTE_ATOMIC(uint32_t) head;  /* Head sequence number (for multi thread operation) */
534236ce9bSLiang Ma 	char _pad2[RTE_CACHE_LINE_SIZE * 3];
544236ce9bSLiang Ma 	struct opdl_stage *stage;  /* back pointer */
55e12a0166STyler Retzlaff 	RTE_ATOMIC(uint32_t) tail;  /* Tail sequence number */
564236ce9bSLiang Ma 	char _pad3[RTE_CACHE_LINE_SIZE * 2];
5727595cd8STyler Retzlaff };
584236ce9bSLiang Ma 
594236ce9bSLiang Ma /* A structure to keep track of "unfinished" claims. This is only used for
604236ce9bSLiang Ma  * stages that are threadsafe. Each lcore accesses its own instance of this
614236ce9bSLiang Ma  * structure to record the entries it has claimed. This allows one lcore to make
624236ce9bSLiang Ma  * multiple claims without being blocked by another. When disclaiming it moves
634236ce9bSLiang Ma  * forward the shared tail when the shared tail matches the tail value recorded
644236ce9bSLiang Ma  * here.
654236ce9bSLiang Ma  */
6627595cd8STyler Retzlaff struct __rte_cache_aligned claim_manager {
674236ce9bSLiang Ma 	uint32_t num_to_disclaim;
684236ce9bSLiang Ma 	uint32_t num_claimed;
694236ce9bSLiang Ma 	uint32_t mgr_head;
704236ce9bSLiang Ma 	uint32_t mgr_tail;
714236ce9bSLiang Ma 	struct {
724236ce9bSLiang Ma 		uint32_t head;
734236ce9bSLiang Ma 		uint32_t tail;
744236ce9bSLiang Ma 	} claims[OPDL_DISCLAIMS_PER_LCORE];
7527595cd8STyler Retzlaff };
764236ce9bSLiang Ma 
774236ce9bSLiang Ma /* Context for each stage of opdl_ring.
784236ce9bSLiang Ma  * Calculations on sequence numbers need to be done with other uint32_t values
794236ce9bSLiang Ma  * so that results are modulus 2^32, and not undefined.
804236ce9bSLiang Ma  */
8127595cd8STyler Retzlaff struct __rte_cache_aligned opdl_stage {
824236ce9bSLiang Ma 	struct opdl_ring *t;  /* back pointer, set at init */
834236ce9bSLiang Ma 	uint32_t num_slots;  /* Number of slots for entries, set at init */
844236ce9bSLiang Ma 	uint32_t index;  /* ID for this stage, set at init */
854236ce9bSLiang Ma 	bool threadsafe;  /* Set to 1 if this stage supports threadsafe use */
864236ce9bSLiang Ma 	/* Last known min seq number of dependencies for used for single thread
874236ce9bSLiang Ma 	 * operation
884236ce9bSLiang Ma 	 */
894236ce9bSLiang Ma 	uint32_t available_seq;
904236ce9bSLiang Ma 	uint32_t head;  /* Current head for single-thread operation */
914236ce9bSLiang Ma 	uint32_t nb_instance;  /* Number of instances */
924236ce9bSLiang Ma 	uint32_t instance_id;  /* ID of this stage instance */
934236ce9bSLiang Ma 	uint16_t num_claimed;  /* Number of slots claimed */
944236ce9bSLiang Ma 	uint16_t num_event;		/* Number of events */
954236ce9bSLiang Ma 	uint32_t seq;			/* sequence number  */
964236ce9bSLiang Ma 	uint32_t num_deps;  /* Number of direct dependencies */
974236ce9bSLiang Ma 	/* Keep track of all dependencies, used during init only */
984236ce9bSLiang Ma 	enum dep_type *dep_tracking;
994236ce9bSLiang Ma 	/* Direct dependencies of this stage */
1004236ce9bSLiang Ma 	struct shared_state **deps;
1014236ce9bSLiang Ma 	/* Other stages read this! */
10227595cd8STyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) struct shared_state shared;
1034236ce9bSLiang Ma 	/* For managing disclaims in multi-threaded processing stages */
10427595cd8STyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) struct claim_manager pending_disclaims[RTE_MAX_LCORE];
105b770f952SLiang Ma 	uint32_t shadow_head;  /* Shadow head for single-thread operation */
106b770f952SLiang Ma 	uint32_t queue_id;     /* ID of Queue which is assigned to this stage */
107b770f952SLiang Ma 	uint32_t pos;		/* Atomic scan position */
10827595cd8STyler Retzlaff };
1094236ce9bSLiang Ma 
1104236ce9bSLiang Ma /* Context for opdl_ring */
1114236ce9bSLiang Ma struct opdl_ring {
1124236ce9bSLiang Ma 	char name[OPDL_NAME_SIZE];  /* OPDL queue instance name */
1134236ce9bSLiang Ma 	int socket;  /* NUMA socket that memory is allocated on */
1144236ce9bSLiang Ma 	uint32_t num_slots;  /* Number of slots for entries */
1154236ce9bSLiang Ma 	uint32_t mask;  /* Mask for sequence numbers (num_slots - 1) */
1164236ce9bSLiang Ma 	uint32_t slot_size;  /* Size of each slot in bytes */
1174236ce9bSLiang Ma 	uint32_t num_stages;  /* Number of stages that have been added */
1184236ce9bSLiang Ma 	uint32_t max_num_stages;  /* Max number of stages */
1194236ce9bSLiang Ma 	/* Stages indexed by ID */
1204236ce9bSLiang Ma 	struct opdl_stage *stages;
1214236ce9bSLiang Ma 	/* Memory for storing slot data */
122*29911b32SStephen Hemminger 	alignas(RTE_CACHE_LINE_SIZE) uint8_t slots[];
1234236ce9bSLiang Ma };
1244236ce9bSLiang Ma 
1254236ce9bSLiang Ma 
1264236ce9bSLiang Ma /* Return input stage of a opdl_ring */
1274236ce9bSLiang Ma static __rte_always_inline struct opdl_stage *
1284236ce9bSLiang Ma input_stage(const struct opdl_ring *t)
1294236ce9bSLiang Ma {
1304236ce9bSLiang Ma 	return &t->stages[0];
1314236ce9bSLiang Ma }
1324236ce9bSLiang Ma 
1334236ce9bSLiang Ma /* Check if a stage is the input stage */
1344236ce9bSLiang Ma static __rte_always_inline bool
1354236ce9bSLiang Ma is_input_stage(const struct opdl_stage *s)
1364236ce9bSLiang Ma {
1374236ce9bSLiang Ma 	return s->index == 0;
1384236ce9bSLiang Ma }
1394236ce9bSLiang Ma 
1404236ce9bSLiang Ma /* Get slot pointer from sequence number */
1414236ce9bSLiang Ma static __rte_always_inline void *
1424236ce9bSLiang Ma get_slot(const struct opdl_ring *t, uint32_t n)
1434236ce9bSLiang Ma {
1444236ce9bSLiang Ma 	return (void *)(uintptr_t)&t->slots[(n & t->mask) * t->slot_size];
1454236ce9bSLiang Ma }
1464236ce9bSLiang Ma 
1474236ce9bSLiang Ma /* Find how many entries are available for processing */
1484236ce9bSLiang Ma static __rte_always_inline uint32_t
1494236ce9bSLiang Ma available(const struct opdl_stage *s)
1504236ce9bSLiang Ma {
1514236ce9bSLiang Ma 	if (s->threadsafe == true) {
152e12a0166STyler Retzlaff 		uint32_t n = rte_atomic_load_explicit(&s->shared.available_seq,
153e12a0166STyler Retzlaff 				rte_memory_order_acquire) -
154e12a0166STyler Retzlaff 				rte_atomic_load_explicit(&s->shared.head,
155e12a0166STyler Retzlaff 				rte_memory_order_acquire);
1564236ce9bSLiang Ma 
1574236ce9bSLiang Ma 		/* Return 0 if available_seq needs to be updated */
1584236ce9bSLiang Ma 		return (n <= s->num_slots) ? n : 0;
1594236ce9bSLiang Ma 	}
1604236ce9bSLiang Ma 
1614236ce9bSLiang Ma 	/* Single threaded */
1624236ce9bSLiang Ma 	return s->available_seq - s->head;
1634236ce9bSLiang Ma }
1644236ce9bSLiang Ma 
1654236ce9bSLiang Ma /* Read sequence number of dependencies and find minimum */
1664236ce9bSLiang Ma static __rte_always_inline void
1674236ce9bSLiang Ma update_available_seq(struct opdl_stage *s)
1684236ce9bSLiang Ma {
1694236ce9bSLiang Ma 	uint32_t i;
1704236ce9bSLiang Ma 	uint32_t this_tail = s->shared.tail;
171e12a0166STyler Retzlaff 	uint32_t min_seq = rte_atomic_load_explicit(&s->deps[0]->tail, rte_memory_order_acquire);
1724236ce9bSLiang Ma 	/* Input stage sequence numbers are greater than the sequence numbers of
1734236ce9bSLiang Ma 	 * its dependencies so an offset of t->num_slots is needed when
1744236ce9bSLiang Ma 	 * calculating available slots and also the condition which is used to
1754236ce9bSLiang Ma 	 * determine the dependencies minimum sequence number must be reverted.
1764236ce9bSLiang Ma 	 */
1774236ce9bSLiang Ma 	uint32_t wrap;
1784236ce9bSLiang Ma 
1794236ce9bSLiang Ma 	if (is_input_stage(s)) {
1804236ce9bSLiang Ma 		wrap = s->num_slots;
1814236ce9bSLiang Ma 		for (i = 1; i < s->num_deps; i++) {
182e12a0166STyler Retzlaff 			uint32_t seq = rte_atomic_load_explicit(&s->deps[i]->tail,
183e12a0166STyler Retzlaff 					rte_memory_order_acquire);
1844236ce9bSLiang Ma 			if ((this_tail - seq) > (this_tail - min_seq))
1854236ce9bSLiang Ma 				min_seq = seq;
1864236ce9bSLiang Ma 		}
1874236ce9bSLiang Ma 	} else {
1884236ce9bSLiang Ma 		wrap = 0;
1894236ce9bSLiang Ma 		for (i = 1; i < s->num_deps; i++) {
190e12a0166STyler Retzlaff 			uint32_t seq = rte_atomic_load_explicit(&s->deps[i]->tail,
191e12a0166STyler Retzlaff 					rte_memory_order_acquire);
1924236ce9bSLiang Ma 			if ((seq - this_tail) < (min_seq - this_tail))
1934236ce9bSLiang Ma 				min_seq = seq;
1944236ce9bSLiang Ma 		}
1954236ce9bSLiang Ma 	}
1964236ce9bSLiang Ma 
1974236ce9bSLiang Ma 	if (s->threadsafe == false)
1984236ce9bSLiang Ma 		s->available_seq = min_seq + wrap;
1994236ce9bSLiang Ma 	else
200e12a0166STyler Retzlaff 		rte_atomic_store_explicit(&s->shared.available_seq, min_seq + wrap,
201e12a0166STyler Retzlaff 				rte_memory_order_release);
2024236ce9bSLiang Ma }
2034236ce9bSLiang Ma 
2044236ce9bSLiang Ma /* Wait until the number of available slots reaches number requested */
2054236ce9bSLiang Ma static __rte_always_inline void
2064236ce9bSLiang Ma wait_for_available(struct opdl_stage *s, uint32_t n)
2074236ce9bSLiang Ma {
2084236ce9bSLiang Ma 	while (available(s) < n) {
2094236ce9bSLiang Ma 		rte_pause();
2104236ce9bSLiang Ma 		update_available_seq(s);
2114236ce9bSLiang Ma 	}
2124236ce9bSLiang Ma }
2134236ce9bSLiang Ma 
2144236ce9bSLiang Ma /* Return number of slots to process based on number requested and mode */
2154236ce9bSLiang Ma static __rte_always_inline uint32_t
2164236ce9bSLiang Ma num_to_process(struct opdl_stage *s, uint32_t n, bool block)
2174236ce9bSLiang Ma {
2184236ce9bSLiang Ma 	/* Don't read tail sequences of dependencies if not needed */
2194236ce9bSLiang Ma 	if (available(s) >= n)
2204236ce9bSLiang Ma 		return n;
2214236ce9bSLiang Ma 
2224236ce9bSLiang Ma 	update_available_seq(s);
2234236ce9bSLiang Ma 
2244236ce9bSLiang Ma 	if (block == false) {
2254236ce9bSLiang Ma 		uint32_t avail = available(s);
2264236ce9bSLiang Ma 
2274236ce9bSLiang Ma 		if (avail == 0) {
2284236ce9bSLiang Ma 			rte_pause();
2294236ce9bSLiang Ma 			return 0;
2304236ce9bSLiang Ma 		}
2314236ce9bSLiang Ma 		return (avail <= n) ? avail : n;
2324236ce9bSLiang Ma 	}
2334236ce9bSLiang Ma 
2344236ce9bSLiang Ma 	if (unlikely(n > s->num_slots)) {
2354236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "%u entries is more than max (%u)",
2364236ce9bSLiang Ma 				n, s->num_slots);
2374236ce9bSLiang Ma 		return 0;  /* Avoid infinite loop */
2384236ce9bSLiang Ma 	}
2394236ce9bSLiang Ma 	/* blocking */
2404236ce9bSLiang Ma 	wait_for_available(s, n);
2414236ce9bSLiang Ma 	return n;
2424236ce9bSLiang Ma }
2434236ce9bSLiang Ma 
2444236ce9bSLiang Ma /* Copy entries in to slots with wrap-around */
2454236ce9bSLiang Ma static __rte_always_inline void
2464236ce9bSLiang Ma copy_entries_in(struct opdl_ring *t, uint32_t start, const void *entries,
2474236ce9bSLiang Ma 		uint32_t num_entries)
2484236ce9bSLiang Ma {
2494236ce9bSLiang Ma 	uint32_t slot_size = t->slot_size;
2504236ce9bSLiang Ma 	uint32_t slot_index = start & t->mask;
2514236ce9bSLiang Ma 
2524236ce9bSLiang Ma 	if (slot_index + num_entries <= t->num_slots) {
2534236ce9bSLiang Ma 		rte_memcpy(get_slot(t, start), entries,
2544236ce9bSLiang Ma 				num_entries * slot_size);
2554236ce9bSLiang Ma 	} else {
2564236ce9bSLiang Ma 		uint32_t split = t->num_slots - slot_index;
2574236ce9bSLiang Ma 
2584236ce9bSLiang Ma 		rte_memcpy(get_slot(t, start), entries, split * slot_size);
2594236ce9bSLiang Ma 		rte_memcpy(get_slot(t, 0),
2604236ce9bSLiang Ma 				RTE_PTR_ADD(entries, split * slot_size),
2614236ce9bSLiang Ma 				(num_entries - split) * slot_size);
2624236ce9bSLiang Ma 	}
2634236ce9bSLiang Ma }
2644236ce9bSLiang Ma 
2654236ce9bSLiang Ma /* Copy entries out from slots with wrap-around */
2664236ce9bSLiang Ma static __rte_always_inline void
2674236ce9bSLiang Ma copy_entries_out(struct opdl_ring *t, uint32_t start, void *entries,
2684236ce9bSLiang Ma 		uint32_t num_entries)
2694236ce9bSLiang Ma {
2704236ce9bSLiang Ma 	uint32_t slot_size = t->slot_size;
2714236ce9bSLiang Ma 	uint32_t slot_index = start & t->mask;
2724236ce9bSLiang Ma 
2734236ce9bSLiang Ma 	if (slot_index + num_entries <= t->num_slots) {
2744236ce9bSLiang Ma 		rte_memcpy(entries, get_slot(t, start),
2754236ce9bSLiang Ma 				num_entries * slot_size);
2764236ce9bSLiang Ma 	} else {
2774236ce9bSLiang Ma 		uint32_t split = t->num_slots - slot_index;
2784236ce9bSLiang Ma 
2794236ce9bSLiang Ma 		rte_memcpy(entries, get_slot(t, start), split * slot_size);
2804236ce9bSLiang Ma 		rte_memcpy(RTE_PTR_ADD(entries, split * slot_size),
2814236ce9bSLiang Ma 				get_slot(t, 0),
2824236ce9bSLiang Ma 				(num_entries - split) * slot_size);
2834236ce9bSLiang Ma 	}
2844236ce9bSLiang Ma }
2854236ce9bSLiang Ma 
2864236ce9bSLiang Ma /* Input function optimised for single thread */
2874236ce9bSLiang Ma static __rte_always_inline uint32_t
2884236ce9bSLiang Ma opdl_ring_input_singlethread(struct opdl_ring *t, const void *entries,
2894236ce9bSLiang Ma 		uint32_t num_entries, bool block)
2904236ce9bSLiang Ma {
2914236ce9bSLiang Ma 	struct opdl_stage *s = input_stage(t);
2924236ce9bSLiang Ma 	uint32_t head = s->head;
2934236ce9bSLiang Ma 
2944236ce9bSLiang Ma 	num_entries = num_to_process(s, num_entries, block);
2954236ce9bSLiang Ma 	if (num_entries == 0)
2964236ce9bSLiang Ma 		return 0;
2974236ce9bSLiang Ma 
2984236ce9bSLiang Ma 	copy_entries_in(t, head, entries, num_entries);
2994236ce9bSLiang Ma 
3004236ce9bSLiang Ma 	s->head += num_entries;
301e12a0166STyler Retzlaff 	rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release);
3024236ce9bSLiang Ma 
3034236ce9bSLiang Ma 	return num_entries;
3044236ce9bSLiang Ma }
3054236ce9bSLiang Ma 
3064236ce9bSLiang Ma /* Convert head and tail of claim_manager into valid index */
3074236ce9bSLiang Ma static __rte_always_inline uint32_t
3084236ce9bSLiang Ma claim_mgr_index(uint32_t n)
3094236ce9bSLiang Ma {
3104236ce9bSLiang Ma 	return n & (OPDL_DISCLAIMS_PER_LCORE - 1);
3114236ce9bSLiang Ma }
3124236ce9bSLiang Ma 
3134236ce9bSLiang Ma /* Check if there are available slots in claim_manager */
3144236ce9bSLiang Ma static __rte_always_inline bool
3154236ce9bSLiang Ma claim_mgr_available(struct claim_manager *mgr)
3164236ce9bSLiang Ma {
3174236ce9bSLiang Ma 	return (mgr->mgr_head < (mgr->mgr_tail + OPDL_DISCLAIMS_PER_LCORE)) ?
3184236ce9bSLiang Ma 			true : false;
3194236ce9bSLiang Ma }
3204236ce9bSLiang Ma 
3214236ce9bSLiang Ma /* Record a new claim. Only use after first checking an entry is available */
3224236ce9bSLiang Ma static __rte_always_inline void
3234236ce9bSLiang Ma claim_mgr_add(struct claim_manager *mgr, uint32_t tail, uint32_t head)
3244236ce9bSLiang Ma {
3254236ce9bSLiang Ma 	if ((mgr->mgr_head != mgr->mgr_tail) &&
3264236ce9bSLiang Ma 			(mgr->claims[claim_mgr_index(mgr->mgr_head - 1)].head ==
3274236ce9bSLiang Ma 			tail)) {
3284236ce9bSLiang Ma 		/* Combine with previous claim */
3294236ce9bSLiang Ma 		mgr->claims[claim_mgr_index(mgr->mgr_head - 1)].head = head;
3304236ce9bSLiang Ma 	} else {
3314236ce9bSLiang Ma 		mgr->claims[claim_mgr_index(mgr->mgr_head)].head = head;
3324236ce9bSLiang Ma 		mgr->claims[claim_mgr_index(mgr->mgr_head)].tail = tail;
3334236ce9bSLiang Ma 		mgr->mgr_head++;
3344236ce9bSLiang Ma 	}
3354236ce9bSLiang Ma 
3364236ce9bSLiang Ma 	mgr->num_claimed += (head - tail);
3374236ce9bSLiang Ma }
3384236ce9bSLiang Ma 
3394236ce9bSLiang Ma /* Read the oldest recorded claim */
3404236ce9bSLiang Ma static __rte_always_inline bool
3414236ce9bSLiang Ma claim_mgr_read(struct claim_manager *mgr, uint32_t *tail, uint32_t *head)
3424236ce9bSLiang Ma {
3434236ce9bSLiang Ma 	if (mgr->mgr_head == mgr->mgr_tail)
3444236ce9bSLiang Ma 		return false;
3454236ce9bSLiang Ma 
3464236ce9bSLiang Ma 	*head = mgr->claims[claim_mgr_index(mgr->mgr_tail)].head;
3474236ce9bSLiang Ma 	*tail = mgr->claims[claim_mgr_index(mgr->mgr_tail)].tail;
3484236ce9bSLiang Ma 	return true;
3494236ce9bSLiang Ma }
3504236ce9bSLiang Ma 
3514236ce9bSLiang Ma /* Remove the oldest recorded claim. Only use after first reading the entry */
3524236ce9bSLiang Ma static __rte_always_inline void
3534236ce9bSLiang Ma claim_mgr_remove(struct claim_manager *mgr)
3544236ce9bSLiang Ma {
3554236ce9bSLiang Ma 	mgr->num_claimed -= (mgr->claims[claim_mgr_index(mgr->mgr_tail)].head -
3564236ce9bSLiang Ma 			mgr->claims[claim_mgr_index(mgr->mgr_tail)].tail);
3574236ce9bSLiang Ma 	mgr->mgr_tail++;
3584236ce9bSLiang Ma }
3594236ce9bSLiang Ma 
3604236ce9bSLiang Ma /* Update tail in the oldest claim. Only use after first reading the entry */
3614236ce9bSLiang Ma static __rte_always_inline void
3624236ce9bSLiang Ma claim_mgr_move_tail(struct claim_manager *mgr, uint32_t num_entries)
3634236ce9bSLiang Ma {
3644236ce9bSLiang Ma 	mgr->num_claimed -= num_entries;
3654236ce9bSLiang Ma 	mgr->claims[claim_mgr_index(mgr->mgr_tail)].tail += num_entries;
3664236ce9bSLiang Ma }
3674236ce9bSLiang Ma 
3684236ce9bSLiang Ma static __rte_always_inline void
3694236ce9bSLiang Ma opdl_stage_disclaim_multithread_n(struct opdl_stage *s,
3704236ce9bSLiang Ma 		uint32_t num_entries, bool block)
3714236ce9bSLiang Ma {
3724236ce9bSLiang Ma 	struct claim_manager *disclaims = &s->pending_disclaims[rte_lcore_id()];
3734236ce9bSLiang Ma 	uint32_t head;
3744236ce9bSLiang Ma 	uint32_t tail;
3754236ce9bSLiang Ma 
3764236ce9bSLiang Ma 	while (num_entries) {
3774236ce9bSLiang Ma 		bool ret = claim_mgr_read(disclaims, &tail, &head);
3784236ce9bSLiang Ma 
3794236ce9bSLiang Ma 		if (ret == false)
3804236ce9bSLiang Ma 			break;  /* nothing is claimed */
3814236ce9bSLiang Ma 		/* There should be no race condition here. If shared.tail
3824236ce9bSLiang Ma 		 * matches, no other core can update it until this one does.
3834236ce9bSLiang Ma 		 */
384e12a0166STyler Retzlaff 		if (rte_atomic_load_explicit(&s->shared.tail, rte_memory_order_acquire) ==
3854236ce9bSLiang Ma 				tail) {
3864236ce9bSLiang Ma 			if (num_entries >= (head - tail)) {
3874236ce9bSLiang Ma 				claim_mgr_remove(disclaims);
388e12a0166STyler Retzlaff 				rte_atomic_store_explicit(&s->shared.tail, head,
389e12a0166STyler Retzlaff 						rte_memory_order_release);
3904236ce9bSLiang Ma 				num_entries -= (head - tail);
3914236ce9bSLiang Ma 			} else {
3924236ce9bSLiang Ma 				claim_mgr_move_tail(disclaims, num_entries);
393e12a0166STyler Retzlaff 				rte_atomic_store_explicit(&s->shared.tail,
3944236ce9bSLiang Ma 						num_entries + tail,
395e12a0166STyler Retzlaff 						rte_memory_order_release);
3964236ce9bSLiang Ma 				num_entries = 0;
3974236ce9bSLiang Ma 			}
3984236ce9bSLiang Ma 		} else if (block == false)
3994236ce9bSLiang Ma 			break;  /* blocked by other thread */
4004236ce9bSLiang Ma 		/* Keep going until num_entries are disclaimed. */
4014236ce9bSLiang Ma 		rte_pause();
4024236ce9bSLiang Ma 	}
4034236ce9bSLiang Ma 
4044236ce9bSLiang Ma 	disclaims->num_to_disclaim = num_entries;
4054236ce9bSLiang Ma }
4064236ce9bSLiang Ma 
4074236ce9bSLiang Ma /* Move head atomically, returning number of entries available to process and
4084236ce9bSLiang Ma  * the original value of head. For non-input stages, the claim is recorded
4094236ce9bSLiang Ma  * so that the tail can be updated later by opdl_stage_disclaim().
4104236ce9bSLiang Ma  */
4114236ce9bSLiang Ma static __rte_always_inline void
4124236ce9bSLiang Ma move_head_atomically(struct opdl_stage *s, uint32_t *num_entries,
4134236ce9bSLiang Ma 		uint32_t *old_head, bool block, bool claim_func)
4144236ce9bSLiang Ma {
4154236ce9bSLiang Ma 	uint32_t orig_num_entries = *num_entries;
4164236ce9bSLiang Ma 	uint32_t ret;
4174236ce9bSLiang Ma 	struct claim_manager *disclaims = &s->pending_disclaims[rte_lcore_id()];
4184236ce9bSLiang Ma 
4194236ce9bSLiang Ma 	/* Attempt to disclaim any outstanding claims */
4204236ce9bSLiang Ma 	opdl_stage_disclaim_multithread_n(s, disclaims->num_to_disclaim,
4214236ce9bSLiang Ma 			false);
4224236ce9bSLiang Ma 
423e12a0166STyler Retzlaff 	*old_head = rte_atomic_load_explicit(&s->shared.head, rte_memory_order_acquire);
4244236ce9bSLiang Ma 	while (true) {
4254236ce9bSLiang Ma 		bool success;
4264236ce9bSLiang Ma 		/* If called by opdl_ring_input(), claim does not need to be
4274236ce9bSLiang Ma 		 * recorded, as there will be no disclaim.
4284236ce9bSLiang Ma 		 */
4294236ce9bSLiang Ma 		if (claim_func) {
4304236ce9bSLiang Ma 			/* Check that the claim can be recorded */
4314236ce9bSLiang Ma 			ret = claim_mgr_available(disclaims);
4324236ce9bSLiang Ma 			if (ret == false) {
4334236ce9bSLiang Ma 				/* exit out if claim can't be recorded */
4344236ce9bSLiang Ma 				*num_entries = 0;
4354236ce9bSLiang Ma 				return;
4364236ce9bSLiang Ma 			}
4374236ce9bSLiang Ma 		}
4384236ce9bSLiang Ma 
4394236ce9bSLiang Ma 		*num_entries = num_to_process(s, orig_num_entries, block);
4404236ce9bSLiang Ma 		if (*num_entries == 0)
4414236ce9bSLiang Ma 			return;
4424236ce9bSLiang Ma 
443e12a0166STyler Retzlaff 		success = rte_atomic_compare_exchange_weak_explicit(&s->shared.head, old_head,
4444236ce9bSLiang Ma 				*old_head + *num_entries,
445e12a0166STyler Retzlaff 				rte_memory_order_release,  /* memory order on success */
446e12a0166STyler Retzlaff 				rte_memory_order_acquire);  /* memory order on fail */
4474236ce9bSLiang Ma 		if (likely(success))
4484236ce9bSLiang Ma 			break;
4494236ce9bSLiang Ma 		rte_pause();
4504236ce9bSLiang Ma 	}
4514236ce9bSLiang Ma 
4524236ce9bSLiang Ma 	if (claim_func)
4534236ce9bSLiang Ma 		/* Store the claim record */
4544236ce9bSLiang Ma 		claim_mgr_add(disclaims, *old_head, *old_head + *num_entries);
4554236ce9bSLiang Ma }
4564236ce9bSLiang Ma 
4574236ce9bSLiang Ma /* Input function that supports multiple threads */
4584236ce9bSLiang Ma static __rte_always_inline uint32_t
4594236ce9bSLiang Ma opdl_ring_input_multithread(struct opdl_ring *t, const void *entries,
4604236ce9bSLiang Ma 		uint32_t num_entries, bool block)
4614236ce9bSLiang Ma {
4624236ce9bSLiang Ma 	struct opdl_stage *s = input_stage(t);
4634236ce9bSLiang Ma 	uint32_t old_head;
4644236ce9bSLiang Ma 
4654236ce9bSLiang Ma 	move_head_atomically(s, &num_entries, &old_head, block, false);
4664236ce9bSLiang Ma 	if (num_entries == 0)
4674236ce9bSLiang Ma 		return 0;
4684236ce9bSLiang Ma 
4694236ce9bSLiang Ma 	copy_entries_in(t, old_head, entries, num_entries);
4704236ce9bSLiang Ma 
4714236ce9bSLiang Ma 	/* If another thread started inputting before this one, but hasn't
4724236ce9bSLiang Ma 	 * finished, we need to wait for it to complete to update the tail.
4734236ce9bSLiang Ma 	 */
474e12a0166STyler Retzlaff 	rte_wait_until_equal_32((uint32_t *)(uintptr_t)&s->shared.tail, old_head,
475e12a0166STyler Retzlaff 			rte_memory_order_acquire);
4764236ce9bSLiang Ma 
477e12a0166STyler Retzlaff 	rte_atomic_store_explicit(&s->shared.tail, old_head + num_entries,
478e12a0166STyler Retzlaff 			rte_memory_order_release);
4794236ce9bSLiang Ma 
4804236ce9bSLiang Ma 	return num_entries;
4814236ce9bSLiang Ma }
4824236ce9bSLiang Ma 
4834236ce9bSLiang Ma static __rte_always_inline uint32_t
4844236ce9bSLiang Ma opdl_first_entry_id(uint32_t start_seq, uint8_t nb_p_lcores,
4854236ce9bSLiang Ma 		uint8_t this_lcore)
4864236ce9bSLiang Ma {
4874236ce9bSLiang Ma 	return ((nb_p_lcores <= 1) ? 0 :
4884236ce9bSLiang Ma 			(nb_p_lcores - (start_seq % nb_p_lcores) + this_lcore) %
4894236ce9bSLiang Ma 			nb_p_lcores);
4904236ce9bSLiang Ma }
4914236ce9bSLiang Ma 
4924236ce9bSLiang Ma /* Claim slots to process, optimised for single-thread operation */
4934236ce9bSLiang Ma static __rte_always_inline uint32_t
4944236ce9bSLiang Ma opdl_stage_claim_singlethread(struct opdl_stage *s, void *entries,
4954236ce9bSLiang Ma 		uint32_t num_entries, uint32_t *seq, bool block, bool atomic)
4964236ce9bSLiang Ma {
4974236ce9bSLiang Ma 	uint32_t i = 0, j = 0,  offset;
498b770f952SLiang Ma 	uint32_t opa_id   = 0;
499b770f952SLiang Ma 	uint32_t flow_id  = 0;
500b770f952SLiang Ma 	uint64_t event    = 0;
5014236ce9bSLiang Ma 	void *get_slots;
5024236ce9bSLiang Ma 	struct rte_event *ev;
5034236ce9bSLiang Ma 	RTE_SET_USED(seq);
5044236ce9bSLiang Ma 	struct opdl_ring *t = s->t;
5054236ce9bSLiang Ma 	uint8_t *entries_offset = (uint8_t *)entries;
5064236ce9bSLiang Ma 
5074236ce9bSLiang Ma 	if (!atomic) {
5084236ce9bSLiang Ma 
5094236ce9bSLiang Ma 		offset = opdl_first_entry_id(s->seq, s->nb_instance,
5104236ce9bSLiang Ma 				s->instance_id);
5114236ce9bSLiang Ma 
5124236ce9bSLiang Ma 		num_entries = s->nb_instance * num_entries;
5134236ce9bSLiang Ma 
5144236ce9bSLiang Ma 		num_entries = num_to_process(s, num_entries, block);
5154236ce9bSLiang Ma 
5164236ce9bSLiang Ma 		for (; offset < num_entries; offset += s->nb_instance) {
5174236ce9bSLiang Ma 			get_slots = get_slot(t, s->head + offset);
5184236ce9bSLiang Ma 			memcpy(entries_offset, get_slots, t->slot_size);
5194236ce9bSLiang Ma 			entries_offset += t->slot_size;
5204236ce9bSLiang Ma 			i++;
5214236ce9bSLiang Ma 		}
5224236ce9bSLiang Ma 	} else {
5234236ce9bSLiang Ma 		num_entries = num_to_process(s, num_entries, block);
5244236ce9bSLiang Ma 
5254236ce9bSLiang Ma 		for (j = 0; j < num_entries; j++) {
5264236ce9bSLiang Ma 			ev = (struct rte_event *)get_slot(t, s->head+j);
527b770f952SLiang Ma 
528e12a0166STyler Retzlaff 			event  = rte_atomic_load_explicit((uint64_t __rte_atomic *)&ev->event,
529e12a0166STyler Retzlaff 					rte_memory_order_acquire);
530b770f952SLiang Ma 
531b770f952SLiang Ma 			opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET);
532b770f952SLiang Ma 			flow_id  = OPDL_FLOWID_MASK & event;
533b770f952SLiang Ma 
534b770f952SLiang Ma 			if (opa_id >= s->queue_id)
535b770f952SLiang Ma 				continue;
536b770f952SLiang Ma 
537b770f952SLiang Ma 			if ((flow_id % s->nb_instance) == s->instance_id) {
5384236ce9bSLiang Ma 				memcpy(entries_offset, ev, t->slot_size);
5394236ce9bSLiang Ma 				entries_offset += t->slot_size;
5404236ce9bSLiang Ma 				i++;
5414236ce9bSLiang Ma 			}
5424236ce9bSLiang Ma 		}
5434236ce9bSLiang Ma 	}
5444236ce9bSLiang Ma 	s->shadow_head = s->head;
5454236ce9bSLiang Ma 	s->head += num_entries;
5464236ce9bSLiang Ma 	s->num_claimed = num_entries;
5474236ce9bSLiang Ma 	s->num_event = i;
548b770f952SLiang Ma 	s->pos = 0;
5494236ce9bSLiang Ma 
5504236ce9bSLiang Ma 	/* automatically disclaim entries if number of rte_events is zero */
5514236ce9bSLiang Ma 	if (unlikely(i == 0))
5524236ce9bSLiang Ma 		opdl_stage_disclaim(s, 0, false);
5534236ce9bSLiang Ma 
5544236ce9bSLiang Ma 	return i;
5554236ce9bSLiang Ma }
5564236ce9bSLiang Ma 
5574236ce9bSLiang Ma /* Thread-safe version of function to claim slots for processing */
5584236ce9bSLiang Ma static __rte_always_inline uint32_t
5594236ce9bSLiang Ma opdl_stage_claim_multithread(struct opdl_stage *s, void *entries,
5604236ce9bSLiang Ma 		uint32_t num_entries, uint32_t *seq, bool block)
5614236ce9bSLiang Ma {
5624236ce9bSLiang Ma 	uint32_t old_head;
5634236ce9bSLiang Ma 	struct opdl_ring *t = s->t;
5644236ce9bSLiang Ma 	uint32_t i = 0, offset;
5654236ce9bSLiang Ma 	uint8_t *entries_offset = (uint8_t *)entries;
5664236ce9bSLiang Ma 
5678545289aSLiang Ma 	if (seq == NULL) {
5688545289aSLiang Ma 		PMD_DRV_LOG(ERR, "Invalid seq PTR");
5698545289aSLiang Ma 		return 0;
5708545289aSLiang Ma 	}
5714236ce9bSLiang Ma 	offset = opdl_first_entry_id(*seq, s->nb_instance, s->instance_id);
5724236ce9bSLiang Ma 	num_entries = offset + (s->nb_instance * num_entries);
5734236ce9bSLiang Ma 
5744236ce9bSLiang Ma 	move_head_atomically(s, &num_entries, &old_head, block, true);
5754236ce9bSLiang Ma 
5764236ce9bSLiang Ma 	for (; offset < num_entries; offset += s->nb_instance) {
5774236ce9bSLiang Ma 		memcpy(entries_offset, get_slot(t, s->head + offset),
5784236ce9bSLiang Ma 			t->slot_size);
5794236ce9bSLiang Ma 		entries_offset += t->slot_size;
5804236ce9bSLiang Ma 		i++;
5814236ce9bSLiang Ma 	}
5828545289aSLiang Ma 
5834236ce9bSLiang Ma 	*seq = old_head;
5844236ce9bSLiang Ma 
5854236ce9bSLiang Ma 	return i;
5864236ce9bSLiang Ma }
5874236ce9bSLiang Ma 
5884236ce9bSLiang Ma /* Claim and copy slot pointers, optimised for single-thread operation */
5894236ce9bSLiang Ma static __rte_always_inline uint32_t
5904236ce9bSLiang Ma opdl_stage_claim_copy_singlethread(struct opdl_stage *s, void *entries,
5914236ce9bSLiang Ma 		uint32_t num_entries, uint32_t *seq, bool block)
5924236ce9bSLiang Ma {
5934236ce9bSLiang Ma 	num_entries = num_to_process(s, num_entries, block);
5944236ce9bSLiang Ma 	if (num_entries == 0)
5954236ce9bSLiang Ma 		return 0;
5964236ce9bSLiang Ma 	copy_entries_out(s->t, s->head, entries, num_entries);
5974236ce9bSLiang Ma 	if (seq != NULL)
5984236ce9bSLiang Ma 		*seq = s->head;
5994236ce9bSLiang Ma 	s->head += num_entries;
6004236ce9bSLiang Ma 	return num_entries;
6014236ce9bSLiang Ma }
6024236ce9bSLiang Ma 
6034236ce9bSLiang Ma /* Thread-safe version of function to claim and copy pointers to slots */
6044236ce9bSLiang Ma static __rte_always_inline uint32_t
6054236ce9bSLiang Ma opdl_stage_claim_copy_multithread(struct opdl_stage *s, void *entries,
6064236ce9bSLiang Ma 		uint32_t num_entries, uint32_t *seq, bool block)
6074236ce9bSLiang Ma {
6084236ce9bSLiang Ma 	uint32_t old_head;
6094236ce9bSLiang Ma 
6104236ce9bSLiang Ma 	move_head_atomically(s, &num_entries, &old_head, block, true);
6114236ce9bSLiang Ma 	if (num_entries == 0)
6124236ce9bSLiang Ma 		return 0;
6134236ce9bSLiang Ma 	copy_entries_out(s->t, old_head, entries, num_entries);
6144236ce9bSLiang Ma 	if (seq != NULL)
6154236ce9bSLiang Ma 		*seq = old_head;
6164236ce9bSLiang Ma 	return num_entries;
6174236ce9bSLiang Ma }
6184236ce9bSLiang Ma 
6194236ce9bSLiang Ma static __rte_always_inline void
6204236ce9bSLiang Ma opdl_stage_disclaim_singlethread_n(struct opdl_stage *s,
6214236ce9bSLiang Ma 		uint32_t num_entries)
6224236ce9bSLiang Ma {
6234236ce9bSLiang Ma 	uint32_t old_tail = s->shared.tail;
6244236ce9bSLiang Ma 
6254236ce9bSLiang Ma 	if (unlikely(num_entries > (s->head - old_tail))) {
6264236ce9bSLiang Ma 		PMD_DRV_LOG(WARNING, "Attempt to disclaim (%u) more than claimed (%u)",
6274236ce9bSLiang Ma 				num_entries, s->head - old_tail);
6284236ce9bSLiang Ma 		num_entries = s->head - old_tail;
6294236ce9bSLiang Ma 	}
630e12a0166STyler Retzlaff 	rte_atomic_store_explicit(&s->shared.tail, num_entries + old_tail,
631e12a0166STyler Retzlaff 			rte_memory_order_release);
6324236ce9bSLiang Ma }
6334236ce9bSLiang Ma 
6344236ce9bSLiang Ma uint32_t
6354236ce9bSLiang Ma opdl_ring_input(struct opdl_ring *t, const void *entries, uint32_t num_entries,
6364236ce9bSLiang Ma 		bool block)
6374236ce9bSLiang Ma {
6384236ce9bSLiang Ma 	if (input_stage(t)->threadsafe == false)
6394236ce9bSLiang Ma 		return opdl_ring_input_singlethread(t, entries, num_entries,
6404236ce9bSLiang Ma 				block);
6414236ce9bSLiang Ma 	else
6424236ce9bSLiang Ma 		return opdl_ring_input_multithread(t, entries, num_entries,
6434236ce9bSLiang Ma 				block);
6444236ce9bSLiang Ma }
6454236ce9bSLiang Ma 
6464236ce9bSLiang Ma uint32_t
6474236ce9bSLiang Ma opdl_ring_copy_from_burst(struct opdl_ring *t, struct opdl_stage *s,
6484236ce9bSLiang Ma 		const void *entries, uint32_t num_entries, bool block)
6494236ce9bSLiang Ma {
6504236ce9bSLiang Ma 	uint32_t head = s->head;
6514236ce9bSLiang Ma 
6524236ce9bSLiang Ma 	num_entries = num_to_process(s, num_entries, block);
6534236ce9bSLiang Ma 
6544236ce9bSLiang Ma 	if (num_entries == 0)
6554236ce9bSLiang Ma 		return 0;
6564236ce9bSLiang Ma 
6574236ce9bSLiang Ma 	copy_entries_in(t, head, entries, num_entries);
6584236ce9bSLiang Ma 
6594236ce9bSLiang Ma 	s->head += num_entries;
660e12a0166STyler Retzlaff 	rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release);
6614236ce9bSLiang Ma 
6624236ce9bSLiang Ma 	return num_entries;
6634236ce9bSLiang Ma 
6644236ce9bSLiang Ma }
6654236ce9bSLiang Ma 
6664236ce9bSLiang Ma uint32_t
6674236ce9bSLiang Ma opdl_ring_copy_to_burst(struct opdl_ring *t, struct opdl_stage *s,
6684236ce9bSLiang Ma 		void *entries, uint32_t num_entries, bool block)
6694236ce9bSLiang Ma {
6704236ce9bSLiang Ma 	uint32_t head = s->head;
6714236ce9bSLiang Ma 
6724236ce9bSLiang Ma 	num_entries = num_to_process(s, num_entries, block);
6734236ce9bSLiang Ma 	if (num_entries == 0)
6744236ce9bSLiang Ma 		return 0;
6754236ce9bSLiang Ma 
6764236ce9bSLiang Ma 	copy_entries_out(t, head, entries, num_entries);
6774236ce9bSLiang Ma 
6784236ce9bSLiang Ma 	s->head += num_entries;
679e12a0166STyler Retzlaff 	rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release);
6804236ce9bSLiang Ma 
6814236ce9bSLiang Ma 	return num_entries;
6824236ce9bSLiang Ma }
6834236ce9bSLiang Ma 
6844236ce9bSLiang Ma uint32_t
6854236ce9bSLiang Ma opdl_stage_find_num_available(struct opdl_stage *s, uint32_t num_entries)
6864236ce9bSLiang Ma {
6874236ce9bSLiang Ma 	/* return (num_to_process(s, num_entries, false)); */
6884236ce9bSLiang Ma 
6894236ce9bSLiang Ma 	if (available(s) >= num_entries)
6904236ce9bSLiang Ma 		return num_entries;
6914236ce9bSLiang Ma 
6924236ce9bSLiang Ma 	update_available_seq(s);
6934236ce9bSLiang Ma 
6944236ce9bSLiang Ma 	uint32_t avail = available(s);
6954236ce9bSLiang Ma 
6964236ce9bSLiang Ma 	if (avail == 0) {
6974236ce9bSLiang Ma 		rte_pause();
6984236ce9bSLiang Ma 		return 0;
6994236ce9bSLiang Ma 	}
7004236ce9bSLiang Ma 	return (avail <= num_entries) ? avail : num_entries;
7014236ce9bSLiang Ma }
7024236ce9bSLiang Ma 
7034236ce9bSLiang Ma uint32_t
7044236ce9bSLiang Ma opdl_stage_claim(struct opdl_stage *s, void *entries,
7054236ce9bSLiang Ma 		uint32_t num_entries, uint32_t *seq, bool block, bool atomic)
7064236ce9bSLiang Ma {
7074236ce9bSLiang Ma 	if (s->threadsafe == false)
7084236ce9bSLiang Ma 		return opdl_stage_claim_singlethread(s, entries, num_entries,
7094236ce9bSLiang Ma 				seq, block, atomic);
7104236ce9bSLiang Ma 	else
7114236ce9bSLiang Ma 		return opdl_stage_claim_multithread(s, entries, num_entries,
7124236ce9bSLiang Ma 				seq, block);
7134236ce9bSLiang Ma }
7144236ce9bSLiang Ma 
7154236ce9bSLiang Ma uint32_t
7164236ce9bSLiang Ma opdl_stage_claim_copy(struct opdl_stage *s, void *entries,
7174236ce9bSLiang Ma 		uint32_t num_entries, uint32_t *seq, bool block)
7184236ce9bSLiang Ma {
7194236ce9bSLiang Ma 	if (s->threadsafe == false)
7204236ce9bSLiang Ma 		return opdl_stage_claim_copy_singlethread(s, entries,
7214236ce9bSLiang Ma 				num_entries, seq, block);
7224236ce9bSLiang Ma 	else
7234236ce9bSLiang Ma 		return opdl_stage_claim_copy_multithread(s, entries,
7244236ce9bSLiang Ma 				num_entries, seq, block);
7254236ce9bSLiang Ma }
7264236ce9bSLiang Ma 
7274236ce9bSLiang Ma void
7284236ce9bSLiang Ma opdl_stage_disclaim_n(struct opdl_stage *s, uint32_t num_entries,
7294236ce9bSLiang Ma 		bool block)
7304236ce9bSLiang Ma {
7314236ce9bSLiang Ma 
7324236ce9bSLiang Ma 	if (s->threadsafe == false) {
7334236ce9bSLiang Ma 		opdl_stage_disclaim_singlethread_n(s, s->num_claimed);
7344236ce9bSLiang Ma 	} else {
7354236ce9bSLiang Ma 		struct claim_manager *disclaims =
7364236ce9bSLiang Ma 			&s->pending_disclaims[rte_lcore_id()];
7374236ce9bSLiang Ma 
7384236ce9bSLiang Ma 		if (unlikely(num_entries > s->num_slots)) {
7394236ce9bSLiang Ma 			PMD_DRV_LOG(WARNING, "Attempt to disclaim (%u) more than claimed (%u)",
7404236ce9bSLiang Ma 					num_entries, disclaims->num_claimed);
7414236ce9bSLiang Ma 			num_entries = disclaims->num_claimed;
7424236ce9bSLiang Ma 		}
7434236ce9bSLiang Ma 
7444236ce9bSLiang Ma 		num_entries = RTE_MIN(num_entries + disclaims->num_to_disclaim,
7454236ce9bSLiang Ma 				disclaims->num_claimed);
7464236ce9bSLiang Ma 		opdl_stage_disclaim_multithread_n(s, num_entries, block);
7474236ce9bSLiang Ma 	}
7484236ce9bSLiang Ma }
7494236ce9bSLiang Ma 
7504236ce9bSLiang Ma int
7514236ce9bSLiang Ma opdl_stage_disclaim(struct opdl_stage *s, uint32_t num_entries, bool block)
7524236ce9bSLiang Ma {
7534236ce9bSLiang Ma 	if (num_entries != s->num_event) {
754db8bdaecSDilshod Urazov 		rte_errno = EINVAL;
7554236ce9bSLiang Ma 		return 0;
7564236ce9bSLiang Ma 	}
7574236ce9bSLiang Ma 	if (s->threadsafe == false) {
758e12a0166STyler Retzlaff 		rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release);
7594236ce9bSLiang Ma 		s->seq += s->num_claimed;
7604236ce9bSLiang Ma 		s->shadow_head = s->head;
7614236ce9bSLiang Ma 		s->num_claimed = 0;
7624236ce9bSLiang Ma 	} else {
7634236ce9bSLiang Ma 		struct claim_manager *disclaims =
7644236ce9bSLiang Ma 				&s->pending_disclaims[rte_lcore_id()];
7654236ce9bSLiang Ma 		opdl_stage_disclaim_multithread_n(s, disclaims->num_claimed,
7664236ce9bSLiang Ma 				block);
7674236ce9bSLiang Ma 	}
7684236ce9bSLiang Ma 	return num_entries;
7694236ce9bSLiang Ma }
7704236ce9bSLiang Ma 
7714236ce9bSLiang Ma uint32_t
7724236ce9bSLiang Ma opdl_ring_available(struct opdl_ring *t)
7734236ce9bSLiang Ma {
7744236ce9bSLiang Ma 	return opdl_stage_available(&t->stages[0]);
7754236ce9bSLiang Ma }
7764236ce9bSLiang Ma 
7774236ce9bSLiang Ma uint32_t
7784236ce9bSLiang Ma opdl_stage_available(struct opdl_stage *s)
7794236ce9bSLiang Ma {
7804236ce9bSLiang Ma 	update_available_seq(s);
7814236ce9bSLiang Ma 	return available(s);
7824236ce9bSLiang Ma }
7834236ce9bSLiang Ma 
7844236ce9bSLiang Ma void
7854236ce9bSLiang Ma opdl_ring_flush(struct opdl_ring *t)
7864236ce9bSLiang Ma {
7874236ce9bSLiang Ma 	struct opdl_stage *s = input_stage(t);
7884236ce9bSLiang Ma 
7894236ce9bSLiang Ma 	wait_for_available(s, s->num_slots);
7904236ce9bSLiang Ma }
7914236ce9bSLiang Ma 
7924236ce9bSLiang Ma /******************** Non performance sensitive functions ********************/
7934236ce9bSLiang Ma 
7944236ce9bSLiang Ma /* Initial setup of a new stage's context */
7954236ce9bSLiang Ma static int
7964236ce9bSLiang Ma init_stage(struct opdl_ring *t, struct opdl_stage *s, bool threadsafe,
7974236ce9bSLiang Ma 		bool is_input)
7984236ce9bSLiang Ma {
7994236ce9bSLiang Ma 	uint32_t available = (is_input) ? t->num_slots : 0;
8004236ce9bSLiang Ma 
8014236ce9bSLiang Ma 	s->t = t;
8024236ce9bSLiang Ma 	s->num_slots = t->num_slots;
8034236ce9bSLiang Ma 	s->index = t->num_stages;
8044236ce9bSLiang Ma 	s->threadsafe = threadsafe;
8054236ce9bSLiang Ma 	s->shared.stage = s;
8064236ce9bSLiang Ma 
8074236ce9bSLiang Ma 	/* Alloc memory for deps */
8084236ce9bSLiang Ma 	s->dep_tracking = rte_zmalloc_socket(LIB_NAME,
8094236ce9bSLiang Ma 			t->max_num_stages * sizeof(enum dep_type),
8104236ce9bSLiang Ma 			0, t->socket);
8114236ce9bSLiang Ma 	if (s->dep_tracking == NULL)
8124236ce9bSLiang Ma 		return -ENOMEM;
8134236ce9bSLiang Ma 
8144236ce9bSLiang Ma 	s->deps = rte_zmalloc_socket(LIB_NAME,
8154236ce9bSLiang Ma 			t->max_num_stages * sizeof(struct shared_state *),
8164236ce9bSLiang Ma 			0, t->socket);
8174236ce9bSLiang Ma 	if (s->deps == NULL) {
8184236ce9bSLiang Ma 		rte_free(s->dep_tracking);
8194236ce9bSLiang Ma 		return -ENOMEM;
8204236ce9bSLiang Ma 	}
8214236ce9bSLiang Ma 
8224236ce9bSLiang Ma 	s->dep_tracking[s->index] = DEP_SELF;
8234236ce9bSLiang Ma 
8244236ce9bSLiang Ma 	if (threadsafe == true)
8254236ce9bSLiang Ma 		s->shared.available_seq = available;
8264236ce9bSLiang Ma 	else
8274236ce9bSLiang Ma 		s->available_seq = available;
8284236ce9bSLiang Ma 
8294236ce9bSLiang Ma 	return 0;
8304236ce9bSLiang Ma }
8314236ce9bSLiang Ma 
8324236ce9bSLiang Ma /* Add direct or indirect dependencies between stages */
8334236ce9bSLiang Ma static int
8344236ce9bSLiang Ma add_dep(struct opdl_stage *dependent, const struct opdl_stage *dependency,
8354236ce9bSLiang Ma 		enum dep_type type)
8364236ce9bSLiang Ma {
8374236ce9bSLiang Ma 	struct opdl_ring *t = dependent->t;
8384236ce9bSLiang Ma 	uint32_t i;
8394236ce9bSLiang Ma 
8404236ce9bSLiang Ma 	/* Add new direct dependency */
8414236ce9bSLiang Ma 	if ((type == DEP_DIRECT) &&
8424236ce9bSLiang Ma 			(dependent->dep_tracking[dependency->index] ==
8434236ce9bSLiang Ma 					DEP_NONE)) {
8444236ce9bSLiang Ma 		PMD_DRV_LOG(DEBUG, "%s:%u direct dependency on %u",
8454236ce9bSLiang Ma 				t->name, dependent->index, dependency->index);
8464236ce9bSLiang Ma 		dependent->dep_tracking[dependency->index] = DEP_DIRECT;
8474236ce9bSLiang Ma 	}
8484236ce9bSLiang Ma 
8494236ce9bSLiang Ma 	/* Add new indirect dependency or change direct to indirect */
8504236ce9bSLiang Ma 	if ((type == DEP_INDIRECT) &&
8514236ce9bSLiang Ma 			((dependent->dep_tracking[dependency->index] ==
8524236ce9bSLiang Ma 			DEP_NONE) ||
8534236ce9bSLiang Ma 			(dependent->dep_tracking[dependency->index] ==
8544236ce9bSLiang Ma 			DEP_DIRECT))) {
8554236ce9bSLiang Ma 		PMD_DRV_LOG(DEBUG, "%s:%u indirect dependency on %u",
8564236ce9bSLiang Ma 				t->name, dependent->index, dependency->index);
8574236ce9bSLiang Ma 		dependent->dep_tracking[dependency->index] = DEP_INDIRECT;
8584236ce9bSLiang Ma 	}
8594236ce9bSLiang Ma 
8604236ce9bSLiang Ma 	/* Shouldn't happen... */
8614236ce9bSLiang Ma 	if ((dependent->dep_tracking[dependency->index] == DEP_SELF) &&
8624236ce9bSLiang Ma 			(dependent != input_stage(t))) {
8634236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "Loop in dependency graph %s:%u",
8644236ce9bSLiang Ma 				t->name, dependent->index);
8654236ce9bSLiang Ma 		return -EINVAL;
8664236ce9bSLiang Ma 	}
8674236ce9bSLiang Ma 
8684236ce9bSLiang Ma 	/* Keep going to dependencies of the dependency, until input stage */
8694236ce9bSLiang Ma 	if (dependency != input_stage(t))
8704236ce9bSLiang Ma 		for (i = 0; i < dependency->num_deps; i++) {
8714236ce9bSLiang Ma 			int ret = add_dep(dependent, dependency->deps[i]->stage,
8724236ce9bSLiang Ma 					DEP_INDIRECT);
8734236ce9bSLiang Ma 
8744236ce9bSLiang Ma 			if (ret < 0)
8754236ce9bSLiang Ma 				return ret;
8764236ce9bSLiang Ma 		}
8774236ce9bSLiang Ma 
8784236ce9bSLiang Ma 	/* Make list of sequence numbers for direct dependencies only */
8794236ce9bSLiang Ma 	if (type == DEP_DIRECT)
8804236ce9bSLiang Ma 		for (i = 0, dependent->num_deps = 0; i < t->num_stages; i++)
8814236ce9bSLiang Ma 			if (dependent->dep_tracking[i] == DEP_DIRECT) {
8824236ce9bSLiang Ma 				if ((i == 0) && (dependent->num_deps > 1))
8834236ce9bSLiang Ma 					rte_panic("%s:%u depends on > input",
8844236ce9bSLiang Ma 							t->name,
8854236ce9bSLiang Ma 							dependent->index);
8864236ce9bSLiang Ma 				dependent->deps[dependent->num_deps++] =
8874236ce9bSLiang Ma 						&t->stages[i].shared;
8884236ce9bSLiang Ma 			}
8894236ce9bSLiang Ma 
8904236ce9bSLiang Ma 	return 0;
8914236ce9bSLiang Ma }
8924236ce9bSLiang Ma 
8934236ce9bSLiang Ma struct opdl_ring *
8944236ce9bSLiang Ma opdl_ring_create(const char *name, uint32_t num_slots, uint32_t slot_size,
8954236ce9bSLiang Ma 		uint32_t max_num_stages, int socket)
8964236ce9bSLiang Ma {
8974236ce9bSLiang Ma 	struct opdl_ring *t;
8984236ce9bSLiang Ma 	char mz_name[RTE_MEMZONE_NAMESIZE];
8994236ce9bSLiang Ma 	int mz_flags = 0;
9004236ce9bSLiang Ma 	struct opdl_stage *st = NULL;
9014236ce9bSLiang Ma 	const struct rte_memzone *mz = NULL;
9024236ce9bSLiang Ma 	size_t alloc_size = RTE_CACHE_LINE_ROUNDUP(sizeof(*t) +
9034236ce9bSLiang Ma 			(num_slots * slot_size));
9044236ce9bSLiang Ma 
9054236ce9bSLiang Ma 	/* Compile time checking */
9064236ce9bSLiang Ma 	RTE_BUILD_BUG_ON((sizeof(struct shared_state) & RTE_CACHE_LINE_MASK) !=
9074236ce9bSLiang Ma 			0);
9084236ce9bSLiang Ma 	RTE_BUILD_BUG_ON((offsetof(struct opdl_stage, shared) &
9094236ce9bSLiang Ma 			RTE_CACHE_LINE_MASK) != 0);
9104236ce9bSLiang Ma 	RTE_BUILD_BUG_ON((offsetof(struct opdl_ring, slots) &
9114236ce9bSLiang Ma 			RTE_CACHE_LINE_MASK) != 0);
9123b342be1SStephen Hemminger 	RTE_BUILD_BUG_ON(!RTE_IS_POWER_OF_2(OPDL_DISCLAIMS_PER_LCORE));
9134236ce9bSLiang Ma 
9144236ce9bSLiang Ma 	/* Parameter checking */
9154236ce9bSLiang Ma 	if (name == NULL) {
9164236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "name param is NULL");
9174236ce9bSLiang Ma 		return NULL;
9184236ce9bSLiang Ma 	}
9194236ce9bSLiang Ma 	if (!rte_is_power_of_2(num_slots)) {
9204236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "num_slots (%u) for %s is not power of 2",
9214236ce9bSLiang Ma 				num_slots, name);
9224236ce9bSLiang Ma 		return NULL;
9234236ce9bSLiang Ma 	}
9244236ce9bSLiang Ma 
9254236ce9bSLiang Ma 	/* Alloc memory for stages */
9264236ce9bSLiang Ma 	st = rte_zmalloc_socket(LIB_NAME,
9274236ce9bSLiang Ma 		max_num_stages * sizeof(struct opdl_stage),
9284236ce9bSLiang Ma 		RTE_CACHE_LINE_SIZE, socket);
9294236ce9bSLiang Ma 	if (st == NULL)
9304236ce9bSLiang Ma 		goto exit_fail;
9314236ce9bSLiang Ma 
9324236ce9bSLiang Ma 	snprintf(mz_name, sizeof(mz_name), "%s%s", LIB_NAME, name);
9334236ce9bSLiang Ma 
9344236ce9bSLiang Ma 	/* Alloc memory for memzone */
9354236ce9bSLiang Ma 	mz = rte_memzone_reserve(mz_name, alloc_size, socket, mz_flags);
9364236ce9bSLiang Ma 	if (mz == NULL)
9374236ce9bSLiang Ma 		goto exit_fail;
9384236ce9bSLiang Ma 
9394236ce9bSLiang Ma 	t = mz->addr;
9404236ce9bSLiang Ma 
9414236ce9bSLiang Ma 	/* Initialise opdl_ring queue */
9424236ce9bSLiang Ma 	memset(t, 0, sizeof(*t));
9436723c0fcSBruce Richardson 	strlcpy(t->name, name, sizeof(t->name));
9444236ce9bSLiang Ma 	t->socket = socket;
9454236ce9bSLiang Ma 	t->num_slots = num_slots;
9464236ce9bSLiang Ma 	t->mask = num_slots - 1;
9474236ce9bSLiang Ma 	t->slot_size = slot_size;
9484236ce9bSLiang Ma 	t->max_num_stages = max_num_stages;
9494236ce9bSLiang Ma 	t->stages = st;
9504236ce9bSLiang Ma 
9514236ce9bSLiang Ma 	PMD_DRV_LOG(DEBUG, "Created %s at %p (num_slots=%u,socket=%i,slot_size=%u)",
9524236ce9bSLiang Ma 			t->name, t, num_slots, socket, slot_size);
9534236ce9bSLiang Ma 
9544236ce9bSLiang Ma 	return t;
9554236ce9bSLiang Ma 
9564236ce9bSLiang Ma exit_fail:
9574236ce9bSLiang Ma 	PMD_DRV_LOG(ERR, "Cannot reserve memory");
9584236ce9bSLiang Ma 	rte_free(st);
9594236ce9bSLiang Ma 	rte_memzone_free(mz);
9604236ce9bSLiang Ma 
9614236ce9bSLiang Ma 	return NULL;
9624236ce9bSLiang Ma }
9634236ce9bSLiang Ma 
9644236ce9bSLiang Ma void *
9654236ce9bSLiang Ma opdl_ring_get_slot(const struct opdl_ring *t, uint32_t index)
9664236ce9bSLiang Ma {
9674236ce9bSLiang Ma 	return get_slot(t, index);
9684236ce9bSLiang Ma }
9694236ce9bSLiang Ma 
9704236ce9bSLiang Ma bool
971b770f952SLiang Ma opdl_ring_cas_slot(struct opdl_stage *s, const struct rte_event *ev,
9724236ce9bSLiang Ma 		uint32_t index, bool atomic)
9734236ce9bSLiang Ma {
974b770f952SLiang Ma 	uint32_t i = 0, offset;
9754236ce9bSLiang Ma 	struct opdl_ring *t = s->t;
9764236ce9bSLiang Ma 	struct rte_event *ev_orig = NULL;
9774236ce9bSLiang Ma 	bool ev_updated = false;
9784236ce9bSLiang Ma 	uint64_t ev_temp    = 0;
979b770f952SLiang Ma 	uint64_t ev_update  = 0;
980b770f952SLiang Ma 
981b770f952SLiang Ma 	uint32_t opa_id   = 0;
982b770f952SLiang Ma 	uint32_t flow_id  = 0;
983b770f952SLiang Ma 	uint64_t event    = 0;
9844236ce9bSLiang Ma 
9854236ce9bSLiang Ma 	if (index > s->num_event) {
9864236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "index is overflow");
9874236ce9bSLiang Ma 		return ev_updated;
9884236ce9bSLiang Ma 	}
9894236ce9bSLiang Ma 
9904236ce9bSLiang Ma 	ev_temp = ev->event & OPDL_EVENT_MASK;
9914236ce9bSLiang Ma 
9924236ce9bSLiang Ma 	if (!atomic) {
9934236ce9bSLiang Ma 		offset = opdl_first_entry_id(s->seq, s->nb_instance,
9944236ce9bSLiang Ma 				s->instance_id);
9954236ce9bSLiang Ma 		offset += index*s->nb_instance;
9964236ce9bSLiang Ma 		ev_orig = get_slot(t, s->shadow_head+offset);
9974236ce9bSLiang Ma 		if ((ev_orig->event&OPDL_EVENT_MASK) != ev_temp) {
9984236ce9bSLiang Ma 			ev_orig->event = ev->event;
9994236ce9bSLiang Ma 			ev_updated = true;
10004236ce9bSLiang Ma 		}
10014236ce9bSLiang Ma 		if (ev_orig->u64 != ev->u64) {
10024236ce9bSLiang Ma 			ev_orig->u64 = ev->u64;
10034236ce9bSLiang Ma 			ev_updated = true;
10044236ce9bSLiang Ma 		}
10054236ce9bSLiang Ma 
10064236ce9bSLiang Ma 	} else {
1007b770f952SLiang Ma 		for (i = s->pos; i < s->num_claimed; i++) {
10084236ce9bSLiang Ma 			ev_orig = (struct rte_event *)
10094236ce9bSLiang Ma 				get_slot(t, s->shadow_head+i);
10104236ce9bSLiang Ma 
1011e12a0166STyler Retzlaff 			event  = rte_atomic_load_explicit((uint64_t __rte_atomic *)&ev_orig->event,
1012e12a0166STyler Retzlaff 					rte_memory_order_acquire);
10134236ce9bSLiang Ma 
1014b770f952SLiang Ma 			opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET);
1015b770f952SLiang Ma 			flow_id  = OPDL_FLOWID_MASK & event;
1016b770f952SLiang Ma 
1017b770f952SLiang Ma 			if (opa_id >= s->queue_id)
1018b770f952SLiang Ma 				continue;
1019b770f952SLiang Ma 
1020b770f952SLiang Ma 			if ((flow_id % s->nb_instance) == s->instance_id) {
1021b770f952SLiang Ma 				ev_update = s->queue_id;
1022b770f952SLiang Ma 				ev_update = (ev_update << OPDL_OPA_OFFSET)
1023b770f952SLiang Ma 					| ev->event;
1024b770f952SLiang Ma 
1025b770f952SLiang Ma 				s->pos = i + 1;
1026b770f952SLiang Ma 
1027b770f952SLiang Ma 				if ((event & OPDL_EVENT_MASK) !=
10284236ce9bSLiang Ma 						ev_temp) {
1029e12a0166STyler Retzlaff 					rte_atomic_store_explicit(
1030e12a0166STyler Retzlaff 						(uint64_t __rte_atomic *)&ev_orig->event,
1031e12a0166STyler Retzlaff 						ev_update, rte_memory_order_release);
10324236ce9bSLiang Ma 					ev_updated = true;
10334236ce9bSLiang Ma 				}
10344236ce9bSLiang Ma 				if (ev_orig->u64 != ev->u64) {
10354236ce9bSLiang Ma 					ev_orig->u64 = ev->u64;
10364236ce9bSLiang Ma 					ev_updated = true;
10374236ce9bSLiang Ma 				}
10384236ce9bSLiang Ma 
10394236ce9bSLiang Ma 				break;
10404236ce9bSLiang Ma 			}
10414236ce9bSLiang Ma 		}
10424236ce9bSLiang Ma 
10434236ce9bSLiang Ma 	}
10444236ce9bSLiang Ma 
10454236ce9bSLiang Ma 	return ev_updated;
10464236ce9bSLiang Ma }
10474236ce9bSLiang Ma 
10484236ce9bSLiang Ma int
10494236ce9bSLiang Ma opdl_ring_get_socket(const struct opdl_ring *t)
10504236ce9bSLiang Ma {
10514236ce9bSLiang Ma 	return t->socket;
10524236ce9bSLiang Ma }
10534236ce9bSLiang Ma 
10544236ce9bSLiang Ma uint32_t
10554236ce9bSLiang Ma opdl_ring_get_num_slots(const struct opdl_ring *t)
10564236ce9bSLiang Ma {
10574236ce9bSLiang Ma 	return t->num_slots;
10584236ce9bSLiang Ma }
10594236ce9bSLiang Ma 
10604236ce9bSLiang Ma const char *
10614236ce9bSLiang Ma opdl_ring_get_name(const struct opdl_ring *t)
10624236ce9bSLiang Ma {
10634236ce9bSLiang Ma 	return t->name;
10644236ce9bSLiang Ma }
10654236ce9bSLiang Ma 
10664236ce9bSLiang Ma /* Check dependency list is valid for a given opdl_ring */
10674236ce9bSLiang Ma static int
10684236ce9bSLiang Ma check_deps(struct opdl_ring *t, struct opdl_stage *deps[],
10694236ce9bSLiang Ma 		uint32_t num_deps)
10704236ce9bSLiang Ma {
10714236ce9bSLiang Ma 	unsigned int i;
10724236ce9bSLiang Ma 
10734236ce9bSLiang Ma 	for (i = 0; i < num_deps; ++i) {
10744236ce9bSLiang Ma 		if (!deps[i]) {
10754236ce9bSLiang Ma 			PMD_DRV_LOG(ERR, "deps[%u] is NULL", i);
10764236ce9bSLiang Ma 			return -EINVAL;
10774236ce9bSLiang Ma 		}
10784236ce9bSLiang Ma 		if (t != deps[i]->t) {
10794236ce9bSLiang Ma 			PMD_DRV_LOG(ERR, "deps[%u] is in opdl_ring %s, not %s",
10804236ce9bSLiang Ma 					i, deps[i]->t->name, t->name);
10814236ce9bSLiang Ma 			return -EINVAL;
10824236ce9bSLiang Ma 		}
10834236ce9bSLiang Ma 	}
1084b770f952SLiang Ma 
10854236ce9bSLiang Ma 	return 0;
10864236ce9bSLiang Ma }
10874236ce9bSLiang Ma 
10884236ce9bSLiang Ma struct opdl_stage *
10894236ce9bSLiang Ma opdl_stage_add(struct opdl_ring *t, bool threadsafe, bool is_input)
10904236ce9bSLiang Ma {
10914236ce9bSLiang Ma 	struct opdl_stage *s;
10924236ce9bSLiang Ma 
10934236ce9bSLiang Ma 	/* Parameter checking */
10944236ce9bSLiang Ma 	if (!t) {
10954236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "opdl_ring is NULL");
10964236ce9bSLiang Ma 		return NULL;
10974236ce9bSLiang Ma 	}
10984236ce9bSLiang Ma 	if (t->num_stages == t->max_num_stages) {
10994236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "%s has max number of stages (%u)",
11004236ce9bSLiang Ma 				t->name, t->max_num_stages);
11014236ce9bSLiang Ma 		return NULL;
11024236ce9bSLiang Ma 	}
11034236ce9bSLiang Ma 
11044236ce9bSLiang Ma 	s = &t->stages[t->num_stages];
11054236ce9bSLiang Ma 
11064236ce9bSLiang Ma 	if (((uintptr_t)&s->shared & RTE_CACHE_LINE_MASK) != 0)
11074236ce9bSLiang Ma 		PMD_DRV_LOG(WARNING, "Tail seq num (%p) of %s stage not cache aligned",
11084236ce9bSLiang Ma 				&s->shared, t->name);
11094236ce9bSLiang Ma 
11104236ce9bSLiang Ma 	if (init_stage(t, s, threadsafe, is_input) < 0) {
11114236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "Cannot reserve memory");
11124236ce9bSLiang Ma 		return NULL;
11134236ce9bSLiang Ma 	}
11144236ce9bSLiang Ma 	t->num_stages++;
11154236ce9bSLiang Ma 
11164236ce9bSLiang Ma 	return s;
11174236ce9bSLiang Ma }
11184236ce9bSLiang Ma 
11194236ce9bSLiang Ma uint32_t
11204236ce9bSLiang Ma opdl_stage_deps_add(struct opdl_ring *t, struct opdl_stage *s,
11214236ce9bSLiang Ma 		uint32_t nb_instance, uint32_t instance_id,
11224236ce9bSLiang Ma 		struct opdl_stage *deps[],
11234236ce9bSLiang Ma 		uint32_t num_deps)
11244236ce9bSLiang Ma {
11254236ce9bSLiang Ma 	uint32_t i;
11264236ce9bSLiang Ma 	int ret = 0;
11274236ce9bSLiang Ma 
11284236ce9bSLiang Ma 	if ((num_deps > 0) && (!deps)) {
11294236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "%s stage has NULL dependencies", t->name);
11304236ce9bSLiang Ma 		return -1;
11314236ce9bSLiang Ma 	}
11324236ce9bSLiang Ma 	ret = check_deps(t, deps, num_deps);
11334236ce9bSLiang Ma 	if (ret < 0)
11344236ce9bSLiang Ma 		return ret;
11354236ce9bSLiang Ma 
11364236ce9bSLiang Ma 	for (i = 0; i < num_deps; i++) {
11374236ce9bSLiang Ma 		ret = add_dep(s, deps[i], DEP_DIRECT);
11384236ce9bSLiang Ma 		if (ret < 0)
11394236ce9bSLiang Ma 			return ret;
11404236ce9bSLiang Ma 	}
11414236ce9bSLiang Ma 
11424236ce9bSLiang Ma 	s->nb_instance = nb_instance;
11434236ce9bSLiang Ma 	s->instance_id = instance_id;
11444236ce9bSLiang Ma 
11454236ce9bSLiang Ma 	return ret;
11464236ce9bSLiang Ma }
11474236ce9bSLiang Ma 
11484236ce9bSLiang Ma struct opdl_stage *
11494236ce9bSLiang Ma opdl_ring_get_input_stage(const struct opdl_ring *t)
11504236ce9bSLiang Ma {
11514236ce9bSLiang Ma 	return input_stage(t);
11524236ce9bSLiang Ma }
11534236ce9bSLiang Ma 
11544236ce9bSLiang Ma int
11554236ce9bSLiang Ma opdl_stage_set_deps(struct opdl_stage *s, struct opdl_stage *deps[],
11564236ce9bSLiang Ma 		uint32_t num_deps)
11574236ce9bSLiang Ma {
11584236ce9bSLiang Ma 	unsigned int i;
11594236ce9bSLiang Ma 	int ret;
11604236ce9bSLiang Ma 
11614236ce9bSLiang Ma 	if ((num_deps == 0) || (!deps)) {
11624236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "cannot set NULL dependencies");
11634236ce9bSLiang Ma 		return -EINVAL;
11644236ce9bSLiang Ma 	}
11654236ce9bSLiang Ma 
11664236ce9bSLiang Ma 	ret = check_deps(s->t, deps, num_deps);
11674236ce9bSLiang Ma 	if (ret < 0)
11684236ce9bSLiang Ma 		return ret;
11694236ce9bSLiang Ma 
11704236ce9bSLiang Ma 	/* Update deps */
11714236ce9bSLiang Ma 	for (i = 0; i < num_deps; i++)
11724236ce9bSLiang Ma 		s->deps[i] = &deps[i]->shared;
11734236ce9bSLiang Ma 	s->num_deps = num_deps;
11744236ce9bSLiang Ma 
11754236ce9bSLiang Ma 	return 0;
11764236ce9bSLiang Ma }
11774236ce9bSLiang Ma 
11784236ce9bSLiang Ma struct opdl_ring *
11794236ce9bSLiang Ma opdl_stage_get_opdl_ring(const struct opdl_stage *s)
11804236ce9bSLiang Ma {
11814236ce9bSLiang Ma 	return s->t;
11824236ce9bSLiang Ma }
11834236ce9bSLiang Ma 
11844236ce9bSLiang Ma void
1185b770f952SLiang Ma opdl_stage_set_queue_id(struct opdl_stage *s,
1186b770f952SLiang Ma 		uint32_t queue_id)
1187b770f952SLiang Ma {
1188b770f952SLiang Ma 	s->queue_id = queue_id;
1189b770f952SLiang Ma }
1190b770f952SLiang Ma 
1191b770f952SLiang Ma void
11924236ce9bSLiang Ma opdl_ring_dump(const struct opdl_ring *t, FILE *f)
11934236ce9bSLiang Ma {
11944236ce9bSLiang Ma 	uint32_t i;
11954236ce9bSLiang Ma 
11964236ce9bSLiang Ma 	if (t == NULL) {
11974236ce9bSLiang Ma 		fprintf(f, "NULL OPDL!\n");
11984236ce9bSLiang Ma 		return;
11994236ce9bSLiang Ma 	}
12004236ce9bSLiang Ma 	fprintf(f, "OPDL \"%s\": num_slots=%u; mask=%#x; slot_size=%u; num_stages=%u; socket=%i\n",
12014236ce9bSLiang Ma 			t->name, t->num_slots, t->mask, t->slot_size,
12024236ce9bSLiang Ma 			t->num_stages, t->socket);
12034236ce9bSLiang Ma 	for (i = 0; i < t->num_stages; i++) {
12044236ce9bSLiang Ma 		uint32_t j;
12054236ce9bSLiang Ma 		const struct opdl_stage *s = &t->stages[i];
12064236ce9bSLiang Ma 
12074236ce9bSLiang Ma 		fprintf(f, "  %s[%u]: threadsafe=%s; head=%u; available_seq=%u; tail=%u; deps=%u",
12084236ce9bSLiang Ma 				t->name, i, (s->threadsafe) ? "true" : "false",
12094236ce9bSLiang Ma 				(s->threadsafe) ? s->shared.head : s->head,
12104236ce9bSLiang Ma 				(s->threadsafe) ? s->shared.available_seq :
12114236ce9bSLiang Ma 				s->available_seq,
12124236ce9bSLiang Ma 				s->shared.tail, (s->num_deps > 0) ?
12134236ce9bSLiang Ma 				s->deps[0]->stage->index : 0);
12144236ce9bSLiang Ma 		for (j = 1; j < s->num_deps; j++)
12154236ce9bSLiang Ma 			fprintf(f, ",%u", s->deps[j]->stage->index);
12164236ce9bSLiang Ma 		fprintf(f, "\n");
12174236ce9bSLiang Ma 	}
12184236ce9bSLiang Ma 	fflush(f);
12194236ce9bSLiang Ma }
12204236ce9bSLiang Ma 
12214236ce9bSLiang Ma void
12224236ce9bSLiang Ma opdl_ring_free(struct opdl_ring *t)
12234236ce9bSLiang Ma {
12244236ce9bSLiang Ma 	uint32_t i;
12254236ce9bSLiang Ma 	const struct rte_memzone *mz;
12264236ce9bSLiang Ma 	char mz_name[RTE_MEMZONE_NAMESIZE];
12274236ce9bSLiang Ma 
12284236ce9bSLiang Ma 	if (t == NULL) {
12294236ce9bSLiang Ma 		PMD_DRV_LOG(DEBUG, "Freeing NULL OPDL Ring!");
12304236ce9bSLiang Ma 		return;
12314236ce9bSLiang Ma 	}
12324236ce9bSLiang Ma 
12334236ce9bSLiang Ma 	PMD_DRV_LOG(DEBUG, "Freeing %s opdl_ring at %p", t->name, t);
12344236ce9bSLiang Ma 
12354236ce9bSLiang Ma 	for (i = 0; i < t->num_stages; ++i) {
12364236ce9bSLiang Ma 		rte_free(t->stages[i].deps);
12374236ce9bSLiang Ma 		rte_free(t->stages[i].dep_tracking);
12384236ce9bSLiang Ma 	}
12394236ce9bSLiang Ma 
12404236ce9bSLiang Ma 	rte_free(t->stages);
12414236ce9bSLiang Ma 
12424236ce9bSLiang Ma 	snprintf(mz_name, sizeof(mz_name), "%s%s", LIB_NAME, t->name);
12434236ce9bSLiang Ma 	mz = rte_memzone_lookup(mz_name);
12444236ce9bSLiang Ma 	if (rte_memzone_free(mz) != 0)
12454236ce9bSLiang Ma 		PMD_DRV_LOG(ERR, "Cannot free memzone for %s", t->name);
12464236ce9bSLiang Ma }
12474236ce9bSLiang Ma 
12484236ce9bSLiang Ma /* search a opdl_ring from its name */
12494236ce9bSLiang Ma struct opdl_ring *
12504236ce9bSLiang Ma opdl_ring_lookup(const char *name)
12514236ce9bSLiang Ma {
12524236ce9bSLiang Ma 	const struct rte_memzone *mz;
12534236ce9bSLiang Ma 	char mz_name[RTE_MEMZONE_NAMESIZE];
12544236ce9bSLiang Ma 
12554236ce9bSLiang Ma 	snprintf(mz_name, sizeof(mz_name), "%s%s", LIB_NAME, name);
12564236ce9bSLiang Ma 
12574236ce9bSLiang Ma 	mz = rte_memzone_lookup(mz_name);
12584236ce9bSLiang Ma 	if (mz == NULL)
12594236ce9bSLiang Ma 		return NULL;
12604236ce9bSLiang Ma 
12614236ce9bSLiang Ma 	return mz->addr;
12624236ce9bSLiang Ma }
12634236ce9bSLiang Ma 
12644236ce9bSLiang Ma void
12654236ce9bSLiang Ma opdl_ring_set_stage_threadsafe(struct opdl_stage *s, bool threadsafe)
12664236ce9bSLiang Ma {
12674236ce9bSLiang Ma 	s->threadsafe = threadsafe;
12684236ce9bSLiang Ma }
1269