199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright(c) 2017 Intel Corporation 399a2dd95SBruce Richardson */ 499a2dd95SBruce Richardson 599a2dd95SBruce Richardson #ifndef _DIST_PRIV_H_ 699a2dd95SBruce Richardson #define _DIST_PRIV_H_ 799a2dd95SBruce Richardson 8e9fd1ebfSTyler Retzlaff #include <stdalign.h> 9e9fd1ebfSTyler Retzlaff 1099a2dd95SBruce Richardson /** 1199a2dd95SBruce Richardson * @file 1299a2dd95SBruce Richardson * RTE distributor 1399a2dd95SBruce Richardson * 1499a2dd95SBruce Richardson * The distributor is a component which is designed to pass packets 1599a2dd95SBruce Richardson * one-at-a-time to workers, with dynamic load balancing. 1699a2dd95SBruce Richardson */ 1799a2dd95SBruce Richardson 1899a2dd95SBruce Richardson #define NO_FLAGS 0 1999a2dd95SBruce Richardson #define RTE_DISTRIB_PREFIX "DT_" 2099a2dd95SBruce Richardson 2199a2dd95SBruce Richardson /* 2299a2dd95SBruce Richardson * We will use the bottom four bits of pointer for flags, shifting out 2399a2dd95SBruce Richardson * the top four bits to make room (since a 64-bit pointer actually only uses 2499a2dd95SBruce Richardson * 48 bits). An arithmetic-right-shift will then appropriately restore the 2599a2dd95SBruce Richardson * original pointer value with proper sign extension into the top bits. 2699a2dd95SBruce Richardson */ 2799a2dd95SBruce Richardson #define RTE_DISTRIB_FLAG_BITS 4 2899a2dd95SBruce Richardson #define RTE_DISTRIB_FLAGS_MASK (0x0F) 2999a2dd95SBruce Richardson #define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */ 3099a2dd95SBruce Richardson #define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */ 3199a2dd95SBruce Richardson #define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */ 3299a2dd95SBruce Richardson #define RTE_DISTRIB_VALID_BUF (4) /**< set if bufptr contains ptr */ 3399a2dd95SBruce Richardson 3499a2dd95SBruce Richardson #define RTE_DISTRIB_BACKLOG_SIZE 8 3599a2dd95SBruce Richardson #define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1) 3699a2dd95SBruce Richardson 3799a2dd95SBruce Richardson #define RTE_DISTRIB_MAX_RETURNS 128 3899a2dd95SBruce Richardson #define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1) 3999a2dd95SBruce Richardson 4099a2dd95SBruce Richardson /** 4199a2dd95SBruce Richardson * Maximum number of workers allowed. 4299a2dd95SBruce Richardson * Be aware of increasing the limit, because it is limited by how we track 4399a2dd95SBruce Richardson * in-flight tags. See in_flight_bitmask and rte_distributor_process 4499a2dd95SBruce Richardson */ 4599a2dd95SBruce Richardson #define RTE_DISTRIB_MAX_WORKERS 64 4699a2dd95SBruce Richardson 4799a2dd95SBruce Richardson #define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ 4899a2dd95SBruce Richardson 4999a2dd95SBruce Richardson /** 5099a2dd95SBruce Richardson * Buffer structure used to pass the pointer data between cores. This is cache 5199a2dd95SBruce Richardson * line aligned, but to improve performance and prevent adjacent cache-line 5299a2dd95SBruce Richardson * prefetches of buffers for other workers, e.g. when worker 1's buffer is on 5399a2dd95SBruce Richardson * the next cache line to worker 0, we pad this out to three cache lines. 5499a2dd95SBruce Richardson * Only 64-bits of the memory is actually used though. 5599a2dd95SBruce Richardson */ 56*c6552d9aSTyler Retzlaff union __rte_cache_aligned rte_distributor_buffer_single { 57c2a363a3STyler Retzlaff volatile RTE_ATOMIC(int64_t) bufptr64; 5899a2dd95SBruce Richardson char pad[RTE_CACHE_LINE_SIZE*3]; 59*c6552d9aSTyler Retzlaff }; 6099a2dd95SBruce Richardson 6199a2dd95SBruce Richardson /* 6299a2dd95SBruce Richardson * Transfer up to 8 mbufs at a time to/from workers, and 6399a2dd95SBruce Richardson * flow matching algorithm optimized for 8 flow IDs at a time 6499a2dd95SBruce Richardson */ 6599a2dd95SBruce Richardson #define RTE_DIST_BURST_SIZE 8 6699a2dd95SBruce Richardson 67*c6552d9aSTyler Retzlaff struct __rte_cache_aligned rte_distributor_backlog { 6899a2dd95SBruce Richardson unsigned int start; 6999a2dd95SBruce Richardson unsigned int count; 70e9fd1ebfSTyler Retzlaff alignas(RTE_CACHE_LINE_SIZE) int64_t pkts[RTE_DIST_BURST_SIZE]; 7199a2dd95SBruce Richardson uint16_t *tags; /* will point to second cacheline of inflights */ 72*c6552d9aSTyler Retzlaff }; 7399a2dd95SBruce Richardson 7499a2dd95SBruce Richardson 7599a2dd95SBruce Richardson struct rte_distributor_returned_pkts { 7699a2dd95SBruce Richardson unsigned int start; 7799a2dd95SBruce Richardson unsigned int count; 7899a2dd95SBruce Richardson struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS]; 7999a2dd95SBruce Richardson }; 8099a2dd95SBruce Richardson 8199a2dd95SBruce Richardson struct rte_distributor_single { 8299a2dd95SBruce Richardson TAILQ_ENTRY(rte_distributor_single) next; /**< Next in list. */ 8399a2dd95SBruce Richardson 8499a2dd95SBruce Richardson char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ 8599a2dd95SBruce Richardson unsigned int num_workers; /**< Number of workers polling */ 8699a2dd95SBruce Richardson 8799a2dd95SBruce Richardson uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS]; 8899a2dd95SBruce Richardson /**< Tracks the tag being processed per core */ 8999a2dd95SBruce Richardson uint64_t in_flight_bitmask; 9099a2dd95SBruce Richardson /**< on/off bits for in-flight tags. 9199a2dd95SBruce Richardson * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then 9299a2dd95SBruce Richardson * the bitmask has to expand. 9399a2dd95SBruce Richardson */ 9499a2dd95SBruce Richardson 9599a2dd95SBruce Richardson struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]; 9699a2dd95SBruce Richardson 9799a2dd95SBruce Richardson union rte_distributor_buffer_single bufs[RTE_DISTRIB_MAX_WORKERS]; 9899a2dd95SBruce Richardson 9999a2dd95SBruce Richardson struct rte_distributor_returned_pkts returns; 10099a2dd95SBruce Richardson }; 10199a2dd95SBruce Richardson 10299a2dd95SBruce Richardson /* All different signature compare functions */ 10399a2dd95SBruce Richardson enum rte_distributor_match_function { 10499a2dd95SBruce Richardson RTE_DIST_MATCH_SCALAR = 0, 10599a2dd95SBruce Richardson RTE_DIST_MATCH_VECTOR, 10699a2dd95SBruce Richardson RTE_DIST_NUM_MATCH_FNS 10799a2dd95SBruce Richardson }; 10899a2dd95SBruce Richardson 10999a2dd95SBruce Richardson /** 11099a2dd95SBruce Richardson * Buffer structure used to pass the pointer data between cores. This is cache 11199a2dd95SBruce Richardson * line aligned, but to improve performance and prevent adjacent cache-line 11299a2dd95SBruce Richardson * prefetches of buffers for other workers, e.g. when worker 1's buffer is on 11399a2dd95SBruce Richardson * the next cache line to worker 0, we pad this out to two cache lines. 11499a2dd95SBruce Richardson * We can pass up to 8 mbufs at a time in one cacheline. 11599a2dd95SBruce Richardson * There is a separate cacheline for returns in the burst API. 11699a2dd95SBruce Richardson */ 11799a2dd95SBruce Richardson struct rte_distributor_buffer { 118e9fd1ebfSTyler Retzlaff volatile alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int64_t) bufptr64[RTE_DIST_BURST_SIZE]; 119e9fd1ebfSTyler Retzlaff /* <= outgoing to worker */ 12099a2dd95SBruce Richardson 121e9fd1ebfSTyler Retzlaff alignas(RTE_CACHE_LINE_SIZE) int64_t pad1; /* <= one cache line */ 12299a2dd95SBruce Richardson 123e9fd1ebfSTyler Retzlaff volatile alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int64_t) retptr64[RTE_DIST_BURST_SIZE]; 124e9fd1ebfSTyler Retzlaff /* <= incoming from worker */ 12599a2dd95SBruce Richardson 126e9fd1ebfSTyler Retzlaff alignas(RTE_CACHE_LINE_SIZE) int64_t pad2; /* <= one cache line */ 12799a2dd95SBruce Richardson 128e9fd1ebfSTyler Retzlaff alignas(RTE_CACHE_LINE_SIZE) int count; /* <= number of current mbufs */ 12999a2dd95SBruce Richardson }; 13099a2dd95SBruce Richardson 13199a2dd95SBruce Richardson struct rte_distributor { 13299a2dd95SBruce Richardson TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */ 13399a2dd95SBruce Richardson 13499a2dd95SBruce Richardson char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ 13599a2dd95SBruce Richardson unsigned int num_workers; /**< Number of workers polling */ 13699a2dd95SBruce Richardson unsigned int alg_type; /**< Number of alg types */ 13799a2dd95SBruce Richardson 13899a2dd95SBruce Richardson /**> 13999a2dd95SBruce Richardson * First cache line in the this array are the tags inflight 14099a2dd95SBruce Richardson * on the worker core. Second cache line are the backlog 14199a2dd95SBruce Richardson * that are going to go to the worker core. 14299a2dd95SBruce Richardson */ 143e9fd1ebfSTyler Retzlaff alignas(RTE_CACHE_LINE_SIZE) uint16_t 144e9fd1ebfSTyler Retzlaff in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2]; 14599a2dd95SBruce Richardson 146e9fd1ebfSTyler Retzlaff alignas(RTE_CACHE_LINE_SIZE) struct rte_distributor_backlog 147e9fd1ebfSTyler Retzlaff backlog[RTE_DISTRIB_MAX_WORKERS]; 14899a2dd95SBruce Richardson 14999a2dd95SBruce Richardson struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS]; 15099a2dd95SBruce Richardson 15199a2dd95SBruce Richardson struct rte_distributor_returned_pkts returns; 15299a2dd95SBruce Richardson 15399a2dd95SBruce Richardson enum rte_distributor_match_function dist_match_fn; 15499a2dd95SBruce Richardson 15599a2dd95SBruce Richardson struct rte_distributor_single *d_single; 15699a2dd95SBruce Richardson 15799a2dd95SBruce Richardson uint8_t active[RTE_DISTRIB_MAX_WORKERS]; 15899a2dd95SBruce Richardson uint8_t activesum; 15999a2dd95SBruce Richardson }; 16099a2dd95SBruce Richardson 16199a2dd95SBruce Richardson void 16299a2dd95SBruce Richardson find_match_scalar(struct rte_distributor *d, 16399a2dd95SBruce Richardson uint16_t *data_ptr, 16499a2dd95SBruce Richardson uint16_t *output_ptr); 16599a2dd95SBruce Richardson 16699a2dd95SBruce Richardson void 16799a2dd95SBruce Richardson find_match_vec(struct rte_distributor *d, 16899a2dd95SBruce Richardson uint16_t *data_ptr, 16999a2dd95SBruce Richardson uint16_t *output_ptr); 17099a2dd95SBruce Richardson 17199a2dd95SBruce Richardson #endif /* _DIST_PRIV_H_ */ 172