1ab09499eSPavan Nikhilesh /* SPDX-License-Identifier: BSD-3-Clause 2ab09499eSPavan Nikhilesh * Copyright(C) 2023 Marvell. 3ab09499eSPavan Nikhilesh */ 4ab09499eSPavan Nikhilesh 5ab09499eSPavan Nikhilesh #include "cnxk_ep_rx.h" 6ab09499eSPavan Nikhilesh 7ab09499eSPavan Nikhilesh static __rte_always_inline uint32_t 8ab09499eSPavan Nikhilesh hadd(__m128i x) 9ab09499eSPavan Nikhilesh { 10ab09499eSPavan Nikhilesh __m128i hi64 = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)); 11ab09499eSPavan Nikhilesh __m128i sum64 = _mm_add_epi32(hi64, x); 12ab09499eSPavan Nikhilesh __m128i hi32 = _mm_shufflelo_epi16(sum64, _MM_SHUFFLE(1, 0, 3, 2)); 13ab09499eSPavan Nikhilesh __m128i sum32 = _mm_add_epi32(sum64, hi32); 14ab09499eSPavan Nikhilesh return _mm_cvtsi128_si32(sum32); 15ab09499eSPavan Nikhilesh } 16ab09499eSPavan Nikhilesh 17ab09499eSPavan Nikhilesh static __rte_always_inline void 18ab09499eSPavan Nikhilesh cnxk_ep_process_pkts_vec_sse(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts) 19ab09499eSPavan Nikhilesh { 20ab09499eSPavan Nikhilesh struct rte_mbuf **recv_buf_list = droq->recv_buf_list; 21bdfb48bfSPavan Nikhilesh uint32_t read_idx = droq->read_idx; 22ab09499eSPavan Nikhilesh struct rte_mbuf *m0, *m1, *m2, *m3; 23ab09499eSPavan Nikhilesh uint16_t nb_desc = droq->nb_desc; 24bdfb48bfSPavan Nikhilesh uint32_t idx0, idx1, idx2, idx3; 25ab09499eSPavan Nikhilesh uint16_t pkts = 0; 26bdfb48bfSPavan Nikhilesh __m128i bytes; 27ab09499eSPavan Nikhilesh 28ab09499eSPavan Nikhilesh idx0 = read_idx; 29bdfb48bfSPavan Nikhilesh bytes = _mm_setzero_si128(); 30ab09499eSPavan Nikhilesh while (pkts < new_pkts) { 31ab09499eSPavan Nikhilesh const __m128i bswap_mask = _mm_set_epi8(0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 8, 9, 0xFF, 32ab09499eSPavan Nikhilesh 0xFF, 4, 5, 0xFF, 0xFF, 0, 1); 33ab09499eSPavan Nikhilesh const __m128i cpy_mask = _mm_set_epi8(0xFF, 0xFF, 9, 8, 0xFF, 0xFF, 9, 8, 0xFF, 34ab09499eSPavan Nikhilesh 0xFF, 1, 0, 0xFF, 0xFF, 1, 0); 35ab09499eSPavan Nikhilesh __m128i s01, s23; 36ab09499eSPavan Nikhilesh 37ab09499eSPavan Nikhilesh idx1 = otx_ep_incr_index(idx0, 1, nb_desc); 38ab09499eSPavan Nikhilesh idx2 = otx_ep_incr_index(idx1, 1, nb_desc); 39ab09499eSPavan Nikhilesh idx3 = otx_ep_incr_index(idx2, 1, nb_desc); 40ab09499eSPavan Nikhilesh 41ab09499eSPavan Nikhilesh m0 = recv_buf_list[idx0]; 42ab09499eSPavan Nikhilesh m1 = recv_buf_list[idx1]; 43ab09499eSPavan Nikhilesh m2 = recv_buf_list[idx2]; 44ab09499eSPavan Nikhilesh m3 = recv_buf_list[idx3]; 45ab09499eSPavan Nikhilesh 46ab09499eSPavan Nikhilesh /* Load packet size big-endian. */ 47bdfb48bfSPavan Nikhilesh s01 = _mm_set_epi32(cnxk_pktmbuf_mtod(m3, struct otx_ep_droq_info *)->length >> 48, 48bdfb48bfSPavan Nikhilesh cnxk_pktmbuf_mtod(m1, struct otx_ep_droq_info *)->length >> 48, 49bdfb48bfSPavan Nikhilesh cnxk_pktmbuf_mtod(m2, struct otx_ep_droq_info *)->length >> 48, 50bdfb48bfSPavan Nikhilesh cnxk_pktmbuf_mtod(m0, struct otx_ep_droq_info *)->length >> 48); 51ab09499eSPavan Nikhilesh /* Convert to little-endian. */ 52ab09499eSPavan Nikhilesh s01 = _mm_shuffle_epi8(s01, bswap_mask); 53bdfb48bfSPavan Nikhilesh /* Vertical add, consolidate outside loop */ 54bdfb48bfSPavan Nikhilesh bytes = _mm_add_epi32(bytes, s01); 55*f2b1510fSStephen Hemminger /* Separate into packet length and data length. */ 56ab09499eSPavan Nikhilesh s23 = _mm_shuffle_epi32(s01, _MM_SHUFFLE(3, 3, 1, 1)); 57ab09499eSPavan Nikhilesh s01 = _mm_shuffle_epi8(s01, cpy_mask); 58ab09499eSPavan Nikhilesh s23 = _mm_shuffle_epi8(s23, cpy_mask); 59ab09499eSPavan Nikhilesh 60ab09499eSPavan Nikhilesh /* Store packet length and data length to mbuf. */ 61ab09499eSPavan Nikhilesh *(uint64_t *)&m0->pkt_len = ((rte_xmm_t)s01).u64[0]; 62ab09499eSPavan Nikhilesh *(uint64_t *)&m1->pkt_len = ((rte_xmm_t)s01).u64[1]; 63ab09499eSPavan Nikhilesh *(uint64_t *)&m2->pkt_len = ((rte_xmm_t)s23).u64[0]; 64ab09499eSPavan Nikhilesh *(uint64_t *)&m3->pkt_len = ((rte_xmm_t)s23).u64[1]; 65ab09499eSPavan Nikhilesh 66ab09499eSPavan Nikhilesh /* Reset rearm data. */ 67ab09499eSPavan Nikhilesh *(uint64_t *)&m0->rearm_data = droq->rearm_data; 68ab09499eSPavan Nikhilesh *(uint64_t *)&m1->rearm_data = droq->rearm_data; 69ab09499eSPavan Nikhilesh *(uint64_t *)&m2->rearm_data = droq->rearm_data; 70ab09499eSPavan Nikhilesh *(uint64_t *)&m3->rearm_data = droq->rearm_data; 71ab09499eSPavan Nikhilesh 72ab09499eSPavan Nikhilesh rx_pkts[pkts++] = m0; 73ab09499eSPavan Nikhilesh rx_pkts[pkts++] = m1; 74ab09499eSPavan Nikhilesh rx_pkts[pkts++] = m2; 75ab09499eSPavan Nikhilesh rx_pkts[pkts++] = m3; 76ab09499eSPavan Nikhilesh idx0 = otx_ep_incr_index(idx3, 1, nb_desc); 77ab09499eSPavan Nikhilesh } 78ab09499eSPavan Nikhilesh droq->read_idx = idx0; 79ab09499eSPavan Nikhilesh 80ab09499eSPavan Nikhilesh droq->refill_count += new_pkts; 81ab09499eSPavan Nikhilesh droq->pkts_pending -= new_pkts; 82ab09499eSPavan Nikhilesh /* Stats */ 83ab09499eSPavan Nikhilesh droq->stats.pkts_received += new_pkts; 84bdfb48bfSPavan Nikhilesh droq->stats.bytes_received += hadd(bytes); 85ab09499eSPavan Nikhilesh } 86ab09499eSPavan Nikhilesh 87ab09499eSPavan Nikhilesh uint16_t __rte_noinline __rte_hot 88ab09499eSPavan Nikhilesh cnxk_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 89ab09499eSPavan Nikhilesh { 90ab09499eSPavan Nikhilesh struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue; 91ab09499eSPavan Nikhilesh uint16_t new_pkts, vpkts; 92ab09499eSPavan Nikhilesh 93bdfb48bfSPavan Nikhilesh /* Refill RX buffers */ 94bdfb48bfSPavan Nikhilesh if (droq->refill_count >= DROQ_REFILL_THRESHOLD) 95bdfb48bfSPavan Nikhilesh cnxk_ep_rx_refill(droq); 96bdfb48bfSPavan Nikhilesh 97ab09499eSPavan Nikhilesh new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts); 98ab09499eSPavan Nikhilesh vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE); 99ab09499eSPavan Nikhilesh cnxk_ep_process_pkts_vec_sse(rx_pkts, droq, vpkts); 100ab09499eSPavan Nikhilesh cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts); 101ab09499eSPavan Nikhilesh 102ab09499eSPavan Nikhilesh return new_pkts; 103ab09499eSPavan Nikhilesh } 104ab09499eSPavan Nikhilesh 105ab09499eSPavan Nikhilesh uint16_t __rte_noinline __rte_hot 106ab09499eSPavan Nikhilesh cn9k_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 107ab09499eSPavan Nikhilesh { 108ab09499eSPavan Nikhilesh struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue; 109ab09499eSPavan Nikhilesh uint16_t new_pkts, vpkts; 110ab09499eSPavan Nikhilesh 111ab09499eSPavan Nikhilesh /* Refill RX buffers */ 112ab09499eSPavan Nikhilesh if (droq->refill_count >= DROQ_REFILL_THRESHOLD) { 113ab09499eSPavan Nikhilesh cnxk_ep_rx_refill(droq); 114ab09499eSPavan Nikhilesh } else { 115ab09499eSPavan Nikhilesh /* SDP output goes into DROP state when output doorbell count 116ab09499eSPavan Nikhilesh * goes below drop count. When door bell count is written with 117ab09499eSPavan Nikhilesh * a value greater than drop count SDP output should come out 118ab09499eSPavan Nikhilesh * of DROP state. Due to a race condition this is not happening. 119ab09499eSPavan Nikhilesh * Writing doorbell register with 0 again may make SDP output 120ab09499eSPavan Nikhilesh * come out of this state. 121ab09499eSPavan Nikhilesh */ 122ab09499eSPavan Nikhilesh 123ab09499eSPavan Nikhilesh rte_write32(0, droq->pkts_credit_reg); 124ab09499eSPavan Nikhilesh } 125ab09499eSPavan Nikhilesh 126bdfb48bfSPavan Nikhilesh new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts); 127bdfb48bfSPavan Nikhilesh vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE); 128bdfb48bfSPavan Nikhilesh cnxk_ep_process_pkts_vec_sse(rx_pkts, droq, vpkts); 129bdfb48bfSPavan Nikhilesh cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts); 130bdfb48bfSPavan Nikhilesh 131ab09499eSPavan Nikhilesh return new_pkts; 132ab09499eSPavan Nikhilesh } 133