1c9e4dc02SPavan Nikhilesh /* SPDX-License-Identifier: BSD-3-Clause 2c9e4dc02SPavan Nikhilesh * Copyright(C) 2023 Marvell. 3c9e4dc02SPavan Nikhilesh */ 4c9e4dc02SPavan Nikhilesh 5c9e4dc02SPavan Nikhilesh #include "cnxk_ep_rx.h" 6c9e4dc02SPavan Nikhilesh 7c9e4dc02SPavan Nikhilesh static __rte_always_inline void 8c9e4dc02SPavan Nikhilesh cnxk_ep_process_pkts_vec_neon(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, 9c9e4dc02SPavan Nikhilesh uint16_t new_pkts) 10c9e4dc02SPavan Nikhilesh { 11c9e4dc02SPavan Nikhilesh const uint8x16_t mask0 = {0, 1, 0xff, 0xff, 0, 1, 0xff, 0xff, 12c9e4dc02SPavan Nikhilesh 4, 5, 0xff, 0xff, 4, 5, 0xff, 0xff}; 13c9e4dc02SPavan Nikhilesh const uint8x16_t mask1 = {8, 9, 0xff, 0xff, 8, 9, 0xff, 0xff, 14c9e4dc02SPavan Nikhilesh 12, 13, 0xff, 0xff, 12, 13, 0xff, 0xff}; 15c9e4dc02SPavan Nikhilesh struct rte_mbuf **recv_buf_list = droq->recv_buf_list; 16c9e4dc02SPavan Nikhilesh uint32_t pidx0, pidx1, pidx2, pidx3; 17c9e4dc02SPavan Nikhilesh struct rte_mbuf *m0, *m1, *m2, *m3; 18c9e4dc02SPavan Nikhilesh uint32_t read_idx = droq->read_idx; 19c9e4dc02SPavan Nikhilesh uint16_t nb_desc = droq->nb_desc; 20c9e4dc02SPavan Nikhilesh uint32_t idx0, idx1, idx2, idx3; 21c9e4dc02SPavan Nikhilesh uint64x2_t s01, s23; 22c9e4dc02SPavan Nikhilesh uint32x4_t bytes; 23c9e4dc02SPavan Nikhilesh uint16_t pkts = 0; 24c9e4dc02SPavan Nikhilesh 25c9e4dc02SPavan Nikhilesh idx0 = read_idx; 26c9e4dc02SPavan Nikhilesh s01 = vdupq_n_u64(0); 27c9e4dc02SPavan Nikhilesh bytes = vdupq_n_u32(0); 28c9e4dc02SPavan Nikhilesh while (pkts < new_pkts) { 29c9e4dc02SPavan Nikhilesh idx1 = otx_ep_incr_index(idx0, 1, nb_desc); 30c9e4dc02SPavan Nikhilesh idx2 = otx_ep_incr_index(idx1, 1, nb_desc); 31c9e4dc02SPavan Nikhilesh idx3 = otx_ep_incr_index(idx2, 1, nb_desc); 32c9e4dc02SPavan Nikhilesh 33c9e4dc02SPavan Nikhilesh if (new_pkts - pkts > 4) { 34c9e4dc02SPavan Nikhilesh pidx0 = otx_ep_incr_index(idx3, 1, nb_desc); 35c9e4dc02SPavan Nikhilesh pidx1 = otx_ep_incr_index(pidx0, 1, nb_desc); 36c9e4dc02SPavan Nikhilesh pidx2 = otx_ep_incr_index(pidx1, 1, nb_desc); 37c9e4dc02SPavan Nikhilesh pidx3 = otx_ep_incr_index(pidx2, 1, nb_desc); 38c9e4dc02SPavan Nikhilesh 39c9e4dc02SPavan Nikhilesh rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx0], void *)); 40c9e4dc02SPavan Nikhilesh rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx1], void *)); 41c9e4dc02SPavan Nikhilesh rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx2], void *)); 42c9e4dc02SPavan Nikhilesh rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx3], void *)); 43c9e4dc02SPavan Nikhilesh } 44c9e4dc02SPavan Nikhilesh 45c9e4dc02SPavan Nikhilesh m0 = recv_buf_list[idx0]; 46c9e4dc02SPavan Nikhilesh m1 = recv_buf_list[idx1]; 47c9e4dc02SPavan Nikhilesh m2 = recv_buf_list[idx2]; 48c9e4dc02SPavan Nikhilesh m3 = recv_buf_list[idx3]; 49c9e4dc02SPavan Nikhilesh 50c9e4dc02SPavan Nikhilesh /* Load packet size big-endian. */ 51c9e4dc02SPavan Nikhilesh s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m0, struct otx_ep_droq_info *)->length >> 48, 52c9e4dc02SPavan Nikhilesh s01, 0); 53c9e4dc02SPavan Nikhilesh s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m1, struct otx_ep_droq_info *)->length >> 48, 54c9e4dc02SPavan Nikhilesh s01, 1); 55c9e4dc02SPavan Nikhilesh s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m2, struct otx_ep_droq_info *)->length >> 48, 56c9e4dc02SPavan Nikhilesh s01, 2); 57c9e4dc02SPavan Nikhilesh s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m3, struct otx_ep_droq_info *)->length >> 48, 58c9e4dc02SPavan Nikhilesh s01, 3); 59c9e4dc02SPavan Nikhilesh /* Convert to little-endian. */ 60c9e4dc02SPavan Nikhilesh s01 = vrev16q_u8(s01); 61c9e4dc02SPavan Nikhilesh 62c9e4dc02SPavan Nikhilesh /* Vertical add, consolidate outside the loop. */ 63c9e4dc02SPavan Nikhilesh bytes += vaddq_u32(bytes, s01); 64*f2b1510fSStephen Hemminger /* Separate into packet length and data length. */ 65c9e4dc02SPavan Nikhilesh s23 = vqtbl1q_u8(s01, mask1); 66c9e4dc02SPavan Nikhilesh s01 = vqtbl1q_u8(s01, mask0); 67c9e4dc02SPavan Nikhilesh 68c9e4dc02SPavan Nikhilesh /* Store packet length and data length to mbuf. */ 69c9e4dc02SPavan Nikhilesh *(uint64_t *)&m0->pkt_len = vgetq_lane_u64(s01, 0); 70c9e4dc02SPavan Nikhilesh *(uint64_t *)&m1->pkt_len = vgetq_lane_u64(s01, 1); 71c9e4dc02SPavan Nikhilesh *(uint64_t *)&m2->pkt_len = vgetq_lane_u64(s23, 0); 72c9e4dc02SPavan Nikhilesh *(uint64_t *)&m3->pkt_len = vgetq_lane_u64(s23, 1); 73c9e4dc02SPavan Nikhilesh 74c9e4dc02SPavan Nikhilesh /* Reset rearm data. */ 75c9e4dc02SPavan Nikhilesh *(uint64_t *)&m0->rearm_data = droq->rearm_data; 76c9e4dc02SPavan Nikhilesh *(uint64_t *)&m1->rearm_data = droq->rearm_data; 77c9e4dc02SPavan Nikhilesh *(uint64_t *)&m2->rearm_data = droq->rearm_data; 78c9e4dc02SPavan Nikhilesh *(uint64_t *)&m3->rearm_data = droq->rearm_data; 79c9e4dc02SPavan Nikhilesh 80c9e4dc02SPavan Nikhilesh rx_pkts[pkts++] = m0; 81c9e4dc02SPavan Nikhilesh rx_pkts[pkts++] = m1; 82c9e4dc02SPavan Nikhilesh rx_pkts[pkts++] = m2; 83c9e4dc02SPavan Nikhilesh rx_pkts[pkts++] = m3; 84c9e4dc02SPavan Nikhilesh idx0 = otx_ep_incr_index(idx3, 1, nb_desc); 85c9e4dc02SPavan Nikhilesh } 86c9e4dc02SPavan Nikhilesh droq->read_idx = idx0; 87c9e4dc02SPavan Nikhilesh 88c9e4dc02SPavan Nikhilesh droq->refill_count += new_pkts; 89c9e4dc02SPavan Nikhilesh droq->pkts_pending -= new_pkts; 90c9e4dc02SPavan Nikhilesh /* Stats */ 91c9e4dc02SPavan Nikhilesh droq->stats.pkts_received += new_pkts; 92c9e4dc02SPavan Nikhilesh #if defined(RTE_ARCH_32) 93c9e4dc02SPavan Nikhilesh droq->stats.bytes_received += vgetq_lane_u32(bytes, 0); 94c9e4dc02SPavan Nikhilesh droq->stats.bytes_received += vgetq_lane_u32(bytes, 1); 95c9e4dc02SPavan Nikhilesh droq->stats.bytes_received += vgetq_lane_u32(bytes, 2); 96c9e4dc02SPavan Nikhilesh droq->stats.bytes_received += vgetq_lane_u32(bytes, 3); 97c9e4dc02SPavan Nikhilesh #else 98c9e4dc02SPavan Nikhilesh droq->stats.bytes_received += vaddvq_u32(bytes); 99c9e4dc02SPavan Nikhilesh #endif 100c9e4dc02SPavan Nikhilesh } 101c9e4dc02SPavan Nikhilesh 102c9e4dc02SPavan Nikhilesh uint16_t __rte_noinline __rte_hot 103c9e4dc02SPavan Nikhilesh cnxk_ep_recv_pkts_neon(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 104c9e4dc02SPavan Nikhilesh { 105c9e4dc02SPavan Nikhilesh struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue; 106c9e4dc02SPavan Nikhilesh uint16_t new_pkts, vpkts; 107c9e4dc02SPavan Nikhilesh 108c9e4dc02SPavan Nikhilesh /* Refill RX buffers */ 109c9e4dc02SPavan Nikhilesh if (droq->refill_count >= DROQ_REFILL_THRESHOLD) 110c9e4dc02SPavan Nikhilesh cnxk_ep_rx_refill(droq); 111c9e4dc02SPavan Nikhilesh 112c9e4dc02SPavan Nikhilesh new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts); 113c9e4dc02SPavan Nikhilesh vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE); 114c9e4dc02SPavan Nikhilesh cnxk_ep_process_pkts_vec_neon(rx_pkts, droq, vpkts); 115c9e4dc02SPavan Nikhilesh cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts); 116c9e4dc02SPavan Nikhilesh 117c9e4dc02SPavan Nikhilesh return new_pkts; 118c9e4dc02SPavan Nikhilesh } 119c9e4dc02SPavan Nikhilesh 120c9e4dc02SPavan Nikhilesh uint16_t __rte_noinline __rte_hot 121c9e4dc02SPavan Nikhilesh cn9k_ep_recv_pkts_neon(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 122c9e4dc02SPavan Nikhilesh { 123c9e4dc02SPavan Nikhilesh struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue; 124c9e4dc02SPavan Nikhilesh uint16_t new_pkts, vpkts; 125c9e4dc02SPavan Nikhilesh 126c9e4dc02SPavan Nikhilesh /* Refill RX buffers */ 127c9e4dc02SPavan Nikhilesh if (droq->refill_count >= DROQ_REFILL_THRESHOLD) { 128c9e4dc02SPavan Nikhilesh cnxk_ep_rx_refill(droq); 129c9e4dc02SPavan Nikhilesh } else { 130c9e4dc02SPavan Nikhilesh /* SDP output goes into DROP state when output doorbell count 131c9e4dc02SPavan Nikhilesh * goes below drop count. When door bell count is written with 132c9e4dc02SPavan Nikhilesh * a value greater than drop count SDP output should come out 133c9e4dc02SPavan Nikhilesh * of DROP state. Due to a race condition this is not happening. 134c9e4dc02SPavan Nikhilesh * Writing doorbell register with 0 again may make SDP output 135c9e4dc02SPavan Nikhilesh * come out of this state. 136c9e4dc02SPavan Nikhilesh */ 137c9e4dc02SPavan Nikhilesh 138c9e4dc02SPavan Nikhilesh rte_write32(0, droq->pkts_credit_reg); 139c9e4dc02SPavan Nikhilesh } 140c9e4dc02SPavan Nikhilesh 141c9e4dc02SPavan Nikhilesh new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts); 142c9e4dc02SPavan Nikhilesh vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE); 143c9e4dc02SPavan Nikhilesh cnxk_ep_process_pkts_vec_neon(rx_pkts, droq, vpkts); 144c9e4dc02SPavan Nikhilesh cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts); 145c9e4dc02SPavan Nikhilesh 146c9e4dc02SPavan Nikhilesh return new_pkts; 147c9e4dc02SPavan Nikhilesh } 148