1a68b6168SQi Zhang /* SPDX-License-Identifier: BSD-3-Clause 2a68b6168SQi Zhang * Copyright(c) 2018 Intel Corporation 3a68b6168SQi Zhang */ 4a68b6168SQi Zhang 5a68b6168SQi Zhang #ifndef _MACSWAP_SSE_H_ 6a68b6168SQi Zhang #define _MACSWAP_SSE_H_ 7a68b6168SQi Zhang 8a68b6168SQi Zhang #include "macswap_common.h" 9a68b6168SQi Zhang 10a68b6168SQi Zhang static inline void 11a68b6168SQi Zhang do_macswap(struct rte_mbuf *pkts[], uint16_t nb, 12a68b6168SQi Zhang struct rte_port *txp) 13a68b6168SQi Zhang { 146d13ea8eSOlivier Matz struct rte_ether_hdr *eth_hdr[4]; 1562b52877SQi Zhang struct rte_mbuf *mb[4]; 16a68b6168SQi Zhang uint64_t ol_flags; 17a68b6168SQi Zhang int i; 1862b52877SQi Zhang int r; 198001e1c8SVipin Varghese register __m128i addr0, addr1, addr2, addr3; 20a68b6168SQi Zhang /** 21a68b6168SQi Zhang * shuffle mask be used to shuffle the 16 bytes. 22a68b6168SQi Zhang * byte 0-5 wills be swapped with byte 6-11. 23a68b6168SQi Zhang * byte 12-15 will keep unchanged. 24a68b6168SQi Zhang */ 258001e1c8SVipin Varghese register const __m128i shfl_msk = _mm_set_epi8(15, 14, 13, 12, 26a68b6168SQi Zhang 5, 4, 3, 2, 27a68b6168SQi Zhang 1, 0, 11, 10, 28a68b6168SQi Zhang 9, 8, 7, 6); 29a68b6168SQi Zhang 30a68b6168SQi Zhang ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads); 31a68b6168SQi Zhang vlan_qinq_set(pkts, nb, ol_flags, 32a68b6168SQi Zhang txp->tx_vlan_id, txp->tx_vlan_id_outer); 33a68b6168SQi Zhang 3462b52877SQi Zhang i = 0; 3562b52877SQi Zhang r = nb; 36a68b6168SQi Zhang 3762b52877SQi Zhang while (r >= 4) { 3866fc1704SYongseok Koh if (r >= 8) { 3966fc1704SYongseok Koh rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 4], void *)); 4066fc1704SYongseok Koh rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 5], void *)); 4166fc1704SYongseok Koh rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 6], void *)); 4266fc1704SYongseok Koh rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 7], void *)); 4366fc1704SYongseok Koh } 4466fc1704SYongseok Koh 4562b52877SQi Zhang mb[0] = pkts[i++]; 466d13ea8eSOlivier Matz eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *); 4762b52877SQi Zhang addr0 = _mm_loadu_si128((__m128i *)eth_hdr[0]); 48222effc6SVipin Varghese mbuf_field_set(mb[0], ol_flags); 4962b52877SQi Zhang 5062b52877SQi Zhang mb[1] = pkts[i++]; 516d13ea8eSOlivier Matz eth_hdr[1] = rte_pktmbuf_mtod(mb[1], struct rte_ether_hdr *); 5262b52877SQi Zhang addr1 = _mm_loadu_si128((__m128i *)eth_hdr[1]); 53222effc6SVipin Varghese mbuf_field_set(mb[1], ol_flags); 5462b52877SQi Zhang 55*1b307e53SVipin Varghese addr0 = _mm_shuffle_epi8(addr0, shfl_msk); 56*1b307e53SVipin Varghese 5762b52877SQi Zhang mb[2] = pkts[i++]; 586d13ea8eSOlivier Matz eth_hdr[2] = rte_pktmbuf_mtod(mb[2], struct rte_ether_hdr *); 5962b52877SQi Zhang addr2 = _mm_loadu_si128((__m128i *)eth_hdr[2]); 60222effc6SVipin Varghese mbuf_field_set(mb[2], ol_flags); 6162b52877SQi Zhang 62*1b307e53SVipin Varghese addr1 = _mm_shuffle_epi8(addr1, shfl_msk); 63*1b307e53SVipin Varghese _mm_storeu_si128((__m128i *)eth_hdr[0], addr0); 64*1b307e53SVipin Varghese 6562b52877SQi Zhang mb[3] = pkts[i++]; 666d13ea8eSOlivier Matz eth_hdr[3] = rte_pktmbuf_mtod(mb[3], struct rte_ether_hdr *); 6762b52877SQi Zhang addr3 = _mm_loadu_si128((__m128i *)eth_hdr[3]); 68222effc6SVipin Varghese mbuf_field_set(mb[3], ol_flags); 6962b52877SQi Zhang 7062b52877SQi Zhang addr2 = _mm_shuffle_epi8(addr2, shfl_msk); 7162b52877SQi Zhang _mm_storeu_si128((__m128i *)eth_hdr[1], addr1); 72*1b307e53SVipin Varghese 73*1b307e53SVipin Varghese addr3 = _mm_shuffle_epi8(addr3, shfl_msk); 7462b52877SQi Zhang _mm_storeu_si128((__m128i *)eth_hdr[2], addr2); 7562b52877SQi Zhang _mm_storeu_si128((__m128i *)eth_hdr[3], addr3); 7662b52877SQi Zhang 7762b52877SQi Zhang r -= 4; 7862b52877SQi Zhang } 7962b52877SQi Zhang 8062b52877SQi Zhang for ( ; i < nb; i++) { 8162b52877SQi Zhang if (i < nb - 1) 8262b52877SQi Zhang rte_prefetch0(rte_pktmbuf_mtod(pkts[i+1], void *)); 8362b52877SQi Zhang mb[0] = pkts[i]; 846d13ea8eSOlivier Matz eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *); 85a68b6168SQi Zhang 86a68b6168SQi Zhang /* Swap dest and src mac addresses. */ 87cc28a690SYongseok Koh addr0 = _mm_loadu_si128((__m128i *)eth_hdr[0]); 88222effc6SVipin Varghese mbuf_field_set(mb[0], ol_flags); 89222effc6SVipin Varghese 9062b52877SQi Zhang addr0 = _mm_shuffle_epi8(addr0, shfl_msk); 9162b52877SQi Zhang _mm_storeu_si128((__m128i *)eth_hdr[0], addr0); 92a68b6168SQi Zhang } 93a68b6168SQi Zhang } 94a68b6168SQi Zhang 95a68b6168SQi Zhang #endif /* _MACSWAP_SSE_H_ */ 96