xref: /dpdk/app/test-pmd/macswap_sse.h (revision 1b307e535643c94be10f2dadca8de354bb2def6d)
1a68b6168SQi Zhang /* SPDX-License-Identifier: BSD-3-Clause
2a68b6168SQi Zhang  * Copyright(c) 2018 Intel Corporation
3a68b6168SQi Zhang  */
4a68b6168SQi Zhang 
5a68b6168SQi Zhang #ifndef _MACSWAP_SSE_H_
6a68b6168SQi Zhang #define _MACSWAP_SSE_H_
7a68b6168SQi Zhang 
8a68b6168SQi Zhang #include "macswap_common.h"
9a68b6168SQi Zhang 
10a68b6168SQi Zhang static inline void
11a68b6168SQi Zhang do_macswap(struct rte_mbuf *pkts[], uint16_t nb,
12a68b6168SQi Zhang 		struct rte_port *txp)
13a68b6168SQi Zhang {
146d13ea8eSOlivier Matz 	struct rte_ether_hdr *eth_hdr[4];
1562b52877SQi Zhang 	struct rte_mbuf *mb[4];
16a68b6168SQi Zhang 	uint64_t ol_flags;
17a68b6168SQi Zhang 	int i;
1862b52877SQi Zhang 	int r;
198001e1c8SVipin Varghese 	register __m128i addr0, addr1, addr2, addr3;
20a68b6168SQi Zhang 	/**
21a68b6168SQi Zhang 	 * shuffle mask be used to shuffle the 16 bytes.
22a68b6168SQi Zhang 	 * byte 0-5 wills be swapped with byte 6-11.
23a68b6168SQi Zhang 	 * byte 12-15 will keep unchanged.
24a68b6168SQi Zhang 	 */
258001e1c8SVipin Varghese 	register const __m128i shfl_msk = _mm_set_epi8(15, 14, 13, 12,
26a68b6168SQi Zhang 					5, 4, 3, 2,
27a68b6168SQi Zhang 					1, 0, 11, 10,
28a68b6168SQi Zhang 					9, 8, 7, 6);
29a68b6168SQi Zhang 
30a68b6168SQi Zhang 	ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
31a68b6168SQi Zhang 	vlan_qinq_set(pkts, nb, ol_flags,
32a68b6168SQi Zhang 			txp->tx_vlan_id, txp->tx_vlan_id_outer);
33a68b6168SQi Zhang 
3462b52877SQi Zhang 	i = 0;
3562b52877SQi Zhang 	r = nb;
36a68b6168SQi Zhang 
3762b52877SQi Zhang 	while (r >= 4) {
3866fc1704SYongseok Koh 		if (r >= 8) {
3966fc1704SYongseok Koh 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 4], void *));
4066fc1704SYongseok Koh 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 5], void *));
4166fc1704SYongseok Koh 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 6], void *));
4266fc1704SYongseok Koh 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 7], void *));
4366fc1704SYongseok Koh 		}
4466fc1704SYongseok Koh 
4562b52877SQi Zhang 		mb[0] = pkts[i++];
466d13ea8eSOlivier Matz 		eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
4762b52877SQi Zhang 		addr0 = _mm_loadu_si128((__m128i *)eth_hdr[0]);
48222effc6SVipin Varghese 		mbuf_field_set(mb[0], ol_flags);
4962b52877SQi Zhang 
5062b52877SQi Zhang 		mb[1] = pkts[i++];
516d13ea8eSOlivier Matz 		eth_hdr[1] = rte_pktmbuf_mtod(mb[1], struct rte_ether_hdr *);
5262b52877SQi Zhang 		addr1 = _mm_loadu_si128((__m128i *)eth_hdr[1]);
53222effc6SVipin Varghese 		mbuf_field_set(mb[1], ol_flags);
5462b52877SQi Zhang 
55*1b307e53SVipin Varghese 		addr0 = _mm_shuffle_epi8(addr0, shfl_msk);
56*1b307e53SVipin Varghese 
5762b52877SQi Zhang 		mb[2] = pkts[i++];
586d13ea8eSOlivier Matz 		eth_hdr[2] = rte_pktmbuf_mtod(mb[2], struct rte_ether_hdr *);
5962b52877SQi Zhang 		addr2 = _mm_loadu_si128((__m128i *)eth_hdr[2]);
60222effc6SVipin Varghese 		mbuf_field_set(mb[2], ol_flags);
6162b52877SQi Zhang 
62*1b307e53SVipin Varghese 		addr1 = _mm_shuffle_epi8(addr1, shfl_msk);
63*1b307e53SVipin Varghese 		_mm_storeu_si128((__m128i *)eth_hdr[0], addr0);
64*1b307e53SVipin Varghese 
6562b52877SQi Zhang 		mb[3] = pkts[i++];
666d13ea8eSOlivier Matz 		eth_hdr[3] = rte_pktmbuf_mtod(mb[3], struct rte_ether_hdr *);
6762b52877SQi Zhang 		addr3 = _mm_loadu_si128((__m128i *)eth_hdr[3]);
68222effc6SVipin Varghese 		mbuf_field_set(mb[3], ol_flags);
6962b52877SQi Zhang 
7062b52877SQi Zhang 		addr2 = _mm_shuffle_epi8(addr2, shfl_msk);
7162b52877SQi Zhang 		_mm_storeu_si128((__m128i *)eth_hdr[1], addr1);
72*1b307e53SVipin Varghese 
73*1b307e53SVipin Varghese 		addr3 = _mm_shuffle_epi8(addr3, shfl_msk);
7462b52877SQi Zhang 		_mm_storeu_si128((__m128i *)eth_hdr[2], addr2);
7562b52877SQi Zhang 		_mm_storeu_si128((__m128i *)eth_hdr[3], addr3);
7662b52877SQi Zhang 
7762b52877SQi Zhang 		r -= 4;
7862b52877SQi Zhang 	}
7962b52877SQi Zhang 
8062b52877SQi Zhang 	for ( ; i < nb; i++) {
8162b52877SQi Zhang 		if (i < nb - 1)
8262b52877SQi Zhang 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i+1], void *));
8362b52877SQi Zhang 		mb[0] = pkts[i];
846d13ea8eSOlivier Matz 		eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
85a68b6168SQi Zhang 
86a68b6168SQi Zhang 		/* Swap dest and src mac addresses. */
87cc28a690SYongseok Koh 		addr0 = _mm_loadu_si128((__m128i *)eth_hdr[0]);
88222effc6SVipin Varghese 		mbuf_field_set(mb[0], ol_flags);
89222effc6SVipin Varghese 
9062b52877SQi Zhang 		addr0 = _mm_shuffle_epi8(addr0, shfl_msk);
9162b52877SQi Zhang 		_mm_storeu_si128((__m128i *)eth_hdr[0], addr0);
92a68b6168SQi Zhang 	}
93a68b6168SQi Zhang }
94a68b6168SQi Zhang 
95a68b6168SQi Zhang #endif /* _MACSWAP_SSE_H_ */
96