xref: /dpdk/app/test-pmd/macswap_neon.h (revision 6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Arm Limited
3  *
4  * Copyright(c) 2019 Intel Corporation
5  *
6  * Derived do_macswap implementation from app/test-pmd/macswap_sse.h
7  */
8 
9 #ifndef _MACSWAP_NEON_H_
10 #define _MACSWAP_NEON_H_
11 
12 #include "macswap_common.h"
13 #include "rte_vect.h"
14 
15 static inline void
do_macswap(struct rte_mbuf * pkts[],uint16_t nb,struct rte_port * txp)16 do_macswap(struct rte_mbuf *pkts[], uint16_t nb,
17 		struct rte_port *txp)
18 {
19 	struct rte_ether_hdr *eth_hdr[4];
20 	struct rte_mbuf *mb[4];
21 	uint64_t ol_flags;
22 	int i;
23 	int r;
24 	uint8x16_t v0, v1, v2, v3;
25 	/**
26 	 * Index map be used to shuffle the 16 bytes.
27 	 * byte 0-5 will be swapped with byte 6-11.
28 	 * byte 12-15 will keep unchanged.
29 	 */
30 	const uint8x16_t idx_map = {6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
31 				12, 13, 14, 15};
32 
33 	ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
34 	vlan_qinq_set(pkts, nb, ol_flags,
35 			txp->tx_vlan_id, txp->tx_vlan_id_outer);
36 
37 	i = 0;
38 	r = nb;
39 
40 	while (r >= 4) {
41 		if (r >= 8) {
42 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 4], void *));
43 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 5], void *));
44 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 6], void *));
45 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 7], void *));
46 		}
47 
48 		mb[0] = pkts[i++];
49 		eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
50 
51 		mb[1] = pkts[i++];
52 		eth_hdr[1] = rte_pktmbuf_mtod(mb[1], struct rte_ether_hdr *);
53 
54 		mb[2] = pkts[i++];
55 		eth_hdr[2] = rte_pktmbuf_mtod(mb[2], struct rte_ether_hdr *);
56 
57 		mb[3] = pkts[i++];
58 		eth_hdr[3] = rte_pktmbuf_mtod(mb[3], struct rte_ether_hdr *);
59 
60 		v0 = vld1q_u8((uint8_t const *)eth_hdr[0]);
61 		v1 = vld1q_u8((uint8_t const *)eth_hdr[1]);
62 		v2 = vld1q_u8((uint8_t const *)eth_hdr[2]);
63 		v3 = vld1q_u8((uint8_t const *)eth_hdr[3]);
64 
65 		v0 = vqtbl1q_u8(v0, idx_map);
66 		v1 = vqtbl1q_u8(v1, idx_map);
67 		v2 = vqtbl1q_u8(v2, idx_map);
68 		v3 = vqtbl1q_u8(v3, idx_map);
69 
70 		vst1q_u8((uint8_t *)eth_hdr[0], v0);
71 		vst1q_u8((uint8_t *)eth_hdr[1], v1);
72 		vst1q_u8((uint8_t *)eth_hdr[2], v2);
73 		vst1q_u8((uint8_t *)eth_hdr[3], v3);
74 
75 		mbuf_field_set(mb[0], ol_flags);
76 		mbuf_field_set(mb[1], ol_flags);
77 		mbuf_field_set(mb[2], ol_flags);
78 		mbuf_field_set(mb[3], ol_flags);
79 		r -= 4;
80 	}
81 
82 	for ( ; i < nb; i++) {
83 		if (i < nb - 1)
84 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i+1], void *));
85 		mb[0] = pkts[i];
86 		eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
87 
88 		/* Swap dest and src mac addresses. */
89 		v0 = vld1q_u8((uint8_t const *)eth_hdr[0]);
90 		v0 = vqtbl1q_u8(v0, idx_map);
91 		vst1q_u8((uint8_t *)eth_hdr[0], v0);
92 
93 		mbuf_field_set(mb[0], ol_flags);
94 	}
95 }
96 
97 #endif /* _MACSWAP_NEON_H_ */
98