xref: /dpdk/app/test-pmd/macswap_neon.h (revision 6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1)
10ef246a7SRuifeng Wang /* SPDX-License-Identifier: BSD-3-Clause
20ef246a7SRuifeng Wang  * Copyright(c) 2019 Arm Limited
30ef246a7SRuifeng Wang  *
40ef246a7SRuifeng Wang  * Copyright(c) 2019 Intel Corporation
50ef246a7SRuifeng Wang  *
60ef246a7SRuifeng Wang  * Derived do_macswap implementation from app/test-pmd/macswap_sse.h
70ef246a7SRuifeng Wang  */
80ef246a7SRuifeng Wang 
90ef246a7SRuifeng Wang #ifndef _MACSWAP_NEON_H_
100ef246a7SRuifeng Wang #define _MACSWAP_NEON_H_
110ef246a7SRuifeng Wang 
120ef246a7SRuifeng Wang #include "macswap_common.h"
130ef246a7SRuifeng Wang #include "rte_vect.h"
140ef246a7SRuifeng Wang 
150ef246a7SRuifeng Wang static inline void
do_macswap(struct rte_mbuf * pkts[],uint16_t nb,struct rte_port * txp)160ef246a7SRuifeng Wang do_macswap(struct rte_mbuf *pkts[], uint16_t nb,
170ef246a7SRuifeng Wang 		struct rte_port *txp)
180ef246a7SRuifeng Wang {
19*6d13ea8eSOlivier Matz 	struct rte_ether_hdr *eth_hdr[4];
200ef246a7SRuifeng Wang 	struct rte_mbuf *mb[4];
210ef246a7SRuifeng Wang 	uint64_t ol_flags;
220ef246a7SRuifeng Wang 	int i;
230ef246a7SRuifeng Wang 	int r;
240ef246a7SRuifeng Wang 	uint8x16_t v0, v1, v2, v3;
250ef246a7SRuifeng Wang 	/**
260ef246a7SRuifeng Wang 	 * Index map be used to shuffle the 16 bytes.
270ef246a7SRuifeng Wang 	 * byte 0-5 will be swapped with byte 6-11.
280ef246a7SRuifeng Wang 	 * byte 12-15 will keep unchanged.
290ef246a7SRuifeng Wang 	 */
300ef246a7SRuifeng Wang 	const uint8x16_t idx_map = {6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
310ef246a7SRuifeng Wang 				12, 13, 14, 15};
320ef246a7SRuifeng Wang 
330ef246a7SRuifeng Wang 	ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
340ef246a7SRuifeng Wang 	vlan_qinq_set(pkts, nb, ol_flags,
350ef246a7SRuifeng Wang 			txp->tx_vlan_id, txp->tx_vlan_id_outer);
360ef246a7SRuifeng Wang 
370ef246a7SRuifeng Wang 	i = 0;
380ef246a7SRuifeng Wang 	r = nb;
390ef246a7SRuifeng Wang 
400ef246a7SRuifeng Wang 	while (r >= 4) {
410ef246a7SRuifeng Wang 		if (r >= 8) {
420ef246a7SRuifeng Wang 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 4], void *));
430ef246a7SRuifeng Wang 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 5], void *));
440ef246a7SRuifeng Wang 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 6], void *));
450ef246a7SRuifeng Wang 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 7], void *));
460ef246a7SRuifeng Wang 		}
470ef246a7SRuifeng Wang 
480ef246a7SRuifeng Wang 		mb[0] = pkts[i++];
49*6d13ea8eSOlivier Matz 		eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
500ef246a7SRuifeng Wang 
510ef246a7SRuifeng Wang 		mb[1] = pkts[i++];
52*6d13ea8eSOlivier Matz 		eth_hdr[1] = rte_pktmbuf_mtod(mb[1], struct rte_ether_hdr *);
530ef246a7SRuifeng Wang 
540ef246a7SRuifeng Wang 		mb[2] = pkts[i++];
55*6d13ea8eSOlivier Matz 		eth_hdr[2] = rte_pktmbuf_mtod(mb[2], struct rte_ether_hdr *);
560ef246a7SRuifeng Wang 
570ef246a7SRuifeng Wang 		mb[3] = pkts[i++];
58*6d13ea8eSOlivier Matz 		eth_hdr[3] = rte_pktmbuf_mtod(mb[3], struct rte_ether_hdr *);
590ef246a7SRuifeng Wang 
600ef246a7SRuifeng Wang 		v0 = vld1q_u8((uint8_t const *)eth_hdr[0]);
610ef246a7SRuifeng Wang 		v1 = vld1q_u8((uint8_t const *)eth_hdr[1]);
620ef246a7SRuifeng Wang 		v2 = vld1q_u8((uint8_t const *)eth_hdr[2]);
630ef246a7SRuifeng Wang 		v3 = vld1q_u8((uint8_t const *)eth_hdr[3]);
640ef246a7SRuifeng Wang 
650ef246a7SRuifeng Wang 		v0 = vqtbl1q_u8(v0, idx_map);
660ef246a7SRuifeng Wang 		v1 = vqtbl1q_u8(v1, idx_map);
670ef246a7SRuifeng Wang 		v2 = vqtbl1q_u8(v2, idx_map);
680ef246a7SRuifeng Wang 		v3 = vqtbl1q_u8(v3, idx_map);
690ef246a7SRuifeng Wang 
700ef246a7SRuifeng Wang 		vst1q_u8((uint8_t *)eth_hdr[0], v0);
710ef246a7SRuifeng Wang 		vst1q_u8((uint8_t *)eth_hdr[1], v1);
720ef246a7SRuifeng Wang 		vst1q_u8((uint8_t *)eth_hdr[2], v2);
730ef246a7SRuifeng Wang 		vst1q_u8((uint8_t *)eth_hdr[3], v3);
740ef246a7SRuifeng Wang 
750ef246a7SRuifeng Wang 		mbuf_field_set(mb[0], ol_flags);
760ef246a7SRuifeng Wang 		mbuf_field_set(mb[1], ol_flags);
770ef246a7SRuifeng Wang 		mbuf_field_set(mb[2], ol_flags);
780ef246a7SRuifeng Wang 		mbuf_field_set(mb[3], ol_flags);
790ef246a7SRuifeng Wang 		r -= 4;
800ef246a7SRuifeng Wang 	}
810ef246a7SRuifeng Wang 
820ef246a7SRuifeng Wang 	for ( ; i < nb; i++) {
830ef246a7SRuifeng Wang 		if (i < nb - 1)
840ef246a7SRuifeng Wang 			rte_prefetch0(rte_pktmbuf_mtod(pkts[i+1], void *));
850ef246a7SRuifeng Wang 		mb[0] = pkts[i];
86*6d13ea8eSOlivier Matz 		eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
870ef246a7SRuifeng Wang 
880ef246a7SRuifeng Wang 		/* Swap dest and src mac addresses. */
890ef246a7SRuifeng Wang 		v0 = vld1q_u8((uint8_t const *)eth_hdr[0]);
900ef246a7SRuifeng Wang 		v0 = vqtbl1q_u8(v0, idx_map);
910ef246a7SRuifeng Wang 		vst1q_u8((uint8_t *)eth_hdr[0], v0);
920ef246a7SRuifeng Wang 
930ef246a7SRuifeng Wang 		mbuf_field_set(mb[0], ol_flags);
940ef246a7SRuifeng Wang 	}
950ef246a7SRuifeng Wang }
960ef246a7SRuifeng Wang 
970ef246a7SRuifeng Wang #endif /* _MACSWAP_NEON_H_ */
98