xref: /dpdk/examples/l3fwd/l3fwd_lpm_altivec.h (revision ebab0e8b2257aa049dd35dedc7efd230b0f45b88)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright(c) 2017 IBM Corporation.
4  * All rights reserved.
5  */
6 
7 #ifndef __L3FWD_LPM_ALTIVEC_H__
8 #define __L3FWD_LPM_ALTIVEC_H__
9 
10 #include "l3fwd_altivec.h"
11 
12 /*
13  * Read packet_type and destination IPV4 addresses from 4 mbufs.
14  */
15 static inline void
16 processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
17 		__vector unsigned int *dip,
18 		uint32_t *ipv4_flag)
19 {
20 	struct rte_ipv4_hdr *ipv4_hdr;
21 	struct rte_ether_hdr *eth_hdr;
22 	uint32_t x0, x1, x2, x3;
23 
24 	eth_hdr = rte_pktmbuf_mtod(pkt[0], struct rte_ether_hdr *);
25 	ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
26 	x0 = ipv4_hdr->dst_addr;
27 	ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
28 
29 	rte_compiler_barrier();
30 	eth_hdr = rte_pktmbuf_mtod(pkt[1], struct rte_ether_hdr *);
31 	ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
32 	x1 = ipv4_hdr->dst_addr;
33 	ipv4_flag[0] &= pkt[1]->packet_type;
34 
35 	rte_compiler_barrier();
36 	eth_hdr = rte_pktmbuf_mtod(pkt[2], struct rte_ether_hdr *);
37 	ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
38 	x2 = ipv4_hdr->dst_addr;
39 	ipv4_flag[0] &= pkt[2]->packet_type;
40 
41 	rte_compiler_barrier();
42 	eth_hdr = rte_pktmbuf_mtod(pkt[3], struct rte_ether_hdr *);
43 	ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
44 	x3 = ipv4_hdr->dst_addr;
45 	ipv4_flag[0] &= pkt[3]->packet_type;
46 
47 	rte_compiler_barrier();
48 	dip[0] = (__vector unsigned int){x0, x1, x2, x3};
49 }
50 
51 /*
52  * Lookup into LPM for destination port.
53  * If lookup fails, use incoming port (portid) as destination port.
54  */
55 static inline void
56 processx4_step2(const struct lcore_conf *qconf,
57 		__vector unsigned int dip,
58 		uint32_t ipv4_flag,
59 		uint8_t portid,
60 		struct rte_mbuf *pkt[FWDSTEP],
61 		uint16_t dprt[FWDSTEP])
62 {
63 	rte_xmm_t dst;
64 	const __vector unsigned char bswap_mask = (__vector unsigned char){
65 							3, 2, 1, 0,
66 							7, 6, 5, 4,
67 							11, 10, 9, 8,
68 							15, 14, 13, 12};
69 
70 	/* Byte swap 4 IPV4 addresses. */
71 	dip = (__vector unsigned int)vec_perm(*(__vector unsigned char *)&dip,
72 					(__vector unsigned char){}, bswap_mask);
73 
74 	/* if all 4 packets are IPV4. */
75 	if (likely(ipv4_flag)) {
76 		rte_lpm_lookupx4(qconf->ipv4_lookup_struct, (xmm_t)dip,
77 			(uint32_t *)&dst, portid);
78 		/* get rid of unused upper 16 bit for each dport. */
79 		dst.x = (xmm_t)vec_packs(dst.x, dst.x);
80 		*(uint64_t *)dprt = dst.u64[0];
81 	} else {
82 		dst.x = (xmm_t)dip;
83 		dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0],
84 							dst.u32[0], portid);
85 		dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1],
86 							dst.u32[1], portid);
87 		dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2],
88 							dst.u32[2], portid);
89 		dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3],
90 							dst.u32[3], portid);
91 	}
92 }
93 
94 /*
95  * Buffer optimized handling of packets, invoked
96  * from main_loop.
97  */
98 static inline void
99 l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
100 			  uint8_t portid, uint16_t *dst_port,
101 			  struct lcore_conf *qconf, const uint8_t do_step3)
102 {
103 	int32_t j;
104 	__vector unsigned int dip[MAX_PKT_BURST / FWDSTEP];
105 	uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
106 	const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
107 
108 	for (j = 0; j != k; j += FWDSTEP)
109 		processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
110 				&ipv4_flag[j / FWDSTEP]);
111 
112 	for (j = 0; j != k; j += FWDSTEP)
113 		processx4_step2(qconf, dip[j / FWDSTEP],
114 				ipv4_flag[j / FWDSTEP],
115 				portid, &pkts_burst[j], &dst_port[j]);
116 
117 	if (do_step3)
118 		for (j = 0; j != k; j += FWDSTEP)
119 			processx4_step3(&pkts_burst[j], &dst_port[j]);
120 
121 	/* Classify last up to 3 packets one by one */
122 	switch (nb_rx % FWDSTEP) {
123 	case 3:
124 		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
125 		if (do_step3)
126 			process_packet(pkts_burst[j], &dst_port[j]);
127 		j++;
128 		/* fall-through */
129 	case 2:
130 		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
131 		if (do_step3)
132 			process_packet(pkts_burst[j], &dst_port[j]);
133 		j++;
134 		/* fall-through */
135 	case 1:
136 		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
137 		if (do_step3)
138 			process_packet(pkts_burst[j], &dst_port[j]);
139 		j++;
140 		/* fall-through */
141 	}
142 }
143 
144 static inline void
145 l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, uint8_t portid,
146 		       struct lcore_conf *qconf)
147 {
148 	uint16_t dst_port[SENDM_PORT_OVERHEAD(MAX_PKT_BURST)];
149 
150 	l3fwd_lpm_process_packets(nb_rx, pkts_burst, portid, dst_port, qconf,
151 				  0);
152 	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
153 }
154 
155 #endif /* __L3FWD_LPM_ALTIVEC_H__ */
156