xref: /dpdk/examples/l3fwd/l3fwd_altivec.h (revision ebab0e8b2257aa049dd35dedc7efd230b0f45b88)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016 Intel Corporation.
3  * Copyright(c) 2017 IBM Corporation.
4  * All rights reserved.
5  */
6 
7 #ifndef _L3FWD_ALTIVEC_H_
8 #define _L3FWD_ALTIVEC_H_
9 
10 #include "l3fwd.h"
11 #include "altivec/port_group.h"
12 #include "l3fwd_common.h"
13 
14 #undef SENDM_PORT_OVERHEAD
15 #define SENDM_PORT_OVERHEAD(x) ((x) + 2 * FWDSTEP)
16 
17 /*
18  * Update source and destination MAC addresses in the ethernet header.
19  * Perform RFC1812 checks and updates for IPV4 packets.
20  */
21 static inline void
22 processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
23 {
24 	__vector unsigned int te[FWDSTEP];
25 	__vector unsigned int ve[FWDSTEP];
26 	__vector unsigned int *p[FWDSTEP];
27 
28 	p[0] = rte_pktmbuf_mtod(pkt[0], __vector unsigned int *);
29 	p[1] = rte_pktmbuf_mtod(pkt[1], __vector unsigned int *);
30 	p[2] = rte_pktmbuf_mtod(pkt[2], __vector unsigned int *);
31 	p[3] = rte_pktmbuf_mtod(pkt[3], __vector unsigned int *);
32 
33 	ve[0] = (__vector unsigned int)val_eth[dst_port[0]];
34 	te[0] = *p[0];
35 
36 	ve[1] = (__vector unsigned int)val_eth[dst_port[1]];
37 	te[1] = *p[1];
38 
39 	ve[2] = (__vector unsigned int)val_eth[dst_port[2]];
40 	te[2] = *p[2];
41 
42 	ve[3] = (__vector unsigned int)val_eth[dst_port[3]];
43 	te[3] = *p[3];
44 
45 	/* Update first 12 bytes, keep rest bytes intact. */
46 	te[0] = (__vector unsigned int)vec_sel(
47 			(__vector unsigned short)ve[0],
48 			(__vector unsigned short)te[0],
49 			(__vector unsigned short) {0, 0, 0, 0,
50 						0, 0, 0xffff, 0xffff});
51 
52 	te[1] = (__vector unsigned int)vec_sel(
53 			(__vector unsigned short)ve[1],
54 			(__vector unsigned short)te[1],
55 			(__vector unsigned short) {0, 0, 0, 0,
56 						0, 0, 0xffff, 0xffff});
57 
58 	te[2] = (__vector unsigned int)vec_sel(
59 			(__vector unsigned short)ve[2],
60 			(__vector unsigned short)te[2],
61 			(__vector unsigned short) {0, 0, 0, 0, 0,
62 						0, 0xffff, 0xffff});
63 
64 	te[3] = (__vector unsigned int)vec_sel(
65 			(__vector unsigned short)ve[3],
66 			(__vector unsigned short)te[3],
67 			(__vector unsigned short) {0, 0, 0, 0,
68 						0, 0, 0xffff, 0xffff});
69 
70 	*p[0] = te[0];
71 	*p[1] = te[1];
72 	*p[2] = te[2];
73 	*p[3] = te[3];
74 
75 	rfc1812_process((struct rte_ipv4_hdr *)
76 			((struct rte_ether_hdr *)p[0] + 1),
77 			&dst_port[0], pkt[0]->packet_type);
78 	rfc1812_process((struct rte_ipv4_hdr *)
79 			((struct rte_ether_hdr *)p[1] + 1),
80 			&dst_port[1], pkt[1]->packet_type);
81 	rfc1812_process((struct rte_ipv4_hdr *)
82 			((struct rte_ether_hdr *)p[2] + 1),
83 			&dst_port[2], pkt[2]->packet_type);
84 	rfc1812_process((struct rte_ipv4_hdr *)
85 			((struct rte_ether_hdr *)p[3] + 1),
86 			&dst_port[3], pkt[3]->packet_type);
87 }
88 
89 /**
90  * Process one packet:
91  * Update source and destination MAC addresses in the ethernet header.
92  * Perform RFC1812 checks and updates for IPV4 packets.
93  */
94 static inline void
95 process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
96 {
97 	struct rte_ether_hdr *eth_hdr;
98 	__vector unsigned int te, ve;
99 
100 	eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
101 
102 	te = *(__vector unsigned int *)eth_hdr;
103 	ve = (__vector unsigned int)val_eth[dst_port[0]];
104 
105 	rfc1812_process((struct rte_ipv4_hdr *)(eth_hdr + 1), dst_port,
106 			pkt->packet_type);
107 
108 	/* dynamically vec_sel te and ve for MASK_ETH (0x3f) */
109 	te = (__vector unsigned int)vec_sel(
110 		(__vector unsigned short)ve,
111 		(__vector unsigned short)te,
112 		(__vector unsigned short){0, 0, 0, 0,
113 					0, 0, 0xffff, 0xffff});
114 
115 	*(__vector unsigned int *)eth_hdr = te;
116 }
117 
118 /**
119  * Send packets burst from pkts_burst to the ports in dst_port array
120  */
121 static __rte_always_inline void
122 send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
123 		uint16_t dst_port[SENDM_PORT_OVERHEAD(MAX_PKT_BURST)],
124 		int nb_rx)
125 {
126 	int32_t k;
127 	int j = 0;
128 	uint16_t dlp;
129 	uint16_t *lp;
130 	uint16_t pnum[MAX_PKT_BURST + 1];
131 
132 	/*
133 	 * Finish packet processing and group consecutive
134 	 * packets with the same destination port.
135 	 */
136 	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
137 	if (k != 0) {
138 		__vector unsigned short dp1, dp2;
139 
140 		lp = pnum;
141 		lp[0] = 1;
142 
143 		processx4_step3(pkts_burst, dst_port);
144 
145 		/* dp1: <d[0], d[1], d[2], d[3], ... > */
146 		dp1 = *(__vector unsigned short *)dst_port;
147 
148 		for (j = FWDSTEP; j != k; j += FWDSTEP) {
149 			processx4_step3(&pkts_burst[j], &dst_port[j]);
150 
151 			/*
152 			 * dp2:
153 			 * <d[j-3], d[j-2], d[j-1], d[j], ... >
154 			 */
155 			dp2 = *((__vector unsigned short *)
156 					&dst_port[j - FWDSTEP + 1]);
157 			lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
158 
159 			/*
160 			 * dp1:
161 			 * <d[j], d[j+1], d[j+2], d[j+3], ... >
162 			 */
163 			dp1 = vec_sro(dp2, (__vector unsigned char) {
164 				0, 0, 0, 0, 0, 0, 0, 0,
165 				0, 0, 0, (FWDSTEP - 1) * sizeof(dst_port[0])});
166 		}
167 
168 		/*
169 		 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
170 		 */
171 		dp2 = vec_perm(dp1, (__vector unsigned short){},
172 				(__vector unsigned char){0xf9});
173 		lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
174 
175 		/*
176 		 * remove values added by the last repeated
177 		 * dst port.
178 		 */
179 		lp[0]--;
180 		dlp = dst_port[j - 1];
181 	} else {
182 		/* set dlp and lp to the never used values. */
183 		dlp = BAD_PORT - 1;
184 		lp = pnum + MAX_PKT_BURST;
185 	}
186 
187 	/* Process up to last 3 packets one by one. */
188 	switch (nb_rx % FWDSTEP) {
189 	case 3:
190 		process_packet(pkts_burst[j], dst_port + j);
191 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
192 		j++;
193 		/* fall-through */
194 	case 2:
195 		process_packet(pkts_burst[j], dst_port + j);
196 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
197 		j++;
198 		/* fall-through */
199 	case 1:
200 		process_packet(pkts_burst[j], dst_port + j);
201 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
202 		j++;
203 	}
204 
205 	/*
206 	 * Send packets out, through destination port.
207 	 * Consecutive packets with the same destination port
208 	 * are already grouped together.
209 	 * If destination port for the packet equals BAD_PORT,
210 	 * then free the packet without sending it out.
211 	 */
212 	for (j = 0; j < nb_rx; j += k) {
213 
214 		int32_t m;
215 		uint16_t pn;
216 
217 		pn = dst_port[j];
218 		k = pnum[j];
219 
220 		if (likely(pn != BAD_PORT))
221 			send_packetsx4(qconf, pn, pkts_burst + j, k);
222 		else
223 			for (m = j; m != j + k; m++)
224 				rte_pktmbuf_free(pkts_burst[m]);
225 
226 	}
227 }
228 
229 static __rte_always_inline uint16_t
230 process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
231 {
232 	uint16_t i = 0, res;
233 
234 	while (nb_elem > 7) {
235 		__vector unsigned short dp1;
236 		__vector unsigned short dp;
237 
238 		dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
239 		dp1 = *((__vector unsigned short *)&dst_ports[i]);
240 		res = vec_all_eq(dp1, dp);
241 		if (!res)
242 			return BAD_PORT;
243 
244 		nb_elem -= 8;
245 		i += 8;
246 	}
247 
248 	while (nb_elem) {
249 		if (dst_ports[i] != dst_ports[0])
250 			return BAD_PORT;
251 		nb_elem--;
252 		i++;
253 	}
254 
255 	return dst_ports[0];
256 }
257 
258 #endif /* _L3FWD_ALTIVEC_H_ */
259