1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation. 3 * Copyright(c) 2017 IBM Corporation. 4 * All rights reserved. 5 */ 6 7 #ifndef _L3FWD_ALTIVEC_H_ 8 #define _L3FWD_ALTIVEC_H_ 9 10 #include "l3fwd.h" 11 #include "altivec/port_group.h" 12 #include "l3fwd_common.h" 13 14 #undef SENDM_PORT_OVERHEAD 15 #define SENDM_PORT_OVERHEAD(x) ((x) + 2 * FWDSTEP) 16 17 /* 18 * Update source and destination MAC addresses in the ethernet header. 19 * Perform RFC1812 checks and updates for IPV4 packets. 20 */ 21 static inline void 22 processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) 23 { 24 __vector unsigned int te[FWDSTEP]; 25 __vector unsigned int ve[FWDSTEP]; 26 __vector unsigned int *p[FWDSTEP]; 27 28 p[0] = rte_pktmbuf_mtod(pkt[0], __vector unsigned int *); 29 p[1] = rte_pktmbuf_mtod(pkt[1], __vector unsigned int *); 30 p[2] = rte_pktmbuf_mtod(pkt[2], __vector unsigned int *); 31 p[3] = rte_pktmbuf_mtod(pkt[3], __vector unsigned int *); 32 33 ve[0] = (__vector unsigned int)val_eth[dst_port[0]]; 34 te[0] = *p[0]; 35 36 ve[1] = (__vector unsigned int)val_eth[dst_port[1]]; 37 te[1] = *p[1]; 38 39 ve[2] = (__vector unsigned int)val_eth[dst_port[2]]; 40 te[2] = *p[2]; 41 42 ve[3] = (__vector unsigned int)val_eth[dst_port[3]]; 43 te[3] = *p[3]; 44 45 /* Update first 12 bytes, keep rest bytes intact. */ 46 te[0] = (__vector unsigned int)vec_sel( 47 (__vector unsigned short)ve[0], 48 (__vector unsigned short)te[0], 49 (__vector unsigned short) {0, 0, 0, 0, 50 0, 0, 0xffff, 0xffff}); 51 52 te[1] = (__vector unsigned int)vec_sel( 53 (__vector unsigned short)ve[1], 54 (__vector unsigned short)te[1], 55 (__vector unsigned short) {0, 0, 0, 0, 56 0, 0, 0xffff, 0xffff}); 57 58 te[2] = (__vector unsigned int)vec_sel( 59 (__vector unsigned short)ve[2], 60 (__vector unsigned short)te[2], 61 (__vector unsigned short) {0, 0, 0, 0, 0, 62 0, 0xffff, 0xffff}); 63 64 te[3] = (__vector unsigned int)vec_sel( 65 (__vector unsigned short)ve[3], 66 (__vector unsigned short)te[3], 67 (__vector unsigned short) {0, 0, 0, 0, 68 0, 0, 0xffff, 0xffff}); 69 70 *p[0] = te[0]; 71 *p[1] = te[1]; 72 *p[2] = te[2]; 73 *p[3] = te[3]; 74 75 rfc1812_process((struct rte_ipv4_hdr *) 76 ((struct rte_ether_hdr *)p[0] + 1), 77 &dst_port[0], pkt[0]->packet_type); 78 rfc1812_process((struct rte_ipv4_hdr *) 79 ((struct rte_ether_hdr *)p[1] + 1), 80 &dst_port[1], pkt[1]->packet_type); 81 rfc1812_process((struct rte_ipv4_hdr *) 82 ((struct rte_ether_hdr *)p[2] + 1), 83 &dst_port[2], pkt[2]->packet_type); 84 rfc1812_process((struct rte_ipv4_hdr *) 85 ((struct rte_ether_hdr *)p[3] + 1), 86 &dst_port[3], pkt[3]->packet_type); 87 } 88 89 /** 90 * Process one packet: 91 * Update source and destination MAC addresses in the ethernet header. 92 * Perform RFC1812 checks and updates for IPV4 packets. 93 */ 94 static inline void 95 process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) 96 { 97 struct rte_ether_hdr *eth_hdr; 98 __vector unsigned int te, ve; 99 100 eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); 101 102 te = *(__vector unsigned int *)eth_hdr; 103 ve = (__vector unsigned int)val_eth[dst_port[0]]; 104 105 rfc1812_process((struct rte_ipv4_hdr *)(eth_hdr + 1), dst_port, 106 pkt->packet_type); 107 108 /* dynamically vec_sel te and ve for MASK_ETH (0x3f) */ 109 te = (__vector unsigned int)vec_sel( 110 (__vector unsigned short)ve, 111 (__vector unsigned short)te, 112 (__vector unsigned short){0, 0, 0, 0, 113 0, 0, 0xffff, 0xffff}); 114 115 *(__vector unsigned int *)eth_hdr = te; 116 } 117 118 /** 119 * Send packets burst from pkts_burst to the ports in dst_port array 120 */ 121 static __rte_always_inline void 122 send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, 123 uint16_t dst_port[SENDM_PORT_OVERHEAD(MAX_PKT_BURST)], 124 int nb_rx) 125 { 126 int32_t k; 127 int j = 0; 128 uint16_t dlp; 129 uint16_t *lp; 130 uint16_t pnum[MAX_PKT_BURST + 1]; 131 132 /* 133 * Finish packet processing and group consecutive 134 * packets with the same destination port. 135 */ 136 k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); 137 if (k != 0) { 138 __vector unsigned short dp1, dp2; 139 140 lp = pnum; 141 lp[0] = 1; 142 143 processx4_step3(pkts_burst, dst_port); 144 145 /* dp1: <d[0], d[1], d[2], d[3], ... > */ 146 dp1 = *(__vector unsigned short *)dst_port; 147 148 for (j = FWDSTEP; j != k; j += FWDSTEP) { 149 processx4_step3(&pkts_burst[j], &dst_port[j]); 150 151 /* 152 * dp2: 153 * <d[j-3], d[j-2], d[j-1], d[j], ... > 154 */ 155 dp2 = *((__vector unsigned short *) 156 &dst_port[j - FWDSTEP + 1]); 157 lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); 158 159 /* 160 * dp1: 161 * <d[j], d[j+1], d[j+2], d[j+3], ... > 162 */ 163 dp1 = vec_sro(dp2, (__vector unsigned char) { 164 0, 0, 0, 0, 0, 0, 0, 0, 165 0, 0, 0, (FWDSTEP - 1) * sizeof(dst_port[0])}); 166 } 167 168 /* 169 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... > 170 */ 171 dp2 = vec_perm(dp1, (__vector unsigned short){}, 172 (__vector unsigned char){0xf9}); 173 lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); 174 175 /* 176 * remove values added by the last repeated 177 * dst port. 178 */ 179 lp[0]--; 180 dlp = dst_port[j - 1]; 181 } else { 182 /* set dlp and lp to the never used values. */ 183 dlp = BAD_PORT - 1; 184 lp = pnum + MAX_PKT_BURST; 185 } 186 187 /* Process up to last 3 packets one by one. */ 188 switch (nb_rx % FWDSTEP) { 189 case 3: 190 process_packet(pkts_burst[j], dst_port + j); 191 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 192 j++; 193 /* fall-through */ 194 case 2: 195 process_packet(pkts_burst[j], dst_port + j); 196 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 197 j++; 198 /* fall-through */ 199 case 1: 200 process_packet(pkts_burst[j], dst_port + j); 201 GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); 202 j++; 203 } 204 205 /* 206 * Send packets out, through destination port. 207 * Consecutive packets with the same destination port 208 * are already grouped together. 209 * If destination port for the packet equals BAD_PORT, 210 * then free the packet without sending it out. 211 */ 212 for (j = 0; j < nb_rx; j += k) { 213 214 int32_t m; 215 uint16_t pn; 216 217 pn = dst_port[j]; 218 k = pnum[j]; 219 220 if (likely(pn != BAD_PORT)) 221 send_packetsx4(qconf, pn, pkts_burst + j, k); 222 else 223 for (m = j; m != j + k; m++) 224 rte_pktmbuf_free(pkts_burst[m]); 225 226 } 227 } 228 229 static __rte_always_inline uint16_t 230 process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) 231 { 232 uint16_t i = 0, res; 233 234 while (nb_elem > 7) { 235 __vector unsigned short dp1; 236 __vector unsigned short dp; 237 238 dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); 239 dp1 = *((__vector unsigned short *)&dst_ports[i]); 240 res = vec_all_eq(dp1, dp); 241 if (!res) 242 return BAD_PORT; 243 244 nb_elem -= 8; 245 i += 8; 246 } 247 248 while (nb_elem) { 249 if (dst_ports[i] != dst_ports[0]) 250 return BAD_PORT; 251 nb_elem--; 252 i++; 253 } 254 255 return dst_ports[0]; 256 } 257 258 #endif /* _L3FWD_ALTIVEC_H_ */ 259