1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2023 Marvell. 3 */ 4 5 #include <arpa/inet.h> 6 #include <sys/socket.h> 7 8 #include <rte_ethdev.h> 9 #include <rte_ether.h> 10 #include <rte_graph.h> 11 #include <rte_graph_worker.h> 12 #include <rte_ip.h> 13 #include <rte_lpm6.h> 14 15 #include "rte_node_ip6_api.h" 16 17 #include "node_private.h" 18 19 #define IPV6_L3FWD_LPM_MAX_RULES 1024 20 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 8) 21 22 /* IP6 Lookup global data struct */ 23 struct ip6_lookup_node_main { 24 struct rte_lpm6 *lpm_tbl[RTE_MAX_NUMA_NODES]; 25 }; 26 27 struct ip6_lookup_node_ctx { 28 /* Socket's LPM table */ 29 struct rte_lpm6 *lpm6; 30 /* Dynamic offset to mbuf priv1 */ 31 int mbuf_priv1_off; 32 }; 33 34 static struct ip6_lookup_node_main ip6_lookup_nm; 35 36 #define IP6_LOOKUP_NODE_LPM(ctx) \ 37 (((struct ip6_lookup_node_ctx *)ctx)->lpm6) 38 39 #define IP6_LOOKUP_NODE_PRIV1_OFF(ctx) \ 40 (((struct ip6_lookup_node_ctx *)ctx)->mbuf_priv1_off) 41 42 static uint16_t 43 ip6_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node *node, 44 void **objs, uint16_t nb_objs) 45 { 46 struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; 47 struct rte_lpm6 *lpm6 = IP6_LOOKUP_NODE_LPM(node->ctx); 48 const int dyn = IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx); 49 struct rte_ipv6_hdr *ipv6_hdr; 50 void **to_next, **from; 51 uint16_t last_spec = 0; 52 rte_edge_t next_index; 53 uint16_t n_left_from; 54 uint16_t held = 0; 55 uint32_t drop_nh; 56 int i, rc; 57 58 /* Speculative next */ 59 next_index = RTE_NODE_IP6_LOOKUP_NEXT_REWRITE; 60 /* Drop node */ 61 drop_nh = ((uint32_t)RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP) << 16; 62 63 pkts = (struct rte_mbuf **)objs; 64 from = objs; 65 n_left_from = nb_objs; 66 67 for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE) 68 rte_prefetch0(&objs[i]); 69 70 for (i = 0; i < 4 && i < n_left_from; i++) 71 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, 72 sizeof(struct rte_ether_hdr))); 73 74 /* Get stream for the speculated next node */ 75 to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); 76 while (n_left_from >= 4) { 77 struct rte_ipv6_addr ip_batch[4]; 78 int32_t next_hop[4]; 79 uint16_t next[4]; 80 81 #if RTE_GRAPH_BURST_SIZE > 64 82 /* Prefetch next-next mbufs */ 83 if (likely(n_left_from > 11)) { 84 rte_prefetch0(pkts[8]); 85 rte_prefetch0(pkts[9]); 86 rte_prefetch0(pkts[10]); 87 rte_prefetch0(pkts[11]); 88 } 89 #endif 90 /* Prefetch next mbuf data */ 91 if (likely(n_left_from > 7)) { 92 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, 93 sizeof(struct rte_ether_hdr))); 94 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, 95 sizeof(struct rte_ether_hdr))); 96 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, 97 sizeof(struct rte_ether_hdr))); 98 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, 99 sizeof(struct rte_ether_hdr))); 100 } 101 102 mbuf0 = pkts[0]; 103 mbuf1 = pkts[1]; 104 mbuf2 = pkts[2]; 105 mbuf3 = pkts[3]; 106 107 pkts += 4; 108 n_left_from -= 4; 109 110 /* Extract DIP of mbuf0 */ 111 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr *, 112 sizeof(struct rte_ether_hdr)); 113 /* Extract hop_limits as ipv6 hdr is in cache */ 114 node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits; 115 ip_batch[0] = ipv6_hdr->dst_addr; 116 117 /* Extract DIP of mbuf1 */ 118 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv6_hdr *, 119 sizeof(struct rte_ether_hdr)); 120 /* Extract hop_limits as ipv6 hdr is in cache */ 121 node_mbuf_priv1(mbuf1, dyn)->ttl = ipv6_hdr->hop_limits; 122 ip_batch[1] = ipv6_hdr->dst_addr; 123 124 /* Extract DIP of mbuf2 */ 125 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv6_hdr *, 126 sizeof(struct rte_ether_hdr)); 127 /* Extract hop_limits as ipv6 hdr is in cache */ 128 node_mbuf_priv1(mbuf2, dyn)->ttl = ipv6_hdr->hop_limits; 129 ip_batch[2] = ipv6_hdr->dst_addr; 130 131 /* Extract DIP of mbuf3 */ 132 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv6_hdr *, 133 sizeof(struct rte_ether_hdr)); 134 /* Extract hop_limits as ipv6 hdr is in cache */ 135 node_mbuf_priv1(mbuf3, dyn)->ttl = ipv6_hdr->hop_limits; 136 ip_batch[3] = ipv6_hdr->dst_addr; 137 138 rte_lpm6_lookup_bulk_func(lpm6, ip_batch, next_hop, 4); 139 140 next_hop[0] = (next_hop[0] < 0) ? (int32_t)drop_nh : next_hop[0]; 141 node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop[0]; 142 next[0] = (uint16_t)(next_hop[0] >> 16); 143 144 next_hop[1] = (next_hop[1] < 0) ? (int32_t)drop_nh : next_hop[1]; 145 node_mbuf_priv1(mbuf1, dyn)->nh = (uint16_t)next_hop[1]; 146 next[1] = (uint16_t)(next_hop[1] >> 16); 147 148 next_hop[2] = (next_hop[2] < 0) ? (int32_t)drop_nh : next_hop[2]; 149 node_mbuf_priv1(mbuf2, dyn)->nh = (uint16_t)next_hop[2]; 150 next[2] = (uint16_t)(next_hop[2] >> 16); 151 152 next_hop[3] = (next_hop[3] < 0) ? (int32_t)drop_nh : next_hop[3]; 153 node_mbuf_priv1(mbuf3, dyn)->nh = (uint16_t)next_hop[3]; 154 next[3] = (uint16_t)(next_hop[3] >> 16); 155 156 rte_edge_t fix_spec = ((next_index == next[0]) && 157 (next_index == next[1]) && 158 (next_index == next[2]) && 159 (next_index == next[3])); 160 161 if (unlikely(fix_spec == 0)) { 162 /* Copy things successfully speculated till now */ 163 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 164 from += last_spec; 165 to_next += last_spec; 166 held += last_spec; 167 last_spec = 0; 168 169 /* Next0 */ 170 if (next_index == next[0]) { 171 to_next[0] = from[0]; 172 to_next++; 173 held++; 174 } else { 175 rte_node_enqueue_x1(graph, node, next[0], from[0]); 176 } 177 178 /* Next1 */ 179 if (next_index == next[1]) { 180 to_next[0] = from[1]; 181 to_next++; 182 held++; 183 } else { 184 rte_node_enqueue_x1(graph, node, next[1], from[1]); 185 } 186 187 /* Next2 */ 188 if (next_index == next[2]) { 189 to_next[0] = from[2]; 190 to_next++; 191 held++; 192 } else { 193 rte_node_enqueue_x1(graph, node, next[2], from[2]); 194 } 195 196 /* Next3 */ 197 if (next_index == next[3]) { 198 to_next[0] = from[3]; 199 to_next++; 200 held++; 201 } else { 202 rte_node_enqueue_x1(graph, node, next[3], from[3]); 203 } 204 205 from += 4; 206 } else { 207 last_spec += 4; 208 } 209 } 210 211 while (n_left_from > 0) { 212 uint32_t next_hop; 213 uint16_t next0; 214 215 mbuf0 = pkts[0]; 216 217 pkts += 1; 218 n_left_from -= 1; 219 220 /* Extract DIP of mbuf0 */ 221 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr *, 222 sizeof(struct rte_ether_hdr)); 223 /* Extract TTL as IPv6 hdr is in cache */ 224 node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits; 225 226 rc = rte_lpm6_lookup(lpm6, &ipv6_hdr->dst_addr, &next_hop); 227 next_hop = (rc == 0) ? next_hop : drop_nh; 228 229 node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop; 230 next_hop = next_hop >> 16; 231 next0 = (uint16_t)next_hop; 232 233 if (unlikely(next_index ^ next0)) { 234 /* Copy things successfully speculated till now */ 235 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 236 from += last_spec; 237 to_next += last_spec; 238 held += last_spec; 239 last_spec = 0; 240 241 rte_node_enqueue_x1(graph, node, next0, from[0]); 242 from += 1; 243 } else { 244 last_spec += 1; 245 } 246 } 247 248 /* !!! Home run !!! */ 249 if (likely(last_spec == nb_objs)) { 250 rte_node_next_stream_move(graph, node, next_index); 251 return nb_objs; 252 } 253 held += last_spec; 254 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 255 rte_node_next_stream_put(graph, node, next_index, held); 256 257 return nb_objs; 258 } 259 260 int 261 rte_node_ip6_route_add(const struct rte_ipv6_addr *ip, uint8_t depth, uint16_t next_hop, 262 enum rte_node_ip6_lookup_next next_node) 263 { 264 char abuf[INET6_ADDRSTRLEN]; 265 uint8_t socket; 266 uint32_t val; 267 int ret; 268 269 inet_ntop(AF_INET6, ip, abuf, sizeof(abuf)); 270 /* Embedded next node id into 24 bit next hop */ 271 val = ((next_node << 16) | next_hop) & ((1ull << 24) - 1); 272 node_dbg("ip6_lookup", "LPM: Adding route %s / %d nh (0x%x)", abuf, 273 depth, val); 274 275 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { 276 if (!ip6_lookup_nm.lpm_tbl[socket]) 277 continue; 278 279 ret = rte_lpm6_add(ip6_lookup_nm.lpm_tbl[socket], ip, depth, val); 280 if (ret < 0) { 281 node_err("ip6_lookup", 282 "Unable to add entry %s / %d nh (%x) to LPM " 283 "table on sock %d, rc=%d", 284 abuf, depth, val, socket, ret); 285 return ret; 286 } 287 } 288 289 return 0; 290 } 291 292 static int 293 setup_lpm6(struct ip6_lookup_node_main *nm, int socket) 294 { 295 struct rte_lpm6_config config_ipv6; 296 char s[RTE_LPM6_NAMESIZE]; 297 298 /* One LPM table per socket */ 299 if (nm->lpm_tbl[socket]) 300 return 0; 301 302 /* create the LPM table */ 303 config_ipv6.max_rules = IPV6_L3FWD_LPM_MAX_RULES; 304 config_ipv6.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; 305 config_ipv6.flags = 0; 306 snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socket); 307 nm->lpm_tbl[socket] = rte_lpm6_create(s, socket, &config_ipv6); 308 if (nm->lpm_tbl[socket] == NULL) 309 return -rte_errno; 310 311 return 0; 312 } 313 314 static int 315 ip6_lookup_node_init(const struct rte_graph *graph, struct rte_node *node) 316 { 317 uint16_t socket, lcore_id; 318 static uint8_t init_once; 319 int rc; 320 321 RTE_SET_USED(graph); 322 RTE_BUILD_BUG_ON(sizeof(struct ip6_lookup_node_ctx) > RTE_NODE_CTX_SZ); 323 324 if (!init_once) { 325 node_mbuf_priv1_dynfield_offset = 326 rte_mbuf_dynfield_register( 327 &node_mbuf_priv1_dynfield_desc); 328 if (node_mbuf_priv1_dynfield_offset < 0) 329 return -rte_errno; 330 331 /* Setup LPM tables for all sockets */ 332 RTE_LCORE_FOREACH(lcore_id) 333 { 334 socket = rte_lcore_to_socket_id(lcore_id); 335 rc = setup_lpm6(&ip6_lookup_nm, socket); 336 if (rc) { 337 node_err("ip6_lookup", 338 "Failed to setup lpm6 tbl for " 339 "sock %u, rc=%d", socket, rc); 340 return rc; 341 } 342 } 343 init_once = 1; 344 } 345 346 /* Update socket's LPM and mbuf dyn priv1 offset in node ctx */ 347 IP6_LOOKUP_NODE_LPM(node->ctx) = ip6_lookup_nm.lpm_tbl[graph->socket]; 348 IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx) = 349 node_mbuf_priv1_dynfield_offset; 350 351 node_dbg("ip6_lookup", "Initialized ip6_lookup node"); 352 353 return 0; 354 } 355 356 static struct rte_node_register ip6_lookup_node = { 357 .process = ip6_lookup_node_process_scalar, 358 .name = "ip6_lookup", 359 360 .init = ip6_lookup_node_init, 361 362 .nb_edges = RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP + 1, 363 .next_nodes = { 364 [RTE_NODE_IP6_LOOKUP_NEXT_REWRITE] = "ip6_rewrite", 365 [RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP] = "pkt_drop", 366 }, 367 }; 368 369 RTE_NODE_REGISTER(ip6_lookup_node); 370