199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright(C) 2020 Marvell International Ltd. 399a2dd95SBruce Richardson */ 499a2dd95SBruce Richardson 599a2dd95SBruce Richardson #ifndef __INCLUDE_IP4_LOOKUP_NEON_H__ 699a2dd95SBruce Richardson #define __INCLUDE_IP4_LOOKUP_NEON_H__ 799a2dd95SBruce Richardson 899a2dd95SBruce Richardson /* ARM64 NEON */ 999a2dd95SBruce Richardson static uint16_t 1099a2dd95SBruce Richardson ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node, 1199a2dd95SBruce Richardson void **objs, uint16_t nb_objs) 1299a2dd95SBruce Richardson { 1399a2dd95SBruce Richardson struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; 1499a2dd95SBruce Richardson struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx); 1599a2dd95SBruce Richardson const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx); 1699a2dd95SBruce Richardson struct rte_ipv4_hdr *ipv4_hdr; 1799a2dd95SBruce Richardson void **to_next, **from; 1899a2dd95SBruce Richardson uint16_t last_spec = 0; 1999a2dd95SBruce Richardson rte_edge_t next_index; 2099a2dd95SBruce Richardson uint16_t n_left_from; 2199a2dd95SBruce Richardson uint16_t held = 0; 2299a2dd95SBruce Richardson uint32_t drop_nh; 2399a2dd95SBruce Richardson rte_xmm_t result; 2499a2dd95SBruce Richardson rte_xmm_t priv01; 2599a2dd95SBruce Richardson rte_xmm_t priv23; 2699a2dd95SBruce Richardson int32x4_t dip; 2799a2dd95SBruce Richardson int rc, i; 2899a2dd95SBruce Richardson 2999a2dd95SBruce Richardson /* Speculative next */ 3099a2dd95SBruce Richardson next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; 3199a2dd95SBruce Richardson /* Drop node */ 3299a2dd95SBruce Richardson drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; 3399a2dd95SBruce Richardson 3499a2dd95SBruce Richardson pkts = (struct rte_mbuf **)objs; 3599a2dd95SBruce Richardson from = objs; 3699a2dd95SBruce Richardson n_left_from = nb_objs; 3799a2dd95SBruce Richardson 3899a2dd95SBruce Richardson for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE) 3999a2dd95SBruce Richardson rte_prefetch0(&objs[i]); 4099a2dd95SBruce Richardson 4199a2dd95SBruce Richardson for (i = 0; i < 4 && i < n_left_from; i++) 4299a2dd95SBruce Richardson rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, 4399a2dd95SBruce Richardson sizeof(struct rte_ether_hdr))); 4499a2dd95SBruce Richardson 4599a2dd95SBruce Richardson dip = vdupq_n_s32(0); 4699a2dd95SBruce Richardson /* Get stream for the speculated next node */ 4799a2dd95SBruce Richardson to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); 4899a2dd95SBruce Richardson while (n_left_from >= 4) { 4999a2dd95SBruce Richardson #if RTE_GRAPH_BURST_SIZE > 64 5099a2dd95SBruce Richardson /* Prefetch next-next mbufs */ 5199a2dd95SBruce Richardson if (likely(n_left_from > 11)) { 5299a2dd95SBruce Richardson rte_prefetch0(pkts[8]); 5399a2dd95SBruce Richardson rte_prefetch0(pkts[9]); 5499a2dd95SBruce Richardson rte_prefetch0(pkts[10]); 5599a2dd95SBruce Richardson rte_prefetch0(pkts[11]); 5699a2dd95SBruce Richardson } 5799a2dd95SBruce Richardson #endif 5899a2dd95SBruce Richardson /* Prefetch next mbuf data */ 5999a2dd95SBruce Richardson if (likely(n_left_from > 7)) { 6099a2dd95SBruce Richardson rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, 6199a2dd95SBruce Richardson sizeof(struct rte_ether_hdr))); 6299a2dd95SBruce Richardson rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, 6399a2dd95SBruce Richardson sizeof(struct rte_ether_hdr))); 6499a2dd95SBruce Richardson rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, 6599a2dd95SBruce Richardson sizeof(struct rte_ether_hdr))); 6699a2dd95SBruce Richardson rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, 6799a2dd95SBruce Richardson sizeof(struct rte_ether_hdr))); 6899a2dd95SBruce Richardson } 6999a2dd95SBruce Richardson 7099a2dd95SBruce Richardson mbuf0 = pkts[0]; 7199a2dd95SBruce Richardson mbuf1 = pkts[1]; 7299a2dd95SBruce Richardson mbuf2 = pkts[2]; 7399a2dd95SBruce Richardson mbuf3 = pkts[3]; 7499a2dd95SBruce Richardson 7599a2dd95SBruce Richardson pkts += 4; 7699a2dd95SBruce Richardson n_left_from -= 4; 7799a2dd95SBruce Richardson 7899a2dd95SBruce Richardson /* Extract DIP of mbuf0 */ 7999a2dd95SBruce Richardson ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, 8099a2dd95SBruce Richardson sizeof(struct rte_ether_hdr)); 8199a2dd95SBruce Richardson dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 0); 8299a2dd95SBruce Richardson /* Extract cksum, ttl as ipv4 hdr is in cache */ 8399a2dd95SBruce Richardson priv01.u16[1] = ipv4_hdr->time_to_live; 8499a2dd95SBruce Richardson priv01.u32[1] = ipv4_hdr->hdr_checksum; 8599a2dd95SBruce Richardson 8699a2dd95SBruce Richardson /* Extract DIP of mbuf1 */ 8799a2dd95SBruce Richardson ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *, 8899a2dd95SBruce Richardson sizeof(struct rte_ether_hdr)); 8999a2dd95SBruce Richardson dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 1); 9099a2dd95SBruce Richardson /* Extract cksum, ttl as ipv4 hdr is in cache */ 9199a2dd95SBruce Richardson priv01.u16[5] = ipv4_hdr->time_to_live; 9299a2dd95SBruce Richardson priv01.u32[3] = ipv4_hdr->hdr_checksum; 9399a2dd95SBruce Richardson 9499a2dd95SBruce Richardson /* Extract DIP of mbuf2 */ 9599a2dd95SBruce Richardson ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *, 9699a2dd95SBruce Richardson sizeof(struct rte_ether_hdr)); 9799a2dd95SBruce Richardson dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 2); 9899a2dd95SBruce Richardson /* Extract cksum, ttl as ipv4 hdr is in cache */ 9999a2dd95SBruce Richardson priv23.u16[1] = ipv4_hdr->time_to_live; 10099a2dd95SBruce Richardson priv23.u32[1] = ipv4_hdr->hdr_checksum; 10199a2dd95SBruce Richardson 10299a2dd95SBruce Richardson /* Extract DIP of mbuf3 */ 10399a2dd95SBruce Richardson ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *, 10499a2dd95SBruce Richardson sizeof(struct rte_ether_hdr)); 10599a2dd95SBruce Richardson dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 3); 10699a2dd95SBruce Richardson 10799a2dd95SBruce Richardson dip = vreinterpretq_s32_u8( 10899a2dd95SBruce Richardson vrev32q_u8(vreinterpretq_u8_s32(dip))); 10999a2dd95SBruce Richardson /* Extract cksum, ttl as ipv4 hdr is in cache */ 11099a2dd95SBruce Richardson priv23.u16[5] = ipv4_hdr->time_to_live; 11199a2dd95SBruce Richardson priv23.u32[3] = ipv4_hdr->hdr_checksum; 11299a2dd95SBruce Richardson 11399a2dd95SBruce Richardson /* Perform LPM lookup to get NH and next node */ 11499a2dd95SBruce Richardson rte_lpm_lookupx4(lpm, dip, result.u32, drop_nh); 11599a2dd95SBruce Richardson priv01.u16[0] = result.u16[0]; 11699a2dd95SBruce Richardson priv01.u16[4] = result.u16[2]; 11799a2dd95SBruce Richardson priv23.u16[0] = result.u16[4]; 11899a2dd95SBruce Richardson priv23.u16[4] = result.u16[6]; 119*be4c0cb4SPavan Nikhilesh NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[1] == (drop_nh >> 16), 1); 120*be4c0cb4SPavan Nikhilesh NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[3] == (drop_nh >> 16), 1); 121*be4c0cb4SPavan Nikhilesh NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[5] == (drop_nh >> 16), 1); 122*be4c0cb4SPavan Nikhilesh NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[7] == (drop_nh >> 16), 1); 12399a2dd95SBruce Richardson 12499a2dd95SBruce Richardson node_mbuf_priv1(mbuf0, dyn)->u = priv01.u64[0]; 12599a2dd95SBruce Richardson node_mbuf_priv1(mbuf1, dyn)->u = priv01.u64[1]; 12699a2dd95SBruce Richardson node_mbuf_priv1(mbuf2, dyn)->u = priv23.u64[0]; 12799a2dd95SBruce Richardson node_mbuf_priv1(mbuf3, dyn)->u = priv23.u64[1]; 12899a2dd95SBruce Richardson 12999a2dd95SBruce Richardson /* Enqueue four to next node */ 13099a2dd95SBruce Richardson rte_edge_t fix_spec = ((next_index == result.u16[1]) && 13199a2dd95SBruce Richardson (result.u16[1] == result.u16[3]) && 13299a2dd95SBruce Richardson (result.u16[3] == result.u16[5]) && 13399a2dd95SBruce Richardson (result.u16[5] == result.u16[7])); 13499a2dd95SBruce Richardson 13599a2dd95SBruce Richardson if (unlikely(fix_spec == 0)) { 13699a2dd95SBruce Richardson /* Copy things successfully speculated till now */ 13799a2dd95SBruce Richardson rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 13899a2dd95SBruce Richardson from += last_spec; 13999a2dd95SBruce Richardson to_next += last_spec; 14099a2dd95SBruce Richardson held += last_spec; 14199a2dd95SBruce Richardson last_spec = 0; 14299a2dd95SBruce Richardson 14399a2dd95SBruce Richardson /* Next0 */ 14499a2dd95SBruce Richardson if (next_index == result.u16[1]) { 14599a2dd95SBruce Richardson to_next[0] = from[0]; 14699a2dd95SBruce Richardson to_next++; 14799a2dd95SBruce Richardson held++; 14899a2dd95SBruce Richardson } else { 14999a2dd95SBruce Richardson rte_node_enqueue_x1(graph, node, result.u16[1], 15099a2dd95SBruce Richardson from[0]); 15199a2dd95SBruce Richardson } 15299a2dd95SBruce Richardson 15399a2dd95SBruce Richardson /* Next1 */ 15499a2dd95SBruce Richardson if (next_index == result.u16[3]) { 15599a2dd95SBruce Richardson to_next[0] = from[1]; 15699a2dd95SBruce Richardson to_next++; 15799a2dd95SBruce Richardson held++; 15899a2dd95SBruce Richardson } else { 15999a2dd95SBruce Richardson rte_node_enqueue_x1(graph, node, result.u16[3], 16099a2dd95SBruce Richardson from[1]); 16199a2dd95SBruce Richardson } 16299a2dd95SBruce Richardson 16399a2dd95SBruce Richardson /* Next2 */ 16499a2dd95SBruce Richardson if (next_index == result.u16[5]) { 16599a2dd95SBruce Richardson to_next[0] = from[2]; 16699a2dd95SBruce Richardson to_next++; 16799a2dd95SBruce Richardson held++; 16899a2dd95SBruce Richardson } else { 16999a2dd95SBruce Richardson rte_node_enqueue_x1(graph, node, result.u16[5], 17099a2dd95SBruce Richardson from[2]); 17199a2dd95SBruce Richardson } 17299a2dd95SBruce Richardson 17399a2dd95SBruce Richardson /* Next3 */ 17499a2dd95SBruce Richardson if (next_index == result.u16[7]) { 17599a2dd95SBruce Richardson to_next[0] = from[3]; 17699a2dd95SBruce Richardson to_next++; 17799a2dd95SBruce Richardson held++; 17899a2dd95SBruce Richardson } else { 17999a2dd95SBruce Richardson rte_node_enqueue_x1(graph, node, result.u16[7], 18099a2dd95SBruce Richardson from[3]); 18199a2dd95SBruce Richardson } 18299a2dd95SBruce Richardson 18399a2dd95SBruce Richardson from += 4; 18499a2dd95SBruce Richardson } else { 18599a2dd95SBruce Richardson last_spec += 4; 18699a2dd95SBruce Richardson } 18799a2dd95SBruce Richardson } 18899a2dd95SBruce Richardson 18999a2dd95SBruce Richardson while (n_left_from > 0) { 19099a2dd95SBruce Richardson uint32_t next_hop; 19199a2dd95SBruce Richardson uint16_t next0; 19299a2dd95SBruce Richardson 19399a2dd95SBruce Richardson mbuf0 = pkts[0]; 19499a2dd95SBruce Richardson 19599a2dd95SBruce Richardson pkts += 1; 19699a2dd95SBruce Richardson n_left_from -= 1; 19799a2dd95SBruce Richardson 19899a2dd95SBruce Richardson /* Extract DIP of mbuf0 */ 19999a2dd95SBruce Richardson ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, 20099a2dd95SBruce Richardson sizeof(struct rte_ether_hdr)); 20199a2dd95SBruce Richardson /* Extract cksum, ttl as ipv4 hdr is in cache */ 20299a2dd95SBruce Richardson node_mbuf_priv1(mbuf0, dyn)->cksum = ipv4_hdr->hdr_checksum; 20399a2dd95SBruce Richardson node_mbuf_priv1(mbuf0, dyn)->ttl = ipv4_hdr->time_to_live; 20499a2dd95SBruce Richardson 20599a2dd95SBruce Richardson rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr), 20699a2dd95SBruce Richardson &next_hop); 20799a2dd95SBruce Richardson next_hop = (rc == 0) ? next_hop : drop_nh; 20899a2dd95SBruce Richardson 209*be4c0cb4SPavan Nikhilesh NODE_INCREMENT_XSTAT_ID(node, 0, rc != 0, 1); 21099a2dd95SBruce Richardson node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop; 21199a2dd95SBruce Richardson next_hop = next_hop >> 16; 21299a2dd95SBruce Richardson next0 = (uint16_t)next_hop; 21399a2dd95SBruce Richardson 21499a2dd95SBruce Richardson if (unlikely(next_index ^ next0)) { 21599a2dd95SBruce Richardson /* Copy things successfully speculated till now */ 21699a2dd95SBruce Richardson rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 21799a2dd95SBruce Richardson from += last_spec; 21899a2dd95SBruce Richardson to_next += last_spec; 21999a2dd95SBruce Richardson held += last_spec; 22099a2dd95SBruce Richardson last_spec = 0; 22199a2dd95SBruce Richardson 22299a2dd95SBruce Richardson rte_node_enqueue_x1(graph, node, next0, from[0]); 22399a2dd95SBruce Richardson from += 1; 22499a2dd95SBruce Richardson } else { 22599a2dd95SBruce Richardson last_spec += 1; 22699a2dd95SBruce Richardson } 22799a2dd95SBruce Richardson } 22899a2dd95SBruce Richardson 22999a2dd95SBruce Richardson /* !!! Home run !!! */ 23099a2dd95SBruce Richardson if (likely(last_spec == nb_objs)) { 23199a2dd95SBruce Richardson rte_node_next_stream_move(graph, node, next_index); 23299a2dd95SBruce Richardson return nb_objs; 23399a2dd95SBruce Richardson } 23499a2dd95SBruce Richardson held += last_spec; 23599a2dd95SBruce Richardson rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 23699a2dd95SBruce Richardson rte_node_next_stream_put(graph, node, next_index, held); 23799a2dd95SBruce Richardson 23899a2dd95SBruce Richardson return nb_objs; 23999a2dd95SBruce Richardson } 24099a2dd95SBruce Richardson 24199a2dd95SBruce Richardson #endif /* __INCLUDE_IP4_LOOKUP_NEON_H__ */ 242