1 /* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 2 * Copyright 2017 Mellanox Technologies, Ltd 3 */ 4 5 #include <linux/in.h> 6 #include <linux/if_ether.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <linux/pkt_cls.h> 10 #include <linux/bpf.h> 11 12 #include <bpf/bpf_helpers.h> 13 #include <bpf/bpf_endian.h> 14 15 #include "../tap_rss.h" 16 17 /* 18 * This map provides configuration information about flows which need BPF RSS. 19 * 20 * The hash is indexed by the skb mark. 21 */ 22 struct { 23 __uint(type, BPF_MAP_TYPE_HASH); 24 __uint(key_size, sizeof(__u32)); 25 __uint(value_size, sizeof(struct rss_key)); 26 __uint(max_entries, TAP_RSS_MAX); 27 } rss_map SEC(".maps"); 28 29 #define IP_MF 0x2000 /** IP header Flags **/ 30 #define IP_OFFSET 0x1FFF /** IP header fragment offset **/ 31 32 /* 33 * Compute Toeplitz hash over the input tuple. 34 * This is same as rte_softrss_be in lib/hash 35 * but loop needs to be setup to match BPF restrictions. 36 */ 37 static __always_inline __u32 38 softrss_be(const __u32 *input_tuple, __u32 input_len, const __u32 *key) 39 { 40 __u32 i, j, hash = 0; 41 42 #pragma unroll 43 for (j = 0; j < input_len; j++) { 44 #pragma unroll 45 for (i = 0; i < 32; i++) { 46 if (input_tuple[j] & (1U << (31 - i))) 47 hash ^= key[j] << i | key[j + 1] >> (32 - i); 48 } 49 } 50 return hash; 51 } 52 53 /* 54 * Compute RSS hash for IPv4 packet. 55 * return in 0 if RSS not specified 56 */ 57 static __always_inline __u32 58 parse_ipv4(const struct __sk_buff *skb, __u32 hash_type, const __u32 *key) 59 { 60 struct iphdr iph; 61 __u32 off = 0; 62 63 if (bpf_skb_load_bytes_relative(skb, off, &iph, sizeof(iph), BPF_HDR_START_NET)) 64 return 0; /* no IP header present */ 65 66 struct { 67 __u32 src_addr; 68 __u32 dst_addr; 69 __u16 dport; 70 __u16 sport; 71 } v4_tuple = { 72 .src_addr = bpf_ntohl(iph.saddr), 73 .dst_addr = bpf_ntohl(iph.daddr), 74 }; 75 76 /* If only calculating L3 hash, do it now */ 77 if (hash_type & (1 << HASH_FIELD_IPV4_L3)) 78 return softrss_be((__u32 *)&v4_tuple, sizeof(v4_tuple) / sizeof(__u32) - 1, key); 79 80 /* If packet is fragmented then no L4 hash is possible */ 81 if ((iph.frag_off & bpf_htons(IP_MF | IP_OFFSET)) != 0) 82 return 0; 83 84 /* Do RSS on UDP or TCP protocols */ 85 if (iph.protocol == IPPROTO_UDP || iph.protocol == IPPROTO_TCP) { 86 __u16 src_dst_port[2]; 87 88 off += iph.ihl * 4; 89 if (bpf_skb_load_bytes_relative(skb, off, &src_dst_port, sizeof(src_dst_port), 90 BPF_HDR_START_NET)) 91 return 0; /* TCP or UDP header missing */ 92 93 v4_tuple.sport = bpf_ntohs(src_dst_port[0]); 94 v4_tuple.dport = bpf_ntohs(src_dst_port[1]); 95 return softrss_be((__u32 *)&v4_tuple, sizeof(v4_tuple) / sizeof(__u32), key); 96 } 97 98 /* Other protocol */ 99 return 0; 100 } 101 102 /* 103 * Parse Ipv6 extended headers, update offset and return next proto. 104 * returns next proto on success, -1 on malformed header 105 */ 106 static __always_inline int 107 skip_ip6_ext(__u16 proto, const struct __sk_buff *skb, __u32 *off, int *frag) 108 { 109 struct ext_hdr { 110 __u8 next_hdr; 111 __u8 len; 112 } xh; 113 unsigned int i; 114 115 *frag = 0; 116 117 #define MAX_EXT_HDRS 5 118 #pragma unroll 119 for (i = 0; i < MAX_EXT_HDRS; i++) { 120 switch (proto) { 121 case IPPROTO_HOPOPTS: 122 case IPPROTO_ROUTING: 123 case IPPROTO_DSTOPTS: 124 if (bpf_skb_load_bytes_relative(skb, *off, &xh, sizeof(xh), 125 BPF_HDR_START_NET)) 126 return -1; 127 128 *off += (xh.len + 1) * 8; 129 proto = xh.next_hdr; 130 break; 131 case IPPROTO_FRAGMENT: 132 if (bpf_skb_load_bytes_relative(skb, *off, &xh, sizeof(xh), 133 BPF_HDR_START_NET)) 134 return -1; 135 136 *off += 8; 137 proto = xh.next_hdr; 138 *frag = 1; 139 return proto; /* this is always the last ext hdr */ 140 default: 141 return proto; 142 } 143 } 144 145 /* too many extension headers give up */ 146 return -1; 147 } 148 149 /* 150 * Compute RSS hash for IPv6 packet. 151 * return in 0 if RSS not specified 152 */ 153 static __always_inline __u32 154 parse_ipv6(const struct __sk_buff *skb, __u32 hash_type, const __u32 *key) 155 { 156 struct { 157 __u32 src_addr[4]; 158 __u32 dst_addr[4]; 159 __u16 dport; 160 __u16 sport; 161 } v6_tuple = { }; 162 struct ipv6hdr ip6h; 163 __u32 off = 0, j; 164 int proto, frag; 165 166 if (bpf_skb_load_bytes_relative(skb, off, &ip6h, sizeof(ip6h), BPF_HDR_START_NET)) 167 return 0; /* missing IPv6 header */ 168 169 #pragma unroll 170 for (j = 0; j < 4; j++) { 171 v6_tuple.src_addr[j] = bpf_ntohl(ip6h.saddr.in6_u.u6_addr32[j]); 172 v6_tuple.dst_addr[j] = bpf_ntohl(ip6h.daddr.in6_u.u6_addr32[j]); 173 } 174 175 /* If only doing L3 hash, do it now */ 176 if (hash_type & (1 << HASH_FIELD_IPV6_L3)) 177 return softrss_be((__u32 *)&v6_tuple, sizeof(v6_tuple) / sizeof(__u32) - 1, key); 178 179 /* Skip extension headers if present */ 180 off += sizeof(ip6h); 181 proto = skip_ip6_ext(ip6h.nexthdr, skb, &off, &frag); 182 if (proto < 0) 183 return 0; 184 185 /* If packet is a fragment then no L4 hash is possible */ 186 if (frag) 187 return 0; 188 189 /* Do RSS on UDP or TCP */ 190 if (proto == IPPROTO_UDP || proto == IPPROTO_TCP) { 191 __u16 src_dst_port[2]; 192 193 if (bpf_skb_load_bytes_relative(skb, off, &src_dst_port, sizeof(src_dst_port), 194 BPF_HDR_START_NET)) 195 return 0; 196 197 v6_tuple.sport = bpf_ntohs(src_dst_port[0]); 198 v6_tuple.dport = bpf_ntohs(src_dst_port[1]); 199 200 return softrss_be((__u32 *)&v6_tuple, sizeof(v6_tuple) / sizeof(__u32), key); 201 } 202 203 return 0; 204 } 205 206 /* 207 * Scale value to be into range [0, n) 208 * Assumes val is large (ie hash covers whole u32 range) 209 */ 210 static __always_inline __u32 211 reciprocal_scale(__u32 val, __u32 n) 212 { 213 return (__u32)(((__u64)val * n) >> 32); 214 } 215 216 /* 217 * When this BPF program is run by tc from the filter classifier, 218 * it is able to read skb metadata and packet data. 219 * 220 * For packets where RSS is not possible, then just return TC_ACT_OK. 221 * When RSS is desired, change the skb->queue_mapping and set TC_ACT_PIPE 222 * to continue processing. 223 * 224 * This should be BPF_PROG_TYPE_SCHED_ACT so section needs to be "action" 225 */ 226 SEC("action") int 227 rss_flow_action(struct __sk_buff *skb) 228 { 229 const struct rss_key *rsskey; 230 const __u32 *key; 231 __be16 proto; 232 __u32 mark; 233 __u32 hash; 234 __u16 queue; 235 236 __builtin_preserve_access_index(({ 237 mark = skb->mark; 238 proto = skb->protocol; 239 })); 240 241 /* Lookup RSS configuration for that BPF class */ 242 rsskey = bpf_map_lookup_elem(&rss_map, &mark); 243 if (rsskey == NULL) 244 return TC_ACT_OK; 245 246 key = (const __u32 *)rsskey->key; 247 248 if (proto == bpf_htons(ETH_P_IP)) 249 hash = parse_ipv4(skb, rsskey->hash_fields, key); 250 else if (proto == bpf_htons(ETH_P_IPV6)) 251 hash = parse_ipv6(skb, rsskey->hash_fields, key); 252 else 253 hash = 0; 254 255 if (hash == 0) 256 return TC_ACT_OK; 257 258 /* Fold hash to the number of queues configured */ 259 queue = reciprocal_scale(hash, rsskey->nb_queues); 260 261 __builtin_preserve_access_index(({ 262 skb->queue_mapping = queue; 263 })); 264 return TC_ACT_PIPE; 265 } 266 267 char _license[] SEC("license") = "Dual BSD/GPL"; 268