1 /*- 2 * Copyright (c) 1998 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the NetBSD 19 * Foundation, Inc. and its contributors. 20 * 4. Neither the name of The NetBSD Foundation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 * 36 * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.2 2001/11/04 17:35:31 luigi Exp $ 37 * $DragonFly: src/sys/netinet/ip_flow.c,v 1.7 2005/03/04 03:48:25 hsu Exp $ 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/globaldata.h> 45 #include <sys/thread.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/kernel.h> 49 50 #include <sys/sysctl.h> 51 52 #include <net/if.h> 53 #include <net/route.h> 54 55 #include <netinet/in.h> 56 #include <netinet/in_systm.h> 57 #include <netinet/ip.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip_var.h> 60 #include <netinet/ip_flow.h> 61 62 #define IPFLOW_TIMER (5 * PR_SLOWHZ) 63 #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ 64 #define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) 65 static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE]; 66 static int ipflow_inuse; 67 #define IPFLOW_MAX 256 68 69 static int ipflow_active = 0; 70 SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, 71 &ipflow_active, 0, "Enable flow-based IP forwarding"); 72 73 static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); 74 75 static unsigned 76 ipflow_hash(struct in_addr dst, struct in_addr src, unsigned tos) 77 { 78 unsigned hash = tos; 79 int idx; 80 81 for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) 82 hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx); 83 return hash & (IPFLOW_HASHSIZE-1); 84 } 85 86 static struct ipflow * 87 ipflow_lookup(const struct ip *ip) 88 { 89 unsigned hash; 90 struct ipflow *ipf; 91 92 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); 93 94 ipf = LIST_FIRST(&ipflows[hash]); 95 while (ipf != NULL) { 96 if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr && 97 ip->ip_src.s_addr == ipf->ipf_src.s_addr && 98 ip->ip_tos == ipf->ipf_tos) 99 break; 100 ipf = LIST_NEXT(ipf, ipf_next); 101 } 102 return ipf; 103 } 104 105 int 106 ipflow_fastforward(struct mbuf *m) 107 { 108 struct ip *ip; 109 struct ipflow *ipf; 110 struct rtentry *rt; 111 struct sockaddr *dst; 112 int error; 113 114 /* 115 * Are we forwarding packets? Big enough for an IP packet? 116 */ 117 if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip)) 118 return 0; 119 /* 120 * IP header with no option and valid version and length 121 */ 122 ip = mtod(m, struct ip *); 123 if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) || 124 ntohs(ip->ip_len) > m->m_pkthdr.len) 125 return 0; 126 /* 127 * Find a flow. 128 */ 129 if ((ipf = ipflow_lookup(ip)) == NULL) 130 return 0; 131 132 /* 133 * Route and interface still up? 134 */ 135 rt = ipf->ipf_ro.ro_rt; 136 if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) 137 return 0; 138 139 /* 140 * Packet size OK? TTL? 141 */ 142 if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) 143 return 0; 144 145 /* 146 * Everything checks out and so we can forward this packet. 147 * Modify the TTL and incrementally change the checksum. 148 */ 149 ip->ip_ttl -= IPTTLDEC; 150 if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) 151 ip->ip_sum += htons(IPTTLDEC << 8) + 1; 152 else 153 ip->ip_sum += htons(IPTTLDEC << 8); 154 155 /* 156 * Send the packet on its way. All we can get back is ENOBUFS 157 */ 158 ipf->ipf_uses++; 159 ipf->ipf_timer = IPFLOW_TIMER; 160 161 if (rt->rt_flags & RTF_GATEWAY) 162 dst = rt->rt_gateway; 163 else 164 dst = &ipf->ipf_ro.ro_dst; 165 if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { 166 if (error == ENOBUFS) 167 ipf->ipf_dropped++; 168 else 169 ipf->ipf_errors++; 170 } 171 return 1; 172 } 173 174 static void 175 ipflow_addstats(struct ipflow *ipf) 176 { 177 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; 178 ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped; 179 ipstat.ips_forward += ipf->ipf_uses; 180 ipstat.ips_fastforward += ipf->ipf_uses; 181 } 182 183 static void 184 ipflow_free(struct ipflow *ipf) 185 { 186 int s; 187 188 /* 189 * Remove the flow from the hash table (at elevated IPL). 190 * Once it's off the list, we can deal with it at normal 191 * network IPL. 192 */ 193 s = splimp(); 194 LIST_REMOVE(ipf, ipf_next); 195 splx(s); 196 ipflow_addstats(ipf); 197 RTFREE(ipf->ipf_ro.ro_rt); 198 ipflow_inuse--; 199 free(ipf, M_IPFLOW); 200 } 201 202 static struct ipflow * 203 ipflow_reap(void) 204 { 205 struct ipflow *ipf, *maybe_ipf = NULL; 206 int idx; 207 int s; 208 209 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { 210 ipf = LIST_FIRST(&ipflows[idx]); 211 while (ipf != NULL) { 212 /* 213 * If this no longer points to a valid route 214 * reclaim it. 215 */ 216 if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0) 217 goto done; 218 /* 219 * choose the one that's been least recently used 220 * or has had the least uses in the last 1.5 221 * intervals. 222 */ 223 if (maybe_ipf == NULL || 224 ipf->ipf_timer < maybe_ipf->ipf_timer || 225 (ipf->ipf_timer == maybe_ipf->ipf_timer && 226 ipf->ipf_last_uses + ipf->ipf_uses < 227 maybe_ipf->ipf_last_uses + maybe_ipf->ipf_uses)) 228 maybe_ipf = ipf; 229 ipf = LIST_NEXT(ipf, ipf_next); 230 } 231 } 232 ipf = maybe_ipf; 233 done: 234 /* 235 * Remove the entry from the flow table. 236 */ 237 s = splimp(); 238 LIST_REMOVE(ipf, ipf_next); 239 splx(s); 240 ipflow_addstats(ipf); 241 RTFREE(ipf->ipf_ro.ro_rt); 242 return ipf; 243 } 244 245 void 246 ipflow_slowtimo(void) 247 { 248 struct ipflow *ipf; 249 int idx; 250 251 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { 252 ipf = LIST_FIRST(&ipflows[idx]); 253 while (ipf != NULL) { 254 struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); 255 if (--ipf->ipf_timer == 0) { 256 ipflow_free(ipf); 257 } else { 258 ipf->ipf_last_uses = ipf->ipf_uses; 259 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; 260 ipstat.ips_forward += ipf->ipf_uses; 261 ipstat.ips_fastforward += ipf->ipf_uses; 262 ipf->ipf_uses = 0; 263 } 264 ipf = next_ipf; 265 } 266 } 267 } 268 269 void 270 ipflow_create(const struct route *ro, struct mbuf *m) 271 { 272 const struct ip *const ip = mtod(m, struct ip *); 273 struct ipflow *ipf; 274 unsigned hash; 275 int s; 276 277 /* 278 * Don't create cache entries for ICMP messages. 279 */ 280 if (!ipflow_active || ip->ip_p == IPPROTO_ICMP) 281 return; 282 /* 283 * See if an existing flow struct exists. If so remove it from it's 284 * list and free the old route. If not, try to malloc a new one 285 * (if we aren't at our limit). 286 */ 287 ipf = ipflow_lookup(ip); 288 if (ipf == NULL) { 289 if (ipflow_inuse == IPFLOW_MAX) { 290 ipf = ipflow_reap(); 291 } else { 292 ipf = malloc(sizeof *ipf, M_IPFLOW, 293 M_INTWAIT | M_NULLOK); 294 if (ipf == NULL) 295 return; 296 ipflow_inuse++; 297 } 298 bzero(ipf, sizeof *ipf); 299 } else { 300 s = splimp(); 301 LIST_REMOVE(ipf, ipf_next); 302 splx(s); 303 ipflow_addstats(ipf); 304 RTFREE(ipf->ipf_ro.ro_rt); 305 ipf->ipf_uses = ipf->ipf_last_uses = 0; 306 ipf->ipf_errors = ipf->ipf_dropped = 0; 307 } 308 309 /* 310 * Fill in the updated information. 311 */ 312 ipf->ipf_ro = *ro; 313 ro->ro_rt->rt_refcnt++; 314 ipf->ipf_dst = ip->ip_dst; 315 ipf->ipf_src = ip->ip_src; 316 ipf->ipf_tos = ip->ip_tos; 317 ipf->ipf_timer = IPFLOW_TIMER; 318 /* 319 * Insert into the approriate bucket of the flow table. 320 */ 321 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); 322 s = splimp(); 323 LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); 324 splx(s); 325 } 326