1*c5711800Sriastradh /* $NetBSD: ip6_flow.c,v 1.43 2024/06/29 13:00:44 riastradh Exp $ */
28aa640daSliamjfoy
36163badeSmaxv /*
48aa640daSliamjfoy * Copyright (c) 2007 The NetBSD Foundation, Inc.
58aa640daSliamjfoy * All rights reserved.
68aa640daSliamjfoy *
78aa640daSliamjfoy * This code is derived from software contributed to The NetBSD Foundation
88aa640daSliamjfoy * by the 3am Software Foundry ("3am"). It was developed by Liam J. Foy
98aa640daSliamjfoy * <liamjfoy@netbsd.org> and Matt Thomas <matt@netbsd.org>.
108aa640daSliamjfoy *
118aa640daSliamjfoy * Redistribution and use in source and binary forms, with or without
128aa640daSliamjfoy * modification, are permitted provided that the following conditions
138aa640daSliamjfoy * are met:
148aa640daSliamjfoy * 1. Redistributions of source code must retain the above copyright
158aa640daSliamjfoy * notice, this list of conditions and the following disclaimer.
168aa640daSliamjfoy * 2. Redistributions in binary form must reproduce the above copyright
178aa640daSliamjfoy * notice, this list of conditions and the following disclaimer in the
188aa640daSliamjfoy * documentation and/or other materials provided with the distribution.
198aa640daSliamjfoy *
208aa640daSliamjfoy * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
218aa640daSliamjfoy * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
228aa640daSliamjfoy * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
238aa640daSliamjfoy * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
248aa640daSliamjfoy * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
258aa640daSliamjfoy * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
268aa640daSliamjfoy * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
278aa640daSliamjfoy * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
288aa640daSliamjfoy * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
298aa640daSliamjfoy * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
308aa640daSliamjfoy * POSSIBILITY OF SUCH DAMAGE.
318aa640daSliamjfoy *
328aa640daSliamjfoy * IPv6 version was developed by Liam J. Foy. Original source existed in IPv4
338aa640daSliamjfoy * format developed by Matt Thomas. Thanks to Joerg Sonnenberger, Matt
348aa640daSliamjfoy * Thomas and Christos Zoulas.
358aa640daSliamjfoy *
368aa640daSliamjfoy * Thanks to Liverpool John Moores University, especially Dr. David Llewellyn-Jones
378aa640daSliamjfoy * for providing resources (to test) and Professor Madjid Merabti.
388aa640daSliamjfoy */
398aa640daSliamjfoy
408aa640daSliamjfoy #include <sys/cdefs.h>
41*c5711800Sriastradh __KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.43 2024/06/29 13:00:44 riastradh Exp $");
423be31428Sozaki-r
433be31428Sozaki-r #ifdef _KERNEL_OPT
443be31428Sozaki-r #include "opt_net_mpsafe.h"
453be31428Sozaki-r #endif
468aa640daSliamjfoy
478aa640daSliamjfoy #include <sys/param.h>
488aa640daSliamjfoy #include <sys/systm.h>
498aa640daSliamjfoy #include <sys/malloc.h>
508aa640daSliamjfoy #include <sys/mbuf.h>
518aa640daSliamjfoy #include <sys/socketvar.h>
528aa640daSliamjfoy #include <sys/time.h>
538aa640daSliamjfoy #include <sys/kernel.h>
548aa640daSliamjfoy #include <sys/pool.h>
558aa640daSliamjfoy #include <sys/sysctl.h>
56dca032f9Sozaki-r #include <sys/workqueue.h>
57e449cc85Sozaki-r #include <sys/atomic.h>
588aa640daSliamjfoy
598aa640daSliamjfoy #include <net/if.h>
608aa640daSliamjfoy #include <net/if_dl.h>
618aa640daSliamjfoy #include <net/route.h>
628aa640daSliamjfoy #include <net/pfil.h>
638aa640daSliamjfoy
648aa640daSliamjfoy #include <netinet/in.h>
658aa640daSliamjfoy #include <netinet6/in6_var.h>
668aa640daSliamjfoy #include <netinet/in_systm.h>
678aa640daSliamjfoy #include <netinet/ip6.h>
688aa640daSliamjfoy #include <netinet6/ip6_var.h>
690dd41b37Sthorpej #include <netinet6/ip6_private.h>
708aa640daSliamjfoy
718aa640daSliamjfoy /*
728aa640daSliamjfoy * IPv6 Fast Forward caches/hashes flows from one source to destination.
738aa640daSliamjfoy *
748aa640daSliamjfoy * Upon a successful forward IPv6FF caches and hashes details such as the
758aa640daSliamjfoy * route, source and destination. Once another packet is received matching
768aa640daSliamjfoy * the source and destination the packet is forwarded straight onto if_output
778aa640daSliamjfoy * using the cached details.
788aa640daSliamjfoy *
798aa640daSliamjfoy * Example:
80202952fbSchristos * ether/fddi_input -> ip6flow_fastforward -> if_output
818aa640daSliamjfoy */
828aa640daSliamjfoy
8329f89491Sliamjfoy static struct pool ip6flow_pool;
848aa640daSliamjfoy
8574c24413Sknakahara TAILQ_HEAD(ip6flowhead, ip6flow);
868aa640daSliamjfoy
878aa640daSliamjfoy /*
888aa640daSliamjfoy * We could use IPv4 defines (IPFLOW_HASHBITS) but we'll
898aa640daSliamjfoy * use our own (possibly for future expansion).
908aa640daSliamjfoy */
918aa640daSliamjfoy #define IP6FLOW_TIMER (5 * PR_SLOWHZ)
92a3580ff0Sliamjfoy #define IP6FLOW_DEFAULT_HASHSIZE (1 << IP6FLOW_HASHBITS)
938aa640daSliamjfoy
94e4ff09f0Sknakahara /*
95e4ff09f0Sknakahara * ip6_flow.c internal lock.
96e4ff09f0Sknakahara * If we use softnet_lock, it would cause recursive lock.
97e4ff09f0Sknakahara *
98e4ff09f0Sknakahara * This is a tentative workaround.
99e4ff09f0Sknakahara * We should make it scalable somehow in the future.
100e4ff09f0Sknakahara */
1016163badeSmaxv static kmutex_t ip6flow_lock __cacheline_aligned;
102a3580ff0Sliamjfoy static struct ip6flowhead *ip6flowtable = NULL;
1038aa640daSliamjfoy static struct ip6flowhead ip6flowlist;
1046163badeSmaxv static int ip6flow_inuse __cacheline_aligned;
1058aa640daSliamjfoy
106dca032f9Sozaki-r static void ip6flow_slowtimo_work(struct work *, void *);
107dca032f9Sozaki-r static struct workqueue *ip6flow_slowtimo_wq;
108dca032f9Sozaki-r static struct work ip6flow_slowtimo_wk;
109dca032f9Sozaki-r
11048235e82Sknakahara static int sysctl_net_inet6_ip6_hashsize(SYSCTLFN_PROTO);
11148235e82Sknakahara static int sysctl_net_inet6_ip6_maxflows(SYSCTLFN_PROTO);
11248235e82Sknakahara static void ip6flow_sysctl_init(struct sysctllog **);
11348235e82Sknakahara
1148aa640daSliamjfoy /*
1158aa640daSliamjfoy * Insert an ip6flow into the list.
1168aa640daSliamjfoy */
11774c24413Sknakahara #define IP6FLOW_INSERT(hashidx, ip6f) \
1188aa640daSliamjfoy do { \
11974c24413Sknakahara (ip6f)->ip6f_hashidx = (hashidx); \
12074c24413Sknakahara TAILQ_INSERT_HEAD(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \
12174c24413Sknakahara TAILQ_INSERT_HEAD(&ip6flowlist, (ip6f), ip6f_list); \
1228aa640daSliamjfoy } while (/*CONSTCOND*/ 0)
1238aa640daSliamjfoy
1248aa640daSliamjfoy /*
1258aa640daSliamjfoy * Remove an ip6flow from the list.
1268aa640daSliamjfoy */
12774c24413Sknakahara #define IP6FLOW_REMOVE(hashidx, ip6f) \
1288aa640daSliamjfoy do { \
12974c24413Sknakahara TAILQ_REMOVE(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \
13074c24413Sknakahara TAILQ_REMOVE(&ip6flowlist, (ip6f), ip6f_list); \
1318aa640daSliamjfoy } while (/*CONSTCOND*/ 0)
1328aa640daSliamjfoy
1338aa640daSliamjfoy #ifndef IP6FLOW_DEFAULT
1348aa640daSliamjfoy #define IP6FLOW_DEFAULT 256
1358aa640daSliamjfoy #endif
1368aa640daSliamjfoy
1378aa640daSliamjfoy int ip6_maxflows = IP6FLOW_DEFAULT;
138a3580ff0Sliamjfoy int ip6_hashsize = IP6FLOW_DEFAULT_HASHSIZE;
1398aa640daSliamjfoy
1408aa640daSliamjfoy /*
1418aa640daSliamjfoy * Calculate hash table position.
1428aa640daSliamjfoy */
1438aa640daSliamjfoy static size_t
ip6flow_hash(const struct ip6_hdr * ip6)1441c6cf449Sdyoung ip6flow_hash(const struct ip6_hdr *ip6)
1458aa640daSliamjfoy {
1468aa640daSliamjfoy size_t hash;
1478aa640daSliamjfoy uint32_t dst_sum, src_sum;
14872a3be8fSliamjfoy size_t idx;
1498aa640daSliamjfoy
1508aa640daSliamjfoy src_sum = ip6->ip6_src.s6_addr32[0] + ip6->ip6_src.s6_addr32[1]
1518aa640daSliamjfoy + ip6->ip6_src.s6_addr32[2] + ip6->ip6_src.s6_addr32[3];
1528aa640daSliamjfoy dst_sum = ip6->ip6_dst.s6_addr32[0] + ip6->ip6_dst.s6_addr32[1]
1538aa640daSliamjfoy + ip6->ip6_dst.s6_addr32[2] + ip6->ip6_dst.s6_addr32[3];
1548aa640daSliamjfoy
1558aa640daSliamjfoy hash = ip6->ip6_flow;
1568aa640daSliamjfoy
1578aa640daSliamjfoy for (idx = 0; idx < 32; idx += IP6FLOW_HASHBITS)
1588aa640daSliamjfoy hash += (dst_sum >> (32 - idx)) + (src_sum >> idx);
1598aa640daSliamjfoy
160a3580ff0Sliamjfoy return hash & (ip6_hashsize-1);
1618aa640daSliamjfoy }
1628aa640daSliamjfoy
1638aa640daSliamjfoy /*
1648aa640daSliamjfoy * Check to see if a flow already exists - if so return it.
1658aa640daSliamjfoy */
1668aa640daSliamjfoy static struct ip6flow *
ip6flow_lookup(const struct ip6_hdr * ip6)1671c6cf449Sdyoung ip6flow_lookup(const struct ip6_hdr *ip6)
1688aa640daSliamjfoy {
1698aa640daSliamjfoy size_t hash;
1708aa640daSliamjfoy struct ip6flow *ip6f;
1718aa640daSliamjfoy
172e4ff09f0Sknakahara KASSERT(mutex_owned(&ip6flow_lock));
173e4ff09f0Sknakahara
1748aa640daSliamjfoy hash = ip6flow_hash(ip6);
1758aa640daSliamjfoy
17674c24413Sknakahara TAILQ_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) {
1778aa640daSliamjfoy if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6f->ip6f_dst)
1788aa640daSliamjfoy && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6f->ip6f_src)
1798aa640daSliamjfoy && ip6f->ip6f_flow == ip6->ip6_flow) {
1808aa640daSliamjfoy /* A cached flow has been found. */
1818aa640daSliamjfoy return ip6f;
1828aa640daSliamjfoy }
1838aa640daSliamjfoy }
1848aa640daSliamjfoy
1858aa640daSliamjfoy return NULL;
1868aa640daSliamjfoy }
1878aa640daSliamjfoy
18829f89491Sliamjfoy void
ip6flow_poolinit(void)18929f89491Sliamjfoy ip6flow_poolinit(void)
19029f89491Sliamjfoy {
19129f89491Sliamjfoy
19229f89491Sliamjfoy pool_init(&ip6flow_pool, sizeof(struct ip6flow), 0, 0, 0, "ip6flowpl",
19329f89491Sliamjfoy NULL, IPL_NET);
19429f89491Sliamjfoy }
19529f89491Sliamjfoy
1968aa640daSliamjfoy /*
197a3580ff0Sliamjfoy * Allocate memory and initialise lists. This function is called
198a3580ff0Sliamjfoy * from ip6_init and called there after to resize the hash table.
199a3580ff0Sliamjfoy * If a newly sized table cannot be malloc'ed we just continue
200a3580ff0Sliamjfoy * to use the old one.
2018aa640daSliamjfoy */
202e4ff09f0Sknakahara static int
ip6flow_init_locked(int table_size)203e4ff09f0Sknakahara ip6flow_init_locked(int table_size)
2048aa640daSliamjfoy {
205a3580ff0Sliamjfoy struct ip6flowhead *new_table;
2068aa640daSliamjfoy size_t i;
2078aa640daSliamjfoy
208e4ff09f0Sknakahara KASSERT(mutex_owned(&ip6flow_lock));
209e4ff09f0Sknakahara
210a3580ff0Sliamjfoy new_table = (struct ip6flowhead *)malloc(sizeof(struct ip6flowhead) *
211a3580ff0Sliamjfoy table_size, M_RTABLE, M_NOWAIT);
212a3580ff0Sliamjfoy
213a3580ff0Sliamjfoy if (new_table == NULL)
214a3580ff0Sliamjfoy return 1;
215a3580ff0Sliamjfoy
216a3580ff0Sliamjfoy if (ip6flowtable != NULL)
217a3580ff0Sliamjfoy free(ip6flowtable, M_RTABLE);
218a3580ff0Sliamjfoy
219a3580ff0Sliamjfoy ip6flowtable = new_table;
220a3580ff0Sliamjfoy ip6_hashsize = table_size;
221a3580ff0Sliamjfoy
22274c24413Sknakahara TAILQ_INIT(&ip6flowlist);
223a3580ff0Sliamjfoy for (i = 0; i < ip6_hashsize; i++)
22474c24413Sknakahara TAILQ_INIT(&ip6flowtable[i]);
225a3580ff0Sliamjfoy
226a3580ff0Sliamjfoy return 0;
2278aa640daSliamjfoy }
2288aa640daSliamjfoy
229e4ff09f0Sknakahara int
ip6flow_init(int table_size)230e4ff09f0Sknakahara ip6flow_init(int table_size)
231e4ff09f0Sknakahara {
232dca032f9Sozaki-r int ret, error;
233dca032f9Sozaki-r
2342c87fbb8Sozaki-r error = workqueue_create(&ip6flow_slowtimo_wq, "ip6flow",
235dca032f9Sozaki-r ip6flow_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
236dca032f9Sozaki-r if (error != 0)
237dca032f9Sozaki-r panic("%s: workqueue_create failed (%d)\n", __func__, error);
238e4ff09f0Sknakahara
239e4ff09f0Sknakahara mutex_init(&ip6flow_lock, MUTEX_DEFAULT, IPL_NONE);
240e4ff09f0Sknakahara
241e4ff09f0Sknakahara mutex_enter(&ip6flow_lock);
242e4ff09f0Sknakahara ret = ip6flow_init_locked(table_size);
243e4ff09f0Sknakahara mutex_exit(&ip6flow_lock);
24448235e82Sknakahara ip6flow_sysctl_init(NULL);
245e4ff09f0Sknakahara
246e4ff09f0Sknakahara return ret;
247e4ff09f0Sknakahara }
248e4ff09f0Sknakahara
2498aa640daSliamjfoy /*
2508aa640daSliamjfoy * IPv6 Fast Forward routine. Attempt to forward the packet -
2518aa640daSliamjfoy * if any problems are found return to the main IPv6 input
2528aa640daSliamjfoy * routine to deal with.
2538aa640daSliamjfoy */
2548aa640daSliamjfoy int
ip6flow_fastforward(struct mbuf ** mp)255202952fbSchristos ip6flow_fastforward(struct mbuf **mp)
2568aa640daSliamjfoy {
2578aa640daSliamjfoy struct ip6flow *ip6f;
2588aa640daSliamjfoy struct ip6_hdr *ip6;
2594c25fb2fSozaki-r struct rtentry *rt = NULL;
260202952fbSchristos struct mbuf *m;
26172f0a6dfSdyoung const struct sockaddr *dst;
2628aa640daSliamjfoy int error;
263e4ff09f0Sknakahara int ret = 0;
264e4ff09f0Sknakahara
265e4ff09f0Sknakahara mutex_enter(&ip6flow_lock);
2668aa640daSliamjfoy
2678aa640daSliamjfoy /*
2688aa640daSliamjfoy * Are we forwarding packets and have flows?
2698aa640daSliamjfoy */
2708aa640daSliamjfoy if (!ip6_forwarding || ip6flow_inuse == 0)
271e4ff09f0Sknakahara goto out;
2728aa640daSliamjfoy
273202952fbSchristos m = *mp;
2748aa640daSliamjfoy /*
2758aa640daSliamjfoy * At least size of IPv6 Header?
2768aa640daSliamjfoy */
2778aa640daSliamjfoy if (m->m_len < sizeof(struct ip6_hdr))
278e4ff09f0Sknakahara goto out;
2798aa640daSliamjfoy /*
2808aa640daSliamjfoy * Was packet received as a link-level multicast or broadcast?
2818aa640daSliamjfoy * If so, don't try to fast forward.
2828aa640daSliamjfoy */
2838aa640daSliamjfoy if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
284e4ff09f0Sknakahara goto out;
2858aa640daSliamjfoy
2862143da87Schristos if (ACCESSIBLE_POINTER(mtod(m, const void *), struct ip6_hdr) == 0) {
2878aa640daSliamjfoy if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2888aa640daSliamjfoy (max_linkhdr + 3) & ~3)) == NULL) {
289df4b74a9Smaxv ret = 1;
290e4ff09f0Sknakahara goto out;
2918aa640daSliamjfoy }
292202952fbSchristos *mp = m;
2938aa640daSliamjfoy }
2948aa640daSliamjfoy
2958aa640daSliamjfoy ip6 = mtod(m, struct ip6_hdr *);
2968aa640daSliamjfoy
2978aa640daSliamjfoy if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2988aa640daSliamjfoy /* Bad version. */
299e4ff09f0Sknakahara goto out;
3008aa640daSliamjfoy }
3018aa640daSliamjfoy
3028aa640daSliamjfoy /*
3038aa640daSliamjfoy * If we have a hop-by-hop extension we must process it.
3048aa640daSliamjfoy * We just leave this up to ip6_input to deal with.
3058aa640daSliamjfoy */
3068aa640daSliamjfoy if (ip6->ip6_nxt == IPPROTO_HOPOPTS)
307e4ff09f0Sknakahara goto out;
3088aa640daSliamjfoy
3098aa640daSliamjfoy /*
3108aa640daSliamjfoy * Attempt to find a flow.
3118aa640daSliamjfoy */
3128aa640daSliamjfoy if ((ip6f = ip6flow_lookup(ip6)) == NULL) {
3138aa640daSliamjfoy /* No flow found. */
314e4ff09f0Sknakahara goto out;
3158aa640daSliamjfoy }
3168aa640daSliamjfoy
3178aa640daSliamjfoy /*
3188aa640daSliamjfoy * Route and interface still up?
3198aa640daSliamjfoy */
320a4455600Sdyoung if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL ||
321a37502b2Sroy (rt->rt_ifp->if_flags & IFF_UP) == 0 ||
322a37502b2Sroy (rt->rt_flags & RTF_BLACKHOLE) != 0)
3234c25fb2fSozaki-r goto out_unref;
3248aa640daSliamjfoy
3258aa640daSliamjfoy /*
3268aa640daSliamjfoy * Packet size greater than MTU?
3278aa640daSliamjfoy */
3288aa640daSliamjfoy if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) {
3298aa640daSliamjfoy /* Return to main IPv6 input function. */
3304c25fb2fSozaki-r goto out_unref;
3318aa640daSliamjfoy }
3328aa640daSliamjfoy
333c259649fSmsaitoh /*
334c259649fSmsaitoh * Clear any in-bound checksum flags for this packet.
335c259649fSmsaitoh */
336c259649fSmsaitoh m->m_pkthdr.csum_flags = 0;
337c259649fSmsaitoh
3388aa640daSliamjfoy if (ip6->ip6_hlim <= IPV6_HLIMDEC)
3394c25fb2fSozaki-r goto out_unref;
3408aa640daSliamjfoy
3418aa640daSliamjfoy /* Decrement hop limit (same as TTL) */
3428aa640daSliamjfoy ip6->ip6_hlim -= IPV6_HLIMDEC;
3438aa640daSliamjfoy
3448aa640daSliamjfoy if (rt->rt_flags & RTF_GATEWAY)
34572f0a6dfSdyoung dst = rt->rt_gateway;
3468aa640daSliamjfoy else
34772f0a6dfSdyoung dst = rtcache_getdst(&ip6f->ip6f_ro);
3488aa640daSliamjfoy
3498aa640daSliamjfoy PRT_SLOW_ARM(ip6f->ip6f_timer, IP6FLOW_TIMER);
3508aa640daSliamjfoy
3518aa640daSliamjfoy ip6f->ip6f_uses++;
3528aa640daSliamjfoy
35374c24413Sknakahara #if 0
35474c24413Sknakahara /*
35574c24413Sknakahara * We use FIFO cache replacement instead of LRU the same ip_flow.c.
35674c24413Sknakahara */
35774c24413Sknakahara /* move to head (LRU) for ip6flowlist. ip6flowtable does not care LRU. */
35874c24413Sknakahara TAILQ_REMOVE(&ip6flowlist, ip6f, ip6f_list);
35974c24413Sknakahara TAILQ_INSERT_HEAD(&ip6flowlist, ip6f, ip6f_list);
36074c24413Sknakahara #endif
36174c24413Sknakahara
3628aa640daSliamjfoy /* Send on its way - straight to the interface output routine. */
36395fc1456Sknakahara if ((error = if_output_lock(rt->rt_ifp, rt->rt_ifp, m, dst, rt)) != 0) {
3648aa640daSliamjfoy ip6f->ip6f_dropped++;
3658aa640daSliamjfoy } else {
3668aa640daSliamjfoy ip6f->ip6f_forwarded++;
3678aa640daSliamjfoy }
368e4ff09f0Sknakahara ret = 1;
3694c25fb2fSozaki-r out_unref:
3704c25fb2fSozaki-r rtcache_unref(rt, &ip6f->ip6f_ro);
371e4ff09f0Sknakahara out:
372e4ff09f0Sknakahara mutex_exit(&ip6flow_lock);
373e4ff09f0Sknakahara return ret;
3748aa640daSliamjfoy }
3758aa640daSliamjfoy
3768aa640daSliamjfoy /*
3778aa640daSliamjfoy * Add the IPv6 flow statistics to the main IPv6 statistics.
3788aa640daSliamjfoy */
3798aa640daSliamjfoy static void
ip6flow_addstats_rt(struct rtentry * rt,struct ip6flow * ip6f)3804c25fb2fSozaki-r ip6flow_addstats_rt(struct rtentry *rt, struct ip6flow *ip6f)
3818aa640daSliamjfoy {
382*c5711800Sriastradh net_stat_ref_t ip6s;
38372fa642aSdyoung
3844c25fb2fSozaki-r if (rt != NULL)
38572fa642aSdyoung rt->rt_use += ip6f->ip6f_uses;
3860dd41b37Sthorpej ip6s = IP6_STAT_GETREF();
387*c5711800Sriastradh ip6s->nsr_stats[IP6_STAT_FASTFORWARDFLOWS-1] = ip6flow_inuse; /* XXX */
388*c5711800Sriastradh _NET_STATADD_REF(ip6s, IP6_STAT_CANTFORWARD, ip6f->ip6f_dropped);
389*c5711800Sriastradh _NET_STATADD_REF(ip6s, IP6_STAT_ODROPPED, ip6f->ip6f_dropped);
390*c5711800Sriastradh _NET_STATADD_REF(ip6s, IP6_STAT_TOTAL, ip6f->ip6f_uses);
391*c5711800Sriastradh _NET_STATADD_REF(ip6s, IP6_STAT_FORWARD, ip6f->ip6f_forwarded);
392*c5711800Sriastradh _NET_STATADD_REF(ip6s, IP6_STAT_FASTFORWARD, ip6f->ip6f_forwarded);
3930dd41b37Sthorpej IP6_STAT_PUTREF();
3948aa640daSliamjfoy }
3958aa640daSliamjfoy
3964c25fb2fSozaki-r static void
ip6flow_addstats(struct ip6flow * ip6f)3974c25fb2fSozaki-r ip6flow_addstats(struct ip6flow *ip6f)
3984c25fb2fSozaki-r {
3994c25fb2fSozaki-r struct rtentry *rt;
4004c25fb2fSozaki-r
4014c25fb2fSozaki-r rt = rtcache_validate(&ip6f->ip6f_ro);
4024c25fb2fSozaki-r ip6flow_addstats_rt(rt, ip6f);
4034c25fb2fSozaki-r rtcache_unref(rt, &ip6f->ip6f_ro);
4044c25fb2fSozaki-r }
4054c25fb2fSozaki-r
4068aa640daSliamjfoy /*
4078aa640daSliamjfoy * Add statistics and free the flow.
4088aa640daSliamjfoy */
4098aa640daSliamjfoy static void
ip6flow_free(struct ip6flow * ip6f)4108aa640daSliamjfoy ip6flow_free(struct ip6flow *ip6f)
4118aa640daSliamjfoy {
4128aa640daSliamjfoy
413e4ff09f0Sknakahara KASSERT(mutex_owned(&ip6flow_lock));
414e4ff09f0Sknakahara
4158aa640daSliamjfoy /*
4168aa640daSliamjfoy * Remove the flow from the hash table (at elevated IPL).
4178aa640daSliamjfoy * Once it's off the list, we can deal with it at normal
4188aa640daSliamjfoy * network IPL.
4198aa640daSliamjfoy */
42074c24413Sknakahara IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
421a6f4292eSknakahara
4228aa640daSliamjfoy ip6flow_inuse--;
4238aa640daSliamjfoy ip6flow_addstats(ip6f);
42472f0a6dfSdyoung rtcache_free(&ip6f->ip6f_ro);
4258aa640daSliamjfoy pool_put(&ip6flow_pool, ip6f);
4268aa640daSliamjfoy }
4278aa640daSliamjfoy
428e4ff09f0Sknakahara static struct ip6flow *
ip6flow_reap_locked(int just_one)429e4ff09f0Sknakahara ip6flow_reap_locked(int just_one)
4308aa640daSliamjfoy {
43174c24413Sknakahara struct ip6flow *ip6f;
432e4ff09f0Sknakahara
433e4ff09f0Sknakahara KASSERT(mutex_owned(&ip6flow_lock));
434e4ff09f0Sknakahara
43574c24413Sknakahara /*
43674c24413Sknakahara * This case must remove one ip6flow. Furthermore, this case is used in
43774c24413Sknakahara * fast path(packet processing path). So, simply remove TAILQ_LAST one.
43874c24413Sknakahara */
43974c24413Sknakahara if (just_one) {
44074c24413Sknakahara ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead);
44174c24413Sknakahara KASSERT(ip6f != NULL);
4428aa640daSliamjfoy
44374c24413Sknakahara IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
44474c24413Sknakahara
44574c24413Sknakahara ip6flow_addstats(ip6f);
44674c24413Sknakahara rtcache_free(&ip6f->ip6f_ro);
44774c24413Sknakahara return ip6f;
44874c24413Sknakahara }
44974c24413Sknakahara
45074c24413Sknakahara /*
45174c24413Sknakahara * This case is used in slow path(sysctl).
45274c24413Sknakahara * At first, remove invalid rtcache ip6flow, and then remove TAILQ_LAST
45374c24413Sknakahara * ip6flow if it is ensured least recently used by comparing last_uses.
45474c24413Sknakahara */
45574c24413Sknakahara while (ip6flow_inuse > ip6_maxflows) {
45674c24413Sknakahara struct ip6flow *maybe_ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead);
45774c24413Sknakahara
45874c24413Sknakahara TAILQ_FOREACH(ip6f, &ip6flowlist, ip6f_list) {
4594c25fb2fSozaki-r struct rtentry *rt;
4608aa640daSliamjfoy /*
4618aa640daSliamjfoy * If this no longer points to a valid route -
4628aa640daSliamjfoy * reclaim it.
4638aa640daSliamjfoy */
4644c25fb2fSozaki-r if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL)
4658aa640daSliamjfoy goto done;
4664c25fb2fSozaki-r rtcache_unref(rt, &ip6f->ip6f_ro);
4678aa640daSliamjfoy /*
4688aa640daSliamjfoy * choose the one that's been least recently
4698aa640daSliamjfoy * used or has had the least uses in the
4708aa640daSliamjfoy * last 1.5 intervals.
4718aa640daSliamjfoy */
47274c24413Sknakahara if (ip6f->ip6f_timer < maybe_ip6f->ip6f_timer
47374c24413Sknakahara || ((ip6f->ip6f_timer == maybe_ip6f->ip6f_timer)
47474c24413Sknakahara && (ip6f->ip6f_last_uses + ip6f->ip6f_uses
47574c24413Sknakahara < maybe_ip6f->ip6f_last_uses + maybe_ip6f->ip6f_uses)))
4768aa640daSliamjfoy maybe_ip6f = ip6f;
4778aa640daSliamjfoy }
4788aa640daSliamjfoy ip6f = maybe_ip6f;
4798aa640daSliamjfoy done:
4808aa640daSliamjfoy /*
4818aa640daSliamjfoy * Remove the entry from the flow table
4828aa640daSliamjfoy */
48374c24413Sknakahara IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
484a6f4292eSknakahara
48572f0a6dfSdyoung rtcache_free(&ip6f->ip6f_ro);
4868aa640daSliamjfoy ip6flow_inuse--;
4878aa640daSliamjfoy ip6flow_addstats(ip6f);
4888aa640daSliamjfoy pool_put(&ip6flow_pool, ip6f);
4898aa640daSliamjfoy }
4908aa640daSliamjfoy return NULL;
4918aa640daSliamjfoy }
4928aa640daSliamjfoy
493e4ff09f0Sknakahara /*
494e4ff09f0Sknakahara * Reap one or more flows - ip6flow_reap may remove
495e4ff09f0Sknakahara * multiple flows if net.inet6.ip6.maxflows is reduced.
496e4ff09f0Sknakahara */
497e4ff09f0Sknakahara struct ip6flow *
ip6flow_reap(int just_one)498e4ff09f0Sknakahara ip6flow_reap(int just_one)
499e4ff09f0Sknakahara {
500e4ff09f0Sknakahara struct ip6flow *ip6f;
501e4ff09f0Sknakahara
502e4ff09f0Sknakahara mutex_enter(&ip6flow_lock);
503e4ff09f0Sknakahara ip6f = ip6flow_reap_locked(just_one);
504e4ff09f0Sknakahara mutex_exit(&ip6flow_lock);
505e4ff09f0Sknakahara return ip6f;
506e4ff09f0Sknakahara }
507e4ff09f0Sknakahara
508e449cc85Sozaki-r static unsigned int ip6flow_work_enqueued = 0;
509dca032f9Sozaki-r
5108aa640daSliamjfoy void
ip6flow_slowtimo_work(struct work * wk,void * arg)511dca032f9Sozaki-r ip6flow_slowtimo_work(struct work *wk, void *arg)
5128aa640daSliamjfoy {
5138aa640daSliamjfoy struct ip6flow *ip6f, *next_ip6f;
5148aa640daSliamjfoy
515e449cc85Sozaki-r /* We can allow enqueuing another work at this point */
516e449cc85Sozaki-r atomic_swap_uint(&ip6flow_work_enqueued, 0);
517e449cc85Sozaki-r
518cead3b88Sozaki-r SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
5193be31428Sozaki-r mutex_enter(&ip6flow_lock);
52015e29e98Sad
52174c24413Sknakahara for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
5224c25fb2fSozaki-r struct rtentry *rt = NULL;
52374c24413Sknakahara next_ip6f = TAILQ_NEXT(ip6f, ip6f_list);
5248aa640daSliamjfoy if (PRT_SLOW_ISEXPIRED(ip6f->ip6f_timer) ||
5254c25fb2fSozaki-r (rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL) {
5268aa640daSliamjfoy ip6flow_free(ip6f);
5278aa640daSliamjfoy } else {
5288aa640daSliamjfoy ip6f->ip6f_last_uses = ip6f->ip6f_uses;
5294c25fb2fSozaki-r ip6flow_addstats_rt(rt, ip6f);
5308aa640daSliamjfoy ip6f->ip6f_uses = 0;
5318aa640daSliamjfoy ip6f->ip6f_dropped = 0;
5328aa640daSliamjfoy ip6f->ip6f_forwarded = 0;
5338aa640daSliamjfoy }
5344c25fb2fSozaki-r rtcache_unref(rt, &ip6f->ip6f_ro);
5358aa640daSliamjfoy }
53615e29e98Sad
537e4ff09f0Sknakahara mutex_exit(&ip6flow_lock);
538cead3b88Sozaki-r SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
5398aa640daSliamjfoy }
5408aa640daSliamjfoy
541dca032f9Sozaki-r void
ip6flow_slowtimo(void)542dca032f9Sozaki-r ip6flow_slowtimo(void)
543dca032f9Sozaki-r {
544dca032f9Sozaki-r
545dca032f9Sozaki-r /* Avoid enqueuing another work when one is already enqueued */
546e449cc85Sozaki-r if (atomic_swap_uint(&ip6flow_work_enqueued, 1) == 1)
547dca032f9Sozaki-r return;
548dca032f9Sozaki-r
549dca032f9Sozaki-r workqueue_enqueue(ip6flow_slowtimo_wq, &ip6flow_slowtimo_wk, NULL);
550dca032f9Sozaki-r }
551dca032f9Sozaki-r
5528aa640daSliamjfoy /*
5538aa640daSliamjfoy * We have successfully forwarded a packet using the normal
5548aa640daSliamjfoy * IPv6 stack. Now create/update a flow.
5558aa640daSliamjfoy */
5568aa640daSliamjfoy void
ip6flow_create(struct route * ro,struct mbuf * m)5574c25fb2fSozaki-r ip6flow_create(struct route *ro, struct mbuf *m)
5588aa640daSliamjfoy {
5591c6cf449Sdyoung const struct ip6_hdr *ip6;
5608aa640daSliamjfoy struct ip6flow *ip6f;
5618aa640daSliamjfoy size_t hash;
5628aa640daSliamjfoy
5631c6cf449Sdyoung ip6 = mtod(m, const struct ip6_hdr *);
5648aa640daSliamjfoy
565cead3b88Sozaki-r KERNEL_LOCK_UNLESS_NET_MPSAFE();
5663be31428Sozaki-r mutex_enter(&ip6flow_lock);
5673be31428Sozaki-r
5688aa640daSliamjfoy /*
5698aa640daSliamjfoy * If IPv6 Fast Forward is disabled, don't create a flow.
5708aa640daSliamjfoy * It can be disabled by setting net.inet6.ip6.maxflows to 0.
5718aa640daSliamjfoy *
5728aa640daSliamjfoy * Don't create a flow for ICMPv6 messages.
5738aa640daSliamjfoy */
574d88bfb30Sknakahara if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP)
5753be31428Sozaki-r goto out;
576d6107915Spooka
5778aa640daSliamjfoy /*
5788aa640daSliamjfoy * See if an existing flow exists. If so:
5798aa640daSliamjfoy * - Remove the flow
5808aa640daSliamjfoy * - Add flow statistics
5818aa640daSliamjfoy * - Free the route
5828aa640daSliamjfoy * - Reset statistics
5838aa640daSliamjfoy *
5848aa640daSliamjfoy * If a flow doesn't exist allocate a new one if
5858aa640daSliamjfoy * ip6_maxflows hasn't reached its limit. If it has
5868aa640daSliamjfoy * been reached, reap some flows.
5878aa640daSliamjfoy */
5888aa640daSliamjfoy ip6f = ip6flow_lookup(ip6);
5898aa640daSliamjfoy if (ip6f == NULL) {
5908aa640daSliamjfoy if (ip6flow_inuse >= ip6_maxflows) {
591e4ff09f0Sknakahara ip6f = ip6flow_reap_locked(1);
5928aa640daSliamjfoy } else {
5938aa640daSliamjfoy ip6f = pool_get(&ip6flow_pool, PR_NOWAIT);
5948aa640daSliamjfoy if (ip6f == NULL)
595d6107915Spooka goto out;
5968aa640daSliamjfoy ip6flow_inuse++;
5978aa640daSliamjfoy }
5988aa640daSliamjfoy memset(ip6f, 0, sizeof(*ip6f));
5998aa640daSliamjfoy } else {
60074c24413Sknakahara IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
601a6f4292eSknakahara
6028aa640daSliamjfoy ip6flow_addstats(ip6f);
60372f0a6dfSdyoung rtcache_free(&ip6f->ip6f_ro);
6048aa640daSliamjfoy ip6f->ip6f_uses = 0;
6058aa640daSliamjfoy ip6f->ip6f_last_uses = 0;
6068aa640daSliamjfoy ip6f->ip6f_dropped = 0;
6078aa640daSliamjfoy ip6f->ip6f_forwarded = 0;
6088aa640daSliamjfoy }
6098aa640daSliamjfoy
6108aa640daSliamjfoy /*
6118aa640daSliamjfoy * Fill in the updated/new details.
6128aa640daSliamjfoy */
61372f0a6dfSdyoung rtcache_copy(&ip6f->ip6f_ro, ro);
6148aa640daSliamjfoy ip6f->ip6f_dst = ip6->ip6_dst;
6158aa640daSliamjfoy ip6f->ip6f_src = ip6->ip6_src;
6168aa640daSliamjfoy ip6f->ip6f_flow = ip6->ip6_flow;
6178aa640daSliamjfoy PRT_SLOW_ARM(ip6f->ip6f_timer, IP6FLOW_TIMER);
6188aa640daSliamjfoy
6198aa640daSliamjfoy /*
6206163badeSmaxv * Insert into the appropriate bucket of the flow table.
6218aa640daSliamjfoy */
6228aa640daSliamjfoy hash = ip6flow_hash(ip6);
62374c24413Sknakahara IP6FLOW_INSERT(hash, ip6f);
624d6107915Spooka
625d6107915Spooka out:
626e4ff09f0Sknakahara mutex_exit(&ip6flow_lock);
627cead3b88Sozaki-r KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
6288aa640daSliamjfoy }
6298aa640daSliamjfoy
6308aa640daSliamjfoy /*
631a3580ff0Sliamjfoy * Invalidate/remove all flows - if new_size is positive we
632a3580ff0Sliamjfoy * resize the hash table.
6338aa640daSliamjfoy */
634a3580ff0Sliamjfoy int
ip6flow_invalidate_all(int new_size)635a3580ff0Sliamjfoy ip6flow_invalidate_all(int new_size)
6368aa640daSliamjfoy {
6378aa640daSliamjfoy struct ip6flow *ip6f, *next_ip6f;
638a6f4292eSknakahara int error;
6398aa640daSliamjfoy
640a3580ff0Sliamjfoy error = 0;
641e4ff09f0Sknakahara
642e4ff09f0Sknakahara mutex_enter(&ip6flow_lock);
643e4ff09f0Sknakahara
64474c24413Sknakahara for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
64574c24413Sknakahara next_ip6f = TAILQ_NEXT(ip6f, ip6f_list);
6468aa640daSliamjfoy ip6flow_free(ip6f);
6478aa640daSliamjfoy }
648a3580ff0Sliamjfoy
649a3580ff0Sliamjfoy if (new_size)
650e4ff09f0Sknakahara error = ip6flow_init_locked(new_size);
651a3580ff0Sliamjfoy
652e4ff09f0Sknakahara mutex_exit(&ip6flow_lock);
653e4ff09f0Sknakahara
654a3580ff0Sliamjfoy return error;
6558aa640daSliamjfoy }
65648235e82Sknakahara
65748235e82Sknakahara /*
65848235e82Sknakahara * sysctl helper routine for net.inet.ip6.maxflows. Since
65948235e82Sknakahara * we could reduce this value, call ip6flow_reap();
66048235e82Sknakahara */
66148235e82Sknakahara static int
sysctl_net_inet6_ip6_maxflows(SYSCTLFN_ARGS)66248235e82Sknakahara sysctl_net_inet6_ip6_maxflows(SYSCTLFN_ARGS)
66348235e82Sknakahara {
66448235e82Sknakahara int error;
66548235e82Sknakahara
66648235e82Sknakahara error = sysctl_lookup(SYSCTLFN_CALL(rnode));
66748235e82Sknakahara if (error || newp == NULL)
66848235e82Sknakahara return (error);
66948235e82Sknakahara
670cead3b88Sozaki-r SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
67148235e82Sknakahara
67248235e82Sknakahara ip6flow_reap(0);
67348235e82Sknakahara
674cead3b88Sozaki-r SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
67548235e82Sknakahara
67648235e82Sknakahara return (0);
67748235e82Sknakahara }
67848235e82Sknakahara
67948235e82Sknakahara static int
sysctl_net_inet6_ip6_hashsize(SYSCTLFN_ARGS)68048235e82Sknakahara sysctl_net_inet6_ip6_hashsize(SYSCTLFN_ARGS)
68148235e82Sknakahara {
68248235e82Sknakahara int error, tmp;
68348235e82Sknakahara struct sysctlnode node;
68448235e82Sknakahara
68548235e82Sknakahara node = *rnode;
68648235e82Sknakahara tmp = ip6_hashsize;
68748235e82Sknakahara node.sysctl_data = &tmp;
68848235e82Sknakahara error = sysctl_lookup(SYSCTLFN_CALL(&node));
68948235e82Sknakahara if (error || newp == NULL)
69048235e82Sknakahara return (error);
69148235e82Sknakahara
69248235e82Sknakahara if ((tmp & (tmp - 1)) == 0 && tmp != 0) {
69348235e82Sknakahara /*
69448235e82Sknakahara * Can only fail due to malloc()
69548235e82Sknakahara */
696cead3b88Sozaki-r SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
69748235e82Sknakahara error = ip6flow_invalidate_all(tmp);
698cead3b88Sozaki-r SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
69948235e82Sknakahara } else {
70048235e82Sknakahara /*
70148235e82Sknakahara * EINVAL if not a power of 2
70248235e82Sknakahara */
70348235e82Sknakahara error = EINVAL;
70448235e82Sknakahara }
70548235e82Sknakahara
70648235e82Sknakahara return error;
70748235e82Sknakahara }
70848235e82Sknakahara
70948235e82Sknakahara static void
ip6flow_sysctl_init(struct sysctllog ** clog)71048235e82Sknakahara ip6flow_sysctl_init(struct sysctllog **clog)
71148235e82Sknakahara {
71248235e82Sknakahara
71348235e82Sknakahara sysctl_createv(clog, 0, NULL, NULL,
71448235e82Sknakahara CTLFLAG_PERMANENT,
71548235e82Sknakahara CTLTYPE_NODE, "inet6",
71648235e82Sknakahara SYSCTL_DESCR("PF_INET6 related settings"),
71748235e82Sknakahara NULL, 0, NULL, 0,
71848235e82Sknakahara CTL_NET, PF_INET6, CTL_EOL);
71948235e82Sknakahara sysctl_createv(clog, 0, NULL, NULL,
72048235e82Sknakahara CTLFLAG_PERMANENT,
72148235e82Sknakahara CTLTYPE_NODE, "ip6",
72248235e82Sknakahara SYSCTL_DESCR("IPv6 related settings"),
72348235e82Sknakahara NULL, 0, NULL, 0,
72448235e82Sknakahara CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
72548235e82Sknakahara
72648235e82Sknakahara sysctl_createv(clog, 0, NULL, NULL,
72748235e82Sknakahara CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
72848235e82Sknakahara CTLTYPE_INT, "maxflows",
72948235e82Sknakahara SYSCTL_DESCR("Number of flows for fast forwarding (IPv6)"),
73048235e82Sknakahara sysctl_net_inet6_ip6_maxflows, 0, &ip6_maxflows, 0,
73148235e82Sknakahara CTL_NET, PF_INET6, IPPROTO_IPV6,
73248235e82Sknakahara CTL_CREATE, CTL_EOL);
73348235e82Sknakahara sysctl_createv(clog, 0, NULL, NULL,
73448235e82Sknakahara CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
73548235e82Sknakahara CTLTYPE_INT, "hashsize",
73648235e82Sknakahara SYSCTL_DESCR("Size of hash table for fast forwarding (IPv6)"),
73748235e82Sknakahara sysctl_net_inet6_ip6_hashsize, 0, &ip6_hashsize, 0,
73848235e82Sknakahara CTL_NET, PF_INET6, IPPROTO_IPV6,
73948235e82Sknakahara CTL_CREATE, CTL_EOL);
74048235e82Sknakahara }
741