xref: /netbsd-src/sys/netinet6/ip6_flow.c (revision f21b7d7f2cbdd5c14b3882c4e8a3d43580d460a6)
1 /*	$NetBSD: ip6_flow.c,v 1.31 2016/08/23 09:59:20 knakahara Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by the 3am Software Foundry ("3am").  It was developed by Liam J. Foy
9  * <liamjfoy@netbsd.org> and Matt Thomas <matt@netbsd.org>.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * IPv6 version was developed by Liam J. Foy. Original source existed in IPv4
33  * format developed by Matt Thomas. Thanks to Joerg Sonnenberger, Matt
34  * Thomas and Christos Zoulas.
35  *
36  * Thanks to Liverpool John Moores University, especially Dr. David Llewellyn-Jones
37  * for providing resources (to test) and Professor Madjid Merabti.
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.31 2016/08/23 09:59:20 knakahara Exp $");
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/domain.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/time.h>
52 #include <sys/kernel.h>
53 #include <sys/pool.h>
54 #include <sys/sysctl.h>
55 #include <sys/workqueue.h>
56 #include <sys/atomic.h>
57 
58 #include <net/if.h>
59 #include <net/if_dl.h>
60 #include <net/route.h>
61 #include <net/pfil.h>
62 
63 #include <netinet/in.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #include <netinet6/ip6_private.h>
69 
70 /*
71  * IPv6 Fast Forward caches/hashes flows from one source to destination.
72  *
73  * Upon a successful forward IPv6FF caches and hashes details such as the
74  * route, source and destination. Once another packet is received matching
75  * the source and destination the packet is forwarded straight onto if_output
76  * using the cached details.
77  *
78  * Example:
79  * ether/fddi_input -> ip6flow_fastforward -> if_output
80  */
81 
82 static struct pool ip6flow_pool;
83 
84 TAILQ_HEAD(ip6flowhead, ip6flow);
85 
86 /*
87  * We could use IPv4 defines (IPFLOW_HASHBITS) but we'll
88  * use our own (possibly for future expansion).
89  */
90 #define	IP6FLOW_TIMER		(5 * PR_SLOWHZ)
91 #define	IP6FLOW_DEFAULT_HASHSIZE	(1 << IP6FLOW_HASHBITS)
92 
93 /*
94  * ip6_flow.c internal lock.
95  * If we use softnet_lock, it would cause recursive lock.
96  *
97  * This is a tentative workaround.
98  * We should make it scalable somehow in the future.
99  */
100 static kmutex_t ip6flow_lock;
101 static struct ip6flowhead *ip6flowtable = NULL;
102 static struct ip6flowhead ip6flowlist;
103 static int ip6flow_inuse;
104 
105 static void ip6flow_slowtimo_work(struct work *, void *);
106 static struct workqueue	*ip6flow_slowtimo_wq;
107 static struct work	ip6flow_slowtimo_wk;
108 
109 static int sysctl_net_inet6_ip6_hashsize(SYSCTLFN_PROTO);
110 static int sysctl_net_inet6_ip6_maxflows(SYSCTLFN_PROTO);
111 static void ip6flow_sysctl_init(struct sysctllog **);
112 
113 /*
114  * Insert an ip6flow into the list.
115  */
116 #define	IP6FLOW_INSERT(hashidx, ip6f) \
117 do { \
118 	(ip6f)->ip6f_hashidx = (hashidx); \
119 	TAILQ_INSERT_HEAD(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \
120 	TAILQ_INSERT_HEAD(&ip6flowlist, (ip6f), ip6f_list); \
121 } while (/*CONSTCOND*/ 0)
122 
123 /*
124  * Remove an ip6flow from the list.
125  */
126 #define	IP6FLOW_REMOVE(hashidx, ip6f) \
127 do { \
128 	TAILQ_REMOVE(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \
129 	TAILQ_REMOVE(&ip6flowlist, (ip6f), ip6f_list); \
130 } while (/*CONSTCOND*/ 0)
131 
132 #ifndef IP6FLOW_DEFAULT
133 #define	IP6FLOW_DEFAULT		256
134 #endif
135 
136 int ip6_maxflows = IP6FLOW_DEFAULT;
137 int ip6_hashsize = IP6FLOW_DEFAULT_HASHSIZE;
138 
139 /*
140  * Calculate hash table position.
141  */
142 static size_t
143 ip6flow_hash(const struct ip6_hdr *ip6)
144 {
145 	size_t hash;
146 	uint32_t dst_sum, src_sum;
147 	size_t idx;
148 
149 	src_sum = ip6->ip6_src.s6_addr32[0] + ip6->ip6_src.s6_addr32[1]
150 	    + ip6->ip6_src.s6_addr32[2] + ip6->ip6_src.s6_addr32[3];
151 	dst_sum = ip6->ip6_dst.s6_addr32[0] + ip6->ip6_dst.s6_addr32[1]
152 	    + ip6->ip6_dst.s6_addr32[2] + ip6->ip6_dst.s6_addr32[3];
153 
154 	hash = ip6->ip6_flow;
155 
156 	for (idx = 0; idx < 32; idx += IP6FLOW_HASHBITS)
157 		hash += (dst_sum >> (32 - idx)) + (src_sum >> idx);
158 
159 	return hash & (ip6_hashsize-1);
160 }
161 
162 /*
163  * Check to see if a flow already exists - if so return it.
164  */
165 static struct ip6flow *
166 ip6flow_lookup(const struct ip6_hdr *ip6)
167 {
168 	size_t hash;
169 	struct ip6flow *ip6f;
170 
171 	KASSERT(mutex_owned(&ip6flow_lock));
172 
173 	hash = ip6flow_hash(ip6);
174 
175 	TAILQ_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) {
176 		if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6f->ip6f_dst)
177 		    && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6f->ip6f_src)
178 		    && ip6f->ip6f_flow == ip6->ip6_flow) {
179 		    	/* A cached flow has been found. */
180 			return ip6f;
181 		}
182 	}
183 
184 	return NULL;
185 }
186 
187 void
188 ip6flow_poolinit(void)
189 {
190 
191 	pool_init(&ip6flow_pool, sizeof(struct ip6flow), 0, 0, 0, "ip6flowpl",
192 			NULL, IPL_NET);
193 }
194 
195 /*
196  * Allocate memory and initialise lists. This function is called
197  * from ip6_init and called there after to resize the hash table.
198  * If a newly sized table cannot be malloc'ed we just continue
199  * to use the old one.
200  */
201 static int
202 ip6flow_init_locked(int table_size)
203 {
204 	struct ip6flowhead *new_table;
205 	size_t i;
206 
207 	KASSERT(mutex_owned(&ip6flow_lock));
208 
209 	new_table = (struct ip6flowhead *)malloc(sizeof(struct ip6flowhead) *
210 	    table_size, M_RTABLE, M_NOWAIT);
211 
212 	if (new_table == NULL)
213 		return 1;
214 
215 	if (ip6flowtable != NULL)
216 		free(ip6flowtable, M_RTABLE);
217 
218 	ip6flowtable = new_table;
219 	ip6_hashsize = table_size;
220 
221 	TAILQ_INIT(&ip6flowlist);
222 	for (i = 0; i < ip6_hashsize; i++)
223 		TAILQ_INIT(&ip6flowtable[i]);
224 
225 	return 0;
226 }
227 
228 int
229 ip6flow_init(int table_size)
230 {
231 	int ret, error;
232 
233 	error = workqueue_create(&ip6flow_slowtimo_wq, "ip6flow_slowtimo",
234 	    ip6flow_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
235 	if (error != 0)
236 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
237 
238 	mutex_init(&ip6flow_lock, MUTEX_DEFAULT, IPL_NONE);
239 
240 	mutex_enter(&ip6flow_lock);
241 	ret = ip6flow_init_locked(table_size);
242 	mutex_exit(&ip6flow_lock);
243 	ip6flow_sysctl_init(NULL);
244 
245 	return ret;
246 }
247 
248 /*
249  * IPv6 Fast Forward routine. Attempt to forward the packet -
250  * if any problems are found return to the main IPv6 input
251  * routine to deal with.
252  */
253 int
254 ip6flow_fastforward(struct mbuf **mp)
255 {
256 	struct ip6flow *ip6f;
257 	struct ip6_hdr *ip6;
258 	struct rtentry *rt;
259 	struct mbuf *m;
260 	const struct sockaddr *dst;
261 	int error;
262 	int ret = 0;
263 
264 	mutex_enter(&ip6flow_lock);
265 
266 	/*
267 	 * Are we forwarding packets and have flows?
268 	 */
269 	if (!ip6_forwarding || ip6flow_inuse == 0)
270 		goto out;
271 
272 	m = *mp;
273 	/*
274 	 * At least size of IPv6 Header?
275 	 */
276 	if (m->m_len < sizeof(struct ip6_hdr))
277 		goto out;
278 	/*
279 	 * Was packet received as a link-level multicast or broadcast?
280 	 * If so, don't try to fast forward.
281 	 */
282 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
283 		goto out;
284 
285 	if (IP6_HDR_ALIGNED_P(mtod(m, const void *)) == 0) {
286 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
287 				(max_linkhdr + 3) & ~3)) == NULL) {
288 			goto out;
289 		}
290 		*mp = m;
291 	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
292 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
293 			goto out;
294 		}
295 		*mp = m;
296 	}
297 
298 	ip6 = mtod(m, struct ip6_hdr *);
299 
300 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
301 		/* Bad version. */
302 		goto out;
303 	}
304 
305 	/*
306 	 * If we have a hop-by-hop extension we must process it.
307 	 * We just leave this up to ip6_input to deal with.
308 	 */
309 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS)
310 		goto out;
311 
312 	/*
313 	 * Attempt to find a flow.
314 	 */
315 	if ((ip6f = ip6flow_lookup(ip6)) == NULL) {
316 		/* No flow found. */
317 		goto out;
318 	}
319 
320 	/*
321 	 * Route and interface still up?
322 	 */
323 	if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL ||
324 	    (rt->rt_ifp->if_flags & IFF_UP) == 0 ||
325 	    (rt->rt_flags & RTF_BLACKHOLE) != 0)
326 		goto out;
327 
328 	/*
329 	 * Packet size greater than MTU?
330 	 */
331 	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) {
332 		/* Return to main IPv6 input function. */
333 		goto out;
334 	}
335 
336 	/*
337 	 * Clear any in-bound checksum flags for this packet.
338 	 */
339 	m->m_pkthdr.csum_flags = 0;
340 
341 	if (ip6->ip6_hlim <= IPV6_HLIMDEC)
342 		goto out;
343 
344 	/* Decrement hop limit (same as TTL) */
345 	ip6->ip6_hlim -= IPV6_HLIMDEC;
346 
347 	if (rt->rt_flags & RTF_GATEWAY)
348 		dst = rt->rt_gateway;
349 	else
350 		dst = rtcache_getdst(&ip6f->ip6f_ro);
351 
352 	PRT_SLOW_ARM(ip6f->ip6f_timer, IP6FLOW_TIMER);
353 
354 	ip6f->ip6f_uses++;
355 
356 #if 0
357 	/*
358 	 * We use FIFO cache replacement instead of LRU the same ip_flow.c.
359 	 */
360 	/* move to head (LRU) for ip6flowlist. ip6flowtable does not care LRU. */
361 	TAILQ_REMOVE(&ip6flowlist, ip6f, ip6f_list);
362 	TAILQ_INSERT_HEAD(&ip6flowlist, ip6f, ip6f_list);
363 #endif
364 
365 	/* Send on its way - straight to the interface output routine. */
366 	if ((error = if_output_lock(rt->rt_ifp, rt->rt_ifp, m, dst, rt)) != 0) {
367 		ip6f->ip6f_dropped++;
368 	} else {
369 		ip6f->ip6f_forwarded++;
370 	}
371 	ret = 1;
372  out:
373 	mutex_exit(&ip6flow_lock);
374 	return ret;
375 }
376 
377 /*
378  * Add the IPv6 flow statistics to the main IPv6 statistics.
379  */
380 static void
381 ip6flow_addstats(const struct ip6flow *ip6f)
382 {
383 	struct rtentry *rt;
384 	uint64_t *ip6s;
385 
386 	if ((rt = rtcache_validate(&ip6f->ip6f_ro)) != NULL)
387 		rt->rt_use += ip6f->ip6f_uses;
388 	ip6s = IP6_STAT_GETREF();
389 	ip6s[IP6_STAT_FASTFORWARDFLOWS] = ip6flow_inuse;
390 	ip6s[IP6_STAT_CANTFORWARD] += ip6f->ip6f_dropped;
391 	ip6s[IP6_STAT_ODROPPED] += ip6f->ip6f_dropped;
392 	ip6s[IP6_STAT_TOTAL] += ip6f->ip6f_uses;
393 	ip6s[IP6_STAT_FORWARD] += ip6f->ip6f_forwarded;
394 	ip6s[IP6_STAT_FASTFORWARD] += ip6f->ip6f_forwarded;
395 	IP6_STAT_PUTREF();
396 }
397 
398 /*
399  * Add statistics and free the flow.
400  */
401 static void
402 ip6flow_free(struct ip6flow *ip6f)
403 {
404 
405 	KASSERT(mutex_owned(&ip6flow_lock));
406 
407 	/*
408 	 * Remove the flow from the hash table (at elevated IPL).
409 	 * Once it's off the list, we can deal with it at normal
410 	 * network IPL.
411 	 */
412 	IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
413 
414 	ip6flow_inuse--;
415 	ip6flow_addstats(ip6f);
416 	rtcache_free(&ip6f->ip6f_ro);
417 	pool_put(&ip6flow_pool, ip6f);
418 }
419 
420 static struct ip6flow *
421 ip6flow_reap_locked(int just_one)
422 {
423 	struct ip6flow *ip6f;
424 
425 	KASSERT(mutex_owned(&ip6flow_lock));
426 
427 	/*
428 	 * This case must remove one ip6flow. Furthermore, this case is used in
429 	 * fast path(packet processing path). So, simply remove TAILQ_LAST one.
430 	 */
431 	if (just_one) {
432 		ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead);
433 		KASSERT(ip6f != NULL);
434 
435 		IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
436 
437 		ip6flow_addstats(ip6f);
438 		rtcache_free(&ip6f->ip6f_ro);
439 		return ip6f;
440 	}
441 
442 	/*
443 	 * This case is used in slow path(sysctl).
444 	 * At first, remove invalid rtcache ip6flow, and then remove TAILQ_LAST
445 	 * ip6flow if it is ensured least recently used by comparing last_uses.
446 	 */
447 	while (ip6flow_inuse > ip6_maxflows) {
448 		struct ip6flow *maybe_ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead);
449 
450 		TAILQ_FOREACH(ip6f, &ip6flowlist, ip6f_list) {
451 			/*
452 			 * If this no longer points to a valid route -
453 			 * reclaim it.
454 			 */
455 			if (rtcache_validate(&ip6f->ip6f_ro) == NULL)
456 				goto done;
457 			/*
458 			 * choose the one that's been least recently
459 			 * used or has had the least uses in the
460 			 * last 1.5 intervals.
461 			 */
462 			if (ip6f->ip6f_timer < maybe_ip6f->ip6f_timer
463 			    || ((ip6f->ip6f_timer == maybe_ip6f->ip6f_timer)
464 				&& (ip6f->ip6f_last_uses + ip6f->ip6f_uses
465 				    < maybe_ip6f->ip6f_last_uses + maybe_ip6f->ip6f_uses)))
466 				maybe_ip6f = ip6f;
467 		}
468 		ip6f = maybe_ip6f;
469 	    done:
470 		/*
471 		 * Remove the entry from the flow table
472 		 */
473 		IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
474 
475 		rtcache_free(&ip6f->ip6f_ro);
476 		ip6flow_inuse--;
477 		ip6flow_addstats(ip6f);
478 		pool_put(&ip6flow_pool, ip6f);
479 	}
480 	return NULL;
481 }
482 
483 /*
484  * Reap one or more flows - ip6flow_reap may remove
485  * multiple flows if net.inet6.ip6.maxflows is reduced.
486  */
487 struct ip6flow *
488 ip6flow_reap(int just_one)
489 {
490 	struct ip6flow *ip6f;
491 
492 	mutex_enter(&ip6flow_lock);
493 	ip6f = ip6flow_reap_locked(just_one);
494 	mutex_exit(&ip6flow_lock);
495 	return ip6f;
496 }
497 
498 static unsigned int ip6flow_work_enqueued = 0;
499 
500 void
501 ip6flow_slowtimo_work(struct work *wk, void *arg)
502 {
503 	struct ip6flow *ip6f, *next_ip6f;
504 
505 	/* We can allow enqueuing another work at this point */
506 	atomic_swap_uint(&ip6flow_work_enqueued, 0);
507 
508 	mutex_enter(softnet_lock);
509 	mutex_enter(&ip6flow_lock);
510 	KERNEL_LOCK(1, NULL);
511 
512 	for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
513 		next_ip6f = TAILQ_NEXT(ip6f, ip6f_list);
514 		if (PRT_SLOW_ISEXPIRED(ip6f->ip6f_timer) ||
515 		    rtcache_validate(&ip6f->ip6f_ro) == NULL) {
516 			ip6flow_free(ip6f);
517 		} else {
518 			ip6f->ip6f_last_uses = ip6f->ip6f_uses;
519 			ip6flow_addstats(ip6f);
520 			ip6f->ip6f_uses = 0;
521 			ip6f->ip6f_dropped = 0;
522 			ip6f->ip6f_forwarded = 0;
523 		}
524 	}
525 
526 	KERNEL_UNLOCK_ONE(NULL);
527 	mutex_exit(&ip6flow_lock);
528 	mutex_exit(softnet_lock);
529 }
530 
531 void
532 ip6flow_slowtimo(void)
533 {
534 
535 	/* Avoid enqueuing another work when one is already enqueued */
536 	if (atomic_swap_uint(&ip6flow_work_enqueued, 1) == 1)
537 		return;
538 
539 	workqueue_enqueue(ip6flow_slowtimo_wq, &ip6flow_slowtimo_wk, NULL);
540 }
541 
542 /*
543  * We have successfully forwarded a packet using the normal
544  * IPv6 stack. Now create/update a flow.
545  */
546 void
547 ip6flow_create(const struct route *ro, struct mbuf *m)
548 {
549 	const struct ip6_hdr *ip6;
550 	struct ip6flow *ip6f;
551 	size_t hash;
552 
553 	mutex_enter(&ip6flow_lock);
554 
555 	ip6 = mtod(m, const struct ip6_hdr *);
556 
557 	/*
558 	 * If IPv6 Fast Forward is disabled, don't create a flow.
559 	 * It can be disabled by setting net.inet6.ip6.maxflows to 0.
560 	 *
561 	 * Don't create a flow for ICMPv6 messages.
562 	 */
563 	if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP) {
564 		mutex_exit(&ip6flow_lock);
565 		return;
566 	}
567 
568 	KERNEL_LOCK(1, NULL);
569 
570 	/*
571 	 * See if an existing flow exists.  If so:
572 	 *	- Remove the flow
573 	 *	- Add flow statistics
574 	 *	- Free the route
575 	 *	- Reset statistics
576 	 *
577 	 * If a flow doesn't exist allocate a new one if
578 	 * ip6_maxflows hasn't reached its limit. If it has
579 	 * been reached, reap some flows.
580 	 */
581 	ip6f = ip6flow_lookup(ip6);
582 	if (ip6f == NULL) {
583 		if (ip6flow_inuse >= ip6_maxflows) {
584 			ip6f = ip6flow_reap_locked(1);
585 		} else {
586 			ip6f = pool_get(&ip6flow_pool, PR_NOWAIT);
587 			if (ip6f == NULL)
588 				goto out;
589 			ip6flow_inuse++;
590 		}
591 		memset(ip6f, 0, sizeof(*ip6f));
592 	} else {
593 		IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f);
594 
595 		ip6flow_addstats(ip6f);
596 		rtcache_free(&ip6f->ip6f_ro);
597 		ip6f->ip6f_uses = 0;
598 		ip6f->ip6f_last_uses = 0;
599 		ip6f->ip6f_dropped = 0;
600 		ip6f->ip6f_forwarded = 0;
601 	}
602 
603 	/*
604 	 * Fill in the updated/new details.
605 	 */
606 	rtcache_copy(&ip6f->ip6f_ro, ro);
607 	ip6f->ip6f_dst = ip6->ip6_dst;
608 	ip6f->ip6f_src = ip6->ip6_src;
609 	ip6f->ip6f_flow = ip6->ip6_flow;
610 	PRT_SLOW_ARM(ip6f->ip6f_timer, IP6FLOW_TIMER);
611 
612 	/*
613 	 * Insert into the approriate bucket of the flow table.
614 	 */
615 	hash = ip6flow_hash(ip6);
616 	IP6FLOW_INSERT(hash, ip6f);
617 
618  out:
619 	KERNEL_UNLOCK_ONE(NULL);
620 	mutex_exit(&ip6flow_lock);
621 }
622 
623 /*
624  * Invalidate/remove all flows - if new_size is positive we
625  * resize the hash table.
626  */
627 int
628 ip6flow_invalidate_all(int new_size)
629 {
630 	struct ip6flow *ip6f, *next_ip6f;
631 	int error;
632 
633 	error = 0;
634 
635 	mutex_enter(&ip6flow_lock);
636 
637 	for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
638 		next_ip6f = TAILQ_NEXT(ip6f, ip6f_list);
639 		ip6flow_free(ip6f);
640 	}
641 
642 	if (new_size)
643 		error = ip6flow_init_locked(new_size);
644 
645 	mutex_exit(&ip6flow_lock);
646 
647 	return error;
648 }
649 
650 /*
651  * sysctl helper routine for net.inet.ip6.maxflows. Since
652  * we could reduce this value, call ip6flow_reap();
653  */
654 static int
655 sysctl_net_inet6_ip6_maxflows(SYSCTLFN_ARGS)
656 {
657 	int error;
658 
659 	error = sysctl_lookup(SYSCTLFN_CALL(rnode));
660 	if (error || newp == NULL)
661 		return (error);
662 
663 	mutex_enter(softnet_lock);
664 	KERNEL_LOCK(1, NULL);
665 
666 	ip6flow_reap(0);
667 
668 	KERNEL_UNLOCK_ONE(NULL);
669 	mutex_exit(softnet_lock);
670 
671 	return (0);
672 }
673 
674 static int
675 sysctl_net_inet6_ip6_hashsize(SYSCTLFN_ARGS)
676 {
677 	int error, tmp;
678 	struct sysctlnode node;
679 
680 	node = *rnode;
681 	tmp = ip6_hashsize;
682 	node.sysctl_data = &tmp;
683 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
684 	if (error || newp == NULL)
685 		return (error);
686 
687 	if ((tmp & (tmp - 1)) == 0 && tmp != 0) {
688 		/*
689 		 * Can only fail due to malloc()
690 		 */
691 		mutex_enter(softnet_lock);
692 		KERNEL_LOCK(1, NULL);
693 
694 		error = ip6flow_invalidate_all(tmp);
695 
696 		KERNEL_UNLOCK_ONE(NULL);
697 		mutex_exit(softnet_lock);
698 	} else {
699 		/*
700 		 * EINVAL if not a power of 2
701 		 */
702 		error = EINVAL;
703 	}
704 
705 	return error;
706 }
707 
708 static void
709 ip6flow_sysctl_init(struct sysctllog **clog)
710 {
711 
712 	sysctl_createv(clog, 0, NULL, NULL,
713 		       CTLFLAG_PERMANENT,
714 		       CTLTYPE_NODE, "inet6",
715 		       SYSCTL_DESCR("PF_INET6 related settings"),
716 		       NULL, 0, NULL, 0,
717 		       CTL_NET, PF_INET6, CTL_EOL);
718 	sysctl_createv(clog, 0, NULL, NULL,
719 		       CTLFLAG_PERMANENT,
720 		       CTLTYPE_NODE, "ip6",
721 		       SYSCTL_DESCR("IPv6 related settings"),
722 		       NULL, 0, NULL, 0,
723 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
724 
725 	sysctl_createv(clog, 0, NULL, NULL,
726 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
727 			CTLTYPE_INT, "maxflows",
728 			SYSCTL_DESCR("Number of flows for fast forwarding (IPv6)"),
729 			sysctl_net_inet6_ip6_maxflows, 0, &ip6_maxflows, 0,
730 			CTL_NET, PF_INET6, IPPROTO_IPV6,
731 			CTL_CREATE, CTL_EOL);
732 	sysctl_createv(clog, 0, NULL, NULL,
733 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
734 			CTLTYPE_INT, "hashsize",
735 			SYSCTL_DESCR("Size of hash table for fast forwarding (IPv6)"),
736 			sysctl_net_inet6_ip6_hashsize, 0, &ip6_hashsize, 0,
737 			CTL_NET, PF_INET6, IPPROTO_IPV6,
738 			CTL_CREATE, CTL_EOL);
739 }
740