xref: /netbsd-src/sys/altq/altq_subr.c (revision fad4c9f71477ae11cea2ee75ec82151ac770a534)
1 /*	$NetBSD: altq_subr.c,v 1.14 2006/04/23 06:46:40 christos Exp $	*/
2 /*	$KAME: altq_subr.c,v 1.11 2002/01/11 08:11:49 kjc Exp $	*/
3 
4 /*
5  * Copyright (C) 1997-2002
6  *	Sony Computer Science Laboratories Inc.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.14 2006/04/23 06:46:40 christos Exp $");
32 
33 #if defined(__FreeBSD__) || defined(__NetBSD__)
34 #include "opt_altq.h"
35 #if (__FreeBSD__ != 2)
36 #include "opt_inet.h"
37 #ifdef __FreeBSD__
38 #include "opt_inet6.h"
39 #endif
40 #endif
41 #endif /* __FreeBSD__ || __NetBSD__ */
42 
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/kernel.h>
51 #include <sys/errno.h>
52 #include <sys/syslog.h>
53 #include <sys/sysctl.h>
54 #include <sys/queue.h>
55 
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #ifdef INET6
64 #include <netinet/ip6.h>
65 #endif
66 #include <netinet/tcp.h>
67 #include <netinet/udp.h>
68 
69 #include <altq/altq.h>
70 #include <altq/altq_conf.h>
71 
72 /* machine dependent clock related includes */
73 #ifdef __FreeBSD__
74 #include "opt_cpu.h"	/* for FreeBSD-2.2.8 to get i586_ctr_freq */
75 #include <machine/clock.h>
76 #endif
77 #if defined(__i386__)
78 #include <machine/specialreg.h>		/* for CPUID_TSC */
79 #ifdef __FreeBSD__
80 #include <machine/md_var.h>		/* for cpu_feature */
81 #elif defined(__NetBSD__) || defined(__OpenBSD__)
82 #include <machine/cpu.h>		/* for cpu_feature */
83 #endif
84 #endif /* __i386__ */
85 
86 /*
87  * internal function prototypes
88  */
89 static void	tbr_timeout __P((void *));
90 static int 	extract_ports4 __P((struct mbuf *, struct ip *,
91 				    struct flowinfo_in *));
92 #ifdef INET6
93 static int 	extract_ports6 __P((struct mbuf *, struct ip6_hdr *,
94 				    struct flowinfo_in6 *));
95 #endif
96 static int	apply_filter4 __P((u_int32_t, struct flow_filter *,
97 				   struct flowinfo_in *));
98 static int	apply_ppfilter4 __P((u_int32_t, struct flow_filter *,
99 				     struct flowinfo_in *));
100 #ifdef INET6
101 static int	apply_filter6 __P((u_int32_t, struct flow_filter6 *,
102 					   struct flowinfo_in6 *));
103 #endif
104 static int	apply_tosfilter4 __P((u_int32_t, struct flow_filter *,
105 					     struct flowinfo_in *));
106 static u_long	get_filt_handle __P((struct acc_classifier *, int));
107 static struct acc_filter *filth_to_filtp __P((struct acc_classifier *,
108 					      u_long));
109 static u_int32_t filt2fibmask __P((struct flow_filter *));
110 
111 static void 	ip4f_cache __P((struct ip *, struct flowinfo_in *));
112 static int 	ip4f_lookup __P((struct ip *, struct flowinfo_in *));
113 static int 	ip4f_init __P((void));
114 static struct ip4_frag	*ip4f_alloc __P((void));
115 static void 	ip4f_free __P((struct ip4_frag *));
116 
117 int (*altq_input) __P((struct mbuf *, int)) = NULL;
118 static int tbr_timer = 0;	/* token bucket regulator timer */
119 static struct callout tbr_callout = CALLOUT_INITIALIZER;
120 
121 /*
122  * alternate queueing support routines
123  */
124 
125 /* look up the queue state by the interface name and the queuing type. */
126 void *
127 altq_lookup(name, type)
128 	char *name;
129 	int type;
130 {
131 	struct ifnet *ifp;
132 
133 	if ((ifp = ifunit(name)) != NULL) {
134 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
135 			return (ifp->if_snd.altq_disc);
136 	}
137 
138 	return NULL;
139 }
140 
141 int
142 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
143 	struct ifaltq *ifq;
144 	int type;
145 	void *discipline;
146 	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
147 	struct mbuf *(*dequeue)(struct ifaltq *, int);
148 	int (*request)(struct ifaltq *, int, void *);
149 	void *clfier;
150 	void *(*classify)(void *, struct mbuf *, int);
151 {
152 	if (!ALTQ_IS_READY(ifq))
153 		return ENXIO;
154 	if (ALTQ_IS_ENABLED(ifq))
155 		return EBUSY;
156 	if (ALTQ_IS_ATTACHED(ifq))
157 		return EEXIST;
158 	ifq->altq_type     = type;
159 	ifq->altq_disc     = discipline;
160 	ifq->altq_enqueue  = enqueue;
161 	ifq->altq_dequeue  = dequeue;
162 	ifq->altq_request  = request;
163 	ifq->altq_clfier   = clfier;
164 	ifq->altq_classify = classify;
165 	ifq->altq_flags &= ALTQF_CANTCHANGE;
166 #ifdef ALTQ_KLD
167 	altq_module_incref(type);
168 #endif
169 	return 0;
170 }
171 
172 int
173 altq_detach(ifq)
174 	struct ifaltq *ifq;
175 {
176 	if (!ALTQ_IS_READY(ifq))
177 		return ENXIO;
178 	if (ALTQ_IS_ENABLED(ifq))
179 		return EBUSY;
180 	if (!ALTQ_IS_ATTACHED(ifq))
181 		return (0);
182 
183 #ifdef ALTQ_KLD
184 	altq_module_declref(ifq->altq_type);
185 #endif
186 	ifq->altq_type     = ALTQT_NONE;
187 	ifq->altq_disc     = NULL;
188 	ifq->altq_enqueue  = NULL;
189 	ifq->altq_dequeue  = NULL;
190 	ifq->altq_request  = NULL;
191 	ifq->altq_clfier   = NULL;
192 	ifq->altq_classify = NULL;
193 	ifq->altq_flags &= ALTQF_CANTCHANGE;
194 	return 0;
195 }
196 
197 int
198 altq_enable(ifq)
199 	struct ifaltq *ifq;
200 {
201 	int s;
202 
203 	if (!ALTQ_IS_READY(ifq))
204 		return ENXIO;
205 	if (ALTQ_IS_ENABLED(ifq))
206 		return 0;
207 
208 	s = splnet();
209 	IFQ_PURGE(ifq);
210 	ASSERT(ifq->ifq_len == 0);
211 	ifq->altq_flags |= ALTQF_ENABLED;
212 	if (ifq->altq_clfier != NULL)
213 		ifq->altq_flags |= ALTQF_CLASSIFY;
214 	splx(s);
215 
216 	return 0;
217 }
218 
219 int
220 altq_disable(ifq)
221 	struct ifaltq *ifq;
222 {
223 	int s;
224 
225 	if (!ALTQ_IS_ENABLED(ifq))
226 		return 0;
227 
228 	s = splnet();
229 	IFQ_PURGE(ifq);
230 	ASSERT(ifq->ifq_len == 0);
231 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
232 	splx(s);
233 	return 0;
234 }
235 
236 void
237 altq_assert(file, line, failedexpr)
238 	const char *file, *failedexpr;
239 	int line;
240 {
241 	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
242 		     failedexpr, file, line);
243 	panic("altq assertion");
244 	/* NOTREACHED */
245 }
246 
247 /*
248  * internal representation of token bucket parameters
249  *	rate: 	byte_per_unittime << 32
250  *		(((bits_per_sec) / 8) << 32) / machclk_freq
251  *	depth:	byte << 32
252  *
253  */
254 #define	TBR_SHIFT	32
255 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
256 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
257 
258 struct mbuf *
259 tbr_dequeue(ifq, op)
260 	struct ifaltq *ifq;
261 	int op;
262 {
263 	struct tb_regulator *tbr;
264 	struct mbuf *m;
265 	int64_t interval;
266 	u_int64_t now;
267 
268 	tbr = ifq->altq_tbr;
269 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
270 		/* if this is a remove after poll, bypass tbr check */
271 	} else {
272 		/* update token only when it is negative */
273 		if (tbr->tbr_token <= 0) {
274 			now = read_machclk();
275 			interval = now - tbr->tbr_last;
276 			if (interval >= tbr->tbr_filluptime)
277 				tbr->tbr_token = tbr->tbr_depth;
278 			else {
279 				tbr->tbr_token += interval * tbr->tbr_rate;
280 				if (tbr->tbr_token > tbr->tbr_depth)
281 					tbr->tbr_token = tbr->tbr_depth;
282 			}
283 			tbr->tbr_last = now;
284 		}
285 		/* if token is still negative, don't allow dequeue */
286 		if (tbr->tbr_token <= 0)
287 			return (NULL);
288 	}
289 
290 	if (ALTQ_IS_ENABLED(ifq))
291 		m = (*ifq->altq_dequeue)(ifq, op);
292 	else {
293 		if (op == ALTDQ_POLL)
294 			IF_POLL(ifq, m);
295 		else
296 			IF_DEQUEUE(ifq, m);
297 	}
298 
299 	if (m != NULL && op == ALTDQ_REMOVE)
300 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
301 	tbr->tbr_lastop = op;
302 	return (m);
303 }
304 
305 /*
306  * set a token bucket regulator.
307  * if the specified rate is zero, the token bucket regulator is deleted.
308  */
309 int
310 tbr_set(ifq, profile)
311 	struct ifaltq *ifq;
312 	struct tb_profile *profile;
313 {
314 	struct tb_regulator *tbr, *otbr;
315 
316 	if (machclk_freq == 0)
317 		init_machclk();
318 	if (machclk_freq == 0) {
319 		printf("tbr_set: no CPU clock available!\n");
320 		return (ENXIO);
321 	}
322 
323 	if (profile->rate == 0) {
324 		/* delete this tbr */
325 		if ((tbr = ifq->altq_tbr) == NULL)
326 			return (ENOENT);
327 		ifq->altq_tbr = NULL;
328 		free(tbr, M_DEVBUF);
329 		return (0);
330 	}
331 
332 	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_WAITOK|M_ZERO);
333 	if (tbr == NULL)
334 		return (ENOMEM);
335 
336 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
337 	tbr->tbr_depth = TBR_SCALE(profile->depth);
338 	if (tbr->tbr_rate > 0)
339 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
340 	else
341 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
342 	tbr->tbr_token = tbr->tbr_depth;
343 	tbr->tbr_last = read_machclk();
344 	tbr->tbr_lastop = ALTDQ_REMOVE;
345 
346 	otbr = ifq->altq_tbr;
347 	ifq->altq_tbr = tbr;	/* set the new tbr */
348 
349 	if (otbr != NULL)
350 		free(otbr, M_DEVBUF);
351 	else {
352 		if (tbr_timer == 0) {
353 			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
354 			tbr_timer = 1;
355 		}
356 	}
357 	return (0);
358 }
359 
360 /*
361  * tbr_timeout goes through the interface list, and kicks the drivers
362  * if necessary.
363  */
364 static void
365 tbr_timeout(arg)
366 	void *arg;
367 {
368 	struct ifnet *ifp;
369 	int active, s;
370 
371 	active = 0;
372 	s = splnet();
373 #ifdef __FreeBSD__
374 #if (__FreeBSD_version < 300000)
375 	for (ifp = ifnet; ifp; ifp = ifp->if_next)
376 #else
377 	for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_link.tqe_next)
378 #endif
379 #else /* !FreeBSD */
380 	for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_list.tqe_next)
381 #endif
382 	{
383 		if (!TBR_IS_ENABLED(&ifp->if_snd))
384 			continue;
385 		active++;
386 		if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL)
387 			(*ifp->if_start)(ifp);
388 	}
389 	splx(s);
390 	if (active > 0)
391 		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
392 	else
393 		tbr_timer = 0;	/* don't need tbr_timer anymore */
394 #if defined(__alpha__) && !defined(ALTQ_NOPCC)
395 	{
396 		/*
397 		 * XXX read out the machine dependent clock once a second
398 		 * to detect counter wrap-around.
399 		 */
400 		static u_int cnt;
401 
402 		if (++cnt >= hz) {
403 			(void)read_machclk();
404 			cnt = 0;
405 		}
406 	}
407 #endif /* __alpha__ && !ALTQ_NOPCC */
408 }
409 
410 /*
411  * get token bucket regulator profile
412  */
413 int
414 tbr_get(ifq, profile)
415 	struct ifaltq *ifq;
416 	struct tb_profile *profile;
417 {
418 	struct tb_regulator *tbr;
419 
420 	if ((tbr = ifq->altq_tbr) == NULL) {
421 		profile->rate = 0;
422 		profile->depth = 0;
423 	} else {
424 		profile->rate =
425 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
426 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
427 	}
428 	return (0);
429 }
430 
431 
432 #ifndef IPPROTO_ESP
433 #define	IPPROTO_ESP	50		/* encapsulating security payload */
434 #endif
435 #ifndef IPPROTO_AH
436 #define	IPPROTO_AH	51		/* authentication header */
437 #endif
438 
439 /*
440  * extract flow information from a given packet.
441  * filt_mask shows flowinfo fields required.
442  * we assume the ip header is in one mbuf, and addresses and ports are
443  * in network byte order.
444  */
445 int
446 altq_extractflow(m, af, flow, filt_bmask)
447 	struct mbuf *m;
448 	int af;
449 	struct flowinfo *flow;
450 	u_int32_t	filt_bmask;
451 {
452 
453 	switch (af) {
454 	case PF_INET: {
455 		struct flowinfo_in *fin;
456 		struct ip *ip;
457 
458 		ip = mtod(m, struct ip *);
459 
460 		if (ip->ip_v != 4)
461 			break;
462 
463 		fin = (struct flowinfo_in *)flow;
464 		fin->fi_len = sizeof(struct flowinfo_in);
465 		fin->fi_family = AF_INET;
466 
467 		fin->fi_proto = ip->ip_p;
468 		fin->fi_tos = ip->ip_tos;
469 
470 		fin->fi_src.s_addr = ip->ip_src.s_addr;
471 		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
472 
473 		if (filt_bmask & FIMB4_PORTS)
474 			/* if port info is required, extract port numbers */
475 			extract_ports4(m, ip, fin);
476 		else {
477 			fin->fi_sport = 0;
478 			fin->fi_dport = 0;
479 			fin->fi_gpi = 0;
480 		}
481 		return (1);
482 	}
483 
484 #ifdef INET6
485 	case PF_INET6: {
486 		struct flowinfo_in6 *fin6;
487 		struct ip6_hdr *ip6;
488 
489 		ip6 = mtod(m, struct ip6_hdr *);
490 		/* should we check the ip version? */
491 
492 		fin6 = (struct flowinfo_in6 *)flow;
493 		fin6->fi6_len = sizeof(struct flowinfo_in6);
494 		fin6->fi6_family = AF_INET6;
495 
496 		fin6->fi6_proto = ip6->ip6_nxt;
497 		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
498 
499 		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
500 		fin6->fi6_src = ip6->ip6_src;
501 		fin6->fi6_dst = ip6->ip6_dst;
502 
503 		if ((filt_bmask & FIMB6_PORTS) ||
504 		    ((filt_bmask & FIMB6_PROTO)
505 		     && ip6->ip6_nxt > IPPROTO_IPV6))
506 			/*
507 			 * if port info is required, or proto is required
508 			 * but there are option headers, extract port
509 			 * and protocol numbers.
510 			 */
511 			extract_ports6(m, ip6, fin6);
512 		else {
513 			fin6->fi6_sport = 0;
514 			fin6->fi6_dport = 0;
515 			fin6->fi6_gpi = 0;
516 		}
517 		return (1);
518 	}
519 #endif /* INET6 */
520 
521 	default:
522 		break;
523 	}
524 
525 	/* failed */
526 	flow->fi_len = sizeof(struct flowinfo);
527 	flow->fi_family = AF_UNSPEC;
528 	return (0);
529 }
530 
531 /*
532  * helper routine to extract port numbers
533  */
534 /* structure for ipsec and ipv6 option header template */
535 struct _opt6 {
536 	u_int8_t	opt6_nxt;	/* next header */
537 	u_int8_t	opt6_hlen;	/* header extension length */
538 	u_int16_t	_pad;
539 	u_int32_t	ah_spi;		/* security parameter index
540 					   for authentication header */
541 };
542 
543 /*
544  * extract port numbers from a ipv4 packet.
545  */
546 static int
547 extract_ports4(m, ip, fin)
548 	struct mbuf *m;
549 	struct ip *ip;
550 	struct flowinfo_in *fin;
551 {
552 	struct mbuf *m0;
553 	u_short ip_off;
554 	u_int8_t proto;
555 	int 	off;
556 
557 	fin->fi_sport = 0;
558 	fin->fi_dport = 0;
559 	fin->fi_gpi = 0;
560 
561 	ip_off = ntohs(ip->ip_off);
562 	/* if it is a fragment, try cached fragment info */
563 	if (ip_off & IP_OFFMASK) {
564 		ip4f_lookup(ip, fin);
565 		return (1);
566 	}
567 
568 	/* locate the mbuf containing the protocol header */
569 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
570 		if (((caddr_t)ip >= m0->m_data) &&
571 		    ((caddr_t)ip < m0->m_data + m0->m_len))
572 			break;
573 	if (m0 == NULL) {
574 #ifdef ALTQ_DEBUG
575 		printf("extract_ports4: can't locate header! ip=%p\n", ip);
576 #endif
577 		return (0);
578 	}
579 	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
580 	proto = ip->ip_p;
581 
582 #ifdef ALTQ_IPSEC
583  again:
584 #endif
585 	while (off >= m0->m_len) {
586 		off -= m0->m_len;
587 		m0 = m0->m_next;
588 		if (m0 == NULL)
589 			return (0);  /* bogus ip_hl! */
590 	}
591 	if (m0->m_len < off + 4)
592 		return (0);
593 
594 	switch (proto) {
595 	case IPPROTO_TCP:
596 	case IPPROTO_UDP: {
597 		struct udphdr *udp;
598 
599 		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
600 		fin->fi_sport = udp->uh_sport;
601 		fin->fi_dport = udp->uh_dport;
602 		fin->fi_proto = proto;
603 		}
604 		break;
605 
606 #ifdef ALTQ_IPSEC
607 	case IPPROTO_ESP:
608 		if (fin->fi_gpi == 0){
609 			u_int32_t *gpi;
610 
611 			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
612 			fin->fi_gpi   = *gpi;
613 		}
614 		fin->fi_proto = proto;
615 		break;
616 
617 	case IPPROTO_AH: {
618 			/* get next header and header length */
619 			struct _opt6 *opt6;
620 
621 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
622 			proto = opt6->opt6_nxt;
623 			off += 8 + (opt6->opt6_hlen * 4);
624 			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
625 				fin->fi_gpi = opt6->ah_spi;
626 		}
627 		/* goto the next header */
628 		goto again;
629 #endif  /* ALTQ_IPSEC */
630 
631 	default:
632 		fin->fi_proto = proto;
633 		return (0);
634 	}
635 
636 	/* if this is a first fragment, cache it. */
637 	if (ip_off & IP_MF)
638 		ip4f_cache(ip, fin);
639 
640 	return (1);
641 }
642 
643 #ifdef INET6
644 static int
645 extract_ports6(m, ip6, fin6)
646 	struct mbuf *m;
647 	struct ip6_hdr *ip6;
648 	struct flowinfo_in6 *fin6;
649 {
650 	struct mbuf *m0;
651 	int	off;
652 	u_int8_t proto;
653 
654 	fin6->fi6_gpi   = 0;
655 	fin6->fi6_sport = 0;
656 	fin6->fi6_dport = 0;
657 
658 	/* locate the mbuf containing the protocol header */
659 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
660 		if (((caddr_t)ip6 >= m0->m_data) &&
661 		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
662 			break;
663 	if (m0 == NULL) {
664 #ifdef ALTQ_DEBUG
665 		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
666 #endif
667 		return (0);
668 	}
669 	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
670 
671 	proto = ip6->ip6_nxt;
672 	do {
673 		while (off >= m0->m_len) {
674 			off -= m0->m_len;
675 			m0 = m0->m_next;
676 			if (m0 == NULL)
677 				return (0);
678 		}
679 		if (m0->m_len < off + 4)
680 			return (0);
681 
682 		switch (proto) {
683 		case IPPROTO_TCP:
684 		case IPPROTO_UDP: {
685 			struct udphdr *udp;
686 
687 			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
688 			fin6->fi6_sport = udp->uh_sport;
689 			fin6->fi6_dport = udp->uh_dport;
690 			fin6->fi6_proto = proto;
691 			}
692 			return (1);
693 
694 		case IPPROTO_ESP:
695 			if (fin6->fi6_gpi == 0) {
696 				u_int32_t *gpi;
697 
698 				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
699 				fin6->fi6_gpi   = *gpi;
700 			}
701 			fin6->fi6_proto = proto;
702 			return (1);
703 
704 		case IPPROTO_AH: {
705 			/* get next header and header length */
706 			struct _opt6 *opt6;
707 
708 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
709 			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
710 				fin6->fi6_gpi = opt6->ah_spi;
711 			proto = opt6->opt6_nxt;
712 			off += 8 + (opt6->opt6_hlen * 4);
713 			/* goto the next header */
714 			break;
715 			}
716 
717 		case IPPROTO_HOPOPTS:
718 		case IPPROTO_ROUTING:
719 		case IPPROTO_DSTOPTS: {
720 			/* get next header and header length */
721 			struct _opt6 *opt6;
722 
723 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
724 			proto = opt6->opt6_nxt;
725 			off += (opt6->opt6_hlen + 1) * 8;
726 			/* goto the next header */
727 			break;
728 			}
729 
730 		case IPPROTO_FRAGMENT:
731 			/* ipv6 fragmentations are not supported yet */
732 		default:
733 			fin6->fi6_proto = proto;
734 			return (0);
735 		}
736 	} while (1);
737 	/*NOTREACHED*/
738 }
739 #endif /* INET6 */
740 
741 /*
742  * altq common classifier
743  */
744 int
745 acc_add_filter(classifier, filter, class, phandle)
746 	struct acc_classifier *classifier;
747 	struct flow_filter *filter;
748 	void	*class;
749 	u_long	*phandle;
750 {
751 	struct acc_filter *afp, *prev, *tmp;
752 	int	i, s;
753 
754 #ifdef INET6
755 	if (filter->ff_flow.fi_family != AF_INET &&
756 	    filter->ff_flow.fi_family != AF_INET6)
757 		return (EINVAL);
758 #else
759 	if (filter->ff_flow.fi_family != AF_INET)
760 		return (EINVAL);
761 #endif
762 
763 	afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK|M_ZERO);
764 	if (afp == NULL)
765 		return (ENOMEM);
766 
767 	afp->f_filter = *filter;
768 	afp->f_class = class;
769 
770 	i = ACC_WILDCARD_INDEX;
771 	if (filter->ff_flow.fi_family == AF_INET) {
772 		struct flow_filter *filter4 = &afp->f_filter;
773 
774 		/*
775 		 * if address is 0, it's a wildcard.  if address mask
776 		 * isn't set, use full mask.
777 		 */
778 		if (filter4->ff_flow.fi_dst.s_addr == 0)
779 			filter4->ff_mask.mask_dst.s_addr = 0;
780 		else if (filter4->ff_mask.mask_dst.s_addr == 0)
781 			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
782 		if (filter4->ff_flow.fi_src.s_addr == 0)
783 			filter4->ff_mask.mask_src.s_addr = 0;
784 		else if (filter4->ff_mask.mask_src.s_addr == 0)
785 			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
786 
787 		/* clear extra bits in addresses  */
788 		   filter4->ff_flow.fi_dst.s_addr &=
789 		       filter4->ff_mask.mask_dst.s_addr;
790 		   filter4->ff_flow.fi_src.s_addr &=
791 		       filter4->ff_mask.mask_src.s_addr;
792 
793 		/*
794 		 * if dst address is a wildcard, use hash-entry
795 		 * ACC_WILDCARD_INDEX.
796 		 */
797 		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
798 			i = ACC_WILDCARD_INDEX;
799 		else
800 			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
801 	}
802 #ifdef INET6
803 	else if (filter->ff_flow.fi_family == AF_INET6) {
804 		struct flow_filter6 *filter6 =
805 			(struct flow_filter6 *)&afp->f_filter;
806 #ifndef IN6MASK0 /* taken from kame ipv6 */
807 #define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
808 #define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
809 		const struct in6_addr in6mask0 = IN6MASK0;
810 		const struct in6_addr in6mask128 = IN6MASK128;
811 #endif
812 
813 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
814 			filter6->ff_mask6.mask6_dst = in6mask0;
815 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
816 			filter6->ff_mask6.mask6_dst = in6mask128;
817 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
818 			filter6->ff_mask6.mask6_src = in6mask0;
819 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
820 			filter6->ff_mask6.mask6_src = in6mask128;
821 
822 		/* clear extra bits in addresses  */
823 		for (i = 0; i < 16; i++)
824 			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
825 			    filter6->ff_mask6.mask6_dst.s6_addr[i];
826 		for (i = 0; i < 16; i++)
827 			filter6->ff_flow6.fi6_src.s6_addr[i] &=
828 			    filter6->ff_mask6.mask6_src.s6_addr[i];
829 
830 		if (filter6->ff_flow6.fi6_flowlabel == 0)
831 			i = ACC_WILDCARD_INDEX;
832 		else
833 			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
834 	}
835 #endif /* INET6 */
836 
837 	afp->f_handle = get_filt_handle(classifier, i);
838 
839 	/* update filter bitmask */
840 	afp->f_fbmask = filt2fibmask(filter);
841 	classifier->acc_fbmask |= afp->f_fbmask;
842 
843 	/*
844 	 * add this filter to the filter list.
845 	 * filters are ordered from the highest rule number.
846 	 */
847 	s = splnet();
848 	prev = NULL;
849 	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
850 		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
851 			prev = tmp;
852 		else
853 			break;
854 	}
855 	if (prev == NULL)
856 		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
857 	else
858 		LIST_INSERT_AFTER(prev, afp, f_chain);
859 	splx(s);
860 
861 	*phandle = afp->f_handle;
862 	return (0);
863 }
864 
865 int
866 acc_delete_filter(classifier, handle)
867 	struct acc_classifier *classifier;
868 	u_long handle;
869 {
870 	struct acc_filter *afp;
871 	int	s;
872 
873 	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
874 		return (EINVAL);
875 
876 	s = splnet();
877 	LIST_REMOVE(afp, f_chain);
878 	splx(s);
879 
880 	free(afp, M_DEVBUF);
881 
882 	/* todo: update filt_bmask */
883 
884 	return (0);
885 }
886 
887 /*
888  * delete filters referencing to the specified class.
889  * if the all flag is not 0, delete all the filters.
890  */
891 int
892 acc_discard_filters(classifier, class, all)
893 	struct acc_classifier *classifier;
894 	void	*class;
895 	int	all;
896 {
897 	struct acc_filter *afp;
898 	int	i, s;
899 
900 	s = splnet();
901 	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
902 		do {
903 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
904 				if (all || afp->f_class == class) {
905 					LIST_REMOVE(afp, f_chain);
906 					free(afp, M_DEVBUF);
907 					/* start again from the head */
908 					break;
909 				}
910 		} while (afp != NULL);
911 	}
912 	splx(s);
913 
914 	if (all)
915 		classifier->acc_fbmask = 0;
916 
917 	return (0);
918 }
919 
920 void *
921 acc_classify(clfier, m, af)
922 	void *clfier;
923 	struct mbuf *m;
924 	int af;
925 {
926 	struct acc_classifier *classifier;
927 	struct flowinfo flow;
928 	struct acc_filter *afp;
929 	int	i;
930 
931 	classifier = (struct acc_classifier *)clfier;
932 	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
933 
934 	if (flow.fi_family == AF_INET) {
935 		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
936 
937 		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
938 			/* only tos is used */
939 			LIST_FOREACH(afp,
940 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
941 				 f_chain)
942 				if (apply_tosfilter4(afp->f_fbmask,
943 						     &afp->f_filter, fp))
944 					/* filter matched */
945 					return (afp->f_class);
946 		} else if ((classifier->acc_fbmask &
947 			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
948 		    == 0) {
949 			/* only proto and ports are used */
950 			LIST_FOREACH(afp,
951 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
952 				 f_chain)
953 				if (apply_ppfilter4(afp->f_fbmask,
954 						    &afp->f_filter, fp))
955 					/* filter matched */
956 					return (afp->f_class);
957 		} else {
958 			/* get the filter hash entry from its dest address */
959 			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
960 			do {
961 				/*
962 				 * go through this loop twice.  first for dst
963 				 * hash, second for wildcards.
964 				 */
965 				LIST_FOREACH(afp, &classifier->acc_filters[i],
966 					     f_chain)
967 					if (apply_filter4(afp->f_fbmask,
968 							  &afp->f_filter, fp))
969 						/* filter matched */
970 						return (afp->f_class);
971 
972 				/*
973 				 * check again for filters with a dst addr
974 				 * wildcard.
975 				 * (daddr == 0 || dmask != 0xffffffff).
976 				 */
977 				if (i != ACC_WILDCARD_INDEX)
978 					i = ACC_WILDCARD_INDEX;
979 				else
980 					break;
981 			} while (1);
982 		}
983 	}
984 #ifdef INET6
985 	else if (flow.fi_family == AF_INET6) {
986 		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
987 
988 		/* get the filter hash entry from its flow ID */
989 		if (fp6->fi6_flowlabel != 0)
990 			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
991 		else
992 			/* flowlable can be zero */
993 			i = ACC_WILDCARD_INDEX;
994 
995 		/* go through this loop twice.  first for flow hash, second
996 		   for wildcards. */
997 		do {
998 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
999 				if (apply_filter6(afp->f_fbmask,
1000 					(struct flow_filter6 *)&afp->f_filter,
1001 					fp6))
1002 					/* filter matched */
1003 					return (afp->f_class);
1004 
1005 			/*
1006 			 * check again for filters with a wildcard.
1007 			 */
1008 			if (i != ACC_WILDCARD_INDEX)
1009 				i = ACC_WILDCARD_INDEX;
1010 			else
1011 				break;
1012 		} while (1);
1013 	}
1014 #endif /* INET6 */
1015 
1016 	/* no filter matched */
1017 	return (NULL);
1018 }
1019 
1020 static int
1021 apply_filter4(fbmask, filt, pkt)
1022 	u_int32_t	fbmask;
1023 	struct flow_filter *filt;
1024 	struct flowinfo_in *pkt;
1025 {
1026 	if (filt->ff_flow.fi_family != AF_INET)
1027 		return (0);
1028 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1029 		return (0);
1030 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1031 		return (0);
1032 	if ((fbmask & FIMB4_DADDR) &&
1033 	    filt->ff_flow.fi_dst.s_addr !=
1034 	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1035 		return (0);
1036 	if ((fbmask & FIMB4_SADDR) &&
1037 	    filt->ff_flow.fi_src.s_addr !=
1038 	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1039 		return (0);
1040 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1041 		return (0);
1042 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1043 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1044 		return (0);
1045 	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1046 		return (0);
1047 	/* match */
1048 	return (1);
1049 }
1050 
1051 /*
1052  * filter matching function optimized for a common case that checks
1053  * only protocol and port numbers
1054  */
1055 static int
1056 apply_ppfilter4(fbmask, filt, pkt)
1057 	u_int32_t	fbmask;
1058 	struct flow_filter *filt;
1059 	struct flowinfo_in *pkt;
1060 {
1061 	if (filt->ff_flow.fi_family != AF_INET)
1062 		return (0);
1063 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1064 		return (0);
1065 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1066 		return (0);
1067 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1068 		return (0);
1069 	/* match */
1070 	return (1);
1071 }
1072 
1073 /*
1074  * filter matching function only for tos field.
1075  */
1076 static int
1077 apply_tosfilter4(fbmask, filt, pkt)
1078 	u_int32_t	fbmask;
1079 	struct flow_filter *filt;
1080 	struct flowinfo_in *pkt;
1081 {
1082 	if (filt->ff_flow.fi_family != AF_INET)
1083 		return (0);
1084 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1085 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1086 		return (0);
1087 	/* match */
1088 	return (1);
1089 }
1090 
1091 #ifdef INET6
1092 static int
1093 apply_filter6(fbmask, filt, pkt)
1094 	u_int32_t	fbmask;
1095 	struct flow_filter6 *filt;
1096 	struct flowinfo_in6 *pkt;
1097 {
1098 	int i;
1099 
1100 	if (filt->ff_flow6.fi6_family != AF_INET6)
1101 		return (0);
1102 	if ((fbmask & FIMB6_FLABEL) &&
1103 	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1104 		return (0);
1105 	if ((fbmask & FIMB6_PROTO) &&
1106 	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1107 		return (0);
1108 	if ((fbmask & FIMB6_SPORT) &&
1109 	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1110 		return (0);
1111 	if ((fbmask & FIMB6_DPORT) &&
1112 	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1113 		return (0);
1114 	if (fbmask & FIMB6_SADDR) {
1115 		for (i = 0; i < 4; i++)
1116 			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1117 			    (pkt->fi6_src.s6_addr32[i] &
1118 			     filt->ff_mask6.mask6_src.s6_addr32[i]))
1119 				return (0);
1120 	}
1121 	if (fbmask & FIMB6_DADDR) {
1122 		for (i = 0; i < 4; i++)
1123 			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1124 			    (pkt->fi6_dst.s6_addr32[i] &
1125 			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
1126 				return (0);
1127 	}
1128 	if ((fbmask & FIMB6_TCLASS) &&
1129 	    filt->ff_flow6.fi6_tclass !=
1130 	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1131 		return (0);
1132 	if ((fbmask & FIMB6_GPI) &&
1133 	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1134 		return (0);
1135 	/* match */
1136 	return (1);
1137 }
1138 #endif /* INET6 */
1139 
1140 /*
1141  *  filter handle:
1142  *	bit 20-28: index to the filter hash table
1143  *	bit  0-19: unique id in the hash bucket.
1144  */
1145 static u_long
1146 get_filt_handle(classifier, i)
1147 	struct acc_classifier *classifier;
1148 	int	i;
1149 {
1150 	static u_long handle_number = 1;
1151 	u_long 	handle;
1152 	struct acc_filter *afp;
1153 
1154 	while (1) {
1155 		handle = handle_number++ & 0x000fffff;
1156 
1157 		if (LIST_EMPTY(&classifier->acc_filters[i]))
1158 			break;
1159 
1160 		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1161 			if ((afp->f_handle & 0x000fffff) == handle)
1162 				break;
1163 		if (afp == NULL)
1164 			break;
1165 		/* this handle is already used, try again */
1166 	}
1167 
1168 	return ((i << 20) | handle);
1169 }
1170 
1171 /* convert filter handle to filter pointer */
1172 static struct acc_filter *
1173 filth_to_filtp(classifier, handle)
1174 	struct acc_classifier *classifier;
1175 	u_long handle;
1176 {
1177 	struct acc_filter *afp;
1178 	int	i;
1179 
1180 	i = ACC_GET_HINDEX(handle);
1181 
1182 	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1183 		if (afp->f_handle == handle)
1184 			return (afp);
1185 
1186 	return (NULL);
1187 }
1188 
1189 /* create flowinfo bitmask */
1190 static u_int32_t
1191 filt2fibmask(filt)
1192 	struct flow_filter *filt;
1193 {
1194 	u_int32_t mask = 0;
1195 #ifdef INET6
1196 	struct flow_filter6 *filt6;
1197 #endif
1198 
1199 	switch (filt->ff_flow.fi_family) {
1200 	case AF_INET:
1201 		if (filt->ff_flow.fi_proto != 0)
1202 			mask |= FIMB4_PROTO;
1203 		if (filt->ff_flow.fi_tos != 0)
1204 			mask |= FIMB4_TOS;
1205 		if (filt->ff_flow.fi_dst.s_addr != 0)
1206 			mask |= FIMB4_DADDR;
1207 		if (filt->ff_flow.fi_src.s_addr != 0)
1208 			mask |= FIMB4_SADDR;
1209 		if (filt->ff_flow.fi_sport != 0)
1210 			mask |= FIMB4_SPORT;
1211 		if (filt->ff_flow.fi_dport != 0)
1212 			mask |= FIMB4_DPORT;
1213 		if (filt->ff_flow.fi_gpi != 0)
1214 			mask |= FIMB4_GPI;
1215 		break;
1216 #ifdef INET6
1217 	case AF_INET6:
1218 		filt6 = (struct flow_filter6 *)filt;
1219 
1220 		if (filt6->ff_flow6.fi6_proto != 0)
1221 			mask |= FIMB6_PROTO;
1222 		if (filt6->ff_flow6.fi6_tclass != 0)
1223 			mask |= FIMB6_TCLASS;
1224 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1225 			mask |= FIMB6_DADDR;
1226 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1227 			mask |= FIMB6_SADDR;
1228 		if (filt6->ff_flow6.fi6_sport != 0)
1229 			mask |= FIMB6_SPORT;
1230 		if (filt6->ff_flow6.fi6_dport != 0)
1231 			mask |= FIMB6_DPORT;
1232 		if (filt6->ff_flow6.fi6_gpi != 0)
1233 			mask |= FIMB6_GPI;
1234 		if (filt6->ff_flow6.fi6_flowlabel != 0)
1235 			mask |= FIMB6_FLABEL;
1236 		break;
1237 #endif /* INET6 */
1238 	}
1239 	return (mask);
1240 }
1241 
1242 
1243 /*
1244  * helper functions to handle IPv4 fragments.
1245  * currently only in-sequence fragments are handled.
1246  *	- fragment info is cached in a LRU list.
1247  *	- when a first fragment is found, cache its flow info.
1248  *	- when a non-first fragment is found, lookup the cache.
1249  */
1250 
1251 struct ip4_frag {
1252     TAILQ_ENTRY(ip4_frag) ip4f_chain;
1253     char    ip4f_valid;
1254     u_short ip4f_id;
1255     struct flowinfo_in ip4f_info;
1256 };
1257 
1258 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1259 
1260 #define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
1261 
1262 
1263 static void
1264 ip4f_cache(ip, fin)
1265 	struct ip *ip;
1266 	struct flowinfo_in *fin;
1267 {
1268 	struct ip4_frag *fp;
1269 
1270 	if (TAILQ_EMPTY(&ip4f_list)) {
1271 		/* first time call, allocate fragment cache entries. */
1272 		if (ip4f_init() < 0)
1273 			/* allocation failed! */
1274 			return;
1275 	}
1276 
1277 	fp = ip4f_alloc();
1278 	fp->ip4f_id = ip->ip_id;
1279 	fp->ip4f_info.fi_proto = ip->ip_p;
1280 	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1281 	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1282 
1283 	/* save port numbers */
1284 	fp->ip4f_info.fi_sport = fin->fi_sport;
1285 	fp->ip4f_info.fi_dport = fin->fi_dport;
1286 	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
1287 }
1288 
1289 static int
1290 ip4f_lookup(ip, fin)
1291 	struct ip *ip;
1292 	struct flowinfo_in *fin;
1293 {
1294 	struct ip4_frag *fp;
1295 
1296 	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1297 	     fp = TAILQ_NEXT(fp, ip4f_chain))
1298 		if (ip->ip_id == fp->ip4f_id &&
1299 		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1300 		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1301 		    ip->ip_p == fp->ip4f_info.fi_proto) {
1302 
1303 			/* found the matching entry */
1304 			fin->fi_sport = fp->ip4f_info.fi_sport;
1305 			fin->fi_dport = fp->ip4f_info.fi_dport;
1306 			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
1307 
1308 			if ((ntohs(ip->ip_off) & IP_MF) == 0)
1309 				/* this is the last fragment,
1310 				   release the entry. */
1311 				ip4f_free(fp);
1312 
1313 			return (1);
1314 		}
1315 
1316 	/* no matching entry found */
1317 	return (0);
1318 }
1319 
1320 static int
1321 ip4f_init(void)
1322 {
1323 	struct ip4_frag *fp;
1324 	int i;
1325 
1326 	TAILQ_INIT(&ip4f_list);
1327 	for (i=0; i<IP4F_TABSIZE; i++) {
1328 		fp = malloc(sizeof(struct ip4_frag), M_DEVBUF, M_NOWAIT);
1329 		if (fp == NULL) {
1330 			printf("ip4f_init: can't alloc %dth entry!\n", i);
1331 			if (i == 0)
1332 				return (-1);
1333 			return (0);
1334 		}
1335 		fp->ip4f_valid = 0;
1336 		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1337 	}
1338 	return (0);
1339 }
1340 
1341 static struct ip4_frag *
1342 ip4f_alloc(void)
1343 {
1344 	struct ip4_frag *fp;
1345 
1346 	/* reclaim an entry at the tail, put it at the head */
1347 	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1348 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1349 	fp->ip4f_valid = 1;
1350 	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1351 	return (fp);
1352 }
1353 
1354 static void
1355 ip4f_free(fp)
1356 	struct ip4_frag *fp;
1357 {
1358 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1359 	fp->ip4f_valid = 0;
1360 	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1361 }
1362 
1363 /*
1364  * read and write diffserv field in IPv4 or IPv6 header
1365  */
1366 u_int8_t
1367 read_dsfield(m, pktattr)
1368 	struct mbuf *m;
1369 	struct altq_pktattr *pktattr;
1370 {
1371 	struct mbuf *m0;
1372 	u_int8_t ds_field = 0;
1373 
1374 	if (pktattr == NULL ||
1375 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
1376 		return ((u_int8_t)0);
1377 
1378 	/* verify that pattr_hdr is within the mbuf data */
1379 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1380 		if ((pktattr->pattr_hdr >= m0->m_data) &&
1381 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
1382 			break;
1383 	if (m0 == NULL) {
1384 		/* ick, pattr_hdr is stale */
1385 		pktattr->pattr_af = AF_UNSPEC;
1386 #ifdef ALTQ_DEBUG
1387 		printf("read_dsfield: can't locate header!\n");
1388 #endif
1389 		return ((u_int8_t)0);
1390 	}
1391 
1392 	if (pktattr->pattr_af == AF_INET) {
1393 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
1394 
1395 		if (ip->ip_v != 4)
1396 			return ((u_int8_t)0);	/* version mismatch! */
1397 		ds_field = ip->ip_tos;
1398 	}
1399 #ifdef INET6
1400 	else if (pktattr->pattr_af == AF_INET6) {
1401 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1402 		u_int32_t flowlabel;
1403 
1404 		flowlabel = ntohl(ip6->ip6_flow);
1405 		if ((flowlabel >> 28) != 6)
1406 			return ((u_int8_t)0);	/* version mismatch! */
1407 		ds_field = (flowlabel >> 20) & 0xff;
1408 	}
1409 #endif
1410 	return (ds_field);
1411 }
1412 
1413 void
1414 write_dsfield(m, pktattr, dsfield)
1415 	struct mbuf *m;
1416 	struct altq_pktattr *pktattr;
1417 	u_int8_t dsfield;
1418 {
1419 	struct mbuf *m0;
1420 
1421 	if (pktattr == NULL ||
1422 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
1423 		return;
1424 
1425 	/* verify that pattr_hdr is within the mbuf data */
1426 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1427 		if ((pktattr->pattr_hdr >= m0->m_data) &&
1428 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
1429 			break;
1430 	if (m0 == NULL) {
1431 		/* ick, pattr_hdr is stale */
1432 		pktattr->pattr_af = AF_UNSPEC;
1433 #ifdef ALTQ_DEBUG
1434 		printf("write_dsfield: can't locate header!\n");
1435 #endif
1436 		return;
1437 	}
1438 
1439 	if (pktattr->pattr_af == AF_INET) {
1440 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
1441 		u_int8_t old;
1442 		int32_t sum;
1443 
1444 		if (ip->ip_v != 4)
1445 			return;		/* version mismatch! */
1446 		old = ip->ip_tos;
1447 		dsfield |= old & 3;	/* leave CU bits */
1448 		if (old == dsfield)
1449 			return;
1450 		ip->ip_tos = dsfield;
1451 		/*
1452 		 * update checksum (from RFC1624)
1453 		 *	   HC' = ~(~HC + ~m + m')
1454 		 */
1455 		sum = ~ntohs(ip->ip_sum) & 0xffff;
1456 		sum += 0xff00 + (~old & 0xff) + dsfield;
1457 		sum = (sum >> 16) + (sum & 0xffff);
1458 		sum += (sum >> 16);  /* add carry */
1459 
1460 		ip->ip_sum = htons(~sum & 0xffff);
1461 	}
1462 #ifdef INET6
1463 	else if (pktattr->pattr_af == AF_INET6) {
1464 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1465 		u_int32_t flowlabel;
1466 
1467 		flowlabel = ntohl(ip6->ip6_flow);
1468 		if ((flowlabel >> 28) != 6)
1469 			return;		/* version mismatch! */
1470 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
1471 		ip6->ip6_flow = htonl(flowlabel);
1472 	}
1473 #endif
1474 	return;
1475 }
1476 
1477 
1478 /*
1479  * high resolution clock support taking advantage of a machine dependent
1480  * high resolution time counter (e.g., timestamp counter of intel pentium).
1481  * we assume
1482  *  - 64-bit-long monotonically-increasing counter
1483  *  - frequency range is 100M-4GHz (CPU speed)
1484  */
1485 u_int32_t machclk_freq = 0;
1486 u_int32_t machclk_per_tick = 0;
1487 
1488 #if (defined(__i386__) || defined(__alpha__)) && !defined(ALTQ_NOPCC)
1489 
1490 #if defined(__FreeBSD__) && defined(SMP)
1491 #error SMP system!  use ALTQ_NOPCC option.
1492 #endif
1493 
1494 #ifdef __alpha__
1495 #ifdef __FreeBSD__
1496 extern u_int32_t cycles_per_sec;	/* alpha CPU clock frequency */
1497 #elif defined(__NetBSD__) || defined(__OpenBSD__)
1498 extern u_int64_t cycles_per_usec;	/* alpha CPU clock frequency */
1499 #endif
1500 #endif /* __alpha__ */
1501 
1502 void
1503 init_machclk(void)
1504 {
1505 	/* sanity check */
1506 #ifdef __i386__
1507 	/* check if TSC is available */
1508 	if ((cpu_feature & CPUID_TSC) == 0) {
1509 		printf("altq: TSC isn't available! use ALTQ_NOPCC option.\n");
1510 		return;
1511 	}
1512 #endif
1513 
1514 	/*
1515 	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
1516 	 * accessible, just use it.
1517 	 */
1518 #ifdef __i386__
1519 #ifdef __FreeBSD__
1520 #if (__FreeBSD_version > 300000)
1521 	machclk_freq = tsc_freq;
1522 #else
1523 	machclk_freq = i586_ctr_freq;
1524 #endif
1525 #elif defined(__NetBSD__)
1526 	machclk_freq = (u_int32_t)curcpu()->ci_tsc_freq;
1527 #elif defined(__OpenBSD__)
1528 	machclk_freq = pentium_mhz * 1000000;
1529 #endif
1530 #elif defined(__alpha__)
1531 #ifdef __FreeBSD__
1532 	machclk_freq = cycles_per_sec;
1533 #elif defined(__NetBSD__) || defined(__OpenBSD__)
1534 	machclk_freq = (u_int32_t)(cycles_per_usec * 1000000);
1535 #endif
1536 #endif /* __alpha__ */
1537 
1538 	/*
1539 	 * if we don't know the clock frequency, measure it.
1540 	 */
1541 	if (machclk_freq == 0) {
1542 		static int	wait;
1543 		struct timeval	tv_start, tv_end;
1544 		u_int64_t	start, end, diff;
1545 		int		timo;
1546 
1547 		microtime(&tv_start);
1548 		start = read_machclk();
1549 		timo = hz;	/* 1 sec */
1550 		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
1551 		microtime(&tv_end);
1552 		end = read_machclk();
1553 		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
1554 		    + tv_end.tv_usec - tv_start.tv_usec;
1555 		if (diff != 0)
1556 			machclk_freq = (u_int)((end - start) * 1000000 / diff);
1557 	}
1558 
1559 	machclk_per_tick = machclk_freq / hz;
1560 
1561 #ifdef ALTQ_DEBUG
1562 	printf("altq: CPU clock: %uHz\n", machclk_freq);
1563 #endif
1564 }
1565 
1566 #ifdef __alpha__
1567 /*
1568  * make a 64bit counter value out of the 32bit alpha processor cycle counter.
1569  * read_machclk must be called within a half of its wrap-around cycle
1570  * (about 5 sec for 400MHz CPU) to properly detect a counter wrap-around.
1571  * tbr_timeout calls read_machclk once a second.
1572  */
1573 u_int64_t
1574 read_machclk(void)
1575 {
1576 	static u_int32_t last_pcc, upper;
1577 	u_int32_t pcc;
1578 
1579 	pcc = (u_int32_t)alpha_rpcc();
1580 	if (pcc <= last_pcc)
1581 		upper++;
1582 	last_pcc = pcc;
1583 	return (((u_int64_t)upper << 32) + pcc);
1584 }
1585 #endif /* __alpha__ */
1586 #else /* !i386  && !alpha */
1587 /* use microtime() for now */
1588 void
1589 init_machclk(void)
1590 {
1591 	machclk_freq = 1000000 << MACHCLK_SHIFT;
1592 	machclk_per_tick = machclk_freq / hz;
1593 	printf("altq: emulate %uHz CPU clock\n", machclk_freq);
1594 }
1595 #endif /* !i386 && !alpha */
1596