xref: /onnv-gate/usr/src/uts/common/ipp/flowacct/flowacct.c (revision 8778:b4169d2ab299)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53902Svi117747  * Common Development and Distribution License (the "License").
63902Svi117747  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
213902Svi117747 
220Sstevel@tonic-gate /*
23*8778SErik.Nordmark@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/kmem.h>
290Sstevel@tonic-gate #include <sys/conf.h>
300Sstevel@tonic-gate #include <sys/atomic.h>
310Sstevel@tonic-gate #include <netinet/in.h>
320Sstevel@tonic-gate #include <netinet/in_systm.h>
330Sstevel@tonic-gate #include <netinet/ip6.h>
340Sstevel@tonic-gate #include <sys/socket.h>
350Sstevel@tonic-gate #include <sys/acct.h>
360Sstevel@tonic-gate #include <sys/exacct.h>
370Sstevel@tonic-gate #include <inet/common.h>
380Sstevel@tonic-gate #include <inet/ip.h>
390Sstevel@tonic-gate #include <inet/ip6.h>
400Sstevel@tonic-gate #include <sys/ddi.h>
410Sstevel@tonic-gate #include <sys/strsun.h>
42*8778SErik.Nordmark@Sun.COM #include <sys/strsubr.h>
430Sstevel@tonic-gate #include <ipp/flowacct/flowacct_impl.h>
440Sstevel@tonic-gate 
450Sstevel@tonic-gate /*
460Sstevel@tonic-gate  * flowacct - IPQoS accounting module. The module maintains an array
470Sstevel@tonic-gate  * of 256 hash buckets. When the action routine is invoked for a flow,
480Sstevel@tonic-gate  * if the flow (identified by the 5-tuple: saddr, daddr, sport, dport, proto)
490Sstevel@tonic-gate  * is already present in the flow table (indexed by the hash function FLOW_HASH)
500Sstevel@tonic-gate  * then a check is made to see if an item for this flow with the same
510Sstevel@tonic-gate  * dsfield, projid & user id is present. If it is, then the number of packets
520Sstevel@tonic-gate  * and the bytes are incremented for that item. If the item does
530Sstevel@tonic-gate  * not exist a new item is added for the flow. If the flow is not present
540Sstevel@tonic-gate  * an entry is made for the flow.
550Sstevel@tonic-gate  *
560Sstevel@tonic-gate  * A timer runs thru the table and writes all the flow items that have
570Sstevel@tonic-gate  * timed out to the accounting file (via exacct PSARC/1999/119), if present
580Sstevel@tonic-gate  * Configuration commands to change the timing interval is provided. The
590Sstevel@tonic-gate  * flow timeout value can also be configured. While the timeout is in nsec,
600Sstevel@tonic-gate  * the flow timer interval is in usec.
610Sstevel@tonic-gate  * Information for an active flow can be obtained by using kstats.
620Sstevel@tonic-gate  */
630Sstevel@tonic-gate 
640Sstevel@tonic-gate /* Used in computing the hash index */
650Sstevel@tonic-gate #define	FLOWACCT_ADDR_HASH(addr) 			\
660Sstevel@tonic-gate 	((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^ 	\
670Sstevel@tonic-gate 	(addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^ 	\
680Sstevel@tonic-gate 	(addr).s6_addr8[14] ^ (addr).s6_addr8[15])
690Sstevel@tonic-gate 
700Sstevel@tonic-gate #define	FLOWACCT_FLOW_HASH(f)				\
710Sstevel@tonic-gate 	(((FLOWACCT_ADDR_HASH(f->saddr)) + 		\
720Sstevel@tonic-gate 	(FLOWACCT_ADDR_HASH(f->daddr)) + 		\
730Sstevel@tonic-gate 	(f->proto) + (f->sport) + (f->dport)) 		\
740Sstevel@tonic-gate 	% FLOW_TBL_COUNT)
750Sstevel@tonic-gate 
760Sstevel@tonic-gate /*
770Sstevel@tonic-gate  * Compute difference between a and b in nsec and store in delta.
780Sstevel@tonic-gate  * delta should be a hrtime_t. Taken from ip_mroute.c.
790Sstevel@tonic-gate  */
800Sstevel@tonic-gate #define	FLOWACCT_DELTA(a, b, delta) { \
810Sstevel@tonic-gate 	int xxs; \
820Sstevel@tonic-gate  \
830Sstevel@tonic-gate 	delta = (a).tv_nsec - (b).tv_nsec; \
840Sstevel@tonic-gate 	if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
850Sstevel@tonic-gate 		switch (xxs) { \
860Sstevel@tonic-gate 		case 2: \
870Sstevel@tonic-gate 		    delta += NANOSEC; \
880Sstevel@tonic-gate 		    /*FALLTHRU*/ \
890Sstevel@tonic-gate 		case 1: \
900Sstevel@tonic-gate 		    delta += NANOSEC; \
910Sstevel@tonic-gate 		    break; \
920Sstevel@tonic-gate 		default: \
930Sstevel@tonic-gate 		    delta += ((hrtime_t)NANOSEC * xxs); \
940Sstevel@tonic-gate 		} \
950Sstevel@tonic-gate 	} \
960Sstevel@tonic-gate }
970Sstevel@tonic-gate 
980Sstevel@tonic-gate /* Debug level */
990Sstevel@tonic-gate int flowacct_debug = 0;
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate /* Collect timed out flows to be written to the accounting file */
1020Sstevel@tonic-gate typedef struct flow_records_s {
1030Sstevel@tonic-gate 	flow_usage_t *fl_use;
1040Sstevel@tonic-gate 	struct flow_records_s *next;
1050Sstevel@tonic-gate }flow_records_t;
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate /* Get port information from the packet. Ignore fragments. */
1080Sstevel@tonic-gate static void
flowacct_port_info(header_t * header,void * iph,int af,mblk_t * mp)1090Sstevel@tonic-gate flowacct_port_info(header_t *header, void *iph, int af, mblk_t *mp)
1100Sstevel@tonic-gate {
1110Sstevel@tonic-gate 	uint16_t *up;
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate 	if (af == AF_INET) {
1140Sstevel@tonic-gate 		ipha_t *ipha = (ipha_t *)iph;
1150Sstevel@tonic-gate 		uint32_t u2, u1;
1160Sstevel@tonic-gate 		uint_t iplen;
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate 		u2 = ntohs(ipha->ipha_fragment_offset_and_flags);
1190Sstevel@tonic-gate 		u1 = u2 & (IPH_MF | IPH_OFFSET);
1200Sstevel@tonic-gate 		if (u1 != 0) {
1210Sstevel@tonic-gate 			return;
1220Sstevel@tonic-gate 		}
1230Sstevel@tonic-gate 		iplen = (ipha->ipha_version_and_hdr_length & 0xF) << 2;
1240Sstevel@tonic-gate 		up = (uint16_t *)(mp->b_rptr + iplen);
1250Sstevel@tonic-gate 		header->sport = (uint16_t)*up++;
1260Sstevel@tonic-gate 		header->dport = (uint16_t)*up;
1270Sstevel@tonic-gate 	} else {
1280Sstevel@tonic-gate 		ip6_t *ip6h = (ip6_t *)iph;
1290Sstevel@tonic-gate 		uint_t  length = IPV6_HDR_LEN;
1300Sstevel@tonic-gate 		uint_t  ehdrlen;
1310Sstevel@tonic-gate 		uint8_t *nexthdrp, *whereptr, *endptr;
1320Sstevel@tonic-gate 		ip6_dest_t *desthdr;
1330Sstevel@tonic-gate 		ip6_rthdr_t *rthdr;
1340Sstevel@tonic-gate 		ip6_hbh_t *hbhhdr;
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate 		whereptr = ((uint8_t *)&ip6h[1]);
1370Sstevel@tonic-gate 		endptr = mp->b_wptr;
1380Sstevel@tonic-gate 		nexthdrp = &ip6h->ip6_nxt;
1390Sstevel@tonic-gate 		while (whereptr < endptr) {
1400Sstevel@tonic-gate 			switch (*nexthdrp) {
1410Sstevel@tonic-gate 			case IPPROTO_HOPOPTS:
1420Sstevel@tonic-gate 				hbhhdr = (ip6_hbh_t *)whereptr;
1430Sstevel@tonic-gate 				ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
1440Sstevel@tonic-gate 				if ((uchar_t *)hbhhdr +  ehdrlen > endptr)
1450Sstevel@tonic-gate 					return;
1460Sstevel@tonic-gate 				nexthdrp = &hbhhdr->ip6h_nxt;
1470Sstevel@tonic-gate 				break;
1480Sstevel@tonic-gate 			case IPPROTO_DSTOPTS:
1490Sstevel@tonic-gate 				desthdr = (ip6_dest_t *)whereptr;
1500Sstevel@tonic-gate 				ehdrlen = 8 * (desthdr->ip6d_len + 1);
1510Sstevel@tonic-gate 				if ((uchar_t *)desthdr +  ehdrlen > endptr)
1520Sstevel@tonic-gate 					return;
1530Sstevel@tonic-gate 				nexthdrp = &desthdr->ip6d_nxt;
1540Sstevel@tonic-gate 				break;
1550Sstevel@tonic-gate 			case IPPROTO_ROUTING:
1560Sstevel@tonic-gate 				rthdr = (ip6_rthdr_t *)whereptr;
1570Sstevel@tonic-gate 				ehdrlen =  8 * (rthdr->ip6r_len + 1);
1580Sstevel@tonic-gate 				if ((uchar_t *)rthdr +  ehdrlen > endptr)
1590Sstevel@tonic-gate 					return;
1600Sstevel@tonic-gate 				nexthdrp = &rthdr->ip6r_nxt;
1610Sstevel@tonic-gate 				break;
1620Sstevel@tonic-gate 			case IPPROTO_FRAGMENT:
1630Sstevel@tonic-gate 				return;
1640Sstevel@tonic-gate 			case IPPROTO_TCP:
1650Sstevel@tonic-gate 			case IPPROTO_UDP:
1660Sstevel@tonic-gate 			case IPPROTO_SCTP:
1670Sstevel@tonic-gate 				/*
1680Sstevel@tonic-gate 				 * Verify we have at least ICMP_MIN_TP_HDR_LEN
1690Sstevel@tonic-gate 				 * bytes of the ULP's header to get the port
1700Sstevel@tonic-gate 				 * info.
1710Sstevel@tonic-gate 				 */
1720Sstevel@tonic-gate 				if (((uchar_t *)ip6h + length +
1730Sstevel@tonic-gate 				    ICMP_MIN_TP_HDR_LEN)  > endptr) {
1740Sstevel@tonic-gate 					return;
1750Sstevel@tonic-gate 				}
1760Sstevel@tonic-gate 				/* Get the protocol & ports */
1770Sstevel@tonic-gate 				header->proto = *nexthdrp;
1780Sstevel@tonic-gate 				up = (uint16_t *)((uchar_t *)ip6h + length);
1790Sstevel@tonic-gate 				header->sport = (uint16_t)*up++;
1800Sstevel@tonic-gate 				header->dport = (uint16_t)*up;
1810Sstevel@tonic-gate 				return;
1820Sstevel@tonic-gate 			case IPPROTO_ICMPV6:
1830Sstevel@tonic-gate 			case IPPROTO_ENCAP:
1840Sstevel@tonic-gate 			case IPPROTO_IPV6:
1850Sstevel@tonic-gate 			case IPPROTO_ESP:
1860Sstevel@tonic-gate 			case IPPROTO_AH:
1870Sstevel@tonic-gate 				header->proto = *nexthdrp;
1880Sstevel@tonic-gate 				return;
1890Sstevel@tonic-gate 			case IPPROTO_NONE:
1900Sstevel@tonic-gate 			default:
1910Sstevel@tonic-gate 				return;
1920Sstevel@tonic-gate 			}
1930Sstevel@tonic-gate 			length += ehdrlen;
1940Sstevel@tonic-gate 			whereptr += ehdrlen;
1950Sstevel@tonic-gate 		}
1960Sstevel@tonic-gate 	}
1970Sstevel@tonic-gate }
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate /*
2000Sstevel@tonic-gate  * flowacct_find_ids(mp, header)
2010Sstevel@tonic-gate  *
2020Sstevel@tonic-gate  * attempt to discern the uid and projid of the originator of a packet by
2030Sstevel@tonic-gate  * looking at the dblks making up the packet - yeuch!
2040Sstevel@tonic-gate  *
2050Sstevel@tonic-gate  * We do it by skipping any fragments with a credp of NULL (originated in
2060Sstevel@tonic-gate  * kernel), taking the first value that isn't NULL to be the cred_t for the
2070Sstevel@tonic-gate  * whole packet.
2080Sstevel@tonic-gate  */
2090Sstevel@tonic-gate static void
flowacct_find_ids(mblk_t * mp,header_t * header)2100Sstevel@tonic-gate flowacct_find_ids(mblk_t *mp, header_t *header)
2110Sstevel@tonic-gate {
2120Sstevel@tonic-gate 	cred_t *cr;
2130Sstevel@tonic-gate 
214*8778SErik.Nordmark@Sun.COM 	cr = msg_getcred(mp, NULL);
215*8778SErik.Nordmark@Sun.COM 	if (cr != NULL) {
2160Sstevel@tonic-gate 		header->uid = crgetuid(cr);
2170Sstevel@tonic-gate 		header->projid = crgetprojid(cr);
2180Sstevel@tonic-gate 	} else {
2194321Scasper 		header->uid = (uid_t)-1;
2200Sstevel@tonic-gate 		header->projid = -1;
2210Sstevel@tonic-gate 	}
2220Sstevel@tonic-gate }
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate /*
2250Sstevel@tonic-gate  * Extract header information in a header_t structure so that we don't have
2260Sstevel@tonic-gate  * have to parse the packet everytime.
2270Sstevel@tonic-gate  */
2280Sstevel@tonic-gate static int
flowacct_extract_header(mblk_t * mp,header_t * header)2290Sstevel@tonic-gate flowacct_extract_header(mblk_t *mp, header_t *header)
2300Sstevel@tonic-gate {
2310Sstevel@tonic-gate 	ipha_t *ipha;
2320Sstevel@tonic-gate 	ip6_t *ip6h;
2330Sstevel@tonic-gate #define	rptr	((uchar_t *)ipha)
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 	/* 0 means no port extracted. */
2360Sstevel@tonic-gate 	header->sport = 0;
2370Sstevel@tonic-gate 	header->dport = 0;
2380Sstevel@tonic-gate 	flowacct_find_ids(mp, header);
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	V6_SET_ZERO(header->saddr);
2410Sstevel@tonic-gate 	V6_SET_ZERO(header->daddr);
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
2440Sstevel@tonic-gate 	header->isv4 = IPH_HDR_VERSION(ipha) == IPV4_VERSION;
2450Sstevel@tonic-gate 	if (header->isv4) {
2460Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
2470Sstevel@tonic-gate 		V4_PART_OF_V6(header->saddr) = (int32_t)ipha->ipha_src;
2480Sstevel@tonic-gate 		V4_PART_OF_V6(header->daddr) = (int32_t)ipha->ipha_dst;
2490Sstevel@tonic-gate 		header->dsfield = ipha->ipha_type_of_service;
2500Sstevel@tonic-gate 		header->proto = ipha->ipha_protocol;
2510Sstevel@tonic-gate 		header->pktlen = ntohs(ipha->ipha_length);
2520Sstevel@tonic-gate 		if ((header->proto == IPPROTO_TCP) ||
2530Sstevel@tonic-gate 		    (header->proto == IPPROTO_UDP) ||
2540Sstevel@tonic-gate 		    (header->proto == IPPROTO_SCTP)) {
2550Sstevel@tonic-gate 			flowacct_port_info(header, ipha, AF_INET, mp);
2560Sstevel@tonic-gate 		}
2570Sstevel@tonic-gate 	} else {
2580Sstevel@tonic-gate 		/*
2590Sstevel@tonic-gate 		 * Need to pullup everything.
2600Sstevel@tonic-gate 		 */
2610Sstevel@tonic-gate 		if (mp->b_cont != NULL) {
2620Sstevel@tonic-gate 			if (!pullupmsg(mp, -1)) {
2630Sstevel@tonic-gate 				flowacct0dbg(("flowacct_extract_header: "\
2640Sstevel@tonic-gate 				    "pullup error"));
2650Sstevel@tonic-gate 				return (-1);
2660Sstevel@tonic-gate 			}
2670Sstevel@tonic-gate 		}
2680Sstevel@tonic-gate 		ip6h = (ip6_t *)mp->b_rptr;
2690Sstevel@tonic-gate 		bcopy(ip6h->ip6_src.s6_addr32, header->saddr.s6_addr32,
2700Sstevel@tonic-gate 		    sizeof (ip6h->ip6_src.s6_addr32));
2710Sstevel@tonic-gate 		bcopy(ip6h->ip6_dst.s6_addr32, header->daddr.s6_addr32,
2720Sstevel@tonic-gate 		    sizeof (ip6h->ip6_dst.s6_addr32));
2730Sstevel@tonic-gate 		header->dsfield = __IPV6_TCLASS_FROM_FLOW(ip6h->ip6_vcf);
2740Sstevel@tonic-gate 		header->proto = ip6h->ip6_nxt;
2750Sstevel@tonic-gate 		header->pktlen = ntohs(ip6h->ip6_plen) +
2760Sstevel@tonic-gate 		    ip_hdr_length_v6(mp, ip6h);
2770Sstevel@tonic-gate 		flowacct_port_info(header, ip6h, AF_INET6, mp);
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	}
2800Sstevel@tonic-gate #undef	rptr
2810Sstevel@tonic-gate 	return (0);
2820Sstevel@tonic-gate }
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate /* Check if the flow (identified by the 5-tuple) exists in the hash table */
2850Sstevel@tonic-gate static flow_t *
flowacct_flow_present(header_t * header,int index,flowacct_data_t * flowacct_data)2860Sstevel@tonic-gate flowacct_flow_present(header_t *header, int index,
2870Sstevel@tonic-gate     flowacct_data_t *flowacct_data)
2880Sstevel@tonic-gate {
2890Sstevel@tonic-gate 	list_hdr_t *hdr = flowacct_data->flows_tbl[index].head;
2900Sstevel@tonic-gate 	flow_t *flow;
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate 	while (hdr != NULL) {
2930Sstevel@tonic-gate 		flow = (flow_t *)hdr->objp;
2940Sstevel@tonic-gate 		if ((flow != NULL) &&
2950Sstevel@tonic-gate 		    (IN6_ARE_ADDR_EQUAL(&flow->saddr, &header->saddr)) &&
2960Sstevel@tonic-gate 		    (IN6_ARE_ADDR_EQUAL(&flow->daddr, &header->daddr)) &&
2970Sstevel@tonic-gate 		    (flow->proto == header->proto) &&
2980Sstevel@tonic-gate 		    (flow->sport == header->sport) &&
2990Sstevel@tonic-gate 		    (flow->dport == header->dport)) {
3000Sstevel@tonic-gate 			return (flow);
3010Sstevel@tonic-gate 		}
3020Sstevel@tonic-gate 		hdr = hdr->next;
3030Sstevel@tonic-gate 	}
3040Sstevel@tonic-gate 	return ((flow_t *)NULL);
3050Sstevel@tonic-gate }
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate /*
3080Sstevel@tonic-gate  * Add an object to the list at insert_point. This could be a flow item or
3090Sstevel@tonic-gate  * a flow itself.
3100Sstevel@tonic-gate  */
3110Sstevel@tonic-gate static list_hdr_t *
flowacct_add_obj(list_head_t * tophdr,list_hdr_t * insert_point,void * obj)3120Sstevel@tonic-gate flowacct_add_obj(list_head_t *tophdr, list_hdr_t *insert_point, void *obj)
3130Sstevel@tonic-gate {
3140Sstevel@tonic-gate 	list_hdr_t *new_hdr;
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	if (tophdr == NULL) {
3170Sstevel@tonic-gate 		return ((list_hdr_t *)NULL);
3180Sstevel@tonic-gate 	}
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 	new_hdr = (list_hdr_t *)kmem_zalloc(FLOWACCT_HDR_SZ, KM_NOSLEEP);
3210Sstevel@tonic-gate 	if (new_hdr == NULL) {
3220Sstevel@tonic-gate 		flowacct0dbg(("flowacct_add_obj: error allocating mem"));
3230Sstevel@tonic-gate 		return ((list_hdr_t *)NULL);
3240Sstevel@tonic-gate 	}
3250Sstevel@tonic-gate 	gethrestime(&new_hdr->last_seen);
3260Sstevel@tonic-gate 	new_hdr->objp = obj;
3270Sstevel@tonic-gate 	tophdr->nbr_items++;
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	if (insert_point == NULL) {
3300Sstevel@tonic-gate 		if (tophdr->head == NULL) {
3310Sstevel@tonic-gate 			tophdr->head = new_hdr;
3320Sstevel@tonic-gate 			tophdr->tail = new_hdr;
3330Sstevel@tonic-gate 			return (new_hdr);
3340Sstevel@tonic-gate 		}
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 		new_hdr->next = tophdr->head;
3370Sstevel@tonic-gate 		tophdr->head->prev = new_hdr;
3380Sstevel@tonic-gate 		tophdr->head = new_hdr;
3390Sstevel@tonic-gate 		return (new_hdr);
3400Sstevel@tonic-gate 	}
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate 	if (insert_point == tophdr->tail) {
3430Sstevel@tonic-gate 		tophdr->tail->next = new_hdr;
3440Sstevel@tonic-gate 		new_hdr->prev = tophdr->tail;
3450Sstevel@tonic-gate 		tophdr->tail = new_hdr;
3460Sstevel@tonic-gate 		return (new_hdr);
3470Sstevel@tonic-gate 	}
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate 	new_hdr->next = insert_point->next;
3500Sstevel@tonic-gate 	new_hdr->prev = insert_point;
3510Sstevel@tonic-gate 	insert_point->next->prev = new_hdr;
3520Sstevel@tonic-gate 	insert_point->next = new_hdr;
3530Sstevel@tonic-gate 	return (new_hdr);
3540Sstevel@tonic-gate }
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate /* Delete an obj from the list. This could be a flow item or the flow itself */
3570Sstevel@tonic-gate static void
flowacct_del_obj(list_head_t * tophdr,list_hdr_t * hdr,uint_t mode)3580Sstevel@tonic-gate flowacct_del_obj(list_head_t *tophdr, list_hdr_t *hdr, uint_t mode)
3590Sstevel@tonic-gate {
3600Sstevel@tonic-gate 	size_t	length;
3610Sstevel@tonic-gate 	uint_t	type;
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	if ((tophdr == NULL) || (hdr == NULL)) {
3640Sstevel@tonic-gate 		return;
3650Sstevel@tonic-gate 	}
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate 	type = ((flow_t *)hdr->objp)->type;
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate 	tophdr->nbr_items--;
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate 	if (hdr->next != NULL) {
3720Sstevel@tonic-gate 		hdr->next->prev = hdr->prev;
3730Sstevel@tonic-gate 	}
3740Sstevel@tonic-gate 	if (hdr->prev != NULL) {
3750Sstevel@tonic-gate 		hdr->prev->next = hdr->next;
3760Sstevel@tonic-gate 	}
3770Sstevel@tonic-gate 	if (tophdr->head == hdr) {
3780Sstevel@tonic-gate 		tophdr->head = hdr->next;
3790Sstevel@tonic-gate 	}
3800Sstevel@tonic-gate 	if (tophdr->tail == hdr) {
3810Sstevel@tonic-gate 		tophdr->tail = hdr->prev;
3820Sstevel@tonic-gate 	}
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	if (mode == FLOWACCT_DEL_OBJ) {
3850Sstevel@tonic-gate 		switch (type) {
3860Sstevel@tonic-gate 		case FLOWACCT_FLOW:
3870Sstevel@tonic-gate 			length = FLOWACCT_FLOW_SZ;
3880Sstevel@tonic-gate 			break;
3890Sstevel@tonic-gate 		case FLOWACCT_ITEM:
3900Sstevel@tonic-gate 			length = FLOWACCT_ITEM_SZ;
3910Sstevel@tonic-gate 			break;
3920Sstevel@tonic-gate 		}
3930Sstevel@tonic-gate 		kmem_free(hdr->objp, length);
3943902Svi117747 		hdr->objp = NULL;
3950Sstevel@tonic-gate 	}
3960Sstevel@tonic-gate 
3970Sstevel@tonic-gate 	kmem_free((void *)hdr, FLOWACCT_HDR_SZ);
3980Sstevel@tonic-gate }
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate /*
4010Sstevel@tonic-gate  * Checks if the given item (identified by dsfield, project id and uid)
4020Sstevel@tonic-gate  * is already present for the flow.
4030Sstevel@tonic-gate  */
4040Sstevel@tonic-gate static flow_item_t *
flowacct_item_present(flow_t * flow,uint8_t dsfield,pid_t proj_id,uint_t uid)4050Sstevel@tonic-gate flowacct_item_present(flow_t *flow, uint8_t dsfield, pid_t proj_id, uint_t uid)
4060Sstevel@tonic-gate {
4070Sstevel@tonic-gate 	list_hdr_t	*itemhdr;
4080Sstevel@tonic-gate 	flow_item_t	*item;
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	itemhdr = flow->items.head;
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	while (itemhdr != NULL) {
4130Sstevel@tonic-gate 		item = (flow_item_t *)itemhdr->objp;
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 		if ((item->dsfield != dsfield) || (item->projid != proj_id) ||
4160Sstevel@tonic-gate 		    (item->uid != uid)) {
4170Sstevel@tonic-gate 			itemhdr = itemhdr->next;
4180Sstevel@tonic-gate 			continue;
4190Sstevel@tonic-gate 		}
4200Sstevel@tonic-gate 		return (item);
4210Sstevel@tonic-gate 	}
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	return ((flow_item_t *)NULL);
4240Sstevel@tonic-gate }
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate /*
4270Sstevel@tonic-gate  * Add the flow to the table, if not already present. If the flow is
4280Sstevel@tonic-gate  * present in the table, add the item. Also, update the flow stats.
4290Sstevel@tonic-gate  * Additionally, re-adjust the timout list as well.
4300Sstevel@tonic-gate  */
4310Sstevel@tonic-gate static int
flowacct_update_flows_tbl(header_t * header,flowacct_data_t * flowacct_data)4320Sstevel@tonic-gate flowacct_update_flows_tbl(header_t *header, flowacct_data_t *flowacct_data)
4330Sstevel@tonic-gate {
4340Sstevel@tonic-gate 	int index;
4350Sstevel@tonic-gate 	list_head_t *fhead;
4360Sstevel@tonic-gate 	list_head_t *thead;
4370Sstevel@tonic-gate 	list_head_t *ihead;
4380Sstevel@tonic-gate 	boolean_t added_flow = B_FALSE;
4390Sstevel@tonic-gate 	timespec_t  now;
4400Sstevel@tonic-gate 	flow_item_t *item;
4410Sstevel@tonic-gate 	flow_t *flow;
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate 	index = FLOWACCT_FLOW_HASH(header);
4440Sstevel@tonic-gate 	fhead = &flowacct_data->flows_tbl[index];
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 	/* The timeout list */
4470Sstevel@tonic-gate 	thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT];
4480Sstevel@tonic-gate 
4490Sstevel@tonic-gate 	mutex_enter(&fhead->lock);
4500Sstevel@tonic-gate 	flow = flowacct_flow_present(header, index, flowacct_data);
4510Sstevel@tonic-gate 	if (flow == NULL) {
4520Sstevel@tonic-gate 		flow = (flow_t *)kmem_zalloc(FLOWACCT_FLOW_SZ, KM_NOSLEEP);
4530Sstevel@tonic-gate 		if (flow == NULL) {
4545566Sudpa 			mutex_exit(&fhead->lock);
4550Sstevel@tonic-gate 			flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
4560Sstevel@tonic-gate 			    "error"));
4570Sstevel@tonic-gate 			return (-1);
4580Sstevel@tonic-gate 		}
4590Sstevel@tonic-gate 		flow->hdr = flowacct_add_obj(fhead, fhead->tail, (void *)flow);
4600Sstevel@tonic-gate 		if (flow->hdr == NULL) {
4615566Sudpa 			mutex_exit(&fhead->lock);
4625566Sudpa 			kmem_free(flow, FLOWACCT_FLOW_SZ);
4630Sstevel@tonic-gate 			flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
4640Sstevel@tonic-gate 			    "error"));
4650Sstevel@tonic-gate 			return (-1);
4660Sstevel@tonic-gate 		}
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 		flow->type = FLOWACCT_FLOW;
4690Sstevel@tonic-gate 		flow->isv4 = header->isv4;
4700Sstevel@tonic-gate 		bcopy(header->saddr.s6_addr32, flow->saddr.s6_addr32,
4710Sstevel@tonic-gate 		    sizeof (header->saddr.s6_addr32));
4720Sstevel@tonic-gate 		bcopy(header->daddr.s6_addr32, flow->daddr.s6_addr32,
4730Sstevel@tonic-gate 		    sizeof (header->daddr.s6_addr32));
4740Sstevel@tonic-gate 		flow->proto = header->proto;
4750Sstevel@tonic-gate 		flow->sport = header->sport;
4760Sstevel@tonic-gate 		flow->dport = header->dport;
4770Sstevel@tonic-gate 		flow->back_ptr = fhead;
4780Sstevel@tonic-gate 		added_flow = B_TRUE;
4793902Svi117747 	} else {
4803902Svi117747 		/*
4813902Svi117747 		 * We need to make sure that this 'flow' is not deleted
4823902Svi117747 		 * either by a scheduled timeout or an explict call
4833902Svi117747 		 * to flowacct_timer() below.
4843902Svi117747 		 */
4853902Svi117747 		flow->inuse = B_TRUE;
4860Sstevel@tonic-gate 	}
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 	ihead = &flow->items;
4890Sstevel@tonic-gate 	item = flowacct_item_present(flow, header->dsfield, header->projid,
4900Sstevel@tonic-gate 	    header->uid);
4910Sstevel@tonic-gate 	if (item == NULL) {
4920Sstevel@tonic-gate 		boolean_t just_once = B_TRUE;
4930Sstevel@tonic-gate 		/*
4940Sstevel@tonic-gate 		 * For all practical purposes, we limit the no. of entries in
4950Sstevel@tonic-gate 		 * the flow table - i.e. the max_limt that a user specifies is
4960Sstevel@tonic-gate 		 * the maximum no. of flow items in the table.
4970Sstevel@tonic-gate 		 */
4980Sstevel@tonic-gate 	try_again:
4990Sstevel@tonic-gate 		atomic_add_32(&flowacct_data->nflows, 1);
5000Sstevel@tonic-gate 		if (flowacct_data->nflows > flowacct_data->max_limit) {
5010Sstevel@tonic-gate 			atomic_add_32(&flowacct_data->nflows, -1);
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 			/* Try timing out once */
5040Sstevel@tonic-gate 			if (just_once) {
5050Sstevel@tonic-gate 				/*
5060Sstevel@tonic-gate 				 * Need to release the lock, as this entry
5070Sstevel@tonic-gate 				 * could contain a flow that can be timed
5080Sstevel@tonic-gate 				 * out.
5090Sstevel@tonic-gate 				 */
5100Sstevel@tonic-gate 				mutex_exit(&fhead->lock);
5110Sstevel@tonic-gate 				flowacct_timer(FLOWACCT_JUST_ONE,
5120Sstevel@tonic-gate 				    flowacct_data);
5130Sstevel@tonic-gate 				mutex_enter(&fhead->lock);
5140Sstevel@tonic-gate 				/* Lets check again */
5150Sstevel@tonic-gate 				just_once = B_FALSE;
5160Sstevel@tonic-gate 				goto try_again;
5170Sstevel@tonic-gate 			} else {
5185115Svi117747 				flow->inuse = B_FALSE;
5195566Sudpa 				/* Need to remove the flow, if one was added */
5200Sstevel@tonic-gate 				if (added_flow) {
5210Sstevel@tonic-gate 					flowacct_del_obj(fhead, flow->hdr,
5220Sstevel@tonic-gate 					    FLOWACCT_DEL_OBJ);
5230Sstevel@tonic-gate 				}
5245566Sudpa 				mutex_exit(&fhead->lock);
5255566Sudpa 				flowacct1dbg(("flowacct_update_flows_tbl: "\
5265566Sudpa 				    "maximum active flows exceeded\n"));
5270Sstevel@tonic-gate 				return (-1);
5280Sstevel@tonic-gate 			}
5290Sstevel@tonic-gate 		}
5300Sstevel@tonic-gate 		item = (flow_item_t *)kmem_zalloc(FLOWACCT_ITEM_SZ, KM_NOSLEEP);
5310Sstevel@tonic-gate 		if (item == NULL) {
5325566Sudpa 			flow->inuse = B_FALSE;
5330Sstevel@tonic-gate 			/* Need to remove the flow, if one was added */
5340Sstevel@tonic-gate 			if (added_flow) {
5350Sstevel@tonic-gate 				flowacct_del_obj(fhead, flow->hdr,
5360Sstevel@tonic-gate 				    FLOWACCT_DEL_OBJ);
5370Sstevel@tonic-gate 			}
5385566Sudpa 			mutex_exit(&fhead->lock);
5390Sstevel@tonic-gate 			atomic_add_32(&flowacct_data->nflows, -1);
5405566Sudpa 			flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
5415566Sudpa 			    "error"));
5420Sstevel@tonic-gate 			return (-1);
5430Sstevel@tonic-gate 		}
5440Sstevel@tonic-gate 		item->hdr = flowacct_add_obj(ihead, ihead->tail, (void *)item);
5450Sstevel@tonic-gate 		if (item->hdr == NULL) {
5465566Sudpa 			flow->inuse = B_FALSE;
5470Sstevel@tonic-gate 			/* Need to remove the flow, if one was added */
5480Sstevel@tonic-gate 			if (added_flow) {
5490Sstevel@tonic-gate 				flowacct_del_obj(fhead, flow->hdr,
5500Sstevel@tonic-gate 				    FLOWACCT_DEL_OBJ);
5510Sstevel@tonic-gate 			}
5525566Sudpa 			mutex_exit(&fhead->lock);
5530Sstevel@tonic-gate 			atomic_add_32(&flowacct_data->nflows, -1);
5545566Sudpa 			kmem_free(item, FLOWACCT_ITEM_SZ);
5555566Sudpa 			flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
5565566Sudpa 			    "error\n"));
5570Sstevel@tonic-gate 			return (-1);
5580Sstevel@tonic-gate 		}
5590Sstevel@tonic-gate 		/* If a flow was added, add it too */
5600Sstevel@tonic-gate 		if (added_flow) {
5610Sstevel@tonic-gate 			atomic_add_64(&flowacct_data->usedmem,
5620Sstevel@tonic-gate 			    FLOWACCT_FLOW_RECORD_SZ);
5630Sstevel@tonic-gate 		}
5640Sstevel@tonic-gate 		atomic_add_64(&flowacct_data->usedmem, FLOWACCT_ITEM_RECORD_SZ);
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 		item->type = FLOWACCT_ITEM;
5670Sstevel@tonic-gate 		item->dsfield = header->dsfield;
5680Sstevel@tonic-gate 		item->projid = header->projid;
5690Sstevel@tonic-gate 		item->uid = header->uid;
5700Sstevel@tonic-gate 		item->npackets = 1;
5710Sstevel@tonic-gate 		item->nbytes = header->pktlen;
5720Sstevel@tonic-gate 		item->creation_time = item->hdr->last_seen;
5730Sstevel@tonic-gate 	} else {
5740Sstevel@tonic-gate 		item->npackets++;
5750Sstevel@tonic-gate 		item->nbytes += header->pktlen;
5760Sstevel@tonic-gate 	}
5770Sstevel@tonic-gate 	gethrestime(&now);
5780Sstevel@tonic-gate 	flow->hdr->last_seen = item->hdr->last_seen = now;
5790Sstevel@tonic-gate 	mutex_exit(&fhead->lock);
5800Sstevel@tonic-gate 
5813902Svi117747 	/*
5823902Svi117747 	 * Re-adjust the timeout list. The timer takes the thead lock
5833902Svi117747 	 * follwed by fhead lock(s), so we release fhead, take thead
5843902Svi117747 	 * and re-take fhead.
5853902Svi117747 	 */
5860Sstevel@tonic-gate 	mutex_enter(&thead->lock);
5873902Svi117747 	mutex_enter(&fhead->lock);
5880Sstevel@tonic-gate 	/* If the flow was added, append it to the tail of the timeout list */
5890Sstevel@tonic-gate 	if (added_flow) {
5900Sstevel@tonic-gate 		if (thead->head == NULL) {
5910Sstevel@tonic-gate 			thead->head = flow->hdr;
5920Sstevel@tonic-gate 			thead->tail = flow->hdr;
5930Sstevel@tonic-gate 		} else {
5940Sstevel@tonic-gate 			thead->tail->timeout_next = flow->hdr;
5950Sstevel@tonic-gate 			flow->hdr->timeout_prev = thead->tail;
5960Sstevel@tonic-gate 			thead->tail = flow->hdr;
5970Sstevel@tonic-gate 		}
5980Sstevel@tonic-gate 	/*
5990Sstevel@tonic-gate 	 * Else, move this flow to the tail of the timeout list, if it is not
6000Sstevel@tonic-gate 	 * already.
6015566Sudpa 	 * flow->hdr in the timeout list :-
6025566Sudpa 	 * timeout_next = NULL, timeout_prev != NULL, at the tail end.
6035566Sudpa 	 * timeout_next != NULL, timeout_prev = NULL, at the head.
6045566Sudpa 	 * timeout_next != NULL, timeout_prev != NULL, in the middle.
6055566Sudpa 	 * timeout_next = NULL, timeout_prev = NULL, not in the timeout list,
6065566Sudpa 	 * ignore such flow.
6070Sstevel@tonic-gate 	 */
6085566Sudpa 	} else if ((flow->hdr->timeout_next != NULL) ||
6095566Sudpa 	    (flow->hdr->timeout_prev != NULL)) {
6105566Sudpa 		if (flow->hdr != thead->tail) {
6115566Sudpa 			if (flow->hdr == thead->head) {
6125566Sudpa 				thead->head->timeout_next->timeout_prev = NULL;
6135566Sudpa 				thead->head = thead->head->timeout_next;
6145566Sudpa 				flow->hdr->timeout_next = NULL;
6155566Sudpa 				thead->tail->timeout_next = flow->hdr;
6165566Sudpa 				flow->hdr->timeout_prev = thead->tail;
6175566Sudpa 				thead->tail = flow->hdr;
6185566Sudpa 			} else {
6195566Sudpa 				flow->hdr->timeout_prev->timeout_next =
6205566Sudpa 				    flow->hdr->timeout_next;
6215566Sudpa 				flow->hdr->timeout_next->timeout_prev =
6225566Sudpa 				    flow->hdr->timeout_prev;
6235566Sudpa 				flow->hdr->timeout_next = NULL;
6245566Sudpa 				thead->tail->timeout_next = flow->hdr;
6255566Sudpa 				flow->hdr->timeout_prev = thead->tail;
6265566Sudpa 				thead->tail = flow->hdr;
6275566Sudpa 			}
6280Sstevel@tonic-gate 		}
6290Sstevel@tonic-gate 	}
6305566Sudpa 	/*
6315566Sudpa 	 * Unset this variable, now it is fine even if this
6325566Sudpa 	 * flow gets deleted (i.e. after timing out its
6335566Sudpa 	 * flow items) since we are done using it.
6345566Sudpa 	 */
6355566Sudpa 	flow->inuse = B_FALSE;
6363902Svi117747 	mutex_exit(&fhead->lock);
6370Sstevel@tonic-gate 	mutex_exit(&thead->lock);
6380Sstevel@tonic-gate 	atomic_add_64(&flowacct_data->tbytes, header->pktlen);
6390Sstevel@tonic-gate 	return (0);
6400Sstevel@tonic-gate }
6410Sstevel@tonic-gate 
6420Sstevel@tonic-gate /* Timer for timing out flows/items from the flow table */
6430Sstevel@tonic-gate void
flowacct_timeout_flows(void * args)6440Sstevel@tonic-gate flowacct_timeout_flows(void *args)
6450Sstevel@tonic-gate {
6460Sstevel@tonic-gate 	flowacct_data_t *flowacct_data = (flowacct_data_t *)args;
6470Sstevel@tonic-gate 	flowacct_timer(FLOWACCT_FLOW_TIMER, flowacct_data);
6480Sstevel@tonic-gate 	flowacct_data->flow_tid = timeout(flowacct_timeout_flows, flowacct_data,
6490Sstevel@tonic-gate 	    drv_usectohz(flowacct_data->timer));
6500Sstevel@tonic-gate }
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate /* Delete the item from the flow in the flow table */
6540Sstevel@tonic-gate static void
flowacct_timeout_item(flow_t ** flow,list_hdr_t ** item_hdr)6550Sstevel@tonic-gate flowacct_timeout_item(flow_t **flow, list_hdr_t **item_hdr)
6560Sstevel@tonic-gate {
6570Sstevel@tonic-gate 	list_hdr_t *next_it_hdr;
6580Sstevel@tonic-gate 
6590Sstevel@tonic-gate 	next_it_hdr = (*item_hdr)->next;
6600Sstevel@tonic-gate 	flowacct_del_obj(&(*flow)->items, *item_hdr, FLOWACCT_DEL_OBJ);
6610Sstevel@tonic-gate 	*item_hdr = next_it_hdr;
6620Sstevel@tonic-gate }
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate /* Create a flow record for this timed out item */
6650Sstevel@tonic-gate static flow_records_t *
flowacct_create_record(flow_t * flow,list_hdr_t * ithdr)6660Sstevel@tonic-gate flowacct_create_record(flow_t *flow, list_hdr_t *ithdr)
6670Sstevel@tonic-gate {
6680Sstevel@tonic-gate 	int count;
6690Sstevel@tonic-gate 	flow_item_t *item = (flow_item_t *)ithdr->objp;
6700Sstevel@tonic-gate 	flow_records_t *tmp_frec = NULL;
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	/* Record to be written into the accounting file */
6730Sstevel@tonic-gate 	tmp_frec = kmem_zalloc(sizeof (flow_records_t), KM_NOSLEEP);
6740Sstevel@tonic-gate 	if (tmp_frec == NULL) {
6750Sstevel@tonic-gate 		flowacct0dbg(("flowacct_create_record: mem alloc error.\n"));
6760Sstevel@tonic-gate 		return (NULL);
6770Sstevel@tonic-gate 	}
6780Sstevel@tonic-gate 	tmp_frec->fl_use = kmem_zalloc(sizeof (flow_usage_t), KM_NOSLEEP);
6790Sstevel@tonic-gate 	if (tmp_frec->fl_use == NULL) {
6800Sstevel@tonic-gate 		flowacct0dbg(("flowacct_create_record: mem alloc error\n"));
6810Sstevel@tonic-gate 		kmem_free(tmp_frec, sizeof (flow_records_t));
6820Sstevel@tonic-gate 		return (NULL);
6830Sstevel@tonic-gate 	}
6840Sstevel@tonic-gate 
6850Sstevel@tonic-gate 	/* Copy the IP address */
6860Sstevel@tonic-gate 	for (count = 0; count < 4; count++) {
6870Sstevel@tonic-gate 		tmp_frec->fl_use->fu_saddr[count] =
6880Sstevel@tonic-gate 		    htonl(flow->saddr.s6_addr32[count]);
6890Sstevel@tonic-gate 		tmp_frec->fl_use->fu_daddr[count] =
6900Sstevel@tonic-gate 		    htonl(flow->daddr.s6_addr32[count]);
6910Sstevel@tonic-gate 	}
6920Sstevel@tonic-gate 
6930Sstevel@tonic-gate 	/*
6940Sstevel@tonic-gate 	 * Ports, protocol, version, dsfield, project id, uid, nbytes, npackets
6950Sstevel@tonic-gate 	 * creation time and last seen.
6960Sstevel@tonic-gate 	 */
6970Sstevel@tonic-gate 	tmp_frec->fl_use->fu_sport = htons(flow->sport);
6980Sstevel@tonic-gate 	tmp_frec->fl_use->fu_dport = htons(flow->dport);
6990Sstevel@tonic-gate 	tmp_frec->fl_use->fu_protocol = flow->proto;
7000Sstevel@tonic-gate 	tmp_frec->fl_use->fu_isv4 = flow->isv4;
7010Sstevel@tonic-gate 	tmp_frec->fl_use->fu_dsfield = item->dsfield;
7020Sstevel@tonic-gate 	tmp_frec->fl_use->fu_projid = item->projid;
7030Sstevel@tonic-gate 	tmp_frec->fl_use->fu_userid = item->uid;
7040Sstevel@tonic-gate 	tmp_frec->fl_use->fu_nbytes = item->nbytes;
7050Sstevel@tonic-gate 	tmp_frec->fl_use->fu_npackets = item->npackets;
7060Sstevel@tonic-gate 	tmp_frec->fl_use->fu_lseen =
7070Sstevel@tonic-gate 	    (uint64_t)(ulong_t)ithdr->last_seen.tv_sec;
7080Sstevel@tonic-gate 	tmp_frec->fl_use->fu_ctime =
7090Sstevel@tonic-gate 	    (uint64_t)(ulong_t)item->creation_time.tv_sec;
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 	return (tmp_frec);
7120Sstevel@tonic-gate }
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate /*
7150Sstevel@tonic-gate  * Scan thru the timeout list and write the records to the accounting file, if
7160Sstevel@tonic-gate  * possible. Basically step thru the timeout list maintained in the last
7170Sstevel@tonic-gate  * hash bucket, FLOW_COUNT_TBL + 1, and timeout flows. This could be called
7180Sstevel@tonic-gate  * from the timer, FLOWACCT_TIMER - delete only timed out flows or when this
7190Sstevel@tonic-gate  * instance is deleted, FLOWACCT_PURGE_FLOW - delete all the flows from the
7200Sstevel@tonic-gate  * table or as FLOWACCT_JUST_ONE - delete the first timed out flow. Since the
7210Sstevel@tonic-gate  * flows are cronologically arranged in the timeout list,  when called as
7220Sstevel@tonic-gate  * FLOWACCT_TIMER and FLOWACCT_JUST_ONE, we can stop when we come across
7230Sstevel@tonic-gate  * the first flow that has not timed out (which means none of the following
7240Sstevel@tonic-gate  * flows would have timed out).
7250Sstevel@tonic-gate  */
7260Sstevel@tonic-gate void
flowacct_timer(int type,flowacct_data_t * flowacct_data)7270Sstevel@tonic-gate flowacct_timer(int type, flowacct_data_t *flowacct_data)
7280Sstevel@tonic-gate {
7290Sstevel@tonic-gate 	hrtime_t diff;
7300Sstevel@tonic-gate 	timespec_t now;
7310Sstevel@tonic-gate 	list_head_t *head, *thead;
7320Sstevel@tonic-gate 	flow_t *flow;
7330Sstevel@tonic-gate 	flow_item_t *item;
7340Sstevel@tonic-gate 	list_hdr_t *fl_hdr, *next_fl_hdr;
7350Sstevel@tonic-gate 	list_hdr_t *ithdr = (list_hdr_t *)NULL;
7360Sstevel@tonic-gate 	flow_records_t *frec = NULL, *tmp_frec, *tail;
7370Sstevel@tonic-gate 	uint64_t flow_size;
7380Sstevel@tonic-gate 	uint64_t item_size;
7390Sstevel@tonic-gate 
7400Sstevel@tonic-gate 	ASSERT(flowacct_data != NULL);
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 	/* 2s-complement for subtraction */
7430Sstevel@tonic-gate 	flow_size = ~FLOWACCT_FLOW_RECORD_SZ + 1;
7440Sstevel@tonic-gate 	item_size = ~FLOWACCT_ITEM_RECORD_SZ + 1;
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	/* Get the current time */
7470Sstevel@tonic-gate 	gethrestime(&now);
7480Sstevel@tonic-gate 
7490Sstevel@tonic-gate 	/*
7500Sstevel@tonic-gate 	 * For each flow in the table, scan thru all the items and delete
7510Sstevel@tonic-gate 	 * those that have exceeded the timeout. If all the items in a
7520Sstevel@tonic-gate 	 * flow have timed out, delete the flow entry as well. Finally,
7530Sstevel@tonic-gate 	 * write all the delted items to the accounting file.
7540Sstevel@tonic-gate 	 */
7550Sstevel@tonic-gate 	thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT];
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate 	mutex_enter(&thead->lock);
7580Sstevel@tonic-gate 	fl_hdr = thead->head;
7590Sstevel@tonic-gate 	while (fl_hdr != NULL) {
7603902Svi117747 		uint32_t	items_deleted = 0;
7613902Svi117747 
7620Sstevel@tonic-gate 		next_fl_hdr = fl_hdr->timeout_next;
7630Sstevel@tonic-gate 		flow = (flow_t *)fl_hdr->objp;
7640Sstevel@tonic-gate 		head = flow->back_ptr;
7650Sstevel@tonic-gate 		mutex_enter(&head->lock);
7660Sstevel@tonic-gate 
7670Sstevel@tonic-gate 		/*LINTED*/
7680Sstevel@tonic-gate 		FLOWACCT_DELTA(now, fl_hdr->last_seen, diff);
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 		/*
7710Sstevel@tonic-gate 		 * If type is FLOW_TIMER, then check if the item has timed out.
7720Sstevel@tonic-gate 		 * If type is FLOW_PURGE delete the entry anyways.
7730Sstevel@tonic-gate 		 */
7740Sstevel@tonic-gate 		if ((type != FLOWACCT_PURGE_FLOW) &&
7750Sstevel@tonic-gate 		    (diff < flowacct_data->timeout)) {
7760Sstevel@tonic-gate 			mutex_exit(&head->lock);
7770Sstevel@tonic-gate 			mutex_exit(&thead->lock);
7780Sstevel@tonic-gate 			goto write_records;
7790Sstevel@tonic-gate 		}
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 		ithdr = flow->items.head;
7820Sstevel@tonic-gate 		while (ithdr != NULL) {
7830Sstevel@tonic-gate 			item = (flow_item_t *)ithdr->objp;
7840Sstevel@tonic-gate 			/*
7850Sstevel@tonic-gate 			 * Fill in the flow record to be
7860Sstevel@tonic-gate 			 * written to the accounting file.
7870Sstevel@tonic-gate 			 */
7880Sstevel@tonic-gate 			tmp_frec = flowacct_create_record(flow, ithdr);
7890Sstevel@tonic-gate 			/*
7900Sstevel@tonic-gate 			 * If we don't have memory for records,
7910Sstevel@tonic-gate 			 * we will come back in case this is
7920Sstevel@tonic-gate 			 * called as FLOW_TIMER, else we will
7930Sstevel@tonic-gate 			 * go ahead and delete the item from
7940Sstevel@tonic-gate 			 * the table (when asked to PURGE the
7950Sstevel@tonic-gate 			 * table), so there could be some
7960Sstevel@tonic-gate 			 * entries not written to the file
7970Sstevel@tonic-gate 			 * when this action instance is
7980Sstevel@tonic-gate 			 * deleted.
7990Sstevel@tonic-gate 			 */
8000Sstevel@tonic-gate 			if (tmp_frec != NULL) {
8010Sstevel@tonic-gate 				tmp_frec->fl_use->fu_aname =
8020Sstevel@tonic-gate 				    flowacct_data->act_name;
8030Sstevel@tonic-gate 				if (frec == NULL) {
8040Sstevel@tonic-gate 					frec = tmp_frec;
8050Sstevel@tonic-gate 					tail = frec;
8060Sstevel@tonic-gate 				} else {
8070Sstevel@tonic-gate 					tail->next = tmp_frec;
8080Sstevel@tonic-gate 					tail = tmp_frec;
8090Sstevel@tonic-gate 				}
8100Sstevel@tonic-gate 			} else if (type != FLOWACCT_PURGE_FLOW) {
8110Sstevel@tonic-gate 				mutex_exit(&head->lock);
8120Sstevel@tonic-gate 				mutex_exit(&thead->lock);
8130Sstevel@tonic-gate 				atomic_add_32(&flowacct_data->nflows,
8140Sstevel@tonic-gate 				    (~items_deleted + 1));
8150Sstevel@tonic-gate 				goto write_records;
8160Sstevel@tonic-gate 			}
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate 			/* Update stats */
8190Sstevel@tonic-gate 			atomic_add_64(&flowacct_data->tbytes, (~item->nbytes +
8200Sstevel@tonic-gate 			    1));
8210Sstevel@tonic-gate 
8220Sstevel@tonic-gate 			/* Delete the item */
8230Sstevel@tonic-gate 			flowacct_timeout_item(&flow, &ithdr);
8240Sstevel@tonic-gate 			items_deleted++;
8250Sstevel@tonic-gate 			atomic_add_64(&flowacct_data->usedmem, item_size);
8260Sstevel@tonic-gate 		}
8270Sstevel@tonic-gate 		ASSERT(flow->items.nbr_items == 0);
8280Sstevel@tonic-gate 		atomic_add_32(&flowacct_data->nflows, (~items_deleted + 1));
8290Sstevel@tonic-gate 
8303902Svi117747 		/*
8313902Svi117747 		 * Don't delete this flow if we are making place for
8323902Svi117747 		 * a new item for this flow.
8333902Svi117747 		 */
8343902Svi117747 		if (!flow->inuse) {
8355115Svi117747 			if (fl_hdr->timeout_prev != NULL) {
8365115Svi117747 				fl_hdr->timeout_prev->timeout_next =
8375115Svi117747 				    fl_hdr->timeout_next;
8383902Svi117747 			} else {
8393902Svi117747 				thead->head = fl_hdr->timeout_next;
8403902Svi117747 			}
8415115Svi117747 			if (fl_hdr->timeout_next != NULL) {
8425115Svi117747 				fl_hdr->timeout_next->timeout_prev =
8435115Svi117747 				    fl_hdr->timeout_prev;
8445115Svi117747 			} else {
8455115Svi117747 				thead->tail = fl_hdr->timeout_prev;
8465115Svi117747 			}
8475115Svi117747 			fl_hdr->timeout_prev = NULL;
8485115Svi117747 			fl_hdr->timeout_next = NULL;
8493902Svi117747 			flowacct_del_obj(head, fl_hdr, FLOWACCT_DEL_OBJ);
8503902Svi117747 			atomic_add_64(&flowacct_data->usedmem, flow_size);
8510Sstevel@tonic-gate 		}
8520Sstevel@tonic-gate 		mutex_exit(&head->lock);
8530Sstevel@tonic-gate 		if (type == FLOWACCT_JUST_ONE) {
8540Sstevel@tonic-gate 			mutex_exit(&thead->lock);
8550Sstevel@tonic-gate 			goto write_records;
8560Sstevel@tonic-gate 		}
8570Sstevel@tonic-gate 		fl_hdr = next_fl_hdr;
8580Sstevel@tonic-gate 	}
8590Sstevel@tonic-gate 	mutex_exit(&thead->lock);
8600Sstevel@tonic-gate write_records:
8610Sstevel@tonic-gate 	/* Write all the timed out flows to the accounting file */
8620Sstevel@tonic-gate 	while (frec != NULL) {
8630Sstevel@tonic-gate 		tmp_frec = frec->next;
8640Sstevel@tonic-gate 		exacct_commit_flow(frec->fl_use);
8650Sstevel@tonic-gate 		kmem_free(frec->fl_use, sizeof (flow_usage_t));
8660Sstevel@tonic-gate 		kmem_free(frec, sizeof (flow_records_t));
8670Sstevel@tonic-gate 		frec = tmp_frec;
8680Sstevel@tonic-gate 	}
8690Sstevel@tonic-gate }
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate /*
8720Sstevel@tonic-gate  * Get the IP header contents from the packet, update the flow table with
8730Sstevel@tonic-gate  * this item and return.
8740Sstevel@tonic-gate  */
8750Sstevel@tonic-gate int
flowacct_process(mblk_t ** mpp,flowacct_data_t * flowacct_data)8760Sstevel@tonic-gate flowacct_process(mblk_t **mpp, flowacct_data_t *flowacct_data)
8770Sstevel@tonic-gate {
8780Sstevel@tonic-gate 	header_t *header;
8790Sstevel@tonic-gate 	mblk_t *mp = *mpp;
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 	ASSERT(mp != NULL);
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate 	/* If we don't find an M_DATA, return error */
8840Sstevel@tonic-gate 	if (mp->b_datap->db_type != M_DATA) {
8850Sstevel@tonic-gate 		if ((mp->b_cont != NULL) &&
8860Sstevel@tonic-gate 		    (mp->b_cont->b_datap->db_type == M_DATA)) {
8870Sstevel@tonic-gate 			mp = mp->b_cont;
8880Sstevel@tonic-gate 		} else {
8890Sstevel@tonic-gate 			flowacct0dbg(("flowacct_process: no data\n"));
8900Sstevel@tonic-gate 			atomic_add_64(&flowacct_data->epackets, 1);
8910Sstevel@tonic-gate 			return (EINVAL);
8920Sstevel@tonic-gate 		}
8930Sstevel@tonic-gate 	}
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	header = kmem_zalloc(FLOWACCT_HEADER_SZ, KM_NOSLEEP);
8960Sstevel@tonic-gate 	if (header == NULL) {
8970Sstevel@tonic-gate 		flowacct0dbg(("flowacct_process: error allocing mem"));
8980Sstevel@tonic-gate 		atomic_add_64(&flowacct_data->epackets, 1);
8990Sstevel@tonic-gate 		return (ENOMEM);
9000Sstevel@tonic-gate 	}
9010Sstevel@tonic-gate 
9020Sstevel@tonic-gate 	/* Get all the required information into header. */
9030Sstevel@tonic-gate 	if (flowacct_extract_header(mp, header) != 0) {
9040Sstevel@tonic-gate 		kmem_free(header, FLOWACCT_HEADER_SZ);
9050Sstevel@tonic-gate 		atomic_add_64(&flowacct_data->epackets, 1);
9060Sstevel@tonic-gate 		return (EINVAL);
9070Sstevel@tonic-gate 	}
9080Sstevel@tonic-gate 
9090Sstevel@tonic-gate 	/* Updated the flow table with this entry */
9100Sstevel@tonic-gate 	if (flowacct_update_flows_tbl(header, flowacct_data) != 0) {
9110Sstevel@tonic-gate 		kmem_free(header, FLOWACCT_HEADER_SZ);
9120Sstevel@tonic-gate 		atomic_add_64(&flowacct_data->epackets, 1);
9130Sstevel@tonic-gate 		return (ENOMEM);
9140Sstevel@tonic-gate 	}
9150Sstevel@tonic-gate 
9160Sstevel@tonic-gate 	/* Update global stats */
9170Sstevel@tonic-gate 	atomic_add_64(&flowacct_data->npackets, 1);
9180Sstevel@tonic-gate 	atomic_add_64(&flowacct_data->nbytes, header->pktlen);
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	kmem_free(header, FLOWACCT_HEADER_SZ);
9210Sstevel@tonic-gate 	if (flowacct_data->flow_tid == 0) {
9220Sstevel@tonic-gate 		flowacct_data->flow_tid = timeout(flowacct_timeout_flows,
9230Sstevel@tonic-gate 		    flowacct_data, drv_usectohz(flowacct_data->timer));
9240Sstevel@tonic-gate 	}
9250Sstevel@tonic-gate 	return (0);
9260Sstevel@tonic-gate }
927