10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
70Sstevel@tonic-gate * with the License.
80Sstevel@tonic-gate *
90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate * See the License for the specific language governing permissions
120Sstevel@tonic-gate * and limitations under the License.
130Sstevel@tonic-gate *
140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate *
200Sstevel@tonic-gate * CDDL HEADER END
210Sstevel@tonic-gate */
220Sstevel@tonic-gate /*
23*852Svi117747 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
280Sstevel@tonic-gate
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/atomic.h>
31*852Svi117747 #include <sys/pattr.h>
320Sstevel@tonic-gate #include <netinet/in.h>
330Sstevel@tonic-gate #include <netinet/ip6.h>
340Sstevel@tonic-gate #include <inet/common.h>
350Sstevel@tonic-gate #include <inet/ip.h>
360Sstevel@tonic-gate #include <inet/ip6.h>
370Sstevel@tonic-gate #include <ipp/dscpmk/dscpmk_impl.h>
380Sstevel@tonic-gate
390Sstevel@tonic-gate /* Module to mark the ToS/DS field for a given packet */
400Sstevel@tonic-gate
410Sstevel@tonic-gate /* Debug level */
420Sstevel@tonic-gate int dscpmk_debug = 0;
430Sstevel@tonic-gate
440Sstevel@tonic-gate /*
450Sstevel@tonic-gate * Given a packet, this routine marks the ToS or DSCP for IPv4 and IPv6 resp.
460Sstevel@tonic-gate * using the configured dscp_map.
470Sstevel@tonic-gate * Note that this module does not change the ECN bits.
480Sstevel@tonic-gate */
490Sstevel@tonic-gate int
dscpmk_process(mblk_t ** mpp,dscpmk_data_t * dscpmk_data,ip_proc_t proc)500Sstevel@tonic-gate dscpmk_process(mblk_t **mpp, dscpmk_data_t *dscpmk_data, ip_proc_t proc)
510Sstevel@tonic-gate {
520Sstevel@tonic-gate ipha_t *ipha;
530Sstevel@tonic-gate ip6_t *ip6_hdr;
540Sstevel@tonic-gate boolean_t is_v4;
550Sstevel@tonic-gate uint8_t dscp, new_dscp;
560Sstevel@tonic-gate mblk_t *mp;
570Sstevel@tonic-gate
580Sstevel@tonic-gate ASSERT((mpp != NULL) && (*mpp != NULL));
590Sstevel@tonic-gate mp = *mpp;
600Sstevel@tonic-gate
610Sstevel@tonic-gate /*
620Sstevel@tonic-gate * The action module will receive an M_DATA or an M_CTL followed
630Sstevel@tonic-gate * by an M_DATA. In the latter case skip the M_CTL.
640Sstevel@tonic-gate */
650Sstevel@tonic-gate if (mp->b_datap->db_type != M_DATA) {
660Sstevel@tonic-gate if ((mp->b_cont != NULL) &&
670Sstevel@tonic-gate (mp->b_cont->b_datap->db_type == M_DATA)) {
680Sstevel@tonic-gate mp = mp->b_cont;
690Sstevel@tonic-gate } else {
700Sstevel@tonic-gate dscpmk0dbg(("dscpmk_process: no data\n"));
710Sstevel@tonic-gate atomic_add_64(&dscpmk_data->epackets, 1);
720Sstevel@tonic-gate return (EINVAL);
730Sstevel@tonic-gate }
740Sstevel@tonic-gate }
750Sstevel@tonic-gate
760Sstevel@tonic-gate /* Pull-up needed? */
770Sstevel@tonic-gate if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
780Sstevel@tonic-gate if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
790Sstevel@tonic-gate dscpmk0dbg(("dscpmk_process: pullup failed\n"));
800Sstevel@tonic-gate atomic_add_64(&dscpmk_data->epackets, 1);
810Sstevel@tonic-gate return (EINVAL);
820Sstevel@tonic-gate }
830Sstevel@tonic-gate }
840Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr;
850Sstevel@tonic-gate
860Sstevel@tonic-gate /* Update global stats */
870Sstevel@tonic-gate atomic_add_64(&dscpmk_data->npackets, 1);
880Sstevel@tonic-gate
890Sstevel@tonic-gate /*
900Sstevel@tonic-gate * This should only be called for outgoing packets. For inbound packets
910Sstevel@tonic-gate * proceed with the next action.
920Sstevel@tonic-gate */
930Sstevel@tonic-gate if ((proc == IPP_LOCAL_IN) || (proc == IPP_FWD_IN)) {
940Sstevel@tonic-gate dscpmk2dbg(("dscpmk_process: cannot mark incoming packets\n"));
950Sstevel@tonic-gate atomic_add_64(&dscpmk_data->ipackets, 1);
960Sstevel@tonic-gate return (0);
970Sstevel@tonic-gate }
980Sstevel@tonic-gate
990Sstevel@tonic-gate /* Figure out the ToS or the Traffic Class from the message */
1000Sstevel@tonic-gate if (IPH_HDR_VERSION(ipha) == IPV4_VERSION) {
1010Sstevel@tonic-gate dscp = ipha->ipha_type_of_service;
1020Sstevel@tonic-gate is_v4 = B_TRUE;
1030Sstevel@tonic-gate } else {
1040Sstevel@tonic-gate ip6_hdr = (ip6_t *)mp->b_rptr;
1050Sstevel@tonic-gate dscp = __IPV6_TCLASS_FROM_FLOW(ip6_hdr->ip6_vcf);
1060Sstevel@tonic-gate is_v4 = B_FALSE;
1070Sstevel@tonic-gate }
1080Sstevel@tonic-gate
1090Sstevel@tonic-gate /*
1100Sstevel@tonic-gate * Select the new dscp from the dscp_map after ignoring the
1110Sstevel@tonic-gate * ECN/CU from dscp (hence dscp >> 2). new_dscp will be the
1120Sstevel@tonic-gate * 6-bit DSCP value.
1130Sstevel@tonic-gate */
1140Sstevel@tonic-gate new_dscp = dscpmk_data->dscp_map[dscp >> 2];
1150Sstevel@tonic-gate
1160Sstevel@tonic-gate /* Update stats for this new_dscp */
1170Sstevel@tonic-gate atomic_add_64(&dscpmk_data->dscp_stats[new_dscp].npackets, 1);
1180Sstevel@tonic-gate
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate * if new_dscp is same as the original, update stats and
1210Sstevel@tonic-gate * return.
1220Sstevel@tonic-gate */
1230Sstevel@tonic-gate if (new_dscp == (dscp >> 2)) {
1240Sstevel@tonic-gate atomic_add_64(&dscpmk_data->unchanged, 1);
1250Sstevel@tonic-gate return (0);
1260Sstevel@tonic-gate }
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate /* Get back the ECN/CU value from the original dscp */
1290Sstevel@tonic-gate new_dscp = (new_dscp << 2) | (dscp & 0x3);
1300Sstevel@tonic-gate
1310Sstevel@tonic-gate atomic_add_64(&dscpmk_data->changed, 1);
1320Sstevel@tonic-gate /*
1330Sstevel@tonic-gate * IPv4 : ToS structure -- RFC 791
1340Sstevel@tonic-gate *
1350Sstevel@tonic-gate * 0 1 2 3 4 5 6 7
1360Sstevel@tonic-gate * +---+---+---+---+---+---+---+---+
1370Sstevel@tonic-gate * | IP Precd | D | T | R | 0 | 0 |
1380Sstevel@tonic-gate * | | | | | | |
1390Sstevel@tonic-gate * +---+---+---+---+---+---+---+---+
1400Sstevel@tonic-gate *
1410Sstevel@tonic-gate * For Backward Compatability the diff serv DSCP will be mapped
1420Sstevel@tonic-gate * to the 3-bits Precedence field. DTR is not supported. Thus,
1430Sstevel@tonic-gate * the following Class Seletor CodePoints are reserved from this
1440Sstevel@tonic-gate * purpose : xxx000; where x is 0 or 1 (note the last 2 bits are
1450Sstevel@tonic-gate * 00) -- see RFC 2474.
1460Sstevel@tonic-gate */
1470Sstevel@tonic-gate
1480Sstevel@tonic-gate if (is_v4) {
1490Sstevel@tonic-gate ipha->ipha_type_of_service = new_dscp;
150*852Svi117747 /*
151*852Svi117747 * If the hardware supports checksumming, we don't need
152*852Svi117747 * to do anything.
153*852Svi117747 */
154*852Svi117747 if (!(mp->b_datap->db_struioun.cksum.flags &
155*852Svi117747 HCK_IPV4_HDRCKSUM)) {
156*852Svi117747 ipha->ipha_hdr_checksum = 0;
157*852Svi117747 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
158*852Svi117747 }
159*852Svi117747 } else {
1600Sstevel@tonic-gate
1610Sstevel@tonic-gate /*
1620Sstevel@tonic-gate * IPv6 : DSCP field structure is as given -- RFC 2474
1630Sstevel@tonic-gate *
1640Sstevel@tonic-gate * 0 1 2 3 4 5 6 7
1650Sstevel@tonic-gate * +---+---+---+---+---+---+---+---+
1660Sstevel@tonic-gate * | DSCP | CU |
1670Sstevel@tonic-gate * | | |
1680Sstevel@tonic-gate * +---+---+---+---+---+---+---+---+
1690Sstevel@tonic-gate *
1700Sstevel@tonic-gate * CU -- Currently Unused
171*852Svi117747 *
1720Sstevel@tonic-gate * the 32 bit vcf consists of version (4 bits), Traffic class (8 bits)
1730Sstevel@tonic-gate * and flow id (20 bits). Need to take care of Big/Little-Endianess.
1740Sstevel@tonic-gate */
1750Sstevel@tonic-gate #ifdef _BIG_ENDIAN
1760Sstevel@tonic-gate ip6_hdr->ip6_vcf = (ip6_hdr->ip6_vcf & TCLASS_MASK) |
1770Sstevel@tonic-gate (new_dscp << 20);
1780Sstevel@tonic-gate #else
1790Sstevel@tonic-gate ip6_hdr->ip6_vcf = (ip6_hdr->ip6_vcf & TCLASS_MASK) |
1800Sstevel@tonic-gate ((new_dscp >> 4) | ((new_dscp << 12) & 0xF000));
1810Sstevel@tonic-gate #endif
1820Sstevel@tonic-gate }
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate return (0);
1850Sstevel@tonic-gate }
186