xref: /onnv-gate/usr/src/uts/common/io/nxge/nxge_fflp_hash.c (revision 6929:a596171cbf16)
13859Sml29623 /*
23859Sml29623  * CDDL HEADER START
33859Sml29623  *
43859Sml29623  * The contents of this file are subject to the terms of the
53859Sml29623  * Common Development and Distribution License (the "License").
63859Sml29623  * You may not use this file except in compliance with the License.
73859Sml29623  *
83859Sml29623  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93859Sml29623  * or http://www.opensolaris.org/os/licensing.
103859Sml29623  * See the License for the specific language governing permissions
113859Sml29623  * and limitations under the License.
123859Sml29623  *
133859Sml29623  * When distributing Covered Code, include this CDDL HEADER in each
143859Sml29623  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153859Sml29623  * If applicable, add the following below this CDDL HEADER, with the
163859Sml29623  * fields enclosed by brackets "[]" replaced with your own identifying
173859Sml29623  * information: Portions Copyright [yyyy] [name of copyright owner]
183859Sml29623  *
193859Sml29623  * CDDL HEADER END
203859Sml29623  */
213859Sml29623 /*
22*6929Smisaki  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
233859Sml29623  * Use is subject to license terms.
243859Sml29623  */
253859Sml29623 #pragma ident	"%Z%%M%	%I%	%E% SMI"
263859Sml29623 
273859Sml29623 #include <sys/types.h>
283859Sml29623 #include <nxge_fflp_hash.h>
293859Sml29623 
303859Sml29623 static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len);
313859Sml29623 
323859Sml29623 /*
333859Sml29623  * The crc32c algorithms are taken from sctp_crc32 implementation
343859Sml29623  * common/inet/sctp_crc32.{c,h}
353859Sml29623  *
363859Sml29623  */
373859Sml29623 
383859Sml29623 /*
393859Sml29623  * Fast CRC32C calculation algorithm.  The basic idea is to look at it
403859Sml29623  * four bytes (one word) at a time, using four tables.  The
413859Sml29623  * standard algorithm in RFC 3309 uses one table.
423859Sml29623  */
433859Sml29623 
443859Sml29623 /*
453859Sml29623  * SCTP uses reflected/reverse polynomial CRC32 with generating
463859Sml29623  * polynomial 0x1EDC6F41L
473859Sml29623  */
483859Sml29623 #define	SCTP_POLY 0x1EDC6F41L
493859Sml29623 
503859Sml29623 /* CRC-CCITT Polynomial */
513859Sml29623 #define	CRC_CCITT_POLY 0x1021
523859Sml29623 
533859Sml29623 /* The four CRC32c tables. */
543859Sml29623 static uint32_t crc32c_tab[4][256];
553859Sml29623 
563859Sml29623 /* The four CRC-CCITT tables. */
573859Sml29623 static uint16_t crc_ccitt_tab[4][256];
583859Sml29623 
593859Sml29623 /* the four tables for H1 Computation */
603859Sml29623 static uint32_t h1table[4][256];
613859Sml29623 
623859Sml29623 #define	CRC_32C_POLY 0x1EDC6F41L
633859Sml29623 
643859Sml29623 #define	COMPUTE_H1_BYTE(crc, data) \
653859Sml29623 	(crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff])
663859Sml29623 
673859Sml29623 static uint32_t
reflect_32(uint32_t b)683859Sml29623 reflect_32(uint32_t b)
693859Sml29623 {
703859Sml29623 	int i;
713859Sml29623 	uint32_t rw = 0;
723859Sml29623 
733859Sml29623 	for (i = 0; i < 32; i++) {
743859Sml29623 		if (b & 1) {
753859Sml29623 			rw |= 1 << (31 - i);
763859Sml29623 		}
773859Sml29623 		b >>= 1;
783859Sml29623 	}
793859Sml29623 	return (rw);
803859Sml29623 }
813859Sml29623 
823859Sml29623 static uint32_t
flip32(uint32_t w)833859Sml29623 flip32(uint32_t w)
843859Sml29623 {
853859Sml29623 	return (((w >> 24) | ((w >> 8) & 0xff00) |
86*6929Smisaki 	    ((w << 8) & 0xff0000) | (w << 24)));
873859Sml29623 }
883859Sml29623 
893859Sml29623 /*
903859Sml29623  * reference crc-ccitt implementation
913859Sml29623  */
923859Sml29623 
933859Sml29623 uint16_t
crc_ccitt(uint16_t crcin,uint8_t data)943859Sml29623 crc_ccitt(uint16_t crcin, uint8_t data)
953859Sml29623 {
963859Sml29623 	uint16_t mcrc, crc = 0, bits = 0;
973859Sml29623 
983859Sml29623 	mcrc = (((crcin >> 8) ^ data) & 0xff) << 8;
993859Sml29623 	for (bits = 0; bits < 8; bits++) {
1003859Sml29623 		crc = ((crc ^ mcrc) & 0x8000) ?
101*6929Smisaki 		    (crc << 1) ^ CRC_CCITT_POLY :
102*6929Smisaki 		    crc << 1;
1033859Sml29623 		mcrc <<= 1;
1043859Sml29623 	}
1053859Sml29623 	return ((crcin << 8) ^ crc);
1063859Sml29623 }
1073859Sml29623 
1083859Sml29623 /*
1093859Sml29623  * Initialize the crc32c tables.
1103859Sml29623  */
1113859Sml29623 
1123859Sml29623 void
nxge_crc32c_init(void)1133859Sml29623 nxge_crc32c_init(void)
1143859Sml29623 {
1153859Sml29623 	uint32_t index, bit, byte, crc;
1163859Sml29623 
1173859Sml29623 	for (index = 0; index < 256; index++) {
1183859Sml29623 		crc = reflect_32(index);
1193859Sml29623 		for (byte = 0; byte < 4; byte++) {
1203859Sml29623 			for (bit = 0; bit < 8; bit++) {
1213859Sml29623 				crc = (crc & 0x80000000) ?
122*6929Smisaki 				    (crc << 1) ^ SCTP_POLY : crc << 1;
1233859Sml29623 			}
1243859Sml29623 #ifdef _BIG_ENDIAN
1253859Sml29623 			crc32c_tab[3 - byte][index] = flip32(reflect_32(crc));
1263859Sml29623 #else
1273859Sml29623 			crc32c_tab[byte][index] = reflect_32(crc);
1283859Sml29623 #endif
1293859Sml29623 		}
1303859Sml29623 	}
1313859Sml29623 }
1323859Sml29623 
1333859Sml29623 /*
1343859Sml29623  * Initialize the crc-ccitt tables.
1353859Sml29623  */
1363859Sml29623 
1373859Sml29623 void
nxge_crc_ccitt_init(void)1383859Sml29623 nxge_crc_ccitt_init(void)
1393859Sml29623 {
1403859Sml29623 	uint16_t crc;
1413859Sml29623 	uint16_t index, bit, byte;
1423859Sml29623 
1433859Sml29623 	for (index = 0; index < 256; index++) {
1443859Sml29623 		crc = index << 8;
1453859Sml29623 		for (byte = 0; byte < 4; byte++) {
1463859Sml29623 			for (bit = 0; bit < 8; bit++) {
1473859Sml29623 				crc = (crc & 0x8000) ?
148*6929Smisaki 				    (crc << 1) ^ CRC_CCITT_POLY : crc << 1;
1493859Sml29623 			}
1503859Sml29623 #ifdef _BIG_ENDIAN
1513859Sml29623 			crc_ccitt_tab[3 - byte][index] = crc;
1523859Sml29623 #else
1533859Sml29623 			crc_ccitt_tab[byte][index] = crc;
1543859Sml29623 #endif
1553859Sml29623 		}
1563859Sml29623 	}
1573859Sml29623 }
1583859Sml29623 
1593859Sml29623 /*
1603859Sml29623  * Lookup  the crc32c for a byte stream
1613859Sml29623  */
1623859Sml29623 
1633859Sml29623 static void
nxge_crc32c_byte(uint32_t * crcptr,const uint8_t * buf,int len)1643859Sml29623 nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len)
1653859Sml29623 {
1663859Sml29623 	uint32_t crc;
1673859Sml29623 	int i;
1683859Sml29623 
1693859Sml29623 	crc = *crcptr;
1703859Sml29623 	for (i = 0; i < len; i++) {
1713859Sml29623 #ifdef _BIG_ENDIAN
1723859Sml29623 		crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)];
1733859Sml29623 #else
1743859Sml29623 		crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)];
1753859Sml29623 #endif
1763859Sml29623 	}
1773859Sml29623 	*crcptr = crc;
1783859Sml29623 }
1793859Sml29623 
1803859Sml29623 /*
1813859Sml29623  * Lookup  the crc-ccitt for a byte stream
1823859Sml29623  */
1833859Sml29623 
1843859Sml29623 static void
nxge_crc_ccitt_byte(uint16_t * crcptr,const uint8_t * buf,int len)1853859Sml29623 nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len)
1863859Sml29623 {
1873859Sml29623 	uint16_t crc;
1883859Sml29623 	int i;
1893859Sml29623 
1903859Sml29623 	crc = *crcptr;
1913859Sml29623 	for (i = 0; i < len; i++) {
1923859Sml29623 
1933859Sml29623 #ifdef _BIG_ENDIAN
1943859Sml29623 		crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)];
1953859Sml29623 #else
1963859Sml29623 		crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)];
1973859Sml29623 #endif
1983859Sml29623 	}
1993859Sml29623 	*crcptr = crc;
2003859Sml29623 }
2013859Sml29623 
2023859Sml29623 /*
2033859Sml29623  * Lookup  the crc32c for a 32 bit word stream
2043859Sml29623  * Lookup is done fro the 4 bytes in parallel
2053859Sml29623  * from the tables computed earlier
2063859Sml29623  *
2073859Sml29623  */
2083859Sml29623 
2093859Sml29623 static void
nxge_crc32c_word(uint32_t * crcptr,const uint32_t * buf,int len)2103859Sml29623 nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len)
2113859Sml29623 {
2123859Sml29623 	uint32_t w, crc;
2133859Sml29623 	int i;
2143859Sml29623 
2153859Sml29623 	crc = *crcptr;
2163859Sml29623 	for (i = 0; i < len; i++) {
2173859Sml29623 		w = crc ^ buf[i];
2183859Sml29623 		crc = crc32c_tab[0][w >> 24] ^
219*6929Smisaki 		    crc32c_tab[1][(w >> 16) & 0xff] ^
220*6929Smisaki 		    crc32c_tab[2][(w >> 8) & 0xff] ^
221*6929Smisaki 		    crc32c_tab[3][w & 0xff];
2223859Sml29623 	}
2233859Sml29623 	*crcptr = crc;
2243859Sml29623 }
2253859Sml29623 
2263859Sml29623 /*
2273859Sml29623  * Lookup  the crc-ccitt for a stream of bytes
2283859Sml29623  *
2293859Sml29623  * Since the parallel lookup version doesn't work yet,
2303859Sml29623  * use the byte stream version (lookup crc for a byte
2313859Sml29623  * at a time
2323859Sml29623  *
2333859Sml29623  */
2343859Sml29623 
2353859Sml29623 uint16_t
nxge_crc_ccitt(uint16_t crc16,const uint8_t * buf,int len)2363859Sml29623 nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len)
2373859Sml29623 {
2383859Sml29623 	nxge_crc_ccitt_byte(&crc16, buf, len);
2393859Sml29623 	return (crc16);
2403859Sml29623 }
2413859Sml29623 
2423859Sml29623 /*
2433859Sml29623  * Lookup  the crc32c for a stream of bytes
2443859Sml29623  *
2453859Sml29623  * Tries to lookup the CRC on 4 byte words
2463859Sml29623  * If the buffer is not 4 byte aligned, first compute
2473859Sml29623  * with byte lookup until aligned. Then compute crc
2483859Sml29623  * for each 4 bytes. If there are bytes left at the end of
2493859Sml29623  * the buffer, then perform a byte lookup for the remaining bytes
2503859Sml29623  *
2513859Sml29623  *
2523859Sml29623  */
2533859Sml29623 
2543859Sml29623 uint32_t
nxge_crc32c(uint32_t crc32,const uint8_t * buf,int len)2553859Sml29623 nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len)
2563859Sml29623 {
2573859Sml29623 	int rem;
2583859Sml29623 
2593859Sml29623 	rem = 4 - ((uintptr_t)buf) & 3;
2603859Sml29623 	if (rem != 0) {
2613859Sml29623 		if (len < rem) {
2623859Sml29623 			rem = len;
2633859Sml29623 		}
2643859Sml29623 		nxge_crc32c_byte(&crc32, buf, rem);
2653859Sml29623 		buf = buf + rem;
2663859Sml29623 		len = len - rem;
2673859Sml29623 	}
2683859Sml29623 	if (len > 3) {
2693859Sml29623 		nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4);
2703859Sml29623 	}
2713859Sml29623 	rem = len & 3;
2723859Sml29623 	if (rem != 0) {
2733859Sml29623 		nxge_crc32c_byte(&crc32, buf + len - rem, rem);
2743859Sml29623 	}
2753859Sml29623 	return (crc32);
2763859Sml29623 }
2773859Sml29623 
2783859Sml29623 void
nxge_init_h1_table()2793859Sml29623 nxge_init_h1_table()
2803859Sml29623 {
2813859Sml29623 	uint32_t crc, bit, byte, index;
2823859Sml29623 
2833859Sml29623 	for (index = 0; index < 256; index++) {
2843859Sml29623 		crc = index << 24;
2853859Sml29623 		for (byte = 0; byte < 4; byte++) {
2863859Sml29623 			for (bit = 0; bit < 8; bit++) {
2873859Sml29623 				crc = ((crc & 0x80000000)) ?
288*6929Smisaki 				    (crc << 1) ^ CRC_32C_POLY : crc << 1;
2893859Sml29623 			}
2903859Sml29623 			h1table[byte][index] = crc;
2913859Sml29623 		}
2923859Sml29623 	}
2933859Sml29623 }
2943859Sml29623 
2953859Sml29623 /*
2963859Sml29623  * Reference Neptune H1 computation function
2973859Sml29623  *
2983859Sml29623  * It is a slightly modified implementation of
2993859Sml29623  * CRC-32C implementation
3003859Sml29623  */
3013859Sml29623 
3023859Sml29623 uint32_t
nxge_compute_h1_serial(uint32_t init_value,uint32_t * flow,uint32_t len)3033859Sml29623 nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len)
3043859Sml29623 {
3053859Sml29623 	int bit, byte;
3063859Sml29623 	uint32_t crc_h1 = init_value;
3073859Sml29623 	uint8_t *buf;
3083859Sml29623 
3093859Sml29623 	buf = (uint8_t *)flow;
3103859Sml29623 	for (byte = 0; byte < len; byte++) {
3113859Sml29623 		for (bit = 0; bit < 8; bit++) {
3123859Sml29623 			crc_h1 = (((crc_h1 >> 24) & 0x80) ^
313*6929Smisaki 			    ((buf[byte] << bit) & 0x80)) ?
314*6929Smisaki 			    (crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1;
3153859Sml29623 		}
3163859Sml29623 	}
3173859Sml29623 
3183859Sml29623 	return (crc_h1);
3193859Sml29623 }
3203859Sml29623 
3213859Sml29623 /*
3223859Sml29623  * table based implementation
3233859Sml29623  * uses 4 four tables in parallel
3243859Sml29623  * 1 for each byte of a 32 bit word
3253859Sml29623  *
3263859Sml29623  * This is the default h1 computing function
3273859Sml29623  *
3283859Sml29623  */
3293859Sml29623 
3303859Sml29623 uint32_t
nxge_compute_h1_table4(uint32_t crcin,uint32_t * flow,uint32_t length)3313859Sml29623 nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length)
3323859Sml29623 {
3333859Sml29623 	uint32_t w, fw, i, crch1 = crcin;
3343859Sml29623 	uint32_t *buf;
3353859Sml29623 
3363859Sml29623 	buf = (uint32_t *)flow;
3373859Sml29623 
3383859Sml29623 	for (i = 0; i < length / 4; i++) {
3393859Sml29623 #ifdef _BIG_ENDIAN
3403859Sml29623 		fw = buf[i];
3413859Sml29623 #else
3423859Sml29623 		fw = flip32(buf[i]);
3433859Sml29623 		fw = buf[i];
3443859Sml29623 #endif
3453859Sml29623 		w = crch1 ^ fw;
3463859Sml29623 		crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^
347*6929Smisaki 		    h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff];
3483859Sml29623 	}
3493859Sml29623 	return (crch1);
3503859Sml29623 }
3513859Sml29623 
3523859Sml29623 /*
3533859Sml29623  * table based implementation
3543859Sml29623  * uses a single table and computes h1 for a byte
3553859Sml29623  * at a time.
3563859Sml29623  *
3573859Sml29623  */
3583859Sml29623 
3593859Sml29623 uint32_t
nxge_compute_h1_table1(uint32_t crcin,uint32_t * flow,uint32_t length)3603859Sml29623 nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length)
3613859Sml29623 {
3623859Sml29623 
3633859Sml29623 	uint32_t i, crch1, tmp = crcin;
3643859Sml29623 	uint8_t *buf;
3653859Sml29623 
3663859Sml29623 	buf = (uint8_t *)flow;
3673859Sml29623 
3683859Sml29623 	tmp = crcin;
3693859Sml29623 	for (i = 0; i < length; i++) {
3703859Sml29623 		crch1 = COMPUTE_H1_BYTE(tmp, buf[i]);
3713859Sml29623 		tmp = crch1;
3723859Sml29623 	}
3733859Sml29623 
3743859Sml29623 	return (crch1);
3753859Sml29623 }
376