xref: /netbsd-src/crypto/external/bsd/openssl/dist/crypto/whrlpool/wp_block.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /**
2  * The Whirlpool hashing function.
3  *
4  * <P>
5  * <b>References</b>
6  *
7  * <P>
8  * The Whirlpool algorithm was developed by
9  * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
10  * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
11  *
12  * See
13  *      P.S.L.M. Barreto, V. Rijmen,
14  *      ``The Whirlpool hashing function,''
15  *      NESSIE submission, 2000 (tweaked version, 2001),
16  *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
17  *
18  * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
19  * Vincent Rijmen. Lookup "reference implementations" on
20  * <http://planeta.terra.com.br/informatica/paulobarreto/>
21  *
22  * =============================================================================
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
25  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
34  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #include "wp_locl.h"
39 #include <string.h>
40 
41 typedef unsigned char		u8;
42 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
43 typedef unsigned __int64	u64;
44 #elif defined(__arch64__)
45 typedef unsigned long		u64;
46 #else
47 typedef unsigned long long	u64;
48 #endif
49 
50 #define ROUNDS	10
51 
52 #define STRICT_ALIGNMENT
53 #if defined(__i386) || defined(__i386__) || \
54     defined(__x86_64) || defined(__x86_64__) || \
55     defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)
56 /* Well, formally there're couple of other architectures, which permit
57  * unaligned loads, specifically those not crossing cache lines, IA-64
58  * and PowerPC... */
59 #  undef STRICT_ALIGNMENT
60 #endif
61 
62 #undef SMALL_REGISTER_BANK
63 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
64 #  define SMALL_REGISTER_BANK
65 #  if defined(WHIRLPOOL_ASM)
66 #    ifndef OPENSSL_SMALL_FOOTPRINT
67 #      define OPENSSL_SMALL_FOOTPRINT	/* it appears that for elder non-MMX
68 					   CPUs this is actually faster! */
69 #    endif
70 #    define GO_FOR_MMX(ctx,inp,num)	do {			\
71 	extern unsigned int OPENSSL_ia32cap_P[];		\
72 	void whirlpool_block_mmx(void *,const void *,size_t);	\
73 	if (!(OPENSSL_ia32cap_P[0] & (1<<23)))	break;		\
74         whirlpool_block_mmx(ctx->H.c,inp,num);	return;		\
75 					} while (0)
76 #  endif
77 #endif
78 
79 #undef ROTATE
80 #if defined(_MSC_VER)
81 #  if defined(_WIN64)	/* applies to both IA-64 and AMD64 */
82 #    pragma intrinsic(_rotl64)
83 #    define ROTATE(a,n)	_rotl64((a),n)
84 #  endif
85 #elif defined(__GNUC__) && __GNUC__>=2
86 #  if defined(__x86_64) || defined(__x86_64__)
87 #    if defined(L_ENDIAN)
88 #      define ROTATE(a,n)	({ u64 ret; asm ("rolq %1,%0"	\
89 				   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
90 #    elif defined(B_ENDIAN)
91        /* Most will argue that x86_64 is always little-endian. Well,
92         * yes, but then we have stratus.com who has modified gcc to
93 	* "emulate" big-endian on x86. Is there evidence that they
94 	* [or somebody else] won't do same for x86_64? Naturally no.
95 	* And this line is waiting ready for that brave soul:-) */
96 #      define ROTATE(a,n)	({ u64 ret; asm ("rorq %1,%0"	\
97 				   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
98 #    endif
99 #  elif defined(__ia64) || defined(__ia64__)
100 #    if defined(L_ENDIAN)
101 #      define ROTATE(a,n)	({ u64 ret; asm ("shrp %0=%1,%1,%2"	\
102 				   : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
103 #    elif defined(B_ENDIAN)
104 #      define ROTATE(a,n)	({ u64 ret; asm ("shrp %0=%1,%1,%2"	\
105 				   : "=r"(ret) : "r"(a),"M"(n)); ret; })
106 #    endif
107 #  endif
108 #endif
109 
110 #if defined(OPENSSL_SMALL_FOOTPRINT)
111 #  if !defined(ROTATE)
112 #    if defined(L_ENDIAN)	/* little-endians have to rotate left */
113 #      define ROTATE(i,n)	((i)<<(n) ^ (i)>>(64-n))
114 #    elif defined(B_ENDIAN)	/* big-endians have to rotate right */
115 #      define ROTATE(i,n)	((i)>>(n) ^ (i)<<(64-n))
116 #    endif
117 #  endif
118 #  if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
119 #    define STRICT_ALIGNMENT	/* ensure smallest table size */
120 #  endif
121 #endif
122 
123 /*
124  * Table size depends on STRICT_ALIGNMENT and whether or not endian-
125  * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
126  * defined, which is normally the case on x86[_64] CPUs, the table is
127  * 4KB large unconditionally. Otherwise if ROTATE is defined, the
128  * table is 2KB large, and otherwise - 16KB. 2KB table requires a
129  * whole bunch of additional rotations, but I'm willing to "trade,"
130  * because 16KB table certainly trashes L1 cache. I wish all CPUs
131  * could handle unaligned load as 4KB table doesn't trash the cache,
132  * nor does it require additional rotations.
133  */
134 /*
135  * Note that every Cn macro expands as two loads: one byte load and
136  * one quadword load. One can argue that that many single-byte loads
137  * is too excessive, as one could load a quadword and "milk" it for
138  * eight 8-bit values instead. Well, yes, but in order to do so *and*
139  * avoid excessive loads you have to accomodate a handful of 64-bit
140  * values in the register bank and issue a bunch of shifts and mask.
141  * It's a tradeoff: loads vs. shift and mask in big register bank[!].
142  * On most CPUs eight single-byte loads are faster and I let other
143  * ones to depend on smart compiler to fold byte loads if beneficial.
144  * Hand-coded assembler would be another alternative:-)
145  */
146 #ifdef STRICT_ALIGNMENT
147 #  if defined(ROTATE)
148 #    define N	1
149 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7
150 #    define C0(K,i)	(Cx.q[K.c[(i)*8+0]])
151 #    define C1(K,i)	ROTATE(Cx.q[K.c[(i)*8+1]],8)
152 #    define C2(K,i)	ROTATE(Cx.q[K.c[(i)*8+2]],16)
153 #    define C3(K,i)	ROTATE(Cx.q[K.c[(i)*8+3]],24)
154 #    define C4(K,i)	ROTATE(Cx.q[K.c[(i)*8+4]],32)
155 #    define C5(K,i)	ROTATE(Cx.q[K.c[(i)*8+5]],40)
156 #    define C6(K,i)	ROTATE(Cx.q[K.c[(i)*8+6]],48)
157 #    define C7(K,i)	ROTATE(Cx.q[K.c[(i)*8+7]],56)
158 #  else
159 #    define N	8
160 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
161 					c7,c0,c1,c2,c3,c4,c5,c6, \
162 					c6,c7,c0,c1,c2,c3,c4,c5, \
163 					c5,c6,c7,c0,c1,c2,c3,c4, \
164 					c4,c5,c6,c7,c0,c1,c2,c3, \
165 					c3,c4,c5,c6,c7,c0,c1,c2, \
166 					c2,c3,c4,c5,c6,c7,c0,c1, \
167 					c1,c2,c3,c4,c5,c6,c7,c0
168 #    define C0(K,i)	(Cx.q[0+8*K.c[(i)*8+0]])
169 #    define C1(K,i)	(Cx.q[1+8*K.c[(i)*8+1]])
170 #    define C2(K,i)	(Cx.q[2+8*K.c[(i)*8+2]])
171 #    define C3(K,i)	(Cx.q[3+8*K.c[(i)*8+3]])
172 #    define C4(K,i)	(Cx.q[4+8*K.c[(i)*8+4]])
173 #    define C5(K,i)	(Cx.q[5+8*K.c[(i)*8+5]])
174 #    define C6(K,i)	(Cx.q[6+8*K.c[(i)*8+6]])
175 #    define C7(K,i)	(Cx.q[7+8*K.c[(i)*8+7]])
176 #  endif
177 #else
178 #  define N	2
179 #  define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
180 					c0,c1,c2,c3,c4,c5,c6,c7
181 #  define C0(K,i)	(((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
182 #  define C1(K,i)	(((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
183 #  define C2(K,i)	(((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
184 #  define C3(K,i)	(((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
185 #  define C4(K,i)	(((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
186 #  define C5(K,i)	(((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
187 #  define C6(K,i)	(((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
188 #  define C7(K,i)	(((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
189 #endif
190 
191 static const
192 union	{
193 	u8	c[(256*N+ROUNDS)*sizeof(u64)];
194 	u64	q[(256*N+ROUNDS)];
195 	} Cx = { {
196 	/* Note endian-neutral representation:-) */
197 	LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
198 	LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
199 	LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
200 	LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
201 	LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
202 	LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
203 	LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
204 	LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
205 	LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
206 	LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
207 	LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
208 	LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
209 	LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
210 	LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
211 	LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
212 	LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
213 	LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
214 	LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
215 	LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
216 	LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
217 	LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
218 	LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
219 	LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
220 	LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
221 	LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
222 	LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
223 	LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
224 	LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
225 	LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
226 	LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
227 	LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
228 	LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
229 	LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
230 	LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
231 	LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
232 	LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
233 	LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
234 	LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
235 	LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
236 	LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
237 	LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
238 	LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
239 	LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
240 	LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
241 	LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
242 	LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
243 	LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
244 	LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
245 	LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
246 	LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
247 	LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
248 	LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
249 	LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
250 	LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
251 	LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
252 	LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
253 	LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
254 	LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
255 	LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
256 	LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
257 	LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
258 	LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
259 	LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
260 	LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
261 	LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
262 	LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
263 	LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
264 	LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
265 	LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
266 	LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
267 	LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
268 	LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
269 	LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
270 	LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
271 	LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
272 	LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
273 	LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
274 	LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
275 	LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
276 	LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
277 	LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
278 	LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
279 	LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
280 	LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
281 	LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
282 	LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
283 	LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
284 	LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
285 	LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
286 	LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
287 	LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
288 	LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
289 	LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
290 	LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
291 	LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
292 	LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
293 	LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
294 	LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
295 	LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
296 	LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
297 	LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
298 	LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
299 	LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
300 	LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
301 	LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
302 	LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
303 	LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
304 	LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
305 	LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
306 	LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
307 	LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
308 	LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
309 	LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
310 	LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
311 	LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
312 	LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
313 	LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
314 	LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
315 	LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
316 	LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
317 	LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
318 	LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
319 	LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
320 	LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
321 	LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
322 	LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
323 	LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
324 	LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
325 	LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
326 	LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
327 	LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
328 	LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
329 	LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
330 	LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
331 	LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
332 	LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
333 	LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
334 	LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
335 	LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
336 	LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
337 	LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
338 	LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
339 	LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
340 	LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
341 	LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
342 	LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
343 	LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
344 	LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
345 	LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
346 	LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
347 	LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
348 	LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
349 	LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
350 	LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
351 	LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
352 	LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
353 	LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
354 	LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
355 	LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
356 	LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
357 	LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
358 	LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
359 	LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
360 	LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
361 	LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
362 	LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
363 	LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
364 	LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
365 	LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
366 	LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
367 	LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
368 	LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
369 	LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
370 	LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
371 	LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
372 	LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
373 	LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
374 	LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
375 	LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
376 	LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
377 	LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
378 	LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
379 	LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
380 	LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
381 	LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
382 	LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
383 	LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
384 	LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
385 	LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
386 	LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
387 	LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
388 	LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
389 	LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
390 	LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
391 	LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
392 	LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
393 	LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
394 	LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
395 	LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
396 	LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
397 	LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
398 	LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
399 	LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
400 	LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
401 	LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
402 	LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
403 	LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
404 	LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
405 	LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
406 	LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
407 	LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
408 	LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
409 	LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
410 	LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
411 	LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
412 	LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
413 	LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
414 	LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
415 	LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
416 	LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
417 	LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
418 	LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
419 	LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
420 	LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
421 	LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
422 	LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
423 	LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
424 	LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
425 	LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
426 	LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
427 	LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
428 	LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
429 	LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
430 	LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
431 	LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
432 	LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
433 	LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
434 	LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
435 	LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
436 	LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
437 	LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
438 	LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
439 	LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
440 	LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
441 	LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
442 	LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
443 	LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
444 	LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
445 	LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
446 	LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
447 	LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
448 	LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
449 	LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
450 	LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
451 	LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
452 	LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
453 #define RC	(&(Cx.q[256*N]))
454 	0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f,	/* rc[ROUNDS] */
455 	0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
456 	0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
457 	0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
458 	0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
459 	0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
460 	0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
461 	0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
462 	0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
463 	0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
464 	}
465 };
466 
467 void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
468 	{
469 	int	r;
470 	const u8 *p=inp;
471 	union	{ u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
472 
473 #ifdef GO_FOR_MMX
474 	GO_FOR_MMX(ctx,inp,n);
475 #endif
476 							do {
477 #ifdef OPENSSL_SMALL_FOOTPRINT
478 	u64	L[8];
479 	int	i;
480 
481 	for (i=0;i<64;i++)	S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
482 	for (r=0;r<ROUNDS;r++)
483 		{
484 		for (i=0;i<8;i++)
485 			{
486 			L[i]  = i ? 0 : RC[r];
487 			L[i] ^=	C0(K,i)       ^ C1(K,(i-1)&7) ^
488 				C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
489 				C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
490 				C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
491 			}
492 		memcpy (K.q,L,64);
493 		for (i=0;i<8;i++)
494 			{
495 			L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
496 				C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
497 				C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
498 				C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
499 			}
500 		memcpy (S.q,L,64);
501 		}
502 	for (i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
503 #else
504 	u64	L0,L1,L2,L3,L4,L5,L6,L7;
505 
506 #ifdef STRICT_ALIGNMENT
507 	if ((size_t)p & 7)
508 		{
509 		memcpy (S.c,p,64);
510 		S.q[0] ^= (K.q[0] = H->q[0]);
511 		S.q[1] ^= (K.q[1] = H->q[1]);
512 		S.q[2] ^= (K.q[2] = H->q[2]);
513 		S.q[3] ^= (K.q[3] = H->q[3]);
514 		S.q[4] ^= (K.q[4] = H->q[4]);
515 		S.q[5] ^= (K.q[5] = H->q[5]);
516 		S.q[6] ^= (K.q[6] = H->q[6]);
517 		S.q[7] ^= (K.q[7] = H->q[7]);
518 		}
519 	else
520 #endif
521 		{
522 		const u64 *pa = (const u64*)p;
523 		S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
524 		S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
525 		S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
526 		S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
527 		S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
528 		S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
529 		S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
530 		S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
531 		}
532 
533 	for(r=0;r<ROUNDS;r++)
534 		{
535 #ifdef SMALL_REGISTER_BANK
536 		L0 =	C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
537 			C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
538 		L1 =	C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
539 			C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
540 		L2 =	C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
541 			C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
542 		L3 =	C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
543 			C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
544 		L4 =	C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
545 			C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
546 		L5 =	C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
547 			C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
548 		L6 =	C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
549 			C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
550 		L7 =	C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
551 			C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
552 
553 		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
554 		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
555 
556 		L0 ^=	C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
557 			C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
558 		L1 ^=	C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
559 			C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
560 		L2 ^=	C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
561 			C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
562 		L3 ^=	C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
563 			C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
564 		L4 ^=	C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
565 			C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
566 		L5 ^=	C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
567 			C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
568 		L6 ^=	C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
569 			C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
570 		L7 ^=	C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
571 			C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
572 
573 		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
574 		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
575 #else
576 		L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
577 		L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
578 		L0 ^= RC[r];
579 
580 		L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
581 		L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
582 
583 		L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
584 		L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
585 
586 		L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
587 		L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
588 
589 		L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
590 		L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
591 
592 		L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
593 		L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
594 
595 		L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
596 		L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
597 
598 		L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
599 		L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
600 
601 		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
602 		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
603 
604 		L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
605 		L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
606 
607 		L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
608 		L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
609 
610 		L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
611 		L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
612 
613 		L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
614 		L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
615 
616 		L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
617 		L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
618 
619 		L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
620 		L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
621 
622 		L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
623 		L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
624 
625 		L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
626 		L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
627 
628 		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
629 		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
630 #endif
631 		}
632 
633 #ifdef STRICT_ALIGNMENT
634 	if ((size_t)p & 7)
635 		{
636 		int i;
637 		for(i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
638 		}
639 	else
640 #endif
641 		{
642 		const u64 *pa=(const u64 *)p;
643 		H->q[0] ^= S.q[0] ^ pa[0];
644 		H->q[1] ^= S.q[1] ^ pa[1];
645 		H->q[2] ^= S.q[2] ^ pa[2];
646 		H->q[3] ^= S.q[3] ^ pa[3];
647 		H->q[4] ^= S.q[4] ^ pa[4];
648 		H->q[5] ^= S.q[5] ^ pa[5];
649 		H->q[6] ^= S.q[6] ^ pa[6];
650 		H->q[7] ^= S.q[7] ^ pa[7];
651 		}
652 #endif
653 							p += 64;
654 							} while(--n);
655 	}
656