1227Skais /*
2227Skais * CDDL HEADER START
3227Skais *
4227Skais * The contents of this file are subject to the terms of the
56125Sbubbva * Common Development and Distribution License (the "License").
66125Sbubbva * You may not use this file except in compliance with the License.
7227Skais *
8227Skais * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9227Skais * or http://www.opensolaris.org/os/licensing.
10227Skais * See the License for the specific language governing permissions
11227Skais * and limitations under the License.
12227Skais *
13227Skais * When distributing Covered Code, include this CDDL HEADER in each
14227Skais * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15227Skais * If applicable, add the following below this CDDL HEADER, with the
16227Skais * fields enclosed by brackets "[]" replaced with your own identifying
17227Skais * information: Portions Copyright [yyyy] [name of copyright owner]
18227Skais *
19227Skais * CDDL HEADER END
20227Skais */
21*12573SDina.Nimeh@Sun.COM
22227Skais /*
23*12573SDina.Nimeh@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24227Skais */
25227Skais
26227Skais #include "../arcfour.h"
27227Skais
28227Skais /* Initialize the key stream 'key' using the key value */
29227Skais void
arcfour_key_init(ARCFour_key * key,uchar_t * keyval,int keyvallen)30227Skais arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
31227Skais {
32227Skais /* EXPORT DELETE START */
33227Skais
34227Skais uchar_t ext_keyval[256];
35227Skais uchar_t tmp;
36227Skais int i, j;
37227Skais
38227Skais for (i = j = 0; i < 256; i++, j++) {
39227Skais if (j == keyvallen)
40227Skais j = 0;
41227Skais
42227Skais ext_keyval[i] = keyval[j];
43227Skais }
44227Skais for (i = 0; i < 256; i++)
45227Skais key->arr[i] = (uchar_t)i;
46227Skais
47227Skais j = 0;
48227Skais for (i = 0; i < 256; i++) {
49227Skais j = (j + key->arr[i] + ext_keyval[i]) % 256;
50227Skais tmp = key->arr[i];
51227Skais key->arr[i] = key->arr[j];
52227Skais key->arr[j] = tmp;
53227Skais }
54227Skais key->i = 0;
55227Skais key->j = 0;
56227Skais
57227Skais /* EXPORT DELETE END */
58227Skais }
59227Skais
60227Skais
61227Skais /*
62227Skais * Encipher 'in' using 'key.
63227Skais * in and out can point to the same location
64227Skais */
65227Skais void
arcfour_crypt(ARCFour_key * key,uchar_t * in,uchar_t * out,size_t len)66227Skais arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
67227Skais {
688119SAnthony.Scarpino@Sun.COM size_t ii;
698119SAnthony.Scarpino@Sun.COM unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0;
70227Skais uchar_t i, j, *base, jj, *base1, tmp;
718119SAnthony.Scarpino@Sun.COM unsigned int tmp0, tmp1, i_accum, shift = 0, i1;
72227Skais
73227Skais
74227Skais /* EXPORT DELETE START */
75227Skais int index;
76227Skais
77227Skais base = key->arr;
78227Skais
796125Sbubbva index = (((uintptr_t)in) & 0x7);
80227Skais
81227Skais /* Get the 'in' on an 8-byte alignment */
82227Skais if (index > 0) {
83227Skais i = key->i;
84227Skais j = key->j;
856125Sbubbva
866125Sbubbva for (index = 8 - index; (index-- > 0) && len > 0;
87227Skais len--, in++, out++) {
886125Sbubbva
89227Skais i = i + 1;
90227Skais j = j + key->arr[i];
91227Skais tmp = key->arr[i];
92227Skais key->arr[i] = key->arr[j];
93227Skais key->arr[j] = tmp;
94227Skais tmp = key->arr[i] + key->arr[j];
95227Skais *out = *in ^ key->arr[tmp];
96227Skais }
97227Skais key->i = i;
98227Skais key->j = j;
99227Skais
100227Skais }
101227Skais if (len == 0)
102227Skais return;
103227Skais
104227Skais /* See if we're fortunate and 'out' got aligned as well */
105227Skais
106227Skais
107227Skais /*
108227Skais * Niagara optimized version for
109227Skais * the cases where the input and output buffers are aligned on
110227Skais * a multiple of 8-byte boundary.
111227Skais */
112227Skais #ifdef sun4v
1136125Sbubbva if ((((uintptr_t)out) & 7) != 0) {
114227Skais #endif /* sun4v */
115227Skais i = key->i;
116227Skais j = key->j;
117227Skais for (ii = 0; ii < len; ii++) {
118227Skais i = i + 1;
119227Skais tmp0 = base[i];
120227Skais j = j + tmp0;
121227Skais tmp1 = base[j];
122*12573SDina.Nimeh@Sun.COM base[i] = (uchar_t)tmp1;
123*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
124227Skais tmp0 += tmp1;
125227Skais tmp0 = tmp0 & 0xff;
126227Skais out[ii] = in[ii] ^ base[tmp0];
127227Skais }
128227Skais key->i = i;
129227Skais key->j = j;
130227Skais #ifdef sun4v
131227Skais } else {
132227Skais i = key->i;
133227Skais j = key->j;
134227Skais
135227Skais /*
136227Skais * Want to align base[i] on a 2B boundary -- allows updates
137227Skais * via [i] to be performed in 2B chunks (reducing # of stores).
138227Skais * Requires appropriate alias detection.
139227Skais */
140227Skais
141227Skais if (((i+1) % 2) != 0) {
142227Skais i = i + 1;
143227Skais tmp0 = base[i];
144227Skais j = j + tmp0;
145227Skais tmp1 = base[j];
146227Skais
147*12573SDina.Nimeh@Sun.COM base[i] = (uchar_t)tmp1;
148*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
149227Skais
150227Skais tmp0 += tmp1;
151227Skais tmp0 = tmp0 & 0xff;
152227Skais
153227Skais merge0 = (unsigned long long)(base[tmp0]) << 56;
154227Skais shift = 8; mask = 0xff;
155227Skais }
156227Skais
157227Skais /*
158227Skais * Note - in and out may now be misaligned -
159227Skais * as updating [out] in 8B chunks need to handle this
160227Skais * possibility. Also could have a 1B overrun.
161227Skais * Need to drop out of loop early as a result.
162227Skais */
163227Skais
164227Skais for (ii = 0, i1 = i; ii < ((len-1) & (~7));
165227Skais ii += 8, i1 = i1&0xff) {
166227Skais
167227Skais /*
168227Skais * If i < less than 248, know wont wrap around
169227Skais * (i % 256), so don't need to bother with masking i
170227Skais * after each increment
171227Skais */
172227Skais if (i1 < 248) {
173227Skais
174227Skais /* BYTE 0 */
175227Skais i1 = (i1 + 1);
176227Skais
177227Skais /*
178227Skais * Creating this base pointer reduces subsequent
179227Skais * arihmetic ops required to load [i]
180227Skais *
181227Skais * N.B. don't need to check if [j] aliases.
182227Skais * [i] and [j] end up with the same values
183227Skais * anyway.
184227Skais */
185227Skais base1 = &base[i1];
186227Skais
187227Skais tmp0 = base1[0];
188227Skais j = j + tmp0;
189227Skais
190227Skais tmp1 = base[j];
191227Skais /*
192227Skais * Don't store [i] yet
193227Skais */
194227Skais i_accum = tmp1;
195*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
196227Skais
197227Skais tmp0 += tmp1;
198227Skais tmp0 = tmp0 & 0xff;
199227Skais
200227Skais /*
201227Skais * Check [tmp0] doesn't alias with [i]
202227Skais */
203227Skais
204227Skais /*
205227Skais * Updating [out] in 8B chunks
206227Skais */
207227Skais if (i1 == tmp0) {
208227Skais merge =
209227Skais (unsigned long long)(i_accum) << 56;
210227Skais } else {
211227Skais merge =
212227Skais (unsigned long long)(base[tmp0]) <<
213227Skais 56;
214227Skais }
215227Skais
216227Skais /* BYTE 1 */
217227Skais tmp0 = base1[1];
218227Skais
219227Skais j = j + tmp0;
220227Skais
221227Skais /*
222227Skais * [j] can now alias with [i] and [i-1]
223227Skais * If alias abort speculation
224227Skais */
225227Skais if ((i1 ^ j) < 2) {
226*12573SDina.Nimeh@Sun.COM base1[0] = (uchar_t)i_accum;
227227Skais
228227Skais tmp1 = base[j];
229227Skais
230*12573SDina.Nimeh@Sun.COM base1[1] = (uchar_t)tmp1;
231*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
232227Skais
233227Skais tmp0 += tmp1;
234227Skais tmp0 = tmp0 & 0xff;
235227Skais
236227Skais merge |= (unsigned long long)
237227Skais (base[tmp0]) << 48;
238227Skais } else {
239227Skais
240227Skais tmp1 = base[j];
241227Skais
242227Skais i_accum = i_accum << 8;
243227Skais i_accum |= tmp1;
244227Skais
245*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
246227Skais
247227Skais tmp0 += tmp1;
248227Skais tmp0 = tmp0 & 0xff;
249227Skais
250227Skais /*
251227Skais * Speculation suceeded! Update [i]
252227Skais * in 2B chunk
253227Skais */
2548119SAnthony.Scarpino@Sun.COM /* LINTED E_BAD_PTR_CAST_ALIGN */
255227Skais *((unsigned short *) &base[i1]) =
256227Skais i_accum;
257227Skais
258227Skais merge |=
259227Skais (unsigned long long)(base[tmp0]) <<
260227Skais 48;
261227Skais }
262227Skais
263227Skais
264227Skais /*
265227Skais * Too expensive to perform [i] speculation for
266227Skais * every byte. Just need to reduce frequency
267227Skais * of stores until store buffer full stalls
268227Skais * are not the bottleneck.
269227Skais */
270227Skais
271227Skais /* BYTE 2 */
272227Skais tmp0 = base1[2];
273227Skais j = j + tmp0;
274227Skais tmp1 = base[j];
275*12573SDina.Nimeh@Sun.COM base1[2] = (uchar_t)tmp1;
276*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
277227Skais tmp1 += tmp0;
278227Skais tmp1 = tmp1 & 0xff;
279227Skais merge |= (unsigned long long)(base[tmp1]) << 40;
280227Skais
281227Skais /* BYTE 3 */
282227Skais tmp0 = base1[3];
283227Skais j = j + tmp0;
284227Skais tmp1 = base[j];
285*12573SDina.Nimeh@Sun.COM base1[3] = (uchar_t)tmp1;
286*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
287227Skais tmp0 += tmp1;
288227Skais tmp0 = tmp0 & 0xff;
289227Skais merge |= (unsigned long long)(base[tmp0]) << 32;
290227Skais
291227Skais /* BYTE 4 */
292227Skais tmp0 = base1[4];
293227Skais j = j + tmp0;
294227Skais tmp1 = base[j];
295*12573SDina.Nimeh@Sun.COM base1[4] = (uchar_t)tmp1;
296*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
297227Skais tmp0 += tmp1;
298227Skais tmp0 = tmp0 & 0xff;
299227Skais merge |= (unsigned long long)(base[tmp0]) << 24;
300227Skais
301227Skais /* BYTE 5 */
302227Skais tmp0 = base1[5];
303227Skais j = j + tmp0;
304227Skais tmp1 = base[j];
305*12573SDina.Nimeh@Sun.COM base1[5] = (uchar_t)tmp1;
306*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
307227Skais tmp0 += tmp1;
308227Skais tmp0 = tmp0 & 0xff;
309227Skais merge |= (unsigned long long)(base[tmp0]) << 16;
310227Skais
311227Skais /* BYTE 6 */
312227Skais i1 = (i1+6);
313227Skais tmp0 = base1[6];
314227Skais j = j + tmp0;
315227Skais tmp1 = base[j];
316227Skais i_accum = tmp1;
317*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
318227Skais
319227Skais tmp0 += tmp1;
320227Skais tmp0 = tmp0 & 0xff;
321227Skais
322227Skais if (i1 == tmp0) {
323227Skais merge |=
324227Skais (unsigned long long)(i_accum) << 8;
325227Skais } else {
326227Skais merge |=
327227Skais (unsigned long long)(base[tmp0]) <<
328227Skais 8;
329227Skais }
330227Skais
331227Skais /* BYTE 7 */
332227Skais tmp0 = base1[7];
333227Skais
334227Skais /*
335227Skais * Perform [i] speculation again. Indentical
336227Skais * to that performed for BYTE0 and BYTE1.
337227Skais */
338227Skais j = j + tmp0;
339227Skais if ((i1 ^ j) < 2) {
340*12573SDina.Nimeh@Sun.COM base1[6] = (uchar_t)i_accum;
341227Skais tmp1 = base[j];
342227Skais
343*12573SDina.Nimeh@Sun.COM base1[7] = (uchar_t)tmp1;
344*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
345227Skais
346227Skais tmp0 += tmp1;
347227Skais tmp0 = tmp0 & 0xff;
348227Skais
349227Skais merge |=
350227Skais (unsigned long long)(base[tmp0]);
351227Skais
352227Skais } else {
353227Skais tmp1 = base[j];
354227Skais
355227Skais i_accum = i_accum << 8;
356227Skais i_accum |= tmp1;
357227Skais
358*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
359227Skais
360227Skais tmp0 += tmp1;
361227Skais tmp0 = tmp0 & 0xff;
362227Skais
3638119SAnthony.Scarpino@Sun.COM /* LINTED E_BAD_PTR_CAST_ALIGN */
364227Skais *((unsigned short *) &base[i1]) =
365227Skais i_accum;
366227Skais
367227Skais merge |=
368227Skais (unsigned long long)(base[tmp0]);
369227Skais }
370227Skais i1++;
371227Skais } else {
372227Skais /*
373227Skais * i is too close to wrap-around to allow
374227Skais * masking to be disregarded
375227Skais */
376227Skais
377227Skais /*
378227Skais * Same old speculation for BYTE 0 and BYTE 1
379227Skais */
380227Skais
381227Skais /* BYTE 0 */
382227Skais i1 = (i1 + 1) & 0xff;
383*12573SDina.Nimeh@Sun.COM jj = (uchar_t)i1;
384227Skais
385227Skais tmp0 = base[i1];
386227Skais j = j + tmp0;
387227Skais
388227Skais tmp1 = base[j];
389227Skais i_accum = tmp1;
390*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
391227Skais
392227Skais tmp0 += tmp1;
393227Skais tmp0 = tmp0 & 0xff;
394227Skais
395227Skais if (i1 == tmp0) {
396227Skais merge =
397227Skais (unsigned long long)(i_accum) << 56;
398227Skais } else {
399227Skais merge =
400227Skais (unsigned long long)(base[tmp0]) <<
401227Skais 56;
402227Skais }
403227Skais
404227Skais /* BYTE 1 */
405227Skais tmp0 = base[i1+1];
406227Skais
407227Skais j = j + tmp0;
408227Skais
409227Skais if ((jj ^ j) < 2) {
410*12573SDina.Nimeh@Sun.COM base[jj] = (uchar_t)i_accum;
411227Skais
412227Skais tmp1 = base[j];
413227Skais
414*12573SDina.Nimeh@Sun.COM base[i1+1] = (uchar_t)tmp1;
415*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
416227Skais
417227Skais tmp0 += tmp1;
418227Skais tmp0 = tmp0 & 0xff;
419227Skais
420227Skais merge |=
421227Skais (unsigned long long)(base[tmp0]) <<
422227Skais 48;
423227Skais } else {
424227Skais
425227Skais tmp1 = base[j];
426227Skais
427227Skais i_accum = i_accum << 8;
428227Skais i_accum |= tmp1;
429227Skais
430*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
431227Skais
432227Skais tmp0 += tmp1;
433227Skais tmp0 = tmp0 & 0xff;
434227Skais
4358119SAnthony.Scarpino@Sun.COM /* LINTED E_BAD_PTR_CAST_ALIGN */
436227Skais *((unsigned short *) &base[jj]) =
437227Skais i_accum;
438227Skais
439227Skais merge |=
440227Skais (unsigned long long)(base[tmp0]) <<
441227Skais 48;
442227Skais }
443227Skais
444227Skais /* BYTE 2 */
445227Skais /*
446227Skais * As know i must be even when enter loop (to
447227Skais * satisfy alignment), can only wrap around
448227Skais * on the even bytes. So just need to perform
449227Skais * mask every 2nd byte
450227Skais */
451227Skais i1 = (i1 + 2) & 0xff;
452227Skais tmp0 = base[i1];
453227Skais j = j + tmp0;
454227Skais tmp1 = base[j];
455*12573SDina.Nimeh@Sun.COM base[i1] = (uchar_t)tmp1;
456*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
457227Skais tmp0 += tmp1;
458227Skais tmp0 = tmp0 & 0xff;
459227Skais merge |= (unsigned long long)(base[tmp0]) << 40;
460227Skais
461227Skais /* BYTE 3 */
462227Skais tmp0 = base[i1+1];
463227Skais j = j + tmp0;
464227Skais tmp1 = base[j];
465*12573SDina.Nimeh@Sun.COM base[i1+1] = (uchar_t)tmp1;
466*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
467227Skais tmp0 += tmp1;
468227Skais tmp0 = tmp0 & 0xff;
469227Skais merge |= (unsigned long long)(base[tmp0]) << 32;
470227Skais
471227Skais /* BYTE 4 */
472227Skais i1 = (i1 + 2) & 0xff;
473227Skais tmp0 = base[i1];
474227Skais j = j + tmp0;
475227Skais tmp1 = base[j];
476*12573SDina.Nimeh@Sun.COM base[i1] = (uchar_t)tmp1;
477*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
478227Skais tmp0 += tmp1;
479227Skais tmp0 = tmp0 & 0xff;
480227Skais merge |= (unsigned long long)(base[tmp0]) << 24;
481227Skais
482227Skais /* BYTE 5 */
483227Skais tmp0 = base[i1+1];
484227Skais j = j + tmp0;
485227Skais tmp1 = base[j];
486*12573SDina.Nimeh@Sun.COM base[i1+1] = (uchar_t)tmp1;
487*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
488227Skais tmp0 += tmp1;
489227Skais tmp0 = tmp0 & 0xff;
490227Skais merge |= (unsigned long long)(base[tmp0]) << 16;
491227Skais
492227Skais /* BYTE 6 */
493227Skais i1 = (i1+2) &0xff;
494*12573SDina.Nimeh@Sun.COM jj = (uchar_t)i1;
495227Skais tmp0 = base[i1];
496227Skais
497227Skais j = j + tmp0;
498227Skais
499227Skais tmp1 = base[j];
500227Skais i_accum = tmp1;
501*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
502227Skais
503227Skais
504227Skais tmp0 += tmp1;
505227Skais tmp0 = tmp0 & 0xff;
506227Skais
507227Skais if (i1 == tmp0) {
508227Skais merge |=
509227Skais (unsigned long long)(i_accum) << 8;
510227Skais } else {
511227Skais merge |=
512227Skais (unsigned long long)(base[tmp0]) <<
513227Skais 8;
514227Skais }
515227Skais
516227Skais /* BYTE 7 */
517227Skais i1++;
518227Skais tmp0 = base[i1];
519227Skais
520227Skais j = j + tmp0;
521227Skais if ((jj ^ j) < 2) {
522*12573SDina.Nimeh@Sun.COM base[jj] = (uchar_t)i_accum;
523227Skais tmp1 = base[j];
524227Skais
525*12573SDina.Nimeh@Sun.COM base[i1] = (uchar_t)tmp1;
526*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
527227Skais
528227Skais tmp0 += tmp1;
529227Skais tmp0 = tmp0 & 0xff;
530227Skais
531227Skais merge |=
532227Skais (unsigned long long)(base[tmp0]);
533227Skais
534227Skais } else {
5358119SAnthony.Scarpino@Sun.COM
536227Skais tmp1 = base[j];
537227Skais
538227Skais i_accum = i_accum << 8;
539227Skais i_accum |= tmp1;
540227Skais
541*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
542227Skais
543227Skais tmp0 += tmp1;
544227Skais tmp0 = tmp0 & 0xff;
545227Skais
5468119SAnthony.Scarpino@Sun.COM /* LINTED E_BAD_PTR_CAST_ALIGN */
547227Skais *((unsigned short *) &base[jj]) =
548227Skais i_accum;
549227Skais
550227Skais merge |=
551227Skais (unsigned long long)(base[tmp0]);
552227Skais }
553227Skais }
554227Skais
555227Skais /*
556227Skais * Perform update to [out]
557227Skais * Remember could be alignment issues
558227Skais */
5598119SAnthony.Scarpino@Sun.COM /* LINTED E_BAD_PTR_CAST_ALIGN */
560227Skais in0 = *((unsigned long long *) (&in[ii]));
561227Skais
562227Skais merge1 = merge0 | (merge >> shift);
563227Skais
564227Skais merge0 = (merge & mask) << 56;
565227Skais
566227Skais in0 = in0 ^ merge1;
567227Skais
5688119SAnthony.Scarpino@Sun.COM /* LINTED E_BAD_PTR_CAST_ALIGN */
569227Skais *((unsigned long long *) (&out[ii])) = in0;
570227Skais }
571227Skais
572*12573SDina.Nimeh@Sun.COM i = (uchar_t)i1;
573227Skais
574227Skais /*
575227Skais * Handle any overrun
576227Skais */
577227Skais if (shift) {
578227Skais out[ii] = in[ii] ^ (merge0 >> 56);
579227Skais ii++;
580227Skais }
581227Skais
582227Skais /*
583227Skais * Handle final few bytes
584227Skais */
585227Skais for (; ii < len; ii++) {
586227Skais i = i + 1;
587227Skais tmp0 = base[i];
588227Skais j = j + tmp0;
589227Skais tmp1 = base[j];
590227Skais
591*12573SDina.Nimeh@Sun.COM base[i] = (uchar_t)tmp1;
592*12573SDina.Nimeh@Sun.COM base[j] = (uchar_t)tmp0;
593227Skais
594227Skais tmp0 += tmp1;
595227Skais tmp0 = tmp0 & 0xff;
596227Skais out[ii] = in[ii] ^ base[tmp0];
597227Skais }
598227Skais key->i = i;
599227Skais key->j = j;
600227Skais }
601227Skais #endif /* sun4v */
602227Skais
603227Skais /* EXPORT DELETE END */
604227Skais }
605