1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include "../arcfour.h"
27
28 /* Initialize the key stream 'key' using the key value */
29 void
arcfour_key_init(ARCFour_key * key,uchar_t * keyval,int keyvallen)30 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
31 {
32 /* EXPORT DELETE START */
33
34 uchar_t ext_keyval[256];
35 uchar_t tmp;
36 int i, j;
37
38 for (i = j = 0; i < 256; i++, j++) {
39 if (j == keyvallen)
40 j = 0;
41
42 ext_keyval[i] = keyval[j];
43 }
44 for (i = 0; i < 256; i++)
45 key->arr[i] = (uchar_t)i;
46
47 j = 0;
48 for (i = 0; i < 256; i++) {
49 j = (j + key->arr[i] + ext_keyval[i]) % 256;
50 tmp = key->arr[i];
51 key->arr[i] = key->arr[j];
52 key->arr[j] = tmp;
53 }
54 key->i = 0;
55 key->j = 0;
56
57 /* EXPORT DELETE END */
58 }
59
60
61 /*
62 * Encipher 'in' using 'key.
63 * in and out can point to the same location
64 */
65 void
arcfour_crypt(ARCFour_key * key,uchar_t * in,uchar_t * out,size_t len)66 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
67 {
68 size_t ii;
69 unsigned long long in0, merge = 0, merge0 = 0, merge1, mask = 0;
70 uchar_t i, j, *base, jj, *base1, tmp;
71 unsigned int tmp0, tmp1, i_accum, shift = 0, i1;
72
73
74 /* EXPORT DELETE START */
75 int index;
76
77 base = key->arr;
78
79 index = (((uintptr_t)in) & 0x7);
80
81 /* Get the 'in' on an 8-byte alignment */
82 if (index > 0) {
83 i = key->i;
84 j = key->j;
85
86 for (index = 8 - index; (index-- > 0) && len > 0;
87 len--, in++, out++) {
88
89 i = i + 1;
90 j = j + key->arr[i];
91 tmp = key->arr[i];
92 key->arr[i] = key->arr[j];
93 key->arr[j] = tmp;
94 tmp = key->arr[i] + key->arr[j];
95 *out = *in ^ key->arr[tmp];
96 }
97 key->i = i;
98 key->j = j;
99
100 }
101 if (len == 0)
102 return;
103
104 /* See if we're fortunate and 'out' got aligned as well */
105
106
107 /*
108 * Niagara optimized version for
109 * the cases where the input and output buffers are aligned on
110 * a multiple of 8-byte boundary.
111 */
112 #ifdef sun4v
113 if ((((uintptr_t)out) & 7) != 0) {
114 #endif /* sun4v */
115 i = key->i;
116 j = key->j;
117 for (ii = 0; ii < len; ii++) {
118 i = i + 1;
119 tmp0 = base[i];
120 j = j + tmp0;
121 tmp1 = base[j];
122 base[i] = (uchar_t)tmp1;
123 base[j] = (uchar_t)tmp0;
124 tmp0 += tmp1;
125 tmp0 = tmp0 & 0xff;
126 out[ii] = in[ii] ^ base[tmp0];
127 }
128 key->i = i;
129 key->j = j;
130 #ifdef sun4v
131 } else {
132 i = key->i;
133 j = key->j;
134
135 /*
136 * Want to align base[i] on a 2B boundary -- allows updates
137 * via [i] to be performed in 2B chunks (reducing # of stores).
138 * Requires appropriate alias detection.
139 */
140
141 if (((i+1) % 2) != 0) {
142 i = i + 1;
143 tmp0 = base[i];
144 j = j + tmp0;
145 tmp1 = base[j];
146
147 base[i] = (uchar_t)tmp1;
148 base[j] = (uchar_t)tmp0;
149
150 tmp0 += tmp1;
151 tmp0 = tmp0 & 0xff;
152
153 merge0 = (unsigned long long)(base[tmp0]) << 56;
154 shift = 8; mask = 0xff;
155 }
156
157 /*
158 * Note - in and out may now be misaligned -
159 * as updating [out] in 8B chunks need to handle this
160 * possibility. Also could have a 1B overrun.
161 * Need to drop out of loop early as a result.
162 */
163
164 for (ii = 0, i1 = i; ii < ((len-1) & (~7));
165 ii += 8, i1 = i1&0xff) {
166
167 /*
168 * If i < less than 248, know wont wrap around
169 * (i % 256), so don't need to bother with masking i
170 * after each increment
171 */
172 if (i1 < 248) {
173
174 /* BYTE 0 */
175 i1 = (i1 + 1);
176
177 /*
178 * Creating this base pointer reduces subsequent
179 * arihmetic ops required to load [i]
180 *
181 * N.B. don't need to check if [j] aliases.
182 * [i] and [j] end up with the same values
183 * anyway.
184 */
185 base1 = &base[i1];
186
187 tmp0 = base1[0];
188 j = j + tmp0;
189
190 tmp1 = base[j];
191 /*
192 * Don't store [i] yet
193 */
194 i_accum = tmp1;
195 base[j] = (uchar_t)tmp0;
196
197 tmp0 += tmp1;
198 tmp0 = tmp0 & 0xff;
199
200 /*
201 * Check [tmp0] doesn't alias with [i]
202 */
203
204 /*
205 * Updating [out] in 8B chunks
206 */
207 if (i1 == tmp0) {
208 merge =
209 (unsigned long long)(i_accum) << 56;
210 } else {
211 merge =
212 (unsigned long long)(base[tmp0]) <<
213 56;
214 }
215
216 /* BYTE 1 */
217 tmp0 = base1[1];
218
219 j = j + tmp0;
220
221 /*
222 * [j] can now alias with [i] and [i-1]
223 * If alias abort speculation
224 */
225 if ((i1 ^ j) < 2) {
226 base1[0] = (uchar_t)i_accum;
227
228 tmp1 = base[j];
229
230 base1[1] = (uchar_t)tmp1;
231 base[j] = (uchar_t)tmp0;
232
233 tmp0 += tmp1;
234 tmp0 = tmp0 & 0xff;
235
236 merge |= (unsigned long long)
237 (base[tmp0]) << 48;
238 } else {
239
240 tmp1 = base[j];
241
242 i_accum = i_accum << 8;
243 i_accum |= tmp1;
244
245 base[j] = (uchar_t)tmp0;
246
247 tmp0 += tmp1;
248 tmp0 = tmp0 & 0xff;
249
250 /*
251 * Speculation suceeded! Update [i]
252 * in 2B chunk
253 */
254 /* LINTED E_BAD_PTR_CAST_ALIGN */
255 *((unsigned short *) &base[i1]) =
256 i_accum;
257
258 merge |=
259 (unsigned long long)(base[tmp0]) <<
260 48;
261 }
262
263
264 /*
265 * Too expensive to perform [i] speculation for
266 * every byte. Just need to reduce frequency
267 * of stores until store buffer full stalls
268 * are not the bottleneck.
269 */
270
271 /* BYTE 2 */
272 tmp0 = base1[2];
273 j = j + tmp0;
274 tmp1 = base[j];
275 base1[2] = (uchar_t)tmp1;
276 base[j] = (uchar_t)tmp0;
277 tmp1 += tmp0;
278 tmp1 = tmp1 & 0xff;
279 merge |= (unsigned long long)(base[tmp1]) << 40;
280
281 /* BYTE 3 */
282 tmp0 = base1[3];
283 j = j + tmp0;
284 tmp1 = base[j];
285 base1[3] = (uchar_t)tmp1;
286 base[j] = (uchar_t)tmp0;
287 tmp0 += tmp1;
288 tmp0 = tmp0 & 0xff;
289 merge |= (unsigned long long)(base[tmp0]) << 32;
290
291 /* BYTE 4 */
292 tmp0 = base1[4];
293 j = j + tmp0;
294 tmp1 = base[j];
295 base1[4] = (uchar_t)tmp1;
296 base[j] = (uchar_t)tmp0;
297 tmp0 += tmp1;
298 tmp0 = tmp0 & 0xff;
299 merge |= (unsigned long long)(base[tmp0]) << 24;
300
301 /* BYTE 5 */
302 tmp0 = base1[5];
303 j = j + tmp0;
304 tmp1 = base[j];
305 base1[5] = (uchar_t)tmp1;
306 base[j] = (uchar_t)tmp0;
307 tmp0 += tmp1;
308 tmp0 = tmp0 & 0xff;
309 merge |= (unsigned long long)(base[tmp0]) << 16;
310
311 /* BYTE 6 */
312 i1 = (i1+6);
313 tmp0 = base1[6];
314 j = j + tmp0;
315 tmp1 = base[j];
316 i_accum = tmp1;
317 base[j] = (uchar_t)tmp0;
318
319 tmp0 += tmp1;
320 tmp0 = tmp0 & 0xff;
321
322 if (i1 == tmp0) {
323 merge |=
324 (unsigned long long)(i_accum) << 8;
325 } else {
326 merge |=
327 (unsigned long long)(base[tmp0]) <<
328 8;
329 }
330
331 /* BYTE 7 */
332 tmp0 = base1[7];
333
334 /*
335 * Perform [i] speculation again. Indentical
336 * to that performed for BYTE0 and BYTE1.
337 */
338 j = j + tmp0;
339 if ((i1 ^ j) < 2) {
340 base1[6] = (uchar_t)i_accum;
341 tmp1 = base[j];
342
343 base1[7] = (uchar_t)tmp1;
344 base[j] = (uchar_t)tmp0;
345
346 tmp0 += tmp1;
347 tmp0 = tmp0 & 0xff;
348
349 merge |=
350 (unsigned long long)(base[tmp0]);
351
352 } else {
353 tmp1 = base[j];
354
355 i_accum = i_accum << 8;
356 i_accum |= tmp1;
357
358 base[j] = (uchar_t)tmp0;
359
360 tmp0 += tmp1;
361 tmp0 = tmp0 & 0xff;
362
363 /* LINTED E_BAD_PTR_CAST_ALIGN */
364 *((unsigned short *) &base[i1]) =
365 i_accum;
366
367 merge |=
368 (unsigned long long)(base[tmp0]);
369 }
370 i1++;
371 } else {
372 /*
373 * i is too close to wrap-around to allow
374 * masking to be disregarded
375 */
376
377 /*
378 * Same old speculation for BYTE 0 and BYTE 1
379 */
380
381 /* BYTE 0 */
382 i1 = (i1 + 1) & 0xff;
383 jj = (uchar_t)i1;
384
385 tmp0 = base[i1];
386 j = j + tmp0;
387
388 tmp1 = base[j];
389 i_accum = tmp1;
390 base[j] = (uchar_t)tmp0;
391
392 tmp0 += tmp1;
393 tmp0 = tmp0 & 0xff;
394
395 if (i1 == tmp0) {
396 merge =
397 (unsigned long long)(i_accum) << 56;
398 } else {
399 merge =
400 (unsigned long long)(base[tmp0]) <<
401 56;
402 }
403
404 /* BYTE 1 */
405 tmp0 = base[i1+1];
406
407 j = j + tmp0;
408
409 if ((jj ^ j) < 2) {
410 base[jj] = (uchar_t)i_accum;
411
412 tmp1 = base[j];
413
414 base[i1+1] = (uchar_t)tmp1;
415 base[j] = (uchar_t)tmp0;
416
417 tmp0 += tmp1;
418 tmp0 = tmp0 & 0xff;
419
420 merge |=
421 (unsigned long long)(base[tmp0]) <<
422 48;
423 } else {
424
425 tmp1 = base[j];
426
427 i_accum = i_accum << 8;
428 i_accum |= tmp1;
429
430 base[j] = (uchar_t)tmp0;
431
432 tmp0 += tmp1;
433 tmp0 = tmp0 & 0xff;
434
435 /* LINTED E_BAD_PTR_CAST_ALIGN */
436 *((unsigned short *) &base[jj]) =
437 i_accum;
438
439 merge |=
440 (unsigned long long)(base[tmp0]) <<
441 48;
442 }
443
444 /* BYTE 2 */
445 /*
446 * As know i must be even when enter loop (to
447 * satisfy alignment), can only wrap around
448 * on the even bytes. So just need to perform
449 * mask every 2nd byte
450 */
451 i1 = (i1 + 2) & 0xff;
452 tmp0 = base[i1];
453 j = j + tmp0;
454 tmp1 = base[j];
455 base[i1] = (uchar_t)tmp1;
456 base[j] = (uchar_t)tmp0;
457 tmp0 += tmp1;
458 tmp0 = tmp0 & 0xff;
459 merge |= (unsigned long long)(base[tmp0]) << 40;
460
461 /* BYTE 3 */
462 tmp0 = base[i1+1];
463 j = j + tmp0;
464 tmp1 = base[j];
465 base[i1+1] = (uchar_t)tmp1;
466 base[j] = (uchar_t)tmp0;
467 tmp0 += tmp1;
468 tmp0 = tmp0 & 0xff;
469 merge |= (unsigned long long)(base[tmp0]) << 32;
470
471 /* BYTE 4 */
472 i1 = (i1 + 2) & 0xff;
473 tmp0 = base[i1];
474 j = j + tmp0;
475 tmp1 = base[j];
476 base[i1] = (uchar_t)tmp1;
477 base[j] = (uchar_t)tmp0;
478 tmp0 += tmp1;
479 tmp0 = tmp0 & 0xff;
480 merge |= (unsigned long long)(base[tmp0]) << 24;
481
482 /* BYTE 5 */
483 tmp0 = base[i1+1];
484 j = j + tmp0;
485 tmp1 = base[j];
486 base[i1+1] = (uchar_t)tmp1;
487 base[j] = (uchar_t)tmp0;
488 tmp0 += tmp1;
489 tmp0 = tmp0 & 0xff;
490 merge |= (unsigned long long)(base[tmp0]) << 16;
491
492 /* BYTE 6 */
493 i1 = (i1+2) &0xff;
494 jj = (uchar_t)i1;
495 tmp0 = base[i1];
496
497 j = j + tmp0;
498
499 tmp1 = base[j];
500 i_accum = tmp1;
501 base[j] = (uchar_t)tmp0;
502
503
504 tmp0 += tmp1;
505 tmp0 = tmp0 & 0xff;
506
507 if (i1 == tmp0) {
508 merge |=
509 (unsigned long long)(i_accum) << 8;
510 } else {
511 merge |=
512 (unsigned long long)(base[tmp0]) <<
513 8;
514 }
515
516 /* BYTE 7 */
517 i1++;
518 tmp0 = base[i1];
519
520 j = j + tmp0;
521 if ((jj ^ j) < 2) {
522 base[jj] = (uchar_t)i_accum;
523 tmp1 = base[j];
524
525 base[i1] = (uchar_t)tmp1;
526 base[j] = (uchar_t)tmp0;
527
528 tmp0 += tmp1;
529 tmp0 = tmp0 & 0xff;
530
531 merge |=
532 (unsigned long long)(base[tmp0]);
533
534 } else {
535
536 tmp1 = base[j];
537
538 i_accum = i_accum << 8;
539 i_accum |= tmp1;
540
541 base[j] = (uchar_t)tmp0;
542
543 tmp0 += tmp1;
544 tmp0 = tmp0 & 0xff;
545
546 /* LINTED E_BAD_PTR_CAST_ALIGN */
547 *((unsigned short *) &base[jj]) =
548 i_accum;
549
550 merge |=
551 (unsigned long long)(base[tmp0]);
552 }
553 }
554
555 /*
556 * Perform update to [out]
557 * Remember could be alignment issues
558 */
559 /* LINTED E_BAD_PTR_CAST_ALIGN */
560 in0 = *((unsigned long long *) (&in[ii]));
561
562 merge1 = merge0 | (merge >> shift);
563
564 merge0 = (merge & mask) << 56;
565
566 in0 = in0 ^ merge1;
567
568 /* LINTED E_BAD_PTR_CAST_ALIGN */
569 *((unsigned long long *) (&out[ii])) = in0;
570 }
571
572 i = (uchar_t)i1;
573
574 /*
575 * Handle any overrun
576 */
577 if (shift) {
578 out[ii] = in[ii] ^ (merge0 >> 56);
579 ii++;
580 }
581
582 /*
583 * Handle final few bytes
584 */
585 for (; ii < len; ii++) {
586 i = i + 1;
587 tmp0 = base[i];
588 j = j + tmp0;
589 tmp1 = base[j];
590
591 base[i] = (uchar_t)tmp1;
592 base[j] = (uchar_t)tmp0;
593
594 tmp0 += tmp1;
595 tmp0 = tmp0 & 0xff;
596 out[ii] = in[ii] ^ base[tmp0];
597 }
598 key->i = i;
599 key->j = j;
600 }
601 #endif /* sun4v */
602
603 /* EXPORT DELETE END */
604 }
605