1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #define ARCFOUR_LOOP_OPTIMIZED
27
28 #ifndef _KERNEL
29 #include <stdint.h>
30 #endif /* _KERNEL */
31
32 #include "arcfour.h"
33
34 #if defined(__amd64)
35 /* ARCFour_key.flag values */
36 #define ARCFOUR_ON_INTEL 1
37 #define ARCFOUR_ON_AMD64 0
38
39 #ifdef _KERNEL
40 #include <sys/x86_archext.h>
41 #include <sys/cpuvar.h>
42
43 #else
44 #include <sys/auxv.h>
45 #endif /* _KERNEL */
46 #endif /* __amd64 */
47
48 #ifndef __amd64
49 /*
50 * Initialize the key stream 'key' using the key value.
51 *
52 * Input:
53 * keyval User-provided key
54 * keyvallen Length, in bytes, of keyval
55 * Output:
56 * key Initialized ARCFOUR key schedule, based on keyval
57 */
58 void
arcfour_key_init(ARCFour_key * key,uchar_t * keyval,int keyvallen)59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
60 {
61 /* EXPORT DELETE START */
62
63 uchar_t ext_keyval[256];
64 uchar_t tmp;
65 int i, j;
66
67 /* Normalize key length to 256 */
68 for (i = j = 0; i < 256; i++, j++) {
69 if (j == keyvallen)
70 j = 0;
71 ext_keyval[i] = keyval[j];
72 }
73
74 for (i = 0; i < 256; i++)
75 key->arr[i] = (uchar_t)i;
76
77 j = 0;
78 for (i = 0; i < 256; i++) {
79 j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
80 tmp = key->arr[i];
81 key->arr[i] = key->arr[j];
82 key->arr[j] = tmp;
83 }
84 key->i = 0;
85 key->j = 0;
86
87 /* EXPORT DELETE END */
88 }
89 #endif /* !__amd64 */
90
91
92 /*
93 * Encipher 'in' using 'key'.
94 *
95 * Input:
96 * key ARCFOUR key, initialized by arcfour_key_init()
97 * in Input text
98 * out Buffer to contain output text
99 * len Length, in bytes, of the in and out buffers
100 *
101 * Output:
102 * out Buffer containing output text
103 *
104 * Note: in and out can point to the same location
105 */
106 void
arcfour_crypt(ARCFour_key * key,uchar_t * in,uchar_t * out,size_t len)107 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
108 {
109 /* EXPORT DELETE START */
110 #ifdef __amd64
111 if (key->flag == ARCFOUR_ON_AMD64) {
112 arcfour_crypt_asm(key, in, out, len);
113 } else { /* Intel EM64T */
114 #endif /* amd64 */
115
116 size_t ii;
117 uchar_t i, j, ti, tj;
118 #ifdef ARCFOUR_LOOP_OPTIMIZED
119 uchar_t arr_ij;
120 #endif
121 #ifdef __amd64
122 uint32_t *arr;
123 #else
124 uchar_t *arr;
125 #endif
126
127 #ifdef sun4u
128 /*
129 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
130 * the cases where the input and output buffers are aligned on
131 * a multiple of 8-byte boundary.
132 */
133 int index;
134 uchar_t tmp;
135
136 index = (((uint64_t)(uintptr_t)in) & 0x7);
137
138 /* Get the 'in' on an 8-byte alignment */
139 if (index > 0) {
140 i = key->i;
141 j = key->j;
142 for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
143 (index-- > 0) && len > 0;
144 len--, in++, out++) {
145 ++i;
146 j = j + key->arr[i];
147 tmp = key->arr[i];
148 key->arr[i] = key->arr[j];
149 key->arr[j] = tmp;
150 tmp = key->arr[i] + key->arr[j];
151 *out = *in ^ key->arr[tmp];
152 }
153 key->i = i;
154 key->j = j;
155 }
156
157 if (len == 0)
158 return;
159
160 /* See if we're fortunate and 'out' got aligned as well */
161
162 if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
163 #endif /* sun4u */
164
165 i = key->i;
166 j = key->j;
167 arr = key->arr;
168
169 #ifndef ARCFOUR_LOOP_OPTIMIZED
170 /*
171 * This loop is hasn't been reordered, but is kept for reference
172 * purposes as it's more readable
173 */
174 for (ii = 0; ii < len; ++ii) {
175 ++i;
176 ti = arr[i];
177 j = j + ti;
178 tj = arr[j];
179 arr[j] = ti;
180 arr[i] = tj;
181 out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
182 }
183
184 #else
185 /*
186 * This for loop is optimized by carefully spreading out
187 * memory access and storage to avoid conflicts,
188 * allowing the processor to process operations in parallel
189 */
190
191 /* for loop setup */
192 ++i;
193 ti = arr[i];
194 j = j + ti;
195 tj = arr[j];
196 arr[j] = ti;
197 arr[i] = tj;
198 arr_ij = arr[(ti + tj) & 0xff];
199 --len;
200
201 for (ii = 0; ii < len; ) {
202 ++i;
203 ti = arr[i];
204 j = j + ti;
205 tj = arr[j];
206 arr[j] = ti;
207 arr[i] = tj;
208
209 /* save result from previous loop: */
210 out[ii] = in[ii] ^ arr_ij;
211
212 ++ii;
213 arr_ij = arr[(ti + tj) & 0xff];
214 }
215 /* save result from last loop: */
216 out[ii] = in[ii] ^ arr_ij;
217 #endif
218
219 key->i = i;
220 key->j = j;
221
222 #ifdef sun4u
223 } else {
224 arcfour_crypt_aligned(key, len, in, out);
225 }
226 #endif /* sun4u */
227 #ifdef __amd64
228 }
229 #endif /* amd64 */
230
231 /* EXPORT DELETE END */
232 }
233
234
235 #ifdef __amd64
236 /*
237 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
238 * Cache the result, as the CPU can't change.
239 *
240 * Note: the userland version uses getisax() and checks for an AMD-64-only
241 * feature. The kernel version uses cpuid_getvendor().
242 */
243 int
arcfour_crypt_on_intel(void)244 arcfour_crypt_on_intel(void)
245 {
246 static int cached_result = -1;
247
248 if (cached_result == -1) { /* first time */
249 #ifdef _KERNEL
250 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
251 #else
252 uint_t ui;
253
254 (void) getisax(&ui, 1);
255 cached_result = ((ui & AV_386_AMD_MMX) == 0);
256 #endif /* _KERNEL */
257 }
258
259 return (cached_result);
260 }
261 #endif /* __amd64 */
262