1 /* $NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $");
31
32 #ifdef _KERNEL
33 #include <sys/systm.h>
34 #include <lib/libkern/libkern.h>
35 #else
36 #include <assert.h>
37 #include <inttypes.h>
38 #include <stdio.h>
39 #define KASSERT assert
40 #endif
41
42 #include "aes_ssse3_impl.h"
43
44 static inline __m128i
loadblock(const void * in)45 loadblock(const void *in)
46 {
47 return _mm_loadu_epi8(in);
48 }
49
50 static inline void
storeblock(void * out,__m128i block)51 storeblock(void *out, __m128i block)
52 {
53 _mm_storeu_epi8(out, block);
54 }
55
56 void
aes_ssse3_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)57 aes_ssse3_enc(const struct aesenc *enc, const uint8_t in[static 16],
58 uint8_t out[static 16], uint32_t nrounds)
59 {
60 __m128i block;
61
62 block = loadblock(in);
63 block = aes_ssse3_enc1(enc, block, nrounds);
64 storeblock(out, block);
65 }
66
67 void
aes_ssse3_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)68 aes_ssse3_dec(const struct aesdec *dec, const uint8_t in[static 16],
69 uint8_t out[static 16], uint32_t nrounds)
70 {
71 __m128i block;
72
73 block = loadblock(in);
74 block = aes_ssse3_dec1(dec, block, nrounds);
75 storeblock(out, block);
76 }
77
78 void
aes_ssse3_cbc_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)79 aes_ssse3_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
80 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
81 uint32_t nrounds)
82 {
83 __m128i cv;
84
85 KASSERT(nbytes);
86
87 cv = loadblock(iv);
88 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
89 cv ^= loadblock(in);
90 cv = aes_ssse3_enc1(enc, cv, nrounds);
91 storeblock(out, cv);
92 }
93 storeblock(iv, cv);
94 }
95
96 void
aes_ssse3_cbc_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)97 aes_ssse3_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
98 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
99 uint32_t nrounds)
100 {
101 __m128i iv0, cv, b;
102
103 KASSERT(nbytes);
104 KASSERT(nbytes % 16 == 0);
105
106 iv0 = loadblock(iv);
107 cv = loadblock(in + nbytes - 16);
108 storeblock(iv, cv);
109
110 for (;;) {
111 b = aes_ssse3_dec1(dec, cv, nrounds);
112 if ((nbytes -= 16) == 0)
113 break;
114 cv = loadblock(in + nbytes - 16);
115 storeblock(out + nbytes, b ^ cv);
116 }
117 storeblock(out, b ^ iv0);
118 }
119
120 static inline __m128i
aes_ssse3_xts_update(__m128i t)121 aes_ssse3_xts_update(__m128i t)
122 {
123 const __m128i one = _mm_set_epi64x(1, 1);
124 __m128i s, m, c;
125
126 s = _mm_srli_epi64(t, 63); /* 1 if high bit set else 0 */
127 m = _mm_sub_epi64(s, one); /* 0 if high bit set else -1 */
128 m = _mm_shuffle_epi32(m, 0x4e); /* swap halves */
129 c = _mm_set_epi64x(1, 0x87); /* carry */
130
131 return _mm_slli_epi64(t, 1) ^ (c & ~m);
132 }
133
134 static int
aes_ssse3_xts_update_selftest(void)135 aes_ssse3_xts_update_selftest(void)
136 {
137 static const struct {
138 uint32_t in[4], out[4];
139 } cases[] = {
140 [0] = { {1}, {2} },
141 [1] = { {0x80000000U,0,0,0}, {0,1,0,0} },
142 [2] = { {0,0x80000000U,0,0}, {0,0,1,0} },
143 [3] = { {0,0,0x80000000U,0}, {0,0,0,1} },
144 [4] = { {0,0,0,0x80000000U}, {0x87,0,0,0} },
145 [5] = { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
146 };
147 unsigned i;
148 uint32_t t[4];
149 int result = 0;
150
151 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
152 t[0] = cases[i].in[0];
153 t[1] = cases[i].in[1];
154 t[2] = cases[i].in[2];
155 t[3] = cases[i].in[3];
156 storeblock(t, aes_ssse3_xts_update(loadblock(t)));
157 if (t[0] != cases[i].out[0] ||
158 t[1] != cases[i].out[1] ||
159 t[2] != cases[i].out[2] ||
160 t[3] != cases[i].out[3]) {
161 printf("%s %u:"
162 " %"PRIx32" %"PRIx32" %"PRIx32" %"PRIx32"\n",
163 __func__, i, t[0], t[1], t[2], t[3]);
164 result = -1;
165 }
166 }
167
168 return result;
169 }
170
171 void
aes_ssse3_xts_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)172 aes_ssse3_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
173 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
174 uint32_t nrounds)
175 {
176 __m128i t, b;
177
178 KASSERT(nbytes);
179 KASSERT(nbytes % 16 == 0);
180
181 t = loadblock(tweak);
182 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
183 b = t ^ loadblock(in);
184 b = aes_ssse3_enc1(enc, b, nrounds);
185 storeblock(out, t ^ b);
186 t = aes_ssse3_xts_update(t);
187 }
188 storeblock(tweak, t);
189 }
190
191 void
aes_ssse3_xts_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)192 aes_ssse3_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
193 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
194 uint32_t nrounds)
195 {
196 __m128i t, b;
197
198 KASSERT(nbytes);
199 KASSERT(nbytes % 16 == 0);
200
201 t = loadblock(tweak);
202 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
203 b = t ^ loadblock(in);
204 b = aes_ssse3_dec1(dec, b, nrounds);
205 storeblock(out, t ^ b);
206 t = aes_ssse3_xts_update(t);
207 }
208 storeblock(tweak, t);
209 }
210
211 void
aes_ssse3_cbcmac_update1(const struct aesenc * enc,const uint8_t in[static16],size_t nbytes,uint8_t auth0[static16],uint32_t nrounds)212 aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
213 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
214 {
215 __m128i auth;
216
217 KASSERT(nbytes);
218 KASSERT(nbytes % 16 == 0);
219
220 auth = loadblock(auth0);
221 for (; nbytes; nbytes -= 16, in += 16)
222 auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds);
223 storeblock(auth0, auth);
224 }
225
226 void
aes_ssse3_ccm_enc1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr[static32],uint32_t nrounds)227 aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
228 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
229 uint32_t nrounds)
230 {
231 const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
232 const __m128i bs32 =
233 _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
234 __m128i auth, ctr_be, ctr, ptxt;
235
236 KASSERT(nbytes);
237 KASSERT(nbytes % 16 == 0);
238
239 auth = loadblock(authctr);
240 ctr_be = loadblock(authctr + 16);
241 ctr = _mm_shuffle_epi8(ctr_be, bs32);
242 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
243 ptxt = loadblock(in);
244 auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
245 ctr = _mm_add_epi32(ctr, ctr32_inc);
246 ctr_be = _mm_shuffle_epi8(ctr, bs32);
247 storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds));
248 }
249 storeblock(authctr, auth);
250 storeblock(authctr + 16, ctr_be);
251 }
252
253 void
aes_ssse3_ccm_dec1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr[static32],uint32_t nrounds)254 aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
255 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
256 uint32_t nrounds)
257 {
258 const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
259 const __m128i bs32 =
260 _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
261 __m128i auth, ctr_be, ctr, ptxt;
262
263 KASSERT(nbytes);
264 KASSERT(nbytes % 16 == 0);
265
266 auth = loadblock(authctr);
267 ctr_be = loadblock(authctr + 16);
268 ctr = _mm_shuffle_epi8(ctr_be, bs32);
269 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
270 ctr = _mm_add_epi32(ctr, ctr32_inc);
271 ctr_be = _mm_shuffle_epi8(ctr, bs32);
272 ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds);
273 storeblock(out, ptxt);
274 auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
275 }
276 storeblock(authctr, auth);
277 storeblock(authctr + 16, ctr_be);
278 }
279
280 int
aes_ssse3_selftest(void)281 aes_ssse3_selftest(void)
282 {
283
284 if (aes_ssse3_xts_update_selftest())
285 return -1;
286
287 return 0;
288 }
289