xref: /netbsd-src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c (revision 2619efef58743213052f3155b5f9fd013be9ba47)
1 /*	$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $");
31 
32 #ifdef _KERNEL
33 #include <sys/systm.h>
34 #include <lib/libkern/libkern.h>
35 #else
36 #include <assert.h>
37 #include <inttypes.h>
38 #include <stdio.h>
39 #define	KASSERT			assert
40 #endif
41 
42 #include "aes_ssse3_impl.h"
43 
44 static inline __m128i
loadblock(const void * in)45 loadblock(const void *in)
46 {
47 	return _mm_loadu_epi8(in);
48 }
49 
50 static inline void
storeblock(void * out,__m128i block)51 storeblock(void *out, __m128i block)
52 {
53 	_mm_storeu_epi8(out, block);
54 }
55 
56 void
aes_ssse3_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)57 aes_ssse3_enc(const struct aesenc *enc, const uint8_t in[static 16],
58     uint8_t out[static 16], uint32_t nrounds)
59 {
60 	__m128i block;
61 
62 	block = loadblock(in);
63 	block = aes_ssse3_enc1(enc, block, nrounds);
64 	storeblock(out, block);
65 }
66 
67 void
aes_ssse3_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)68 aes_ssse3_dec(const struct aesdec *dec, const uint8_t in[static 16],
69     uint8_t out[static 16], uint32_t nrounds)
70 {
71 	__m128i block;
72 
73 	block = loadblock(in);
74 	block = aes_ssse3_dec1(dec, block, nrounds);
75 	storeblock(out, block);
76 }
77 
78 void
aes_ssse3_cbc_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)79 aes_ssse3_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
80     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
81     uint32_t nrounds)
82 {
83 	__m128i cv;
84 
85 	KASSERT(nbytes);
86 
87 	cv = loadblock(iv);
88 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
89 		cv ^= loadblock(in);
90 		cv = aes_ssse3_enc1(enc, cv, nrounds);
91 		storeblock(out, cv);
92 	}
93 	storeblock(iv, cv);
94 }
95 
96 void
aes_ssse3_cbc_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)97 aes_ssse3_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
98     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
99     uint32_t nrounds)
100 {
101 	__m128i iv0, cv, b;
102 
103 	KASSERT(nbytes);
104 	KASSERT(nbytes % 16 == 0);
105 
106 	iv0 = loadblock(iv);
107 	cv = loadblock(in + nbytes - 16);
108 	storeblock(iv, cv);
109 
110 	for (;;) {
111 		b = aes_ssse3_dec1(dec, cv, nrounds);
112 		if ((nbytes -= 16) == 0)
113 			break;
114 		cv = loadblock(in + nbytes - 16);
115 		storeblock(out + nbytes, b ^ cv);
116 	}
117 	storeblock(out, b ^ iv0);
118 }
119 
120 static inline __m128i
aes_ssse3_xts_update(__m128i t)121 aes_ssse3_xts_update(__m128i t)
122 {
123 	const __m128i one = _mm_set_epi64x(1, 1);
124 	__m128i s, m, c;
125 
126 	s = _mm_srli_epi64(t, 63);	/* 1 if high bit set else 0 */
127 	m = _mm_sub_epi64(s, one);	/* 0 if high bit set else -1 */
128 	m = _mm_shuffle_epi32(m, 0x4e);	/* swap halves */
129 	c = _mm_set_epi64x(1, 0x87);	/* carry */
130 
131 	return _mm_slli_epi64(t, 1) ^ (c & ~m);
132 }
133 
134 static int
aes_ssse3_xts_update_selftest(void)135 aes_ssse3_xts_update_selftest(void)
136 {
137 	static const struct {
138 		uint32_t in[4], out[4];
139 	} cases[] = {
140 		[0] = { {1}, {2} },
141 		[1] = { {0x80000000U,0,0,0}, {0,1,0,0} },
142 		[2] = { {0,0x80000000U,0,0}, {0,0,1,0} },
143 		[3] = { {0,0,0x80000000U,0}, {0,0,0,1} },
144 		[4] = { {0,0,0,0x80000000U}, {0x87,0,0,0} },
145 		[5] = { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
146 	};
147 	unsigned i;
148 	uint32_t t[4];
149 	int result = 0;
150 
151 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
152 		t[0] = cases[i].in[0];
153 		t[1] = cases[i].in[1];
154 		t[2] = cases[i].in[2];
155 		t[3] = cases[i].in[3];
156 		storeblock(t, aes_ssse3_xts_update(loadblock(t)));
157 		if (t[0] != cases[i].out[0] ||
158 		    t[1] != cases[i].out[1] ||
159 		    t[2] != cases[i].out[2] ||
160 		    t[3] != cases[i].out[3]) {
161 			printf("%s %u:"
162 			    " %"PRIx32" %"PRIx32" %"PRIx32" %"PRIx32"\n",
163 			    __func__, i, t[0], t[1], t[2], t[3]);
164 			result = -1;
165 		}
166 	}
167 
168 	return result;
169 }
170 
171 void
aes_ssse3_xts_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)172 aes_ssse3_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
173     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
174     uint32_t nrounds)
175 {
176 	__m128i t, b;
177 
178 	KASSERT(nbytes);
179 	KASSERT(nbytes % 16 == 0);
180 
181 	t = loadblock(tweak);
182 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
183 		b = t ^ loadblock(in);
184 		b = aes_ssse3_enc1(enc, b, nrounds);
185 		storeblock(out, t ^ b);
186 		t = aes_ssse3_xts_update(t);
187 	}
188 	storeblock(tweak, t);
189 }
190 
191 void
aes_ssse3_xts_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)192 aes_ssse3_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
193     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
194     uint32_t nrounds)
195 {
196 	__m128i t, b;
197 
198 	KASSERT(nbytes);
199 	KASSERT(nbytes % 16 == 0);
200 
201 	t = loadblock(tweak);
202 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
203 		b = t ^ loadblock(in);
204 		b = aes_ssse3_dec1(dec, b, nrounds);
205 		storeblock(out, t ^ b);
206 		t = aes_ssse3_xts_update(t);
207 	}
208 	storeblock(tweak, t);
209 }
210 
211 void
aes_ssse3_cbcmac_update1(const struct aesenc * enc,const uint8_t in[static16],size_t nbytes,uint8_t auth0[static16],uint32_t nrounds)212 aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
213     size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
214 {
215 	__m128i auth;
216 
217 	KASSERT(nbytes);
218 	KASSERT(nbytes % 16 == 0);
219 
220 	auth = loadblock(auth0);
221 	for (; nbytes; nbytes -= 16, in += 16)
222 		auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds);
223 	storeblock(auth0, auth);
224 }
225 
226 void
aes_ssse3_ccm_enc1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr[static32],uint32_t nrounds)227 aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
228     uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
229     uint32_t nrounds)
230 {
231 	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
232 	const __m128i bs32 =
233 	    _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
234 	__m128i auth, ctr_be, ctr, ptxt;
235 
236 	KASSERT(nbytes);
237 	KASSERT(nbytes % 16 == 0);
238 
239 	auth = loadblock(authctr);
240 	ctr_be = loadblock(authctr + 16);
241 	ctr = _mm_shuffle_epi8(ctr_be, bs32);
242 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
243 		ptxt = loadblock(in);
244 		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
245 		ctr = _mm_add_epi32(ctr, ctr32_inc);
246 		ctr_be = _mm_shuffle_epi8(ctr, bs32);
247 		storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds));
248 	}
249 	storeblock(authctr, auth);
250 	storeblock(authctr + 16, ctr_be);
251 }
252 
253 void
aes_ssse3_ccm_dec1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr[static32],uint32_t nrounds)254 aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
255     uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
256     uint32_t nrounds)
257 {
258 	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
259 	const __m128i bs32 =
260 	    _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
261 	__m128i auth, ctr_be, ctr, ptxt;
262 
263 	KASSERT(nbytes);
264 	KASSERT(nbytes % 16 == 0);
265 
266 	auth = loadblock(authctr);
267 	ctr_be = loadblock(authctr + 16);
268 	ctr = _mm_shuffle_epi8(ctr_be, bs32);
269 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
270 		ctr = _mm_add_epi32(ctr, ctr32_inc);
271 		ctr_be = _mm_shuffle_epi8(ctr, bs32);
272 		ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds);
273 		storeblock(out, ptxt);
274 		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
275 	}
276 	storeblock(authctr, auth);
277 	storeblock(authctr + 16, ctr_be);
278 }
279 
280 int
aes_ssse3_selftest(void)281 aes_ssse3_selftest(void)
282 {
283 
284 	if (aes_ssse3_xts_update_selftest())
285 		return -1;
286 
287 	return 0;
288 }
289