xref: /freebsd-src/sys/contrib/openzfs/module/icp/algs/modes/gcm.c (revision bb2d13b686e3ccf6c3ccb36209dfb7dcc108b182)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23eda14cbcSMatt Macy  */
24eda14cbcSMatt Macy 
25eda14cbcSMatt Macy #include <sys/zfs_context.h>
26eda14cbcSMatt Macy #include <modes/modes.h>
27eda14cbcSMatt Macy #include <sys/crypto/common.h>
28eda14cbcSMatt Macy #include <sys/crypto/icp.h>
29eda14cbcSMatt Macy #include <sys/crypto/impl.h>
30eda14cbcSMatt Macy #include <sys/byteorder.h>
31eda14cbcSMatt Macy #include <sys/simd.h>
32eda14cbcSMatt Macy #include <modes/gcm_impl.h>
33eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
34eda14cbcSMatt Macy #include <aes/aes_impl.h>
35eda14cbcSMatt Macy #endif
36eda14cbcSMatt Macy 
37eda14cbcSMatt Macy #define	GHASH(c, d, t, o) \
38eda14cbcSMatt Macy 	xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
39eda14cbcSMatt Macy 	(o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
40eda14cbcSMatt Macy 	(uint64_t *)(void *)(t));
41eda14cbcSMatt Macy 
42eda14cbcSMatt Macy /* Select GCM implementation */
43eda14cbcSMatt Macy #define	IMPL_FASTEST	(UINT32_MAX)
44eda14cbcSMatt Macy #define	IMPL_CYCLE	(UINT32_MAX-1)
45eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
46eda14cbcSMatt Macy #define	IMPL_AVX	(UINT32_MAX-2)
47eda14cbcSMatt Macy #endif
48eda14cbcSMatt Macy #define	GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
49eda14cbcSMatt Macy static uint32_t icp_gcm_impl = IMPL_FASTEST;
50eda14cbcSMatt Macy static uint32_t user_sel_impl = IMPL_FASTEST;
51eda14cbcSMatt Macy 
52eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
53eda14cbcSMatt Macy /* Does the architecture we run on support the MOVBE instruction? */
54eda14cbcSMatt Macy boolean_t gcm_avx_can_use_movbe = B_FALSE;
55eda14cbcSMatt Macy /*
56eda14cbcSMatt Macy  * Whether to use the optimized openssl gcm and ghash implementations.
57eda14cbcSMatt Macy  * Set to true if module parameter icp_gcm_impl == "avx".
58eda14cbcSMatt Macy  */
59eda14cbcSMatt Macy static boolean_t gcm_use_avx = B_FALSE;
60eda14cbcSMatt Macy #define	GCM_IMPL_USE_AVX	(*(volatile boolean_t *)&gcm_use_avx)
61eda14cbcSMatt Macy 
627877fdebSMatt Macy extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
637877fdebSMatt Macy 
64eda14cbcSMatt Macy static inline boolean_t gcm_avx_will_work(void);
65eda14cbcSMatt Macy static inline void gcm_set_avx(boolean_t);
66eda14cbcSMatt Macy static inline boolean_t gcm_toggle_avx(void);
677877fdebSMatt Macy static inline size_t gcm_simd_get_htab_size(boolean_t);
68eda14cbcSMatt Macy 
69eda14cbcSMatt Macy static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
70eda14cbcSMatt Macy     crypto_data_t *, size_t);
71eda14cbcSMatt Macy 
72eda14cbcSMatt Macy static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
73eda14cbcSMatt Macy static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
74eda14cbcSMatt Macy static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
75eda14cbcSMatt Macy     size_t, size_t);
76eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
77eda14cbcSMatt Macy 
78eda14cbcSMatt Macy /*
79eda14cbcSMatt Macy  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
80eda14cbcSMatt Macy  * is done in another function.
81eda14cbcSMatt Macy  */
82eda14cbcSMatt Macy int
83eda14cbcSMatt Macy gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
84eda14cbcSMatt Macy     crypto_data_t *out, size_t block_size,
85eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
86eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
87eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
88eda14cbcSMatt Macy {
89eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
90eda14cbcSMatt Macy 	if (ctx->gcm_use_avx == B_TRUE)
91eda14cbcSMatt Macy 		return (gcm_mode_encrypt_contiguous_blocks_avx(
92eda14cbcSMatt Macy 		    ctx, data, length, out, block_size));
93eda14cbcSMatt Macy #endif
94eda14cbcSMatt Macy 
95eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
96eda14cbcSMatt Macy 	size_t remainder = length;
97eda14cbcSMatt Macy 	size_t need = 0;
98eda14cbcSMatt Macy 	uint8_t *datap = (uint8_t *)data;
99eda14cbcSMatt Macy 	uint8_t *blockp;
100eda14cbcSMatt Macy 	uint8_t *lastp;
101eda14cbcSMatt Macy 	void *iov_or_mp;
102eda14cbcSMatt Macy 	offset_t offset;
103eda14cbcSMatt Macy 	uint8_t *out_data_1;
104eda14cbcSMatt Macy 	uint8_t *out_data_2;
105eda14cbcSMatt Macy 	size_t out_data_1_len;
106eda14cbcSMatt Macy 	uint64_t counter;
107eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
108eda14cbcSMatt Macy 
109eda14cbcSMatt Macy 	if (length + ctx->gcm_remainder_len < block_size) {
110eda14cbcSMatt Macy 		/* accumulate bytes here and return */
111da5137abSMartin Matuska 		memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
112da5137abSMartin Matuska 		    datap,
113eda14cbcSMatt Macy 		    length);
114eda14cbcSMatt Macy 		ctx->gcm_remainder_len += length;
115eda14cbcSMatt Macy 		if (ctx->gcm_copy_to == NULL) {
116eda14cbcSMatt Macy 			ctx->gcm_copy_to = datap;
117eda14cbcSMatt Macy 		}
118eda14cbcSMatt Macy 		return (CRYPTO_SUCCESS);
119eda14cbcSMatt Macy 	}
120eda14cbcSMatt Macy 
121eda14cbcSMatt Macy 	crypto_init_ptrs(out, &iov_or_mp, &offset);
122eda14cbcSMatt Macy 
123eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
124eda14cbcSMatt Macy 	do {
125eda14cbcSMatt Macy 		/* Unprocessed data from last call. */
126eda14cbcSMatt Macy 		if (ctx->gcm_remainder_len > 0) {
127eda14cbcSMatt Macy 			need = block_size - ctx->gcm_remainder_len;
128eda14cbcSMatt Macy 
129eda14cbcSMatt Macy 			if (need > remainder)
130eda14cbcSMatt Macy 				return (CRYPTO_DATA_LEN_RANGE);
131eda14cbcSMatt Macy 
132da5137abSMartin Matuska 			memcpy(&((uint8_t *)ctx->gcm_remainder)
133da5137abSMartin Matuska 			    [ctx->gcm_remainder_len], datap, need);
134eda14cbcSMatt Macy 
135eda14cbcSMatt Macy 			blockp = (uint8_t *)ctx->gcm_remainder;
136eda14cbcSMatt Macy 		} else {
137eda14cbcSMatt Macy 			blockp = datap;
138eda14cbcSMatt Macy 		}
139eda14cbcSMatt Macy 
140eda14cbcSMatt Macy 		/*
141eda14cbcSMatt Macy 		 * Increment counter. Counter bits are confined
142eda14cbcSMatt Macy 		 * to the bottom 32 bits of the counter block.
143eda14cbcSMatt Macy 		 */
144eda14cbcSMatt Macy 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
145eda14cbcSMatt Macy 		counter = htonll(counter + 1);
146eda14cbcSMatt Macy 		counter &= counter_mask;
147eda14cbcSMatt Macy 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
148eda14cbcSMatt Macy 
149eda14cbcSMatt Macy 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
150eda14cbcSMatt Macy 		    (uint8_t *)ctx->gcm_tmp);
151eda14cbcSMatt Macy 		xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
152eda14cbcSMatt Macy 
153eda14cbcSMatt Macy 		lastp = (uint8_t *)ctx->gcm_tmp;
154eda14cbcSMatt Macy 
155eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += block_size;
156eda14cbcSMatt Macy 
157eda14cbcSMatt Macy 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
158eda14cbcSMatt Macy 		    &out_data_1_len, &out_data_2, block_size);
159eda14cbcSMatt Macy 
160eda14cbcSMatt Macy 		/* copy block to where it belongs */
161eda14cbcSMatt Macy 		if (out_data_1_len == block_size) {
162eda14cbcSMatt Macy 			copy_block(lastp, out_data_1);
163eda14cbcSMatt Macy 		} else {
164da5137abSMartin Matuska 			memcpy(out_data_1, lastp, out_data_1_len);
165eda14cbcSMatt Macy 			if (out_data_2 != NULL) {
166da5137abSMartin Matuska 				memcpy(out_data_2,
167da5137abSMartin Matuska 				    lastp + out_data_1_len,
168eda14cbcSMatt Macy 				    block_size - out_data_1_len);
169eda14cbcSMatt Macy 			}
170eda14cbcSMatt Macy 		}
171eda14cbcSMatt Macy 		/* update offset */
172eda14cbcSMatt Macy 		out->cd_offset += block_size;
173eda14cbcSMatt Macy 
174eda14cbcSMatt Macy 		/* add ciphertext to the hash */
175eda14cbcSMatt Macy 		GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
176eda14cbcSMatt Macy 
177eda14cbcSMatt Macy 		/* Update pointer to next block of data to be processed. */
178eda14cbcSMatt Macy 		if (ctx->gcm_remainder_len != 0) {
179eda14cbcSMatt Macy 			datap += need;
180eda14cbcSMatt Macy 			ctx->gcm_remainder_len = 0;
181eda14cbcSMatt Macy 		} else {
182eda14cbcSMatt Macy 			datap += block_size;
183eda14cbcSMatt Macy 		}
184eda14cbcSMatt Macy 
185eda14cbcSMatt Macy 		remainder = (size_t)&data[length] - (size_t)datap;
186eda14cbcSMatt Macy 
187eda14cbcSMatt Macy 		/* Incomplete last block. */
188eda14cbcSMatt Macy 		if (remainder > 0 && remainder < block_size) {
189da5137abSMartin Matuska 			memcpy(ctx->gcm_remainder, datap, remainder);
190eda14cbcSMatt Macy 			ctx->gcm_remainder_len = remainder;
191eda14cbcSMatt Macy 			ctx->gcm_copy_to = datap;
192eda14cbcSMatt Macy 			goto out;
193eda14cbcSMatt Macy 		}
194eda14cbcSMatt Macy 		ctx->gcm_copy_to = NULL;
195eda14cbcSMatt Macy 
196eda14cbcSMatt Macy 	} while (remainder > 0);
197eda14cbcSMatt Macy out:
198eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
199eda14cbcSMatt Macy }
200eda14cbcSMatt Macy 
201eda14cbcSMatt Macy int
202eda14cbcSMatt Macy gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
203eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
204eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
205eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
206eda14cbcSMatt Macy {
207e92ffd9bSMartin Matuska 	(void) copy_block;
208eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
209eda14cbcSMatt Macy 	if (ctx->gcm_use_avx == B_TRUE)
210eda14cbcSMatt Macy 		return (gcm_encrypt_final_avx(ctx, out, block_size));
211eda14cbcSMatt Macy #endif
212eda14cbcSMatt Macy 
213eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
214eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
215eda14cbcSMatt Macy 	uint8_t *ghash, *macp = NULL;
216eda14cbcSMatt Macy 	int i, rv;
217eda14cbcSMatt Macy 
218eda14cbcSMatt Macy 	if (out->cd_length <
219eda14cbcSMatt Macy 	    (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
220eda14cbcSMatt Macy 		return (CRYPTO_DATA_LEN_RANGE);
221eda14cbcSMatt Macy 	}
222eda14cbcSMatt Macy 
223eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
224eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
225eda14cbcSMatt Macy 
226eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
227eda14cbcSMatt Macy 		uint64_t counter;
228eda14cbcSMatt Macy 		uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
229eda14cbcSMatt Macy 
230eda14cbcSMatt Macy 		/*
231eda14cbcSMatt Macy 		 * Here is where we deal with data that is not a
232eda14cbcSMatt Macy 		 * multiple of the block size.
233eda14cbcSMatt Macy 		 */
234eda14cbcSMatt Macy 
235eda14cbcSMatt Macy 		/*
236eda14cbcSMatt Macy 		 * Increment counter.
237eda14cbcSMatt Macy 		 */
238eda14cbcSMatt Macy 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
239eda14cbcSMatt Macy 		counter = htonll(counter + 1);
240eda14cbcSMatt Macy 		counter &= counter_mask;
241eda14cbcSMatt Macy 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
242eda14cbcSMatt Macy 
243eda14cbcSMatt Macy 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
244eda14cbcSMatt Macy 		    (uint8_t *)ctx->gcm_tmp);
245eda14cbcSMatt Macy 
246eda14cbcSMatt Macy 		macp = (uint8_t *)ctx->gcm_remainder;
247da5137abSMartin Matuska 		memset(macp + ctx->gcm_remainder_len, 0,
248eda14cbcSMatt Macy 		    block_size - ctx->gcm_remainder_len);
249eda14cbcSMatt Macy 
250eda14cbcSMatt Macy 		/* XOR with counter block */
251eda14cbcSMatt Macy 		for (i = 0; i < ctx->gcm_remainder_len; i++) {
252eda14cbcSMatt Macy 			macp[i] ^= tmpp[i];
253eda14cbcSMatt Macy 		}
254eda14cbcSMatt Macy 
255eda14cbcSMatt Macy 		/* add ciphertext to the hash */
256eda14cbcSMatt Macy 		GHASH(ctx, macp, ghash, gops);
257eda14cbcSMatt Macy 
258eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
259eda14cbcSMatt Macy 	}
260eda14cbcSMatt Macy 
261eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] =
262eda14cbcSMatt Macy 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
263eda14cbcSMatt Macy 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
264eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
265eda14cbcSMatt Macy 	    (uint8_t *)ctx->gcm_J0);
266eda14cbcSMatt Macy 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
267eda14cbcSMatt Macy 
268eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
269eda14cbcSMatt Macy 		rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
270eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS)
271eda14cbcSMatt Macy 			return (rv);
272eda14cbcSMatt Macy 	}
273eda14cbcSMatt Macy 	out->cd_offset += ctx->gcm_remainder_len;
274eda14cbcSMatt Macy 	ctx->gcm_remainder_len = 0;
275eda14cbcSMatt Macy 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
276eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS)
277eda14cbcSMatt Macy 		return (rv);
278eda14cbcSMatt Macy 	out->cd_offset += ctx->gcm_tag_len;
279eda14cbcSMatt Macy 
280eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
281eda14cbcSMatt Macy }
282eda14cbcSMatt Macy 
283eda14cbcSMatt Macy /*
284eda14cbcSMatt Macy  * This will only deal with decrypting the last block of the input that
285eda14cbcSMatt Macy  * might not be a multiple of block length.
286eda14cbcSMatt Macy  */
287eda14cbcSMatt Macy static void
288eda14cbcSMatt Macy gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
289eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
290eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
291eda14cbcSMatt Macy {
292eda14cbcSMatt Macy 	uint8_t *datap, *outp, *counterp;
293eda14cbcSMatt Macy 	uint64_t counter;
294eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
295eda14cbcSMatt Macy 	int i;
296eda14cbcSMatt Macy 
297eda14cbcSMatt Macy 	/*
298eda14cbcSMatt Macy 	 * Increment counter.
299eda14cbcSMatt Macy 	 * Counter bits are confined to the bottom 32 bits
300eda14cbcSMatt Macy 	 */
301eda14cbcSMatt Macy 	counter = ntohll(ctx->gcm_cb[1] & counter_mask);
302eda14cbcSMatt Macy 	counter = htonll(counter + 1);
303eda14cbcSMatt Macy 	counter &= counter_mask;
304eda14cbcSMatt Macy 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
305eda14cbcSMatt Macy 
306eda14cbcSMatt Macy 	datap = (uint8_t *)ctx->gcm_remainder;
307eda14cbcSMatt Macy 	outp = &((ctx->gcm_pt_buf)[index]);
308eda14cbcSMatt Macy 	counterp = (uint8_t *)ctx->gcm_tmp;
309eda14cbcSMatt Macy 
310eda14cbcSMatt Macy 	/* authentication tag */
311da5137abSMartin Matuska 	memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
312da5137abSMartin Matuska 	memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
313eda14cbcSMatt Macy 
314eda14cbcSMatt Macy 	/* add ciphertext to the hash */
315eda14cbcSMatt Macy 	GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
316eda14cbcSMatt Macy 
317eda14cbcSMatt Macy 	/* decrypt remaining ciphertext */
318eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
319eda14cbcSMatt Macy 
320eda14cbcSMatt Macy 	/* XOR with counter block */
321eda14cbcSMatt Macy 	for (i = 0; i < ctx->gcm_remainder_len; i++) {
322eda14cbcSMatt Macy 		outp[i] = datap[i] ^ counterp[i];
323eda14cbcSMatt Macy 	}
324eda14cbcSMatt Macy }
325eda14cbcSMatt Macy 
326eda14cbcSMatt Macy int
327eda14cbcSMatt Macy gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
328eda14cbcSMatt Macy     crypto_data_t *out, size_t block_size,
329eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
330eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
331eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
332eda14cbcSMatt Macy {
333e92ffd9bSMartin Matuska 	(void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
334e92ffd9bSMartin Matuska 	    (void) xor_block;
335eda14cbcSMatt Macy 	size_t new_len;
336eda14cbcSMatt Macy 	uint8_t *new;
337eda14cbcSMatt Macy 
338eda14cbcSMatt Macy 	/*
339eda14cbcSMatt Macy 	 * Copy contiguous ciphertext input blocks to plaintext buffer.
340eda14cbcSMatt Macy 	 * Ciphertext will be decrypted in the final.
341eda14cbcSMatt Macy 	 */
342eda14cbcSMatt Macy 	if (length > 0) {
343eda14cbcSMatt Macy 		new_len = ctx->gcm_pt_buf_len + length;
344c03c5b1cSMartin Matuska 		new = vmem_alloc(new_len, KM_SLEEP);
345eda14cbcSMatt Macy 		if (new == NULL) {
346eda14cbcSMatt Macy 			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
347eda14cbcSMatt Macy 			ctx->gcm_pt_buf = NULL;
348eda14cbcSMatt Macy 			return (CRYPTO_HOST_MEMORY);
349eda14cbcSMatt Macy 		}
350c03c5b1cSMartin Matuska 
351c03c5b1cSMartin Matuska 		if (ctx->gcm_pt_buf != NULL) {
352da5137abSMartin Matuska 			memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
353eda14cbcSMatt Macy 			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
354c03c5b1cSMartin Matuska 		} else {
355c03c5b1cSMartin Matuska 			ASSERT0(ctx->gcm_pt_buf_len);
356c03c5b1cSMartin Matuska 		}
357c03c5b1cSMartin Matuska 
358eda14cbcSMatt Macy 		ctx->gcm_pt_buf = new;
359eda14cbcSMatt Macy 		ctx->gcm_pt_buf_len = new_len;
360da5137abSMartin Matuska 		memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
361eda14cbcSMatt Macy 		    length);
362eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += length;
363eda14cbcSMatt Macy 	}
364eda14cbcSMatt Macy 
365eda14cbcSMatt Macy 	ctx->gcm_remainder_len = 0;
366eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
367eda14cbcSMatt Macy }
368eda14cbcSMatt Macy 
369eda14cbcSMatt Macy int
370eda14cbcSMatt Macy gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
371eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
372eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
373eda14cbcSMatt Macy {
374eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
375eda14cbcSMatt Macy 	if (ctx->gcm_use_avx == B_TRUE)
376eda14cbcSMatt Macy 		return (gcm_decrypt_final_avx(ctx, out, block_size));
377eda14cbcSMatt Macy #endif
378eda14cbcSMatt Macy 
379eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
380eda14cbcSMatt Macy 	size_t pt_len;
381eda14cbcSMatt Macy 	size_t remainder;
382eda14cbcSMatt Macy 	uint8_t *ghash;
383eda14cbcSMatt Macy 	uint8_t *blockp;
384eda14cbcSMatt Macy 	uint8_t *cbp;
385eda14cbcSMatt Macy 	uint64_t counter;
386eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
387eda14cbcSMatt Macy 	int processed = 0, rv;
388eda14cbcSMatt Macy 
389eda14cbcSMatt Macy 	ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
390eda14cbcSMatt Macy 
391eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
392eda14cbcSMatt Macy 	pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
393eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
394eda14cbcSMatt Macy 	blockp = ctx->gcm_pt_buf;
395eda14cbcSMatt Macy 	remainder = pt_len;
396eda14cbcSMatt Macy 	while (remainder > 0) {
397eda14cbcSMatt Macy 		/* Incomplete last block */
398eda14cbcSMatt Macy 		if (remainder < block_size) {
399da5137abSMartin Matuska 			memcpy(ctx->gcm_remainder, blockp, remainder);
400eda14cbcSMatt Macy 			ctx->gcm_remainder_len = remainder;
401eda14cbcSMatt Macy 			/*
402eda14cbcSMatt Macy 			 * not expecting anymore ciphertext, just
403eda14cbcSMatt Macy 			 * compute plaintext for the remaining input
404eda14cbcSMatt Macy 			 */
405eda14cbcSMatt Macy 			gcm_decrypt_incomplete_block(ctx, block_size,
406eda14cbcSMatt Macy 			    processed, encrypt_block, xor_block);
407eda14cbcSMatt Macy 			ctx->gcm_remainder_len = 0;
408eda14cbcSMatt Macy 			goto out;
409eda14cbcSMatt Macy 		}
410eda14cbcSMatt Macy 		/* add ciphertext to the hash */
411eda14cbcSMatt Macy 		GHASH(ctx, blockp, ghash, gops);
412eda14cbcSMatt Macy 
413eda14cbcSMatt Macy 		/*
414eda14cbcSMatt Macy 		 * Increment counter.
415eda14cbcSMatt Macy 		 * Counter bits are confined to the bottom 32 bits
416eda14cbcSMatt Macy 		 */
417eda14cbcSMatt Macy 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
418eda14cbcSMatt Macy 		counter = htonll(counter + 1);
419eda14cbcSMatt Macy 		counter &= counter_mask;
420eda14cbcSMatt Macy 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
421eda14cbcSMatt Macy 
422eda14cbcSMatt Macy 		cbp = (uint8_t *)ctx->gcm_tmp;
423eda14cbcSMatt Macy 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
424eda14cbcSMatt Macy 
425eda14cbcSMatt Macy 		/* XOR with ciphertext */
426eda14cbcSMatt Macy 		xor_block(cbp, blockp);
427eda14cbcSMatt Macy 
428eda14cbcSMatt Macy 		processed += block_size;
429eda14cbcSMatt Macy 		blockp += block_size;
430eda14cbcSMatt Macy 		remainder -= block_size;
431eda14cbcSMatt Macy 	}
432eda14cbcSMatt Macy out:
433eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
434eda14cbcSMatt Macy 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
435eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
436eda14cbcSMatt Macy 	    (uint8_t *)ctx->gcm_J0);
437eda14cbcSMatt Macy 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
438eda14cbcSMatt Macy 
439eda14cbcSMatt Macy 	/* compare the input authentication tag with what we calculated */
440da5137abSMartin Matuska 	if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
441eda14cbcSMatt Macy 		/* They don't match */
442eda14cbcSMatt Macy 		return (CRYPTO_INVALID_MAC);
443eda14cbcSMatt Macy 	} else {
444eda14cbcSMatt Macy 		rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
445eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS)
446eda14cbcSMatt Macy 			return (rv);
447eda14cbcSMatt Macy 		out->cd_offset += pt_len;
448eda14cbcSMatt Macy 	}
449eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
450eda14cbcSMatt Macy }
451eda14cbcSMatt Macy 
452eda14cbcSMatt Macy static int
453eda14cbcSMatt Macy gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
454eda14cbcSMatt Macy {
455eda14cbcSMatt Macy 	size_t tag_len;
456eda14cbcSMatt Macy 
457eda14cbcSMatt Macy 	/*
458eda14cbcSMatt Macy 	 * Check the length of the authentication tag (in bits).
459eda14cbcSMatt Macy 	 */
460eda14cbcSMatt Macy 	tag_len = gcm_param->ulTagBits;
461eda14cbcSMatt Macy 	switch (tag_len) {
462eda14cbcSMatt Macy 	case 32:
463eda14cbcSMatt Macy 	case 64:
464eda14cbcSMatt Macy 	case 96:
465eda14cbcSMatt Macy 	case 104:
466eda14cbcSMatt Macy 	case 112:
467eda14cbcSMatt Macy 	case 120:
468eda14cbcSMatt Macy 	case 128:
469eda14cbcSMatt Macy 		break;
470eda14cbcSMatt Macy 	default:
471eda14cbcSMatt Macy 		return (CRYPTO_MECHANISM_PARAM_INVALID);
472eda14cbcSMatt Macy 	}
473eda14cbcSMatt Macy 
474eda14cbcSMatt Macy 	if (gcm_param->ulIvLen == 0)
475eda14cbcSMatt Macy 		return (CRYPTO_MECHANISM_PARAM_INVALID);
476eda14cbcSMatt Macy 
477eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
478eda14cbcSMatt Macy }
479eda14cbcSMatt Macy 
480eda14cbcSMatt Macy static void
481eda14cbcSMatt Macy gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
482eda14cbcSMatt Macy     gcm_ctx_t *ctx, size_t block_size,
483eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
484eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
485eda14cbcSMatt Macy {
486eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
487eda14cbcSMatt Macy 	uint8_t *cb;
488eda14cbcSMatt Macy 	ulong_t remainder = iv_len;
489eda14cbcSMatt Macy 	ulong_t processed = 0;
490eda14cbcSMatt Macy 	uint8_t *datap, *ghash;
491eda14cbcSMatt Macy 	uint64_t len_a_len_c[2];
492eda14cbcSMatt Macy 
493eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
494eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
495eda14cbcSMatt Macy 	cb = (uint8_t *)ctx->gcm_cb;
496eda14cbcSMatt Macy 	if (iv_len == 12) {
497da5137abSMartin Matuska 		memcpy(cb, iv, 12);
498eda14cbcSMatt Macy 		cb[12] = 0;
499eda14cbcSMatt Macy 		cb[13] = 0;
500eda14cbcSMatt Macy 		cb[14] = 0;
501eda14cbcSMatt Macy 		cb[15] = 1;
502eda14cbcSMatt Macy 		/* J0 will be used again in the final */
503eda14cbcSMatt Macy 		copy_block(cb, (uint8_t *)ctx->gcm_J0);
504eda14cbcSMatt Macy 	} else {
505eda14cbcSMatt Macy 		/* GHASH the IV */
506eda14cbcSMatt Macy 		do {
507eda14cbcSMatt Macy 			if (remainder < block_size) {
508da5137abSMartin Matuska 				memset(cb, 0, block_size);
509da5137abSMartin Matuska 				memcpy(cb, &(iv[processed]), remainder);
510eda14cbcSMatt Macy 				datap = (uint8_t *)cb;
511eda14cbcSMatt Macy 				remainder = 0;
512eda14cbcSMatt Macy 			} else {
513eda14cbcSMatt Macy 				datap = (uint8_t *)(&(iv[processed]));
514eda14cbcSMatt Macy 				processed += block_size;
515eda14cbcSMatt Macy 				remainder -= block_size;
516eda14cbcSMatt Macy 			}
517eda14cbcSMatt Macy 			GHASH(ctx, datap, ghash, gops);
518eda14cbcSMatt Macy 		} while (remainder > 0);
519eda14cbcSMatt Macy 
520eda14cbcSMatt Macy 		len_a_len_c[0] = 0;
521eda14cbcSMatt Macy 		len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
522eda14cbcSMatt Macy 		GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
523eda14cbcSMatt Macy 
524eda14cbcSMatt Macy 		/* J0 will be used again in the final */
525eda14cbcSMatt Macy 		copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
526eda14cbcSMatt Macy 	}
527eda14cbcSMatt Macy }
528eda14cbcSMatt Macy 
529eda14cbcSMatt Macy static int
530eda14cbcSMatt Macy gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
531eda14cbcSMatt Macy     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
532eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
533eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
534eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
535eda14cbcSMatt Macy {
536eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
537eda14cbcSMatt Macy 	uint8_t *ghash, *datap, *authp;
538eda14cbcSMatt Macy 	size_t remainder, processed;
539eda14cbcSMatt Macy 
540eda14cbcSMatt Macy 	/* encrypt zero block to get subkey H */
541da5137abSMartin Matuska 	memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
542eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
543eda14cbcSMatt Macy 	    (uint8_t *)ctx->gcm_H);
544eda14cbcSMatt Macy 
545eda14cbcSMatt Macy 	gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
546eda14cbcSMatt Macy 	    copy_block, xor_block);
547eda14cbcSMatt Macy 
548eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
549eda14cbcSMatt Macy 	authp = (uint8_t *)ctx->gcm_tmp;
550eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
551da5137abSMartin Matuska 	memset(authp, 0, block_size);
552da5137abSMartin Matuska 	memset(ghash, 0, block_size);
553eda14cbcSMatt Macy 
554eda14cbcSMatt Macy 	processed = 0;
555eda14cbcSMatt Macy 	remainder = auth_data_len;
556eda14cbcSMatt Macy 	do {
557eda14cbcSMatt Macy 		if (remainder < block_size) {
558eda14cbcSMatt Macy 			/*
559eda14cbcSMatt Macy 			 * There's not a block full of data, pad rest of
560eda14cbcSMatt Macy 			 * buffer with zero
561eda14cbcSMatt Macy 			 */
562c03c5b1cSMartin Matuska 
563c03c5b1cSMartin Matuska 			if (auth_data != NULL) {
564da5137abSMartin Matuska 				memset(authp, 0, block_size);
565da5137abSMartin Matuska 				memcpy(authp, &(auth_data[processed]),
566da5137abSMartin Matuska 				    remainder);
567c03c5b1cSMartin Matuska 			} else {
568c03c5b1cSMartin Matuska 				ASSERT0(remainder);
569c03c5b1cSMartin Matuska 			}
570c03c5b1cSMartin Matuska 
571eda14cbcSMatt Macy 			datap = (uint8_t *)authp;
572eda14cbcSMatt Macy 			remainder = 0;
573eda14cbcSMatt Macy 		} else {
574eda14cbcSMatt Macy 			datap = (uint8_t *)(&(auth_data[processed]));
575eda14cbcSMatt Macy 			processed += block_size;
576eda14cbcSMatt Macy 			remainder -= block_size;
577eda14cbcSMatt Macy 		}
578eda14cbcSMatt Macy 
579eda14cbcSMatt Macy 		/* add auth data to the hash */
580eda14cbcSMatt Macy 		GHASH(ctx, datap, ghash, gops);
581eda14cbcSMatt Macy 
582eda14cbcSMatt Macy 	} while (remainder > 0);
583eda14cbcSMatt Macy 
584eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
585eda14cbcSMatt Macy }
586eda14cbcSMatt Macy 
587eda14cbcSMatt Macy /*
588eda14cbcSMatt Macy  * The following function is called at encrypt or decrypt init time
589eda14cbcSMatt Macy  * for AES GCM mode.
590eda14cbcSMatt Macy  *
591eda14cbcSMatt Macy  * Init the GCM context struct. Handle the cycle and avx implementations here.
592eda14cbcSMatt Macy  */
593eda14cbcSMatt Macy int
594eda14cbcSMatt Macy gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
595eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
596eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
597eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
598eda14cbcSMatt Macy {
599eda14cbcSMatt Macy 	int rv;
600eda14cbcSMatt Macy 	CK_AES_GCM_PARAMS *gcm_param;
601eda14cbcSMatt Macy 
602eda14cbcSMatt Macy 	if (param != NULL) {
603eda14cbcSMatt Macy 		gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
604eda14cbcSMatt Macy 
605eda14cbcSMatt Macy 		if ((rv = gcm_validate_args(gcm_param)) != 0) {
606eda14cbcSMatt Macy 			return (rv);
607eda14cbcSMatt Macy 		}
608eda14cbcSMatt Macy 
609eda14cbcSMatt Macy 		gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
610eda14cbcSMatt Macy 		gcm_ctx->gcm_tag_len >>= 3;
611eda14cbcSMatt Macy 		gcm_ctx->gcm_processed_data_len = 0;
612eda14cbcSMatt Macy 
613eda14cbcSMatt Macy 		/* these values are in bits */
614eda14cbcSMatt Macy 		gcm_ctx->gcm_len_a_len_c[0]
615eda14cbcSMatt Macy 		    = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
616eda14cbcSMatt Macy 
617eda14cbcSMatt Macy 		rv = CRYPTO_SUCCESS;
618eda14cbcSMatt Macy 		gcm_ctx->gcm_flags |= GCM_MODE;
619eda14cbcSMatt Macy 	} else {
620eda14cbcSMatt Macy 		return (CRYPTO_MECHANISM_PARAM_INVALID);
621eda14cbcSMatt Macy 	}
622eda14cbcSMatt Macy 
623eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
624eda14cbcSMatt Macy 	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
625eda14cbcSMatt Macy 		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
626eda14cbcSMatt Macy 	} else {
627eda14cbcSMatt Macy 		/*
628eda14cbcSMatt Macy 		 * Handle the "cycle" implementation by creating avx and
629eda14cbcSMatt Macy 		 * non-avx contexts alternately.
630eda14cbcSMatt Macy 		 */
631eda14cbcSMatt Macy 		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
632eda14cbcSMatt Macy 		/*
633eda14cbcSMatt Macy 		 * We don't handle byte swapped key schedules in the avx
634eda14cbcSMatt Macy 		 * code path.
635eda14cbcSMatt Macy 		 */
636eda14cbcSMatt Macy 		aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
637eda14cbcSMatt Macy 		if (ks->ops->needs_byteswap == B_TRUE) {
638eda14cbcSMatt Macy 			gcm_ctx->gcm_use_avx = B_FALSE;
639eda14cbcSMatt Macy 		}
640eda14cbcSMatt Macy 		/* Use the MOVBE and the BSWAP variants alternately. */
641eda14cbcSMatt Macy 		if (gcm_ctx->gcm_use_avx == B_TRUE &&
642eda14cbcSMatt Macy 		    zfs_movbe_available() == B_TRUE) {
643eda14cbcSMatt Macy 			(void) atomic_toggle_boolean_nv(
644eda14cbcSMatt Macy 			    (volatile boolean_t *)&gcm_avx_can_use_movbe);
645eda14cbcSMatt Macy 		}
646eda14cbcSMatt Macy 	}
6477877fdebSMatt Macy 	/* Allocate Htab memory as needed. */
6487877fdebSMatt Macy 	if (gcm_ctx->gcm_use_avx == B_TRUE) {
6497877fdebSMatt Macy 		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
6507877fdebSMatt Macy 
6517877fdebSMatt Macy 		if (htab_len == 0) {
6527877fdebSMatt Macy 			return (CRYPTO_MECHANISM_PARAM_INVALID);
6537877fdebSMatt Macy 		}
6547877fdebSMatt Macy 		gcm_ctx->gcm_htab_len = htab_len;
6557877fdebSMatt Macy 		gcm_ctx->gcm_Htable =
656c03c5b1cSMartin Matuska 		    (uint64_t *)kmem_alloc(htab_len, KM_SLEEP);
6577877fdebSMatt Macy 
6587877fdebSMatt Macy 		if (gcm_ctx->gcm_Htable == NULL) {
6597877fdebSMatt Macy 			return (CRYPTO_HOST_MEMORY);
6607877fdebSMatt Macy 		}
6617877fdebSMatt Macy 	}
662eda14cbcSMatt Macy 	/* Avx and non avx context initialization differs from here on. */
663eda14cbcSMatt Macy 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
664eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
665eda14cbcSMatt Macy 		if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
666eda14cbcSMatt Macy 		    gcm_param->pAAD, gcm_param->ulAADLen, block_size,
667eda14cbcSMatt Macy 		    encrypt_block, copy_block, xor_block) != 0) {
668eda14cbcSMatt Macy 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
669eda14cbcSMatt Macy 		}
670eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
671eda14cbcSMatt Macy 	} else {
672eda14cbcSMatt Macy 		if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
673eda14cbcSMatt Macy 		    gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
674eda14cbcSMatt Macy 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
675eda14cbcSMatt Macy 		}
676eda14cbcSMatt Macy 	}
677eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
678eda14cbcSMatt Macy 
679eda14cbcSMatt Macy 	return (rv);
680eda14cbcSMatt Macy }
681eda14cbcSMatt Macy 
682eda14cbcSMatt Macy int
683eda14cbcSMatt Macy gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
684eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
685eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
686eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
687eda14cbcSMatt Macy {
688eda14cbcSMatt Macy 	int rv;
689eda14cbcSMatt Macy 	CK_AES_GMAC_PARAMS *gmac_param;
690eda14cbcSMatt Macy 
691eda14cbcSMatt Macy 	if (param != NULL) {
692eda14cbcSMatt Macy 		gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
693eda14cbcSMatt Macy 
694eda14cbcSMatt Macy 		gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
695eda14cbcSMatt Macy 		gcm_ctx->gcm_processed_data_len = 0;
696eda14cbcSMatt Macy 
697eda14cbcSMatt Macy 		/* these values are in bits */
698eda14cbcSMatt Macy 		gcm_ctx->gcm_len_a_len_c[0]
699eda14cbcSMatt Macy 		    = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
700eda14cbcSMatt Macy 
701eda14cbcSMatt Macy 		rv = CRYPTO_SUCCESS;
702eda14cbcSMatt Macy 		gcm_ctx->gcm_flags |= GMAC_MODE;
703eda14cbcSMatt Macy 	} else {
704eda14cbcSMatt Macy 		return (CRYPTO_MECHANISM_PARAM_INVALID);
705eda14cbcSMatt Macy 	}
706eda14cbcSMatt Macy 
707eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
708eda14cbcSMatt Macy 	/*
709eda14cbcSMatt Macy 	 * Handle the "cycle" implementation by creating avx and non avx
710eda14cbcSMatt Macy 	 * contexts alternately.
711eda14cbcSMatt Macy 	 */
712eda14cbcSMatt Macy 	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
713eda14cbcSMatt Macy 		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
714eda14cbcSMatt Macy 	} else {
715eda14cbcSMatt Macy 		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
716eda14cbcSMatt Macy 	}
717eda14cbcSMatt Macy 	/* We don't handle byte swapped key schedules in the avx code path. */
718eda14cbcSMatt Macy 	aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
719eda14cbcSMatt Macy 	if (ks->ops->needs_byteswap == B_TRUE) {
720eda14cbcSMatt Macy 		gcm_ctx->gcm_use_avx = B_FALSE;
721eda14cbcSMatt Macy 	}
7227877fdebSMatt Macy 	/* Allocate Htab memory as needed. */
7237877fdebSMatt Macy 	if (gcm_ctx->gcm_use_avx == B_TRUE) {
7247877fdebSMatt Macy 		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
7257877fdebSMatt Macy 
7267877fdebSMatt Macy 		if (htab_len == 0) {
7277877fdebSMatt Macy 			return (CRYPTO_MECHANISM_PARAM_INVALID);
7287877fdebSMatt Macy 		}
7297877fdebSMatt Macy 		gcm_ctx->gcm_htab_len = htab_len;
7307877fdebSMatt Macy 		gcm_ctx->gcm_Htable =
731c03c5b1cSMartin Matuska 		    (uint64_t *)kmem_alloc(htab_len, KM_SLEEP);
7327877fdebSMatt Macy 
7337877fdebSMatt Macy 		if (gcm_ctx->gcm_Htable == NULL) {
7347877fdebSMatt Macy 			return (CRYPTO_HOST_MEMORY);
7357877fdebSMatt Macy 		}
7367877fdebSMatt Macy 	}
7377877fdebSMatt Macy 
738eda14cbcSMatt Macy 	/* Avx and non avx context initialization differs from here on. */
739eda14cbcSMatt Macy 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
740eda14cbcSMatt Macy #endif	/* ifdef CAN_USE_GCM_ASM */
741eda14cbcSMatt Macy 		if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
742eda14cbcSMatt Macy 		    gmac_param->pAAD, gmac_param->ulAADLen, block_size,
743eda14cbcSMatt Macy 		    encrypt_block, copy_block, xor_block) != 0) {
744eda14cbcSMatt Macy 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
745eda14cbcSMatt Macy 		}
746eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
747eda14cbcSMatt Macy 	} else {
748eda14cbcSMatt Macy 		if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
749eda14cbcSMatt Macy 		    gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
750eda14cbcSMatt Macy 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
751eda14cbcSMatt Macy 		}
752eda14cbcSMatt Macy 	}
753eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
754eda14cbcSMatt Macy 
755eda14cbcSMatt Macy 	return (rv);
756eda14cbcSMatt Macy }
757eda14cbcSMatt Macy 
758eda14cbcSMatt Macy void *
759eda14cbcSMatt Macy gcm_alloc_ctx(int kmflag)
760eda14cbcSMatt Macy {
761eda14cbcSMatt Macy 	gcm_ctx_t *gcm_ctx;
762eda14cbcSMatt Macy 
763eda14cbcSMatt Macy 	if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
764eda14cbcSMatt Macy 		return (NULL);
765eda14cbcSMatt Macy 
766eda14cbcSMatt Macy 	gcm_ctx->gcm_flags = GCM_MODE;
767eda14cbcSMatt Macy 	return (gcm_ctx);
768eda14cbcSMatt Macy }
769eda14cbcSMatt Macy 
770eda14cbcSMatt Macy void *
771eda14cbcSMatt Macy gmac_alloc_ctx(int kmflag)
772eda14cbcSMatt Macy {
773eda14cbcSMatt Macy 	gcm_ctx_t *gcm_ctx;
774eda14cbcSMatt Macy 
775eda14cbcSMatt Macy 	if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
776eda14cbcSMatt Macy 		return (NULL);
777eda14cbcSMatt Macy 
778eda14cbcSMatt Macy 	gcm_ctx->gcm_flags = GMAC_MODE;
779eda14cbcSMatt Macy 	return (gcm_ctx);
780eda14cbcSMatt Macy }
781eda14cbcSMatt Macy 
782eda14cbcSMatt Macy /* GCM implementation that contains the fastest methods */
783eda14cbcSMatt Macy static gcm_impl_ops_t gcm_fastest_impl = {
784eda14cbcSMatt Macy 	.name = "fastest"
785eda14cbcSMatt Macy };
786eda14cbcSMatt Macy 
787eda14cbcSMatt Macy /* All compiled in implementations */
788e92ffd9bSMartin Matuska static const gcm_impl_ops_t *gcm_all_impl[] = {
789eda14cbcSMatt Macy 	&gcm_generic_impl,
790eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
791eda14cbcSMatt Macy 	&gcm_pclmulqdq_impl,
792eda14cbcSMatt Macy #endif
793eda14cbcSMatt Macy };
794eda14cbcSMatt Macy 
795eda14cbcSMatt Macy /* Indicate that benchmark has been completed */
796eda14cbcSMatt Macy static boolean_t gcm_impl_initialized = B_FALSE;
797eda14cbcSMatt Macy 
798eda14cbcSMatt Macy /* Hold all supported implementations */
799eda14cbcSMatt Macy static size_t gcm_supp_impl_cnt = 0;
800eda14cbcSMatt Macy static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
801eda14cbcSMatt Macy 
802eda14cbcSMatt Macy /*
803eda14cbcSMatt Macy  * Returns the GCM operations for encrypt/decrypt/key setup.  When a
804eda14cbcSMatt Macy  * SIMD implementation is not allowed in the current context, then
805eda14cbcSMatt Macy  * fallback to the fastest generic implementation.
806eda14cbcSMatt Macy  */
807eda14cbcSMatt Macy const gcm_impl_ops_t *
808716fd348SMartin Matuska gcm_impl_get_ops(void)
809eda14cbcSMatt Macy {
810eda14cbcSMatt Macy 	if (!kfpu_allowed())
811eda14cbcSMatt Macy 		return (&gcm_generic_impl);
812eda14cbcSMatt Macy 
813eda14cbcSMatt Macy 	const gcm_impl_ops_t *ops = NULL;
814eda14cbcSMatt Macy 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
815eda14cbcSMatt Macy 
816eda14cbcSMatt Macy 	switch (impl) {
817eda14cbcSMatt Macy 	case IMPL_FASTEST:
818eda14cbcSMatt Macy 		ASSERT(gcm_impl_initialized);
819eda14cbcSMatt Macy 		ops = &gcm_fastest_impl;
820eda14cbcSMatt Macy 		break;
821eda14cbcSMatt Macy 	case IMPL_CYCLE:
822eda14cbcSMatt Macy 		/* Cycle through supported implementations */
823eda14cbcSMatt Macy 		ASSERT(gcm_impl_initialized);
824eda14cbcSMatt Macy 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
825eda14cbcSMatt Macy 		static size_t cycle_impl_idx = 0;
826eda14cbcSMatt Macy 		size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
827eda14cbcSMatt Macy 		ops = gcm_supp_impl[idx];
828eda14cbcSMatt Macy 		break;
829eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
830eda14cbcSMatt Macy 	case IMPL_AVX:
831eda14cbcSMatt Macy 		/*
832eda14cbcSMatt Macy 		 * Make sure that we return a valid implementation while
833eda14cbcSMatt Macy 		 * switching to the avx implementation since there still
834eda14cbcSMatt Macy 		 * may be unfinished non-avx contexts around.
835eda14cbcSMatt Macy 		 */
836eda14cbcSMatt Macy 		ops = &gcm_generic_impl;
837eda14cbcSMatt Macy 		break;
838eda14cbcSMatt Macy #endif
839eda14cbcSMatt Macy 	default:
840eda14cbcSMatt Macy 		ASSERT3U(impl, <, gcm_supp_impl_cnt);
841eda14cbcSMatt Macy 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
842eda14cbcSMatt Macy 		if (impl < ARRAY_SIZE(gcm_all_impl))
843eda14cbcSMatt Macy 			ops = gcm_supp_impl[impl];
844eda14cbcSMatt Macy 		break;
845eda14cbcSMatt Macy 	}
846eda14cbcSMatt Macy 
847eda14cbcSMatt Macy 	ASSERT3P(ops, !=, NULL);
848eda14cbcSMatt Macy 
849eda14cbcSMatt Macy 	return (ops);
850eda14cbcSMatt Macy }
851eda14cbcSMatt Macy 
852eda14cbcSMatt Macy /*
853eda14cbcSMatt Macy  * Initialize all supported implementations.
854eda14cbcSMatt Macy  */
855eda14cbcSMatt Macy void
856eda14cbcSMatt Macy gcm_impl_init(void)
857eda14cbcSMatt Macy {
858eda14cbcSMatt Macy 	gcm_impl_ops_t *curr_impl;
859eda14cbcSMatt Macy 	int i, c;
860eda14cbcSMatt Macy 
861eda14cbcSMatt Macy 	/* Move supported implementations into gcm_supp_impls */
862eda14cbcSMatt Macy 	for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
863eda14cbcSMatt Macy 		curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
864eda14cbcSMatt Macy 
865eda14cbcSMatt Macy 		if (curr_impl->is_supported())
866eda14cbcSMatt Macy 			gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
867eda14cbcSMatt Macy 	}
868eda14cbcSMatt Macy 	gcm_supp_impl_cnt = c;
869eda14cbcSMatt Macy 
870eda14cbcSMatt Macy 	/*
871eda14cbcSMatt Macy 	 * Set the fastest implementation given the assumption that the
872eda14cbcSMatt Macy 	 * hardware accelerated version is the fastest.
873eda14cbcSMatt Macy 	 */
874eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
875eda14cbcSMatt Macy 	if (gcm_pclmulqdq_impl.is_supported()) {
876eda14cbcSMatt Macy 		memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
877eda14cbcSMatt Macy 		    sizeof (gcm_fastest_impl));
878eda14cbcSMatt Macy 	} else
879eda14cbcSMatt Macy #endif
880eda14cbcSMatt Macy 	{
881eda14cbcSMatt Macy 		memcpy(&gcm_fastest_impl, &gcm_generic_impl,
882eda14cbcSMatt Macy 		    sizeof (gcm_fastest_impl));
883eda14cbcSMatt Macy 	}
884eda14cbcSMatt Macy 
885eda14cbcSMatt Macy 	strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
886eda14cbcSMatt Macy 
887eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
888eda14cbcSMatt Macy 	/*
889eda14cbcSMatt Macy 	 * Use the avx implementation if it's available and the implementation
890eda14cbcSMatt Macy 	 * hasn't changed from its default value of fastest on module load.
891eda14cbcSMatt Macy 	 */
892eda14cbcSMatt Macy 	if (gcm_avx_will_work()) {
893eda14cbcSMatt Macy #ifdef HAVE_MOVBE
894eda14cbcSMatt Macy 		if (zfs_movbe_available() == B_TRUE) {
895eda14cbcSMatt Macy 			atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
896eda14cbcSMatt Macy 		}
897eda14cbcSMatt Macy #endif
898eda14cbcSMatt Macy 		if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
899eda14cbcSMatt Macy 			gcm_set_avx(B_TRUE);
900eda14cbcSMatt Macy 		}
901eda14cbcSMatt Macy 	}
902eda14cbcSMatt Macy #endif
903eda14cbcSMatt Macy 	/* Finish initialization */
904eda14cbcSMatt Macy 	atomic_swap_32(&icp_gcm_impl, user_sel_impl);
905eda14cbcSMatt Macy 	gcm_impl_initialized = B_TRUE;
906eda14cbcSMatt Macy }
907eda14cbcSMatt Macy 
908eda14cbcSMatt Macy static const struct {
909a0b956f5SMartin Matuska 	const char *name;
910eda14cbcSMatt Macy 	uint32_t sel;
911eda14cbcSMatt Macy } gcm_impl_opts[] = {
912eda14cbcSMatt Macy 		{ "cycle",	IMPL_CYCLE },
913eda14cbcSMatt Macy 		{ "fastest",	IMPL_FASTEST },
914eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
915eda14cbcSMatt Macy 		{ "avx",	IMPL_AVX },
916eda14cbcSMatt Macy #endif
917eda14cbcSMatt Macy };
918eda14cbcSMatt Macy 
919eda14cbcSMatt Macy /*
920eda14cbcSMatt Macy  * Function sets desired gcm implementation.
921eda14cbcSMatt Macy  *
922eda14cbcSMatt Macy  * If we are called before init(), user preference will be saved in
923eda14cbcSMatt Macy  * user_sel_impl, and applied in later init() call. This occurs when module
924eda14cbcSMatt Macy  * parameter is specified on module load. Otherwise, directly update
925eda14cbcSMatt Macy  * icp_gcm_impl.
926eda14cbcSMatt Macy  *
927eda14cbcSMatt Macy  * @val		Name of gcm implementation to use
928eda14cbcSMatt Macy  * @param	Unused.
929eda14cbcSMatt Macy  */
930eda14cbcSMatt Macy int
931eda14cbcSMatt Macy gcm_impl_set(const char *val)
932eda14cbcSMatt Macy {
933eda14cbcSMatt Macy 	int err = -EINVAL;
934eda14cbcSMatt Macy 	char req_name[GCM_IMPL_NAME_MAX];
935eda14cbcSMatt Macy 	uint32_t impl = GCM_IMPL_READ(user_sel_impl);
936eda14cbcSMatt Macy 	size_t i;
937eda14cbcSMatt Macy 
938eda14cbcSMatt Macy 	/* sanitize input */
939eda14cbcSMatt Macy 	i = strnlen(val, GCM_IMPL_NAME_MAX);
940eda14cbcSMatt Macy 	if (i == 0 || i >= GCM_IMPL_NAME_MAX)
941eda14cbcSMatt Macy 		return (err);
942eda14cbcSMatt Macy 
943eda14cbcSMatt Macy 	strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
944eda14cbcSMatt Macy 	while (i > 0 && isspace(req_name[i-1]))
945eda14cbcSMatt Macy 		i--;
946eda14cbcSMatt Macy 	req_name[i] = '\0';
947eda14cbcSMatt Macy 
948eda14cbcSMatt Macy 	/* Check mandatory options */
949eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
950eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
951eda14cbcSMatt Macy 		/* Ignore avx implementation if it won't work. */
952eda14cbcSMatt Macy 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
953eda14cbcSMatt Macy 			continue;
954eda14cbcSMatt Macy 		}
955eda14cbcSMatt Macy #endif
956eda14cbcSMatt Macy 		if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
957eda14cbcSMatt Macy 			impl = gcm_impl_opts[i].sel;
958eda14cbcSMatt Macy 			err = 0;
959eda14cbcSMatt Macy 			break;
960eda14cbcSMatt Macy 		}
961eda14cbcSMatt Macy 	}
962eda14cbcSMatt Macy 
963eda14cbcSMatt Macy 	/* check all supported impl if init() was already called */
964eda14cbcSMatt Macy 	if (err != 0 && gcm_impl_initialized) {
965eda14cbcSMatt Macy 		/* check all supported implementations */
966eda14cbcSMatt Macy 		for (i = 0; i < gcm_supp_impl_cnt; i++) {
967eda14cbcSMatt Macy 			if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
968eda14cbcSMatt Macy 				impl = i;
969eda14cbcSMatt Macy 				err = 0;
970eda14cbcSMatt Macy 				break;
971eda14cbcSMatt Macy 			}
972eda14cbcSMatt Macy 		}
973eda14cbcSMatt Macy 	}
974eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
975eda14cbcSMatt Macy 	/*
976eda14cbcSMatt Macy 	 * Use the avx implementation if available and the requested one is
977eda14cbcSMatt Macy 	 * avx or fastest.
978eda14cbcSMatt Macy 	 */
979eda14cbcSMatt Macy 	if (gcm_avx_will_work() == B_TRUE &&
980eda14cbcSMatt Macy 	    (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
981eda14cbcSMatt Macy 		gcm_set_avx(B_TRUE);
982eda14cbcSMatt Macy 	} else {
983eda14cbcSMatt Macy 		gcm_set_avx(B_FALSE);
984eda14cbcSMatt Macy 	}
985eda14cbcSMatt Macy #endif
986eda14cbcSMatt Macy 
987eda14cbcSMatt Macy 	if (err == 0) {
988eda14cbcSMatt Macy 		if (gcm_impl_initialized)
989eda14cbcSMatt Macy 			atomic_swap_32(&icp_gcm_impl, impl);
990eda14cbcSMatt Macy 		else
991eda14cbcSMatt Macy 			atomic_swap_32(&user_sel_impl, impl);
992eda14cbcSMatt Macy 	}
993eda14cbcSMatt Macy 
994eda14cbcSMatt Macy 	return (err);
995eda14cbcSMatt Macy }
996eda14cbcSMatt Macy 
997eda14cbcSMatt Macy #if defined(_KERNEL) && defined(__linux__)
998eda14cbcSMatt Macy 
999eda14cbcSMatt Macy static int
1000eda14cbcSMatt Macy icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
1001eda14cbcSMatt Macy {
1002eda14cbcSMatt Macy 	return (gcm_impl_set(val));
1003eda14cbcSMatt Macy }
1004eda14cbcSMatt Macy 
1005eda14cbcSMatt Macy static int
1006eda14cbcSMatt Macy icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
1007eda14cbcSMatt Macy {
1008eda14cbcSMatt Macy 	int i, cnt = 0;
1009eda14cbcSMatt Macy 	char *fmt;
1010eda14cbcSMatt Macy 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
1011eda14cbcSMatt Macy 
1012eda14cbcSMatt Macy 	ASSERT(gcm_impl_initialized);
1013eda14cbcSMatt Macy 
1014eda14cbcSMatt Macy 	/* list mandatory options */
1015eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
1016eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
1017eda14cbcSMatt Macy 		/* Ignore avx implementation if it won't work. */
1018eda14cbcSMatt Macy 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
1019eda14cbcSMatt Macy 			continue;
1020eda14cbcSMatt Macy 		}
1021eda14cbcSMatt Macy #endif
1022eda14cbcSMatt Macy 		fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
1023*bb2d13b6SMartin Matuska 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
1024*bb2d13b6SMartin Matuska 		    gcm_impl_opts[i].name);
1025eda14cbcSMatt Macy 	}
1026eda14cbcSMatt Macy 
1027eda14cbcSMatt Macy 	/* list all supported implementations */
1028eda14cbcSMatt Macy 	for (i = 0; i < gcm_supp_impl_cnt; i++) {
1029eda14cbcSMatt Macy 		fmt = (i == impl) ? "[%s] " : "%s ";
1030*bb2d13b6SMartin Matuska 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
1031*bb2d13b6SMartin Matuska 		    gcm_supp_impl[i]->name);
1032eda14cbcSMatt Macy 	}
1033eda14cbcSMatt Macy 
1034eda14cbcSMatt Macy 	return (cnt);
1035eda14cbcSMatt Macy }
1036eda14cbcSMatt Macy 
1037eda14cbcSMatt Macy module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
1038eda14cbcSMatt Macy     NULL, 0644);
1039eda14cbcSMatt Macy MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
1040eda14cbcSMatt Macy #endif /* defined(__KERNEL) */
1041eda14cbcSMatt Macy 
1042eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
1043eda14cbcSMatt Macy #define	GCM_BLOCK_LEN 16
1044eda14cbcSMatt Macy /*
1045eda14cbcSMatt Macy  * The openssl asm routines are 6x aggregated and need that many bytes
1046eda14cbcSMatt Macy  * at minimum.
1047eda14cbcSMatt Macy  */
1048eda14cbcSMatt Macy #define	GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
1049eda14cbcSMatt Macy #define	GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
1050eda14cbcSMatt Macy /*
1051eda14cbcSMatt Macy  * Ensure the chunk size is reasonable since we are allocating a
1052eda14cbcSMatt Macy  * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
1053eda14cbcSMatt Macy  */
1054eda14cbcSMatt Macy #define	GCM_AVX_MAX_CHUNK_SIZE \
1055eda14cbcSMatt Macy 	(((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
1056eda14cbcSMatt Macy 
1057eda14cbcSMatt Macy /* Clear the FPU registers since they hold sensitive internal state. */
1058eda14cbcSMatt Macy #define	clear_fpu_regs() clear_fpu_regs_avx()
1059eda14cbcSMatt Macy #define	GHASH_AVX(ctx, in, len) \
10607877fdebSMatt Macy     gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
1061eda14cbcSMatt Macy     in, len)
1062eda14cbcSMatt Macy 
1063eda14cbcSMatt Macy #define	gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
1064eda14cbcSMatt Macy 
1065e92ffd9bSMartin Matuska /* Get the chunk size module parameter. */
1066e92ffd9bSMartin Matuska #define	GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
1067e92ffd9bSMartin Matuska 
1068eda14cbcSMatt Macy /*
1069eda14cbcSMatt Macy  * Module parameter: number of bytes to process at once while owning the FPU.
1070eda14cbcSMatt Macy  * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
1071eda14cbcSMatt Macy  * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
1072eda14cbcSMatt Macy  */
1073eda14cbcSMatt Macy static uint32_t gcm_avx_chunk_size =
1074eda14cbcSMatt Macy 	((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1075eda14cbcSMatt Macy 
1076eda14cbcSMatt Macy extern void clear_fpu_regs_avx(void);
1077eda14cbcSMatt Macy extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
1078eda14cbcSMatt Macy extern void aes_encrypt_intel(const uint32_t rk[], int nr,
1079eda14cbcSMatt Macy     const uint32_t pt[4], uint32_t ct[4]);
1080eda14cbcSMatt Macy 
10817877fdebSMatt Macy extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
10827877fdebSMatt Macy extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
1083eda14cbcSMatt Macy     const uint8_t *in, size_t len);
1084eda14cbcSMatt Macy 
1085eda14cbcSMatt Macy extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
1086eda14cbcSMatt Macy     const void *, uint64_t *, uint64_t *);
1087eda14cbcSMatt Macy 
1088eda14cbcSMatt Macy extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
1089eda14cbcSMatt Macy     const void *, uint64_t *, uint64_t *);
1090eda14cbcSMatt Macy 
1091eda14cbcSMatt Macy static inline boolean_t
1092eda14cbcSMatt Macy gcm_avx_will_work(void)
1093eda14cbcSMatt Macy {
1094eda14cbcSMatt Macy 	/* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
1095eda14cbcSMatt Macy 	return (kfpu_allowed() &&
1096eda14cbcSMatt Macy 	    zfs_avx_available() && zfs_aes_available() &&
1097eda14cbcSMatt Macy 	    zfs_pclmulqdq_available());
1098eda14cbcSMatt Macy }
1099eda14cbcSMatt Macy 
1100eda14cbcSMatt Macy static inline void
1101eda14cbcSMatt Macy gcm_set_avx(boolean_t val)
1102eda14cbcSMatt Macy {
1103eda14cbcSMatt Macy 	if (gcm_avx_will_work() == B_TRUE) {
1104eda14cbcSMatt Macy 		atomic_swap_32(&gcm_use_avx, val);
1105eda14cbcSMatt Macy 	}
1106eda14cbcSMatt Macy }
1107eda14cbcSMatt Macy 
1108eda14cbcSMatt Macy static inline boolean_t
1109eda14cbcSMatt Macy gcm_toggle_avx(void)
1110eda14cbcSMatt Macy {
1111eda14cbcSMatt Macy 	if (gcm_avx_will_work() == B_TRUE) {
1112eda14cbcSMatt Macy 		return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
1113eda14cbcSMatt Macy 	} else {
1114eda14cbcSMatt Macy 		return (B_FALSE);
1115eda14cbcSMatt Macy 	}
1116eda14cbcSMatt Macy }
1117eda14cbcSMatt Macy 
11187877fdebSMatt Macy static inline size_t
11197877fdebSMatt Macy gcm_simd_get_htab_size(boolean_t simd_mode)
11207877fdebSMatt Macy {
11217877fdebSMatt Macy 	switch (simd_mode) {
11227877fdebSMatt Macy 	case B_TRUE:
11237877fdebSMatt Macy 		return (2 * 6 * 2 * sizeof (uint64_t));
11247877fdebSMatt Macy 
11257877fdebSMatt Macy 	default:
11267877fdebSMatt Macy 		return (0);
11277877fdebSMatt Macy 	}
11287877fdebSMatt Macy }
11297877fdebSMatt Macy 
1130eda14cbcSMatt Macy /*
1131eda14cbcSMatt Macy  * Clear sensitive data in the context.
1132eda14cbcSMatt Macy  *
1133eda14cbcSMatt Macy  * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
1134eda14cbcSMatt Macy  * ctx->gcm_Htable contain the hash sub key which protects authentication.
1135eda14cbcSMatt Macy  *
1136eda14cbcSMatt Macy  * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
1137eda14cbcSMatt Macy  * a known plaintext attack, they consists of the IV and the first and last
1138eda14cbcSMatt Macy  * counter respectively. If they should be cleared is debatable.
1139eda14cbcSMatt Macy  */
1140eda14cbcSMatt Macy static inline void
1141eda14cbcSMatt Macy gcm_clear_ctx(gcm_ctx_t *ctx)
1142eda14cbcSMatt Macy {
1143da5137abSMartin Matuska 	memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder));
1144da5137abSMartin Matuska 	memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
1145da5137abSMartin Matuska 	memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0));
1146da5137abSMartin Matuska 	memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp));
1147eda14cbcSMatt Macy }
1148eda14cbcSMatt Macy 
1149eda14cbcSMatt Macy /* Increment the GCM counter block by n. */
1150eda14cbcSMatt Macy static inline void
1151eda14cbcSMatt Macy gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
1152eda14cbcSMatt Macy {
1153eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
1154eda14cbcSMatt Macy 	uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
1155eda14cbcSMatt Macy 
1156eda14cbcSMatt Macy 	counter = htonll(counter + n);
1157eda14cbcSMatt Macy 	counter &= counter_mask;
1158eda14cbcSMatt Macy 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
1159eda14cbcSMatt Macy }
1160eda14cbcSMatt Macy 
1161eda14cbcSMatt Macy /*
1162eda14cbcSMatt Macy  * Encrypt multiple blocks of data in GCM mode.
1163eda14cbcSMatt Macy  * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
1164eda14cbcSMatt Macy  * if possible. While processing a chunk the FPU is "locked".
1165eda14cbcSMatt Macy  */
1166eda14cbcSMatt Macy static int
1167eda14cbcSMatt Macy gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
1168eda14cbcSMatt Macy     size_t length, crypto_data_t *out, size_t block_size)
1169eda14cbcSMatt Macy {
1170eda14cbcSMatt Macy 	size_t bleft = length;
1171eda14cbcSMatt Macy 	size_t need = 0;
1172eda14cbcSMatt Macy 	size_t done = 0;
1173eda14cbcSMatt Macy 	uint8_t *datap = (uint8_t *)data;
1174eda14cbcSMatt Macy 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1175eda14cbcSMatt Macy 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1176eda14cbcSMatt Macy 	uint64_t *ghash = ctx->gcm_ghash;
1177eda14cbcSMatt Macy 	uint64_t *cb = ctx->gcm_cb;
1178eda14cbcSMatt Macy 	uint8_t *ct_buf = NULL;
1179eda14cbcSMatt Macy 	uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1180eda14cbcSMatt Macy 	int rv = CRYPTO_SUCCESS;
1181eda14cbcSMatt Macy 
1182eda14cbcSMatt Macy 	ASSERT(block_size == GCM_BLOCK_LEN);
1183eda14cbcSMatt Macy 	/*
1184eda14cbcSMatt Macy 	 * If the last call left an incomplete block, try to fill
1185eda14cbcSMatt Macy 	 * it first.
1186eda14cbcSMatt Macy 	 */
1187eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
1188eda14cbcSMatt Macy 		need = block_size - ctx->gcm_remainder_len;
1189eda14cbcSMatt Macy 		if (length < need) {
1190eda14cbcSMatt Macy 			/* Accumulate bytes here and return. */
1191da5137abSMartin Matuska 			memcpy((uint8_t *)ctx->gcm_remainder +
1192da5137abSMartin Matuska 			    ctx->gcm_remainder_len, datap, length);
1193eda14cbcSMatt Macy 
1194eda14cbcSMatt Macy 			ctx->gcm_remainder_len += length;
1195eda14cbcSMatt Macy 			if (ctx->gcm_copy_to == NULL) {
1196eda14cbcSMatt Macy 				ctx->gcm_copy_to = datap;
1197eda14cbcSMatt Macy 			}
1198eda14cbcSMatt Macy 			return (CRYPTO_SUCCESS);
1199eda14cbcSMatt Macy 		} else {
1200eda14cbcSMatt Macy 			/* Complete incomplete block. */
1201da5137abSMartin Matuska 			memcpy((uint8_t *)ctx->gcm_remainder +
1202da5137abSMartin Matuska 			    ctx->gcm_remainder_len, datap, need);
1203eda14cbcSMatt Macy 
1204eda14cbcSMatt Macy 			ctx->gcm_copy_to = NULL;
1205eda14cbcSMatt Macy 		}
1206eda14cbcSMatt Macy 	}
1207eda14cbcSMatt Macy 
1208eda14cbcSMatt Macy 	/* Allocate a buffer to encrypt to if there is enough input. */
1209eda14cbcSMatt Macy 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1210c03c5b1cSMartin Matuska 		ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
1211eda14cbcSMatt Macy 		if (ct_buf == NULL) {
1212eda14cbcSMatt Macy 			return (CRYPTO_HOST_MEMORY);
1213eda14cbcSMatt Macy 		}
1214eda14cbcSMatt Macy 	}
1215eda14cbcSMatt Macy 
1216eda14cbcSMatt Macy 	/* If we completed an incomplete block, encrypt and write it out. */
1217eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
1218eda14cbcSMatt Macy 		kfpu_begin();
1219eda14cbcSMatt Macy 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1220eda14cbcSMatt Macy 		    (const uint32_t *)cb, (uint32_t *)tmp);
1221eda14cbcSMatt Macy 
1222eda14cbcSMatt Macy 		gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
1223eda14cbcSMatt Macy 		GHASH_AVX(ctx, tmp, block_size);
1224eda14cbcSMatt Macy 		clear_fpu_regs();
1225eda14cbcSMatt Macy 		kfpu_end();
1226eda14cbcSMatt Macy 		rv = crypto_put_output_data(tmp, out, block_size);
1227eda14cbcSMatt Macy 		out->cd_offset += block_size;
1228eda14cbcSMatt Macy 		gcm_incr_counter_block(ctx);
1229eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += block_size;
1230eda14cbcSMatt Macy 		bleft -= need;
1231eda14cbcSMatt Macy 		datap += need;
1232eda14cbcSMatt Macy 		ctx->gcm_remainder_len = 0;
1233eda14cbcSMatt Macy 	}
1234eda14cbcSMatt Macy 
1235eda14cbcSMatt Macy 	/* Do the bulk encryption in chunk_size blocks. */
1236eda14cbcSMatt Macy 	for (; bleft >= chunk_size; bleft -= chunk_size) {
1237eda14cbcSMatt Macy 		kfpu_begin();
1238eda14cbcSMatt Macy 		done = aesni_gcm_encrypt(
1239eda14cbcSMatt Macy 		    datap, ct_buf, chunk_size, key, cb, ghash);
1240eda14cbcSMatt Macy 
1241eda14cbcSMatt Macy 		clear_fpu_regs();
1242eda14cbcSMatt Macy 		kfpu_end();
1243eda14cbcSMatt Macy 		if (done != chunk_size) {
1244eda14cbcSMatt Macy 			rv = CRYPTO_FAILED;
1245eda14cbcSMatt Macy 			goto out_nofpu;
1246eda14cbcSMatt Macy 		}
1247eda14cbcSMatt Macy 		rv = crypto_put_output_data(ct_buf, out, chunk_size);
1248eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS) {
1249eda14cbcSMatt Macy 			goto out_nofpu;
1250eda14cbcSMatt Macy 		}
1251eda14cbcSMatt Macy 		out->cd_offset += chunk_size;
1252eda14cbcSMatt Macy 		datap += chunk_size;
1253eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += chunk_size;
1254eda14cbcSMatt Macy 	}
1255eda14cbcSMatt Macy 	/* Check if we are already done. */
1256eda14cbcSMatt Macy 	if (bleft == 0) {
1257eda14cbcSMatt Macy 		goto out_nofpu;
1258eda14cbcSMatt Macy 	}
1259eda14cbcSMatt Macy 	/* Bulk encrypt the remaining data. */
1260eda14cbcSMatt Macy 	kfpu_begin();
1261eda14cbcSMatt Macy 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1262eda14cbcSMatt Macy 		done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
1263eda14cbcSMatt Macy 		if (done == 0) {
1264eda14cbcSMatt Macy 			rv = CRYPTO_FAILED;
1265eda14cbcSMatt Macy 			goto out;
1266eda14cbcSMatt Macy 		}
1267eda14cbcSMatt Macy 		rv = crypto_put_output_data(ct_buf, out, done);
1268eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS) {
1269eda14cbcSMatt Macy 			goto out;
1270eda14cbcSMatt Macy 		}
1271eda14cbcSMatt Macy 		out->cd_offset += done;
1272eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += done;
1273eda14cbcSMatt Macy 		datap += done;
1274eda14cbcSMatt Macy 		bleft -= done;
1275eda14cbcSMatt Macy 
1276eda14cbcSMatt Macy 	}
1277eda14cbcSMatt Macy 	/* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
1278eda14cbcSMatt Macy 	while (bleft > 0) {
1279eda14cbcSMatt Macy 		if (bleft < block_size) {
1280da5137abSMartin Matuska 			memcpy(ctx->gcm_remainder, datap, bleft);
1281eda14cbcSMatt Macy 			ctx->gcm_remainder_len = bleft;
1282eda14cbcSMatt Macy 			ctx->gcm_copy_to = datap;
1283eda14cbcSMatt Macy 			goto out;
1284eda14cbcSMatt Macy 		}
1285eda14cbcSMatt Macy 		/* Encrypt, hash and write out. */
1286eda14cbcSMatt Macy 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1287eda14cbcSMatt Macy 		    (const uint32_t *)cb, (uint32_t *)tmp);
1288eda14cbcSMatt Macy 
1289eda14cbcSMatt Macy 		gcm_xor_avx(datap, tmp);
1290eda14cbcSMatt Macy 		GHASH_AVX(ctx, tmp, block_size);
1291eda14cbcSMatt Macy 		rv = crypto_put_output_data(tmp, out, block_size);
1292eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS) {
1293eda14cbcSMatt Macy 			goto out;
1294eda14cbcSMatt Macy 		}
1295eda14cbcSMatt Macy 		out->cd_offset += block_size;
1296eda14cbcSMatt Macy 		gcm_incr_counter_block(ctx);
1297eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += block_size;
1298eda14cbcSMatt Macy 		datap += block_size;
1299eda14cbcSMatt Macy 		bleft -= block_size;
1300eda14cbcSMatt Macy 	}
1301eda14cbcSMatt Macy out:
1302eda14cbcSMatt Macy 	clear_fpu_regs();
1303eda14cbcSMatt Macy 	kfpu_end();
1304eda14cbcSMatt Macy out_nofpu:
1305eda14cbcSMatt Macy 	if (ct_buf != NULL) {
1306eda14cbcSMatt Macy 		vmem_free(ct_buf, chunk_size);
1307eda14cbcSMatt Macy 	}
1308eda14cbcSMatt Macy 	return (rv);
1309eda14cbcSMatt Macy }
1310eda14cbcSMatt Macy 
1311eda14cbcSMatt Macy /*
1312eda14cbcSMatt Macy  * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
1313eda14cbcSMatt Macy  * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
1314eda14cbcSMatt Macy  */
1315eda14cbcSMatt Macy static int
1316eda14cbcSMatt Macy gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1317eda14cbcSMatt Macy {
1318eda14cbcSMatt Macy 	uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
1319eda14cbcSMatt Macy 	uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
1320eda14cbcSMatt Macy 	uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
1321eda14cbcSMatt Macy 	size_t rem_len = ctx->gcm_remainder_len;
1322eda14cbcSMatt Macy 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1323eda14cbcSMatt Macy 	int aes_rounds = ((aes_key_t *)keysched)->nr;
1324eda14cbcSMatt Macy 	int rv;
1325eda14cbcSMatt Macy 
1326eda14cbcSMatt Macy 	ASSERT(block_size == GCM_BLOCK_LEN);
1327eda14cbcSMatt Macy 
1328eda14cbcSMatt Macy 	if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
1329eda14cbcSMatt Macy 		return (CRYPTO_DATA_LEN_RANGE);
1330eda14cbcSMatt Macy 	}
1331eda14cbcSMatt Macy 
1332eda14cbcSMatt Macy 	kfpu_begin();
1333eda14cbcSMatt Macy 	/* Pad last incomplete block with zeros, encrypt and hash. */
1334eda14cbcSMatt Macy 	if (rem_len > 0) {
1335eda14cbcSMatt Macy 		uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1336eda14cbcSMatt Macy 		const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1337eda14cbcSMatt Macy 
1338eda14cbcSMatt Macy 		aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
1339da5137abSMartin Matuska 		memset(remainder + rem_len, 0, block_size - rem_len);
1340eda14cbcSMatt Macy 		for (int i = 0; i < rem_len; i++) {
1341eda14cbcSMatt Macy 			remainder[i] ^= tmp[i];
1342eda14cbcSMatt Macy 		}
1343eda14cbcSMatt Macy 		GHASH_AVX(ctx, remainder, block_size);
1344eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += rem_len;
1345eda14cbcSMatt Macy 		/* No need to increment counter_block, it's the last block. */
1346eda14cbcSMatt Macy 	}
1347eda14cbcSMatt Macy 	/* Finish tag. */
1348eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] =
1349eda14cbcSMatt Macy 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
1350eda14cbcSMatt Macy 	GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
1351eda14cbcSMatt Macy 	aes_encrypt_intel(keysched, aes_rounds, J0, J0);
1352eda14cbcSMatt Macy 
1353eda14cbcSMatt Macy 	gcm_xor_avx((uint8_t *)J0, ghash);
1354eda14cbcSMatt Macy 	clear_fpu_regs();
1355eda14cbcSMatt Macy 	kfpu_end();
1356eda14cbcSMatt Macy 
1357eda14cbcSMatt Macy 	/* Output remainder. */
1358eda14cbcSMatt Macy 	if (rem_len > 0) {
1359eda14cbcSMatt Macy 		rv = crypto_put_output_data(remainder, out, rem_len);
1360eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS)
1361eda14cbcSMatt Macy 			return (rv);
1362eda14cbcSMatt Macy 	}
1363eda14cbcSMatt Macy 	out->cd_offset += rem_len;
1364eda14cbcSMatt Macy 	ctx->gcm_remainder_len = 0;
1365eda14cbcSMatt Macy 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
1366eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS)
1367eda14cbcSMatt Macy 		return (rv);
1368eda14cbcSMatt Macy 
1369eda14cbcSMatt Macy 	out->cd_offset += ctx->gcm_tag_len;
1370eda14cbcSMatt Macy 	/* Clear sensitive data in the context before returning. */
1371eda14cbcSMatt Macy 	gcm_clear_ctx(ctx);
1372eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
1373eda14cbcSMatt Macy }
1374eda14cbcSMatt Macy 
1375eda14cbcSMatt Macy /*
1376eda14cbcSMatt Macy  * Finalize decryption: We just have accumulated crypto text, so now we
1377eda14cbcSMatt Macy  * decrypt it here inplace.
1378eda14cbcSMatt Macy  */
1379eda14cbcSMatt Macy static int
1380eda14cbcSMatt Macy gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1381eda14cbcSMatt Macy {
1382eda14cbcSMatt Macy 	ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
1383eda14cbcSMatt Macy 	ASSERT3U(block_size, ==, 16);
1384eda14cbcSMatt Macy 
1385eda14cbcSMatt Macy 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1386eda14cbcSMatt Macy 	size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
1387eda14cbcSMatt Macy 	uint8_t *datap = ctx->gcm_pt_buf;
1388eda14cbcSMatt Macy 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1389eda14cbcSMatt Macy 	uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1390eda14cbcSMatt Macy 	uint64_t *ghash = ctx->gcm_ghash;
1391eda14cbcSMatt Macy 	uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
1392eda14cbcSMatt Macy 	int rv = CRYPTO_SUCCESS;
1393eda14cbcSMatt Macy 	size_t bleft, done;
1394eda14cbcSMatt Macy 
1395eda14cbcSMatt Macy 	/*
1396eda14cbcSMatt Macy 	 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
1397eda14cbcSMatt Macy 	 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
1398eda14cbcSMatt Macy 	 * GCM_AVX_MIN_DECRYPT_BYTES.
1399eda14cbcSMatt Macy 	 */
1400eda14cbcSMatt Macy 	for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
1401eda14cbcSMatt Macy 		kfpu_begin();
1402eda14cbcSMatt Macy 		done = aesni_gcm_decrypt(datap, datap, chunk_size,
1403eda14cbcSMatt Macy 		    (const void *)key, ctx->gcm_cb, ghash);
1404eda14cbcSMatt Macy 		clear_fpu_regs();
1405eda14cbcSMatt Macy 		kfpu_end();
1406eda14cbcSMatt Macy 		if (done != chunk_size) {
1407eda14cbcSMatt Macy 			return (CRYPTO_FAILED);
1408eda14cbcSMatt Macy 		}
1409eda14cbcSMatt Macy 		datap += done;
1410eda14cbcSMatt Macy 	}
141116038816SMartin Matuska 	/* Decrypt remainder, which is less than chunk size, in one go. */
1412eda14cbcSMatt Macy 	kfpu_begin();
1413eda14cbcSMatt Macy 	if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
1414eda14cbcSMatt Macy 		done = aesni_gcm_decrypt(datap, datap, bleft,
1415eda14cbcSMatt Macy 		    (const void *)key, ctx->gcm_cb, ghash);
1416eda14cbcSMatt Macy 		if (done == 0) {
1417eda14cbcSMatt Macy 			clear_fpu_regs();
1418eda14cbcSMatt Macy 			kfpu_end();
1419eda14cbcSMatt Macy 			return (CRYPTO_FAILED);
1420eda14cbcSMatt Macy 		}
1421eda14cbcSMatt Macy 		datap += done;
1422eda14cbcSMatt Macy 		bleft -= done;
1423eda14cbcSMatt Macy 	}
1424eda14cbcSMatt Macy 	ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
1425eda14cbcSMatt Macy 
1426eda14cbcSMatt Macy 	/*
142716038816SMartin Matuska 	 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
1428eda14cbcSMatt Macy 	 * decrypt them block by block.
1429eda14cbcSMatt Macy 	 */
1430eda14cbcSMatt Macy 	while (bleft > 0) {
1431eda14cbcSMatt Macy 		/* Incomplete last block. */
1432eda14cbcSMatt Macy 		if (bleft < block_size) {
1433eda14cbcSMatt Macy 			uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
1434eda14cbcSMatt Macy 
1435da5137abSMartin Matuska 			memset(lastb, 0, block_size);
1436da5137abSMartin Matuska 			memcpy(lastb, datap, bleft);
1437eda14cbcSMatt Macy 			/* The GCM processing. */
1438eda14cbcSMatt Macy 			GHASH_AVX(ctx, lastb, block_size);
1439eda14cbcSMatt Macy 			aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1440eda14cbcSMatt Macy 			for (size_t i = 0; i < bleft; i++) {
1441eda14cbcSMatt Macy 				datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
1442eda14cbcSMatt Macy 			}
1443eda14cbcSMatt Macy 			break;
1444eda14cbcSMatt Macy 		}
1445eda14cbcSMatt Macy 		/* The GCM processing. */
1446eda14cbcSMatt Macy 		GHASH_AVX(ctx, datap, block_size);
1447eda14cbcSMatt Macy 		aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1448eda14cbcSMatt Macy 		gcm_xor_avx((uint8_t *)tmp, datap);
1449eda14cbcSMatt Macy 		gcm_incr_counter_block(ctx);
1450eda14cbcSMatt Macy 
1451eda14cbcSMatt Macy 		datap += block_size;
1452eda14cbcSMatt Macy 		bleft -= block_size;
1453eda14cbcSMatt Macy 	}
1454eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS) {
1455eda14cbcSMatt Macy 		clear_fpu_regs();
1456eda14cbcSMatt Macy 		kfpu_end();
1457eda14cbcSMatt Macy 		return (rv);
1458eda14cbcSMatt Macy 	}
1459eda14cbcSMatt Macy 	/* Decryption done, finish the tag. */
1460eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
1461eda14cbcSMatt Macy 	GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
1462eda14cbcSMatt Macy 	aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
1463eda14cbcSMatt Macy 	    (uint32_t *)ctx->gcm_J0);
1464eda14cbcSMatt Macy 
1465eda14cbcSMatt Macy 	gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
1466eda14cbcSMatt Macy 
1467eda14cbcSMatt Macy 	/* We are done with the FPU, restore its state. */
1468eda14cbcSMatt Macy 	clear_fpu_regs();
1469eda14cbcSMatt Macy 	kfpu_end();
1470eda14cbcSMatt Macy 
1471eda14cbcSMatt Macy 	/* Compare the input authentication tag with what we calculated. */
1472da5137abSMartin Matuska 	if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
1473eda14cbcSMatt Macy 		/* They don't match. */
1474eda14cbcSMatt Macy 		return (CRYPTO_INVALID_MAC);
1475eda14cbcSMatt Macy 	}
1476eda14cbcSMatt Macy 	rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
1477eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS) {
1478eda14cbcSMatt Macy 		return (rv);
1479eda14cbcSMatt Macy 	}
1480eda14cbcSMatt Macy 	out->cd_offset += pt_len;
1481eda14cbcSMatt Macy 	gcm_clear_ctx(ctx);
1482eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
1483eda14cbcSMatt Macy }
1484eda14cbcSMatt Macy 
1485eda14cbcSMatt Macy /*
1486eda14cbcSMatt Macy  * Initialize the GCM params H, Htabtle and the counter block. Save the
1487eda14cbcSMatt Macy  * initial counter block.
1488eda14cbcSMatt Macy  */
1489eda14cbcSMatt Macy static int
1490eda14cbcSMatt Macy gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
1491eda14cbcSMatt Macy     unsigned char *auth_data, size_t auth_data_len, size_t block_size)
1492eda14cbcSMatt Macy {
1493eda14cbcSMatt Macy 	uint8_t *cb = (uint8_t *)ctx->gcm_cb;
1494eda14cbcSMatt Macy 	uint64_t *H = ctx->gcm_H;
1495eda14cbcSMatt Macy 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1496eda14cbcSMatt Macy 	int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
1497eda14cbcSMatt Macy 	uint8_t *datap = auth_data;
1498eda14cbcSMatt Macy 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1499eda14cbcSMatt Macy 	size_t bleft;
1500eda14cbcSMatt Macy 
1501eda14cbcSMatt Macy 	ASSERT(block_size == GCM_BLOCK_LEN);
1502eda14cbcSMatt Macy 
1503eda14cbcSMatt Macy 	/* Init H (encrypt zero block) and create the initial counter block. */
1504da5137abSMartin Matuska 	memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
1505da5137abSMartin Matuska 	memset(H, 0, sizeof (ctx->gcm_H));
1506eda14cbcSMatt Macy 	kfpu_begin();
1507eda14cbcSMatt Macy 	aes_encrypt_intel(keysched, aes_rounds,
1508eda14cbcSMatt Macy 	    (const uint32_t *)H, (uint32_t *)H);
1509eda14cbcSMatt Macy 
1510eda14cbcSMatt Macy 	gcm_init_htab_avx(ctx->gcm_Htable, H);
1511eda14cbcSMatt Macy 
1512eda14cbcSMatt Macy 	if (iv_len == 12) {
1513da5137abSMartin Matuska 		memcpy(cb, iv, 12);
1514eda14cbcSMatt Macy 		cb[12] = 0;
1515eda14cbcSMatt Macy 		cb[13] = 0;
1516eda14cbcSMatt Macy 		cb[14] = 0;
1517eda14cbcSMatt Macy 		cb[15] = 1;
1518eda14cbcSMatt Macy 		/* We need the ICB later. */
1519da5137abSMartin Matuska 		memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
1520eda14cbcSMatt Macy 	} else {
1521eda14cbcSMatt Macy 		/*
1522eda14cbcSMatt Macy 		 * Most consumers use 12 byte IVs, so it's OK to use the
1523eda14cbcSMatt Macy 		 * original routines for other IV sizes, just avoid nesting
1524eda14cbcSMatt Macy 		 * kfpu_begin calls.
1525eda14cbcSMatt Macy 		 */
1526eda14cbcSMatt Macy 		clear_fpu_regs();
1527eda14cbcSMatt Macy 		kfpu_end();
1528eda14cbcSMatt Macy 		gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
1529eda14cbcSMatt Macy 		    aes_copy_block, aes_xor_block);
1530eda14cbcSMatt Macy 		kfpu_begin();
1531eda14cbcSMatt Macy 	}
1532eda14cbcSMatt Macy 
1533eda14cbcSMatt Macy 	/* Openssl post increments the counter, adjust for that. */
1534eda14cbcSMatt Macy 	gcm_incr_counter_block(ctx);
1535eda14cbcSMatt Macy 
1536eda14cbcSMatt Macy 	/* Ghash AAD in chunk_size blocks. */
1537eda14cbcSMatt Macy 	for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
1538eda14cbcSMatt Macy 		GHASH_AVX(ctx, datap, chunk_size);
1539eda14cbcSMatt Macy 		datap += chunk_size;
1540eda14cbcSMatt Macy 		clear_fpu_regs();
1541eda14cbcSMatt Macy 		kfpu_end();
1542eda14cbcSMatt Macy 		kfpu_begin();
1543eda14cbcSMatt Macy 	}
1544eda14cbcSMatt Macy 	/* Ghash the remainder and handle possible incomplete GCM block. */
1545eda14cbcSMatt Macy 	if (bleft > 0) {
1546eda14cbcSMatt Macy 		size_t incomp = bleft % block_size;
1547eda14cbcSMatt Macy 
1548eda14cbcSMatt Macy 		bleft -= incomp;
1549eda14cbcSMatt Macy 		if (bleft > 0) {
1550eda14cbcSMatt Macy 			GHASH_AVX(ctx, datap, bleft);
1551eda14cbcSMatt Macy 			datap += bleft;
1552eda14cbcSMatt Macy 		}
1553eda14cbcSMatt Macy 		if (incomp > 0) {
1554eda14cbcSMatt Macy 			/* Zero pad and hash incomplete last block. */
1555eda14cbcSMatt Macy 			uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
1556eda14cbcSMatt Macy 
1557da5137abSMartin Matuska 			memset(authp, 0, block_size);
1558da5137abSMartin Matuska 			memcpy(authp, datap, incomp);
1559eda14cbcSMatt Macy 			GHASH_AVX(ctx, authp, block_size);
1560eda14cbcSMatt Macy 		}
1561eda14cbcSMatt Macy 	}
1562eda14cbcSMatt Macy 	clear_fpu_regs();
1563eda14cbcSMatt Macy 	kfpu_end();
1564eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
1565eda14cbcSMatt Macy }
1566eda14cbcSMatt Macy 
1567eda14cbcSMatt Macy #if defined(_KERNEL)
1568eda14cbcSMatt Macy static int
1569eda14cbcSMatt Macy icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
1570eda14cbcSMatt Macy {
1571eda14cbcSMatt Macy 	unsigned long val;
1572eda14cbcSMatt Macy 	char val_rounded[16];
1573eda14cbcSMatt Macy 	int error = 0;
1574eda14cbcSMatt Macy 
1575eda14cbcSMatt Macy 	error = kstrtoul(buf, 0, &val);
1576eda14cbcSMatt Macy 	if (error)
1577eda14cbcSMatt Macy 		return (error);
1578eda14cbcSMatt Macy 
1579eda14cbcSMatt Macy 	val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1580eda14cbcSMatt Macy 
1581eda14cbcSMatt Macy 	if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
1582eda14cbcSMatt Macy 		return (-EINVAL);
1583eda14cbcSMatt Macy 
1584eda14cbcSMatt Macy 	snprintf(val_rounded, 16, "%u", (uint32_t)val);
1585eda14cbcSMatt Macy 	error = param_set_uint(val_rounded, kp);
1586eda14cbcSMatt Macy 	return (error);
1587eda14cbcSMatt Macy }
1588eda14cbcSMatt Macy 
1589eda14cbcSMatt Macy module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
1590eda14cbcSMatt Macy     param_get_uint, &gcm_avx_chunk_size, 0644);
1591eda14cbcSMatt Macy 
1592eda14cbcSMatt Macy MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
1593eda14cbcSMatt Macy 	"How many bytes to process while owning the FPU");
1594eda14cbcSMatt Macy 
1595eda14cbcSMatt Macy #endif /* defined(__KERNEL) */
1596eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
1597