1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23eda14cbcSMatt Macy */ 24eda14cbcSMatt Macy 25eda14cbcSMatt Macy #include <sys/zfs_context.h> 26eda14cbcSMatt Macy #include <modes/modes.h> 27eda14cbcSMatt Macy #include <sys/crypto/common.h> 28eda14cbcSMatt Macy #include <sys/crypto/icp.h> 29eda14cbcSMatt Macy #include <sys/crypto/impl.h> 30eda14cbcSMatt Macy #include <sys/byteorder.h> 31eda14cbcSMatt Macy #include <sys/simd.h> 32eda14cbcSMatt Macy #include <modes/gcm_impl.h> 33eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 34eda14cbcSMatt Macy #include <aes/aes_impl.h> 35eda14cbcSMatt Macy #endif 36eda14cbcSMatt Macy 37eda14cbcSMatt Macy #define GHASH(c, d, t, o) \ 38eda14cbcSMatt Macy xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ 39eda14cbcSMatt Macy (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ 40eda14cbcSMatt Macy (uint64_t *)(void *)(t)); 41eda14cbcSMatt Macy 42eda14cbcSMatt Macy /* Select GCM implementation */ 43eda14cbcSMatt Macy #define IMPL_FASTEST (UINT32_MAX) 44eda14cbcSMatt Macy #define IMPL_CYCLE (UINT32_MAX-1) 45eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 46eda14cbcSMatt Macy #define IMPL_AVX (UINT32_MAX-2) 47eda14cbcSMatt Macy #endif 48eda14cbcSMatt Macy #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) 49eda14cbcSMatt Macy static uint32_t icp_gcm_impl = IMPL_FASTEST; 50eda14cbcSMatt Macy static uint32_t user_sel_impl = IMPL_FASTEST; 51eda14cbcSMatt Macy 52eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 53eda14cbcSMatt Macy /* Does the architecture we run on support the MOVBE instruction? */ 54eda14cbcSMatt Macy boolean_t gcm_avx_can_use_movbe = B_FALSE; 55eda14cbcSMatt Macy /* 56eda14cbcSMatt Macy * Whether to use the optimized openssl gcm and ghash implementations. 57eda14cbcSMatt Macy * Set to true if module parameter icp_gcm_impl == "avx". 58eda14cbcSMatt Macy */ 59eda14cbcSMatt Macy static boolean_t gcm_use_avx = B_FALSE; 60eda14cbcSMatt Macy #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) 61eda14cbcSMatt Macy 627877fdebSMatt Macy extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); 637877fdebSMatt Macy 64eda14cbcSMatt Macy static inline boolean_t gcm_avx_will_work(void); 65eda14cbcSMatt Macy static inline void gcm_set_avx(boolean_t); 66eda14cbcSMatt Macy static inline boolean_t gcm_toggle_avx(void); 677877fdebSMatt Macy static inline size_t gcm_simd_get_htab_size(boolean_t); 68eda14cbcSMatt Macy 69eda14cbcSMatt Macy static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, 70eda14cbcSMatt Macy crypto_data_t *, size_t); 71eda14cbcSMatt Macy 72eda14cbcSMatt Macy static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 73eda14cbcSMatt Macy static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 74eda14cbcSMatt Macy static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, 75eda14cbcSMatt Macy size_t, size_t); 76eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */ 77eda14cbcSMatt Macy 78eda14cbcSMatt Macy /* 79eda14cbcSMatt Macy * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode 80eda14cbcSMatt Macy * is done in another function. 81eda14cbcSMatt Macy */ 82eda14cbcSMatt Macy int 83eda14cbcSMatt Macy gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 84eda14cbcSMatt Macy crypto_data_t *out, size_t block_size, 85eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 86eda14cbcSMatt Macy void (*copy_block)(uint8_t *, uint8_t *), 87eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 88eda14cbcSMatt Macy { 89eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 90eda14cbcSMatt Macy if (ctx->gcm_use_avx == B_TRUE) 91eda14cbcSMatt Macy return (gcm_mode_encrypt_contiguous_blocks_avx( 92eda14cbcSMatt Macy ctx, data, length, out, block_size)); 93eda14cbcSMatt Macy #endif 94eda14cbcSMatt Macy 95eda14cbcSMatt Macy const gcm_impl_ops_t *gops; 96eda14cbcSMatt Macy size_t remainder = length; 97eda14cbcSMatt Macy size_t need = 0; 98eda14cbcSMatt Macy uint8_t *datap = (uint8_t *)data; 99eda14cbcSMatt Macy uint8_t *blockp; 100eda14cbcSMatt Macy uint8_t *lastp; 101eda14cbcSMatt Macy void *iov_or_mp; 102eda14cbcSMatt Macy offset_t offset; 103eda14cbcSMatt Macy uint8_t *out_data_1; 104eda14cbcSMatt Macy uint8_t *out_data_2; 105eda14cbcSMatt Macy size_t out_data_1_len; 106eda14cbcSMatt Macy uint64_t counter; 107eda14cbcSMatt Macy uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 108eda14cbcSMatt Macy 109eda14cbcSMatt Macy if (length + ctx->gcm_remainder_len < block_size) { 110eda14cbcSMatt Macy /* accumulate bytes here and return */ 111da5137abSMartin Matuska memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, 112da5137abSMartin Matuska datap, 113eda14cbcSMatt Macy length); 114eda14cbcSMatt Macy ctx->gcm_remainder_len += length; 115eda14cbcSMatt Macy if (ctx->gcm_copy_to == NULL) { 116eda14cbcSMatt Macy ctx->gcm_copy_to = datap; 117eda14cbcSMatt Macy } 118eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 119eda14cbcSMatt Macy } 120eda14cbcSMatt Macy 121eda14cbcSMatt Macy crypto_init_ptrs(out, &iov_or_mp, &offset); 122eda14cbcSMatt Macy 123eda14cbcSMatt Macy gops = gcm_impl_get_ops(); 124eda14cbcSMatt Macy do { 125eda14cbcSMatt Macy /* Unprocessed data from last call. */ 126eda14cbcSMatt Macy if (ctx->gcm_remainder_len > 0) { 127eda14cbcSMatt Macy need = block_size - ctx->gcm_remainder_len; 128eda14cbcSMatt Macy 129eda14cbcSMatt Macy if (need > remainder) 130eda14cbcSMatt Macy return (CRYPTO_DATA_LEN_RANGE); 131eda14cbcSMatt Macy 132da5137abSMartin Matuska memcpy(&((uint8_t *)ctx->gcm_remainder) 133da5137abSMartin Matuska [ctx->gcm_remainder_len], datap, need); 134eda14cbcSMatt Macy 135eda14cbcSMatt Macy blockp = (uint8_t *)ctx->gcm_remainder; 136eda14cbcSMatt Macy } else { 137eda14cbcSMatt Macy blockp = datap; 138eda14cbcSMatt Macy } 139eda14cbcSMatt Macy 140eda14cbcSMatt Macy /* 141eda14cbcSMatt Macy * Increment counter. Counter bits are confined 142eda14cbcSMatt Macy * to the bottom 32 bits of the counter block. 143eda14cbcSMatt Macy */ 144eda14cbcSMatt Macy counter = ntohll(ctx->gcm_cb[1] & counter_mask); 145eda14cbcSMatt Macy counter = htonll(counter + 1); 146eda14cbcSMatt Macy counter &= counter_mask; 147eda14cbcSMatt Macy ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 148eda14cbcSMatt Macy 149eda14cbcSMatt Macy encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 150eda14cbcSMatt Macy (uint8_t *)ctx->gcm_tmp); 151eda14cbcSMatt Macy xor_block(blockp, (uint8_t *)ctx->gcm_tmp); 152eda14cbcSMatt Macy 153eda14cbcSMatt Macy lastp = (uint8_t *)ctx->gcm_tmp; 154eda14cbcSMatt Macy 155eda14cbcSMatt Macy ctx->gcm_processed_data_len += block_size; 156eda14cbcSMatt Macy 157eda14cbcSMatt Macy crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, 158eda14cbcSMatt Macy &out_data_1_len, &out_data_2, block_size); 159eda14cbcSMatt Macy 160eda14cbcSMatt Macy /* copy block to where it belongs */ 161eda14cbcSMatt Macy if (out_data_1_len == block_size) { 162eda14cbcSMatt Macy copy_block(lastp, out_data_1); 163eda14cbcSMatt Macy } else { 164da5137abSMartin Matuska memcpy(out_data_1, lastp, out_data_1_len); 165eda14cbcSMatt Macy if (out_data_2 != NULL) { 166da5137abSMartin Matuska memcpy(out_data_2, 167da5137abSMartin Matuska lastp + out_data_1_len, 168eda14cbcSMatt Macy block_size - out_data_1_len); 169eda14cbcSMatt Macy } 170eda14cbcSMatt Macy } 171eda14cbcSMatt Macy /* update offset */ 172eda14cbcSMatt Macy out->cd_offset += block_size; 173eda14cbcSMatt Macy 174eda14cbcSMatt Macy /* add ciphertext to the hash */ 175eda14cbcSMatt Macy GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops); 176eda14cbcSMatt Macy 177eda14cbcSMatt Macy /* Update pointer to next block of data to be processed. */ 178eda14cbcSMatt Macy if (ctx->gcm_remainder_len != 0) { 179eda14cbcSMatt Macy datap += need; 180eda14cbcSMatt Macy ctx->gcm_remainder_len = 0; 181eda14cbcSMatt Macy } else { 182eda14cbcSMatt Macy datap += block_size; 183eda14cbcSMatt Macy } 184eda14cbcSMatt Macy 185eda14cbcSMatt Macy remainder = (size_t)&data[length] - (size_t)datap; 186eda14cbcSMatt Macy 187eda14cbcSMatt Macy /* Incomplete last block. */ 188eda14cbcSMatt Macy if (remainder > 0 && remainder < block_size) { 189da5137abSMartin Matuska memcpy(ctx->gcm_remainder, datap, remainder); 190eda14cbcSMatt Macy ctx->gcm_remainder_len = remainder; 191eda14cbcSMatt Macy ctx->gcm_copy_to = datap; 192eda14cbcSMatt Macy goto out; 193eda14cbcSMatt Macy } 194eda14cbcSMatt Macy ctx->gcm_copy_to = NULL; 195eda14cbcSMatt Macy 196eda14cbcSMatt Macy } while (remainder > 0); 197eda14cbcSMatt Macy out: 198eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 199eda14cbcSMatt Macy } 200eda14cbcSMatt Macy 201eda14cbcSMatt Macy int 202eda14cbcSMatt Macy gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 203eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 204eda14cbcSMatt Macy void (*copy_block)(uint8_t *, uint8_t *), 205eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 206eda14cbcSMatt Macy { 207e92ffd9bSMartin Matuska (void) copy_block; 208eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 209eda14cbcSMatt Macy if (ctx->gcm_use_avx == B_TRUE) 210eda14cbcSMatt Macy return (gcm_encrypt_final_avx(ctx, out, block_size)); 211eda14cbcSMatt Macy #endif 212eda14cbcSMatt Macy 213eda14cbcSMatt Macy const gcm_impl_ops_t *gops; 214eda14cbcSMatt Macy uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 215eda14cbcSMatt Macy uint8_t *ghash, *macp = NULL; 216eda14cbcSMatt Macy int i, rv; 217eda14cbcSMatt Macy 218eda14cbcSMatt Macy if (out->cd_length < 219eda14cbcSMatt Macy (ctx->gcm_remainder_len + ctx->gcm_tag_len)) { 220eda14cbcSMatt Macy return (CRYPTO_DATA_LEN_RANGE); 221eda14cbcSMatt Macy } 222eda14cbcSMatt Macy 223eda14cbcSMatt Macy gops = gcm_impl_get_ops(); 224eda14cbcSMatt Macy ghash = (uint8_t *)ctx->gcm_ghash; 225eda14cbcSMatt Macy 226eda14cbcSMatt Macy if (ctx->gcm_remainder_len > 0) { 227eda14cbcSMatt Macy uint64_t counter; 228eda14cbcSMatt Macy uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp; 229eda14cbcSMatt Macy 230eda14cbcSMatt Macy /* 231eda14cbcSMatt Macy * Here is where we deal with data that is not a 232eda14cbcSMatt Macy * multiple of the block size. 233eda14cbcSMatt Macy */ 234eda14cbcSMatt Macy 235eda14cbcSMatt Macy /* 236eda14cbcSMatt Macy * Increment counter. 237eda14cbcSMatt Macy */ 238eda14cbcSMatt Macy counter = ntohll(ctx->gcm_cb[1] & counter_mask); 239eda14cbcSMatt Macy counter = htonll(counter + 1); 240eda14cbcSMatt Macy counter &= counter_mask; 241eda14cbcSMatt Macy ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 242eda14cbcSMatt Macy 243eda14cbcSMatt Macy encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 244eda14cbcSMatt Macy (uint8_t *)ctx->gcm_tmp); 245eda14cbcSMatt Macy 246eda14cbcSMatt Macy macp = (uint8_t *)ctx->gcm_remainder; 247da5137abSMartin Matuska memset(macp + ctx->gcm_remainder_len, 0, 248eda14cbcSMatt Macy block_size - ctx->gcm_remainder_len); 249eda14cbcSMatt Macy 250eda14cbcSMatt Macy /* XOR with counter block */ 251eda14cbcSMatt Macy for (i = 0; i < ctx->gcm_remainder_len; i++) { 252eda14cbcSMatt Macy macp[i] ^= tmpp[i]; 253eda14cbcSMatt Macy } 254eda14cbcSMatt Macy 255eda14cbcSMatt Macy /* add ciphertext to the hash */ 256eda14cbcSMatt Macy GHASH(ctx, macp, ghash, gops); 257eda14cbcSMatt Macy 258eda14cbcSMatt Macy ctx->gcm_processed_data_len += ctx->gcm_remainder_len; 259eda14cbcSMatt Macy } 260eda14cbcSMatt Macy 261eda14cbcSMatt Macy ctx->gcm_len_a_len_c[1] = 262eda14cbcSMatt Macy htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 263eda14cbcSMatt Macy GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 264eda14cbcSMatt Macy encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 265eda14cbcSMatt Macy (uint8_t *)ctx->gcm_J0); 266eda14cbcSMatt Macy xor_block((uint8_t *)ctx->gcm_J0, ghash); 267eda14cbcSMatt Macy 268eda14cbcSMatt Macy if (ctx->gcm_remainder_len > 0) { 269eda14cbcSMatt Macy rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len); 270eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) 271eda14cbcSMatt Macy return (rv); 272eda14cbcSMatt Macy } 273eda14cbcSMatt Macy out->cd_offset += ctx->gcm_remainder_len; 274eda14cbcSMatt Macy ctx->gcm_remainder_len = 0; 275eda14cbcSMatt Macy rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 276eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) 277eda14cbcSMatt Macy return (rv); 278eda14cbcSMatt Macy out->cd_offset += ctx->gcm_tag_len; 279eda14cbcSMatt Macy 280eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 281eda14cbcSMatt Macy } 282eda14cbcSMatt Macy 283eda14cbcSMatt Macy /* 284eda14cbcSMatt Macy * This will only deal with decrypting the last block of the input that 285eda14cbcSMatt Macy * might not be a multiple of block length. 286eda14cbcSMatt Macy */ 287eda14cbcSMatt Macy static void 288eda14cbcSMatt Macy gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, 289eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 290eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 291eda14cbcSMatt Macy { 292eda14cbcSMatt Macy uint8_t *datap, *outp, *counterp; 293eda14cbcSMatt Macy uint64_t counter; 294eda14cbcSMatt Macy uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 295eda14cbcSMatt Macy int i; 296eda14cbcSMatt Macy 297eda14cbcSMatt Macy /* 298eda14cbcSMatt Macy * Increment counter. 299eda14cbcSMatt Macy * Counter bits are confined to the bottom 32 bits 300eda14cbcSMatt Macy */ 301eda14cbcSMatt Macy counter = ntohll(ctx->gcm_cb[1] & counter_mask); 302eda14cbcSMatt Macy counter = htonll(counter + 1); 303eda14cbcSMatt Macy counter &= counter_mask; 304eda14cbcSMatt Macy ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 305eda14cbcSMatt Macy 306eda14cbcSMatt Macy datap = (uint8_t *)ctx->gcm_remainder; 307eda14cbcSMatt Macy outp = &((ctx->gcm_pt_buf)[index]); 308eda14cbcSMatt Macy counterp = (uint8_t *)ctx->gcm_tmp; 309eda14cbcSMatt Macy 310eda14cbcSMatt Macy /* authentication tag */ 311da5137abSMartin Matuska memset((uint8_t *)ctx->gcm_tmp, 0, block_size); 312da5137abSMartin Matuska memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len); 313eda14cbcSMatt Macy 314eda14cbcSMatt Macy /* add ciphertext to the hash */ 315eda14cbcSMatt Macy GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); 316eda14cbcSMatt Macy 317eda14cbcSMatt Macy /* decrypt remaining ciphertext */ 318eda14cbcSMatt Macy encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp); 319eda14cbcSMatt Macy 320eda14cbcSMatt Macy /* XOR with counter block */ 321eda14cbcSMatt Macy for (i = 0; i < ctx->gcm_remainder_len; i++) { 322eda14cbcSMatt Macy outp[i] = datap[i] ^ counterp[i]; 323eda14cbcSMatt Macy } 324eda14cbcSMatt Macy } 325eda14cbcSMatt Macy 326eda14cbcSMatt Macy int 327eda14cbcSMatt Macy gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 328eda14cbcSMatt Macy crypto_data_t *out, size_t block_size, 329eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 330eda14cbcSMatt Macy void (*copy_block)(uint8_t *, uint8_t *), 331eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 332eda14cbcSMatt Macy { 333e92ffd9bSMartin Matuska (void) out, (void) block_size, (void) encrypt_block, (void) copy_block, 334e92ffd9bSMartin Matuska (void) xor_block; 335eda14cbcSMatt Macy size_t new_len; 336eda14cbcSMatt Macy uint8_t *new; 337eda14cbcSMatt Macy 338eda14cbcSMatt Macy /* 339eda14cbcSMatt Macy * Copy contiguous ciphertext input blocks to plaintext buffer. 340eda14cbcSMatt Macy * Ciphertext will be decrypted in the final. 341eda14cbcSMatt Macy */ 342eda14cbcSMatt Macy if (length > 0) { 343eda14cbcSMatt Macy new_len = ctx->gcm_pt_buf_len + length; 344c03c5b1cSMartin Matuska new = vmem_alloc(new_len, KM_SLEEP); 345eda14cbcSMatt Macy if (new == NULL) { 346eda14cbcSMatt Macy vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 347eda14cbcSMatt Macy ctx->gcm_pt_buf = NULL; 348eda14cbcSMatt Macy return (CRYPTO_HOST_MEMORY); 349eda14cbcSMatt Macy } 350c03c5b1cSMartin Matuska 351c03c5b1cSMartin Matuska if (ctx->gcm_pt_buf != NULL) { 352da5137abSMartin Matuska memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 353eda14cbcSMatt Macy vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 354c03c5b1cSMartin Matuska } else { 355c03c5b1cSMartin Matuska ASSERT0(ctx->gcm_pt_buf_len); 356c03c5b1cSMartin Matuska } 357c03c5b1cSMartin Matuska 358eda14cbcSMatt Macy ctx->gcm_pt_buf = new; 359eda14cbcSMatt Macy ctx->gcm_pt_buf_len = new_len; 360da5137abSMartin Matuska memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data, 361eda14cbcSMatt Macy length); 362eda14cbcSMatt Macy ctx->gcm_processed_data_len += length; 363eda14cbcSMatt Macy } 364eda14cbcSMatt Macy 365eda14cbcSMatt Macy ctx->gcm_remainder_len = 0; 366eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 367eda14cbcSMatt Macy } 368eda14cbcSMatt Macy 369eda14cbcSMatt Macy int 370eda14cbcSMatt Macy gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 371eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 372eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 373eda14cbcSMatt Macy { 374eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 375eda14cbcSMatt Macy if (ctx->gcm_use_avx == B_TRUE) 376eda14cbcSMatt Macy return (gcm_decrypt_final_avx(ctx, out, block_size)); 377eda14cbcSMatt Macy #endif 378eda14cbcSMatt Macy 379eda14cbcSMatt Macy const gcm_impl_ops_t *gops; 380eda14cbcSMatt Macy size_t pt_len; 381eda14cbcSMatt Macy size_t remainder; 382eda14cbcSMatt Macy uint8_t *ghash; 383eda14cbcSMatt Macy uint8_t *blockp; 384eda14cbcSMatt Macy uint8_t *cbp; 385eda14cbcSMatt Macy uint64_t counter; 386eda14cbcSMatt Macy uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 387eda14cbcSMatt Macy int processed = 0, rv; 388eda14cbcSMatt Macy 389eda14cbcSMatt Macy ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len); 390eda14cbcSMatt Macy 391eda14cbcSMatt Macy gops = gcm_impl_get_ops(); 392eda14cbcSMatt Macy pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 393eda14cbcSMatt Macy ghash = (uint8_t *)ctx->gcm_ghash; 394eda14cbcSMatt Macy blockp = ctx->gcm_pt_buf; 395eda14cbcSMatt Macy remainder = pt_len; 396eda14cbcSMatt Macy while (remainder > 0) { 397eda14cbcSMatt Macy /* Incomplete last block */ 398eda14cbcSMatt Macy if (remainder < block_size) { 399da5137abSMartin Matuska memcpy(ctx->gcm_remainder, blockp, remainder); 400eda14cbcSMatt Macy ctx->gcm_remainder_len = remainder; 401eda14cbcSMatt Macy /* 402eda14cbcSMatt Macy * not expecting anymore ciphertext, just 403eda14cbcSMatt Macy * compute plaintext for the remaining input 404eda14cbcSMatt Macy */ 405eda14cbcSMatt Macy gcm_decrypt_incomplete_block(ctx, block_size, 406eda14cbcSMatt Macy processed, encrypt_block, xor_block); 407eda14cbcSMatt Macy ctx->gcm_remainder_len = 0; 408eda14cbcSMatt Macy goto out; 409eda14cbcSMatt Macy } 410eda14cbcSMatt Macy /* add ciphertext to the hash */ 411eda14cbcSMatt Macy GHASH(ctx, blockp, ghash, gops); 412eda14cbcSMatt Macy 413eda14cbcSMatt Macy /* 414eda14cbcSMatt Macy * Increment counter. 415eda14cbcSMatt Macy * Counter bits are confined to the bottom 32 bits 416eda14cbcSMatt Macy */ 417eda14cbcSMatt Macy counter = ntohll(ctx->gcm_cb[1] & counter_mask); 418eda14cbcSMatt Macy counter = htonll(counter + 1); 419eda14cbcSMatt Macy counter &= counter_mask; 420eda14cbcSMatt Macy ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 421eda14cbcSMatt Macy 422eda14cbcSMatt Macy cbp = (uint8_t *)ctx->gcm_tmp; 423eda14cbcSMatt Macy encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp); 424eda14cbcSMatt Macy 425eda14cbcSMatt Macy /* XOR with ciphertext */ 426eda14cbcSMatt Macy xor_block(cbp, blockp); 427eda14cbcSMatt Macy 428eda14cbcSMatt Macy processed += block_size; 429eda14cbcSMatt Macy blockp += block_size; 430eda14cbcSMatt Macy remainder -= block_size; 431eda14cbcSMatt Macy } 432eda14cbcSMatt Macy out: 433eda14cbcSMatt Macy ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 434eda14cbcSMatt Macy GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 435eda14cbcSMatt Macy encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 436eda14cbcSMatt Macy (uint8_t *)ctx->gcm_J0); 437eda14cbcSMatt Macy xor_block((uint8_t *)ctx->gcm_J0, ghash); 438eda14cbcSMatt Macy 439eda14cbcSMatt Macy /* compare the input authentication tag with what we calculated */ 440da5137abSMartin Matuska if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 441eda14cbcSMatt Macy /* They don't match */ 442eda14cbcSMatt Macy return (CRYPTO_INVALID_MAC); 443eda14cbcSMatt Macy } else { 444eda14cbcSMatt Macy rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 445eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) 446eda14cbcSMatt Macy return (rv); 447eda14cbcSMatt Macy out->cd_offset += pt_len; 448eda14cbcSMatt Macy } 449eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 450eda14cbcSMatt Macy } 451eda14cbcSMatt Macy 452eda14cbcSMatt Macy static int 453eda14cbcSMatt Macy gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) 454eda14cbcSMatt Macy { 455eda14cbcSMatt Macy size_t tag_len; 456eda14cbcSMatt Macy 457eda14cbcSMatt Macy /* 458eda14cbcSMatt Macy * Check the length of the authentication tag (in bits). 459eda14cbcSMatt Macy */ 460eda14cbcSMatt Macy tag_len = gcm_param->ulTagBits; 461eda14cbcSMatt Macy switch (tag_len) { 462eda14cbcSMatt Macy case 32: 463eda14cbcSMatt Macy case 64: 464eda14cbcSMatt Macy case 96: 465eda14cbcSMatt Macy case 104: 466eda14cbcSMatt Macy case 112: 467eda14cbcSMatt Macy case 120: 468eda14cbcSMatt Macy case 128: 469eda14cbcSMatt Macy break; 470eda14cbcSMatt Macy default: 471eda14cbcSMatt Macy return (CRYPTO_MECHANISM_PARAM_INVALID); 472eda14cbcSMatt Macy } 473eda14cbcSMatt Macy 474eda14cbcSMatt Macy if (gcm_param->ulIvLen == 0) 475eda14cbcSMatt Macy return (CRYPTO_MECHANISM_PARAM_INVALID); 476eda14cbcSMatt Macy 477eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 478eda14cbcSMatt Macy } 479eda14cbcSMatt Macy 480eda14cbcSMatt Macy static void 481eda14cbcSMatt Macy gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, 482eda14cbcSMatt Macy gcm_ctx_t *ctx, size_t block_size, 483eda14cbcSMatt Macy void (*copy_block)(uint8_t *, uint8_t *), 484eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 485eda14cbcSMatt Macy { 486eda14cbcSMatt Macy const gcm_impl_ops_t *gops; 487eda14cbcSMatt Macy uint8_t *cb; 488eda14cbcSMatt Macy ulong_t remainder = iv_len; 489eda14cbcSMatt Macy ulong_t processed = 0; 490eda14cbcSMatt Macy uint8_t *datap, *ghash; 491eda14cbcSMatt Macy uint64_t len_a_len_c[2]; 492eda14cbcSMatt Macy 493eda14cbcSMatt Macy gops = gcm_impl_get_ops(); 494eda14cbcSMatt Macy ghash = (uint8_t *)ctx->gcm_ghash; 495eda14cbcSMatt Macy cb = (uint8_t *)ctx->gcm_cb; 496eda14cbcSMatt Macy if (iv_len == 12) { 497da5137abSMartin Matuska memcpy(cb, iv, 12); 498eda14cbcSMatt Macy cb[12] = 0; 499eda14cbcSMatt Macy cb[13] = 0; 500eda14cbcSMatt Macy cb[14] = 0; 501eda14cbcSMatt Macy cb[15] = 1; 502eda14cbcSMatt Macy /* J0 will be used again in the final */ 503eda14cbcSMatt Macy copy_block(cb, (uint8_t *)ctx->gcm_J0); 504eda14cbcSMatt Macy } else { 505eda14cbcSMatt Macy /* GHASH the IV */ 506eda14cbcSMatt Macy do { 507eda14cbcSMatt Macy if (remainder < block_size) { 508da5137abSMartin Matuska memset(cb, 0, block_size); 509da5137abSMartin Matuska memcpy(cb, &(iv[processed]), remainder); 510eda14cbcSMatt Macy datap = (uint8_t *)cb; 511eda14cbcSMatt Macy remainder = 0; 512eda14cbcSMatt Macy } else { 513eda14cbcSMatt Macy datap = (uint8_t *)(&(iv[processed])); 514eda14cbcSMatt Macy processed += block_size; 515eda14cbcSMatt Macy remainder -= block_size; 516eda14cbcSMatt Macy } 517eda14cbcSMatt Macy GHASH(ctx, datap, ghash, gops); 518eda14cbcSMatt Macy } while (remainder > 0); 519eda14cbcSMatt Macy 520eda14cbcSMatt Macy len_a_len_c[0] = 0; 521eda14cbcSMatt Macy len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len)); 522eda14cbcSMatt Macy GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops); 523eda14cbcSMatt Macy 524eda14cbcSMatt Macy /* J0 will be used again in the final */ 525eda14cbcSMatt Macy copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb); 526eda14cbcSMatt Macy } 527eda14cbcSMatt Macy } 528eda14cbcSMatt Macy 529eda14cbcSMatt Macy static int 530eda14cbcSMatt Macy gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 531eda14cbcSMatt Macy unsigned char *auth_data, size_t auth_data_len, size_t block_size, 532eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 533eda14cbcSMatt Macy void (*copy_block)(uint8_t *, uint8_t *), 534eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 535eda14cbcSMatt Macy { 536eda14cbcSMatt Macy const gcm_impl_ops_t *gops; 537eda14cbcSMatt Macy uint8_t *ghash, *datap, *authp; 538eda14cbcSMatt Macy size_t remainder, processed; 539eda14cbcSMatt Macy 540eda14cbcSMatt Macy /* encrypt zero block to get subkey H */ 541da5137abSMartin Matuska memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); 542eda14cbcSMatt Macy encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, 543eda14cbcSMatt Macy (uint8_t *)ctx->gcm_H); 544eda14cbcSMatt Macy 545eda14cbcSMatt Macy gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 546eda14cbcSMatt Macy copy_block, xor_block); 547eda14cbcSMatt Macy 548eda14cbcSMatt Macy gops = gcm_impl_get_ops(); 549eda14cbcSMatt Macy authp = (uint8_t *)ctx->gcm_tmp; 550eda14cbcSMatt Macy ghash = (uint8_t *)ctx->gcm_ghash; 551da5137abSMartin Matuska memset(authp, 0, block_size); 552da5137abSMartin Matuska memset(ghash, 0, block_size); 553eda14cbcSMatt Macy 554eda14cbcSMatt Macy processed = 0; 555eda14cbcSMatt Macy remainder = auth_data_len; 556eda14cbcSMatt Macy do { 557eda14cbcSMatt Macy if (remainder < block_size) { 558eda14cbcSMatt Macy /* 559eda14cbcSMatt Macy * There's not a block full of data, pad rest of 560eda14cbcSMatt Macy * buffer with zero 561eda14cbcSMatt Macy */ 562c03c5b1cSMartin Matuska 563c03c5b1cSMartin Matuska if (auth_data != NULL) { 564da5137abSMartin Matuska memset(authp, 0, block_size); 565da5137abSMartin Matuska memcpy(authp, &(auth_data[processed]), 566da5137abSMartin Matuska remainder); 567c03c5b1cSMartin Matuska } else { 568c03c5b1cSMartin Matuska ASSERT0(remainder); 569c03c5b1cSMartin Matuska } 570c03c5b1cSMartin Matuska 571eda14cbcSMatt Macy datap = (uint8_t *)authp; 572eda14cbcSMatt Macy remainder = 0; 573eda14cbcSMatt Macy } else { 574eda14cbcSMatt Macy datap = (uint8_t *)(&(auth_data[processed])); 575eda14cbcSMatt Macy processed += block_size; 576eda14cbcSMatt Macy remainder -= block_size; 577eda14cbcSMatt Macy } 578eda14cbcSMatt Macy 579eda14cbcSMatt Macy /* add auth data to the hash */ 580eda14cbcSMatt Macy GHASH(ctx, datap, ghash, gops); 581eda14cbcSMatt Macy 582eda14cbcSMatt Macy } while (remainder > 0); 583eda14cbcSMatt Macy 584eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 585eda14cbcSMatt Macy } 586eda14cbcSMatt Macy 587eda14cbcSMatt Macy /* 588eda14cbcSMatt Macy * The following function is called at encrypt or decrypt init time 589eda14cbcSMatt Macy * for AES GCM mode. 590eda14cbcSMatt Macy * 591eda14cbcSMatt Macy * Init the GCM context struct. Handle the cycle and avx implementations here. 592eda14cbcSMatt Macy */ 593eda14cbcSMatt Macy int 594eda14cbcSMatt Macy gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 595eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 596eda14cbcSMatt Macy void (*copy_block)(uint8_t *, uint8_t *), 597eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 598eda14cbcSMatt Macy { 599eda14cbcSMatt Macy int rv; 600eda14cbcSMatt Macy CK_AES_GCM_PARAMS *gcm_param; 601eda14cbcSMatt Macy 602eda14cbcSMatt Macy if (param != NULL) { 603eda14cbcSMatt Macy gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; 604eda14cbcSMatt Macy 605eda14cbcSMatt Macy if ((rv = gcm_validate_args(gcm_param)) != 0) { 606eda14cbcSMatt Macy return (rv); 607eda14cbcSMatt Macy } 608eda14cbcSMatt Macy 609eda14cbcSMatt Macy gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; 610eda14cbcSMatt Macy gcm_ctx->gcm_tag_len >>= 3; 611eda14cbcSMatt Macy gcm_ctx->gcm_processed_data_len = 0; 612eda14cbcSMatt Macy 613eda14cbcSMatt Macy /* these values are in bits */ 614eda14cbcSMatt Macy gcm_ctx->gcm_len_a_len_c[0] 615eda14cbcSMatt Macy = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); 616eda14cbcSMatt Macy 617eda14cbcSMatt Macy rv = CRYPTO_SUCCESS; 618eda14cbcSMatt Macy gcm_ctx->gcm_flags |= GCM_MODE; 619eda14cbcSMatt Macy } else { 620eda14cbcSMatt Macy return (CRYPTO_MECHANISM_PARAM_INVALID); 621eda14cbcSMatt Macy } 622eda14cbcSMatt Macy 623eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 624eda14cbcSMatt Macy if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 625eda14cbcSMatt Macy gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 626eda14cbcSMatt Macy } else { 627eda14cbcSMatt Macy /* 628eda14cbcSMatt Macy * Handle the "cycle" implementation by creating avx and 629eda14cbcSMatt Macy * non-avx contexts alternately. 630eda14cbcSMatt Macy */ 631eda14cbcSMatt Macy gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 632eda14cbcSMatt Macy /* 633eda14cbcSMatt Macy * We don't handle byte swapped key schedules in the avx 634eda14cbcSMatt Macy * code path. 635eda14cbcSMatt Macy */ 636eda14cbcSMatt Macy aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 637eda14cbcSMatt Macy if (ks->ops->needs_byteswap == B_TRUE) { 638eda14cbcSMatt Macy gcm_ctx->gcm_use_avx = B_FALSE; 639eda14cbcSMatt Macy } 640eda14cbcSMatt Macy /* Use the MOVBE and the BSWAP variants alternately. */ 641eda14cbcSMatt Macy if (gcm_ctx->gcm_use_avx == B_TRUE && 642eda14cbcSMatt Macy zfs_movbe_available() == B_TRUE) { 643eda14cbcSMatt Macy (void) atomic_toggle_boolean_nv( 644eda14cbcSMatt Macy (volatile boolean_t *)&gcm_avx_can_use_movbe); 645eda14cbcSMatt Macy } 646eda14cbcSMatt Macy } 6477877fdebSMatt Macy /* Allocate Htab memory as needed. */ 6487877fdebSMatt Macy if (gcm_ctx->gcm_use_avx == B_TRUE) { 6497877fdebSMatt Macy size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 6507877fdebSMatt Macy 6517877fdebSMatt Macy if (htab_len == 0) { 6527877fdebSMatt Macy return (CRYPTO_MECHANISM_PARAM_INVALID); 6537877fdebSMatt Macy } 6547877fdebSMatt Macy gcm_ctx->gcm_htab_len = htab_len; 6557877fdebSMatt Macy gcm_ctx->gcm_Htable = 656c03c5b1cSMartin Matuska (uint64_t *)kmem_alloc(htab_len, KM_SLEEP); 6577877fdebSMatt Macy 6587877fdebSMatt Macy if (gcm_ctx->gcm_Htable == NULL) { 6597877fdebSMatt Macy return (CRYPTO_HOST_MEMORY); 6607877fdebSMatt Macy } 6617877fdebSMatt Macy } 662eda14cbcSMatt Macy /* Avx and non avx context initialization differs from here on. */ 663eda14cbcSMatt Macy if (gcm_ctx->gcm_use_avx == B_FALSE) { 664eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */ 665eda14cbcSMatt Macy if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 666eda14cbcSMatt Macy gcm_param->pAAD, gcm_param->ulAADLen, block_size, 667eda14cbcSMatt Macy encrypt_block, copy_block, xor_block) != 0) { 668eda14cbcSMatt Macy rv = CRYPTO_MECHANISM_PARAM_INVALID; 669eda14cbcSMatt Macy } 670eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 671eda14cbcSMatt Macy } else { 672eda14cbcSMatt Macy if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 673eda14cbcSMatt Macy gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { 674eda14cbcSMatt Macy rv = CRYPTO_MECHANISM_PARAM_INVALID; 675eda14cbcSMatt Macy } 676eda14cbcSMatt Macy } 677eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */ 678eda14cbcSMatt Macy 679eda14cbcSMatt Macy return (rv); 680eda14cbcSMatt Macy } 681eda14cbcSMatt Macy 682eda14cbcSMatt Macy int 683eda14cbcSMatt Macy gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 684eda14cbcSMatt Macy int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 685eda14cbcSMatt Macy void (*copy_block)(uint8_t *, uint8_t *), 686eda14cbcSMatt Macy void (*xor_block)(uint8_t *, uint8_t *)) 687eda14cbcSMatt Macy { 688eda14cbcSMatt Macy int rv; 689eda14cbcSMatt Macy CK_AES_GMAC_PARAMS *gmac_param; 690eda14cbcSMatt Macy 691eda14cbcSMatt Macy if (param != NULL) { 692eda14cbcSMatt Macy gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; 693eda14cbcSMatt Macy 694eda14cbcSMatt Macy gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); 695eda14cbcSMatt Macy gcm_ctx->gcm_processed_data_len = 0; 696eda14cbcSMatt Macy 697eda14cbcSMatt Macy /* these values are in bits */ 698eda14cbcSMatt Macy gcm_ctx->gcm_len_a_len_c[0] 699eda14cbcSMatt Macy = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); 700eda14cbcSMatt Macy 701eda14cbcSMatt Macy rv = CRYPTO_SUCCESS; 702eda14cbcSMatt Macy gcm_ctx->gcm_flags |= GMAC_MODE; 703eda14cbcSMatt Macy } else { 704eda14cbcSMatt Macy return (CRYPTO_MECHANISM_PARAM_INVALID); 705eda14cbcSMatt Macy } 706eda14cbcSMatt Macy 707eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 708eda14cbcSMatt Macy /* 709eda14cbcSMatt Macy * Handle the "cycle" implementation by creating avx and non avx 710eda14cbcSMatt Macy * contexts alternately. 711eda14cbcSMatt Macy */ 712eda14cbcSMatt Macy if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 713eda14cbcSMatt Macy gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 714eda14cbcSMatt Macy } else { 715eda14cbcSMatt Macy gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 716eda14cbcSMatt Macy } 717eda14cbcSMatt Macy /* We don't handle byte swapped key schedules in the avx code path. */ 718eda14cbcSMatt Macy aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 719eda14cbcSMatt Macy if (ks->ops->needs_byteswap == B_TRUE) { 720eda14cbcSMatt Macy gcm_ctx->gcm_use_avx = B_FALSE; 721eda14cbcSMatt Macy } 7227877fdebSMatt Macy /* Allocate Htab memory as needed. */ 7237877fdebSMatt Macy if (gcm_ctx->gcm_use_avx == B_TRUE) { 7247877fdebSMatt Macy size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 7257877fdebSMatt Macy 7267877fdebSMatt Macy if (htab_len == 0) { 7277877fdebSMatt Macy return (CRYPTO_MECHANISM_PARAM_INVALID); 7287877fdebSMatt Macy } 7297877fdebSMatt Macy gcm_ctx->gcm_htab_len = htab_len; 7307877fdebSMatt Macy gcm_ctx->gcm_Htable = 731c03c5b1cSMartin Matuska (uint64_t *)kmem_alloc(htab_len, KM_SLEEP); 7327877fdebSMatt Macy 7337877fdebSMatt Macy if (gcm_ctx->gcm_Htable == NULL) { 7347877fdebSMatt Macy return (CRYPTO_HOST_MEMORY); 7357877fdebSMatt Macy } 7367877fdebSMatt Macy } 7377877fdebSMatt Macy 738eda14cbcSMatt Macy /* Avx and non avx context initialization differs from here on. */ 739eda14cbcSMatt Macy if (gcm_ctx->gcm_use_avx == B_FALSE) { 740eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */ 741eda14cbcSMatt Macy if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 742eda14cbcSMatt Macy gmac_param->pAAD, gmac_param->ulAADLen, block_size, 743eda14cbcSMatt Macy encrypt_block, copy_block, xor_block) != 0) { 744eda14cbcSMatt Macy rv = CRYPTO_MECHANISM_PARAM_INVALID; 745eda14cbcSMatt Macy } 746eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 747eda14cbcSMatt Macy } else { 748eda14cbcSMatt Macy if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 749eda14cbcSMatt Macy gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { 750eda14cbcSMatt Macy rv = CRYPTO_MECHANISM_PARAM_INVALID; 751eda14cbcSMatt Macy } 752eda14cbcSMatt Macy } 753eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */ 754eda14cbcSMatt Macy 755eda14cbcSMatt Macy return (rv); 756eda14cbcSMatt Macy } 757eda14cbcSMatt Macy 758eda14cbcSMatt Macy void * 759eda14cbcSMatt Macy gcm_alloc_ctx(int kmflag) 760eda14cbcSMatt Macy { 761eda14cbcSMatt Macy gcm_ctx_t *gcm_ctx; 762eda14cbcSMatt Macy 763eda14cbcSMatt Macy if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 764eda14cbcSMatt Macy return (NULL); 765eda14cbcSMatt Macy 766eda14cbcSMatt Macy gcm_ctx->gcm_flags = GCM_MODE; 767eda14cbcSMatt Macy return (gcm_ctx); 768eda14cbcSMatt Macy } 769eda14cbcSMatt Macy 770eda14cbcSMatt Macy void * 771eda14cbcSMatt Macy gmac_alloc_ctx(int kmflag) 772eda14cbcSMatt Macy { 773eda14cbcSMatt Macy gcm_ctx_t *gcm_ctx; 774eda14cbcSMatt Macy 775eda14cbcSMatt Macy if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 776eda14cbcSMatt Macy return (NULL); 777eda14cbcSMatt Macy 778eda14cbcSMatt Macy gcm_ctx->gcm_flags = GMAC_MODE; 779eda14cbcSMatt Macy return (gcm_ctx); 780eda14cbcSMatt Macy } 781eda14cbcSMatt Macy 782eda14cbcSMatt Macy /* GCM implementation that contains the fastest methods */ 783eda14cbcSMatt Macy static gcm_impl_ops_t gcm_fastest_impl = { 784eda14cbcSMatt Macy .name = "fastest" 785eda14cbcSMatt Macy }; 786eda14cbcSMatt Macy 787eda14cbcSMatt Macy /* All compiled in implementations */ 788e92ffd9bSMartin Matuska static const gcm_impl_ops_t *gcm_all_impl[] = { 789eda14cbcSMatt Macy &gcm_generic_impl, 790eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 791eda14cbcSMatt Macy &gcm_pclmulqdq_impl, 792eda14cbcSMatt Macy #endif 793eda14cbcSMatt Macy }; 794eda14cbcSMatt Macy 795eda14cbcSMatt Macy /* Indicate that benchmark has been completed */ 796eda14cbcSMatt Macy static boolean_t gcm_impl_initialized = B_FALSE; 797eda14cbcSMatt Macy 798eda14cbcSMatt Macy /* Hold all supported implementations */ 799eda14cbcSMatt Macy static size_t gcm_supp_impl_cnt = 0; 800eda14cbcSMatt Macy static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; 801eda14cbcSMatt Macy 802eda14cbcSMatt Macy /* 803eda14cbcSMatt Macy * Returns the GCM operations for encrypt/decrypt/key setup. When a 804eda14cbcSMatt Macy * SIMD implementation is not allowed in the current context, then 805eda14cbcSMatt Macy * fallback to the fastest generic implementation. 806eda14cbcSMatt Macy */ 807eda14cbcSMatt Macy const gcm_impl_ops_t * 808716fd348SMartin Matuska gcm_impl_get_ops(void) 809eda14cbcSMatt Macy { 810eda14cbcSMatt Macy if (!kfpu_allowed()) 811eda14cbcSMatt Macy return (&gcm_generic_impl); 812eda14cbcSMatt Macy 813eda14cbcSMatt Macy const gcm_impl_ops_t *ops = NULL; 814eda14cbcSMatt Macy const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 815eda14cbcSMatt Macy 816eda14cbcSMatt Macy switch (impl) { 817eda14cbcSMatt Macy case IMPL_FASTEST: 818eda14cbcSMatt Macy ASSERT(gcm_impl_initialized); 819eda14cbcSMatt Macy ops = &gcm_fastest_impl; 820eda14cbcSMatt Macy break; 821eda14cbcSMatt Macy case IMPL_CYCLE: 822eda14cbcSMatt Macy /* Cycle through supported implementations */ 823eda14cbcSMatt Macy ASSERT(gcm_impl_initialized); 824eda14cbcSMatt Macy ASSERT3U(gcm_supp_impl_cnt, >, 0); 825eda14cbcSMatt Macy static size_t cycle_impl_idx = 0; 826eda14cbcSMatt Macy size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; 827eda14cbcSMatt Macy ops = gcm_supp_impl[idx]; 828eda14cbcSMatt Macy break; 829eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 830eda14cbcSMatt Macy case IMPL_AVX: 831eda14cbcSMatt Macy /* 832eda14cbcSMatt Macy * Make sure that we return a valid implementation while 833eda14cbcSMatt Macy * switching to the avx implementation since there still 834eda14cbcSMatt Macy * may be unfinished non-avx contexts around. 835eda14cbcSMatt Macy */ 836eda14cbcSMatt Macy ops = &gcm_generic_impl; 837eda14cbcSMatt Macy break; 838eda14cbcSMatt Macy #endif 839eda14cbcSMatt Macy default: 840eda14cbcSMatt Macy ASSERT3U(impl, <, gcm_supp_impl_cnt); 841eda14cbcSMatt Macy ASSERT3U(gcm_supp_impl_cnt, >, 0); 842eda14cbcSMatt Macy if (impl < ARRAY_SIZE(gcm_all_impl)) 843eda14cbcSMatt Macy ops = gcm_supp_impl[impl]; 844eda14cbcSMatt Macy break; 845eda14cbcSMatt Macy } 846eda14cbcSMatt Macy 847eda14cbcSMatt Macy ASSERT3P(ops, !=, NULL); 848eda14cbcSMatt Macy 849eda14cbcSMatt Macy return (ops); 850eda14cbcSMatt Macy } 851eda14cbcSMatt Macy 852eda14cbcSMatt Macy /* 853eda14cbcSMatt Macy * Initialize all supported implementations. 854eda14cbcSMatt Macy */ 855eda14cbcSMatt Macy void 856eda14cbcSMatt Macy gcm_impl_init(void) 857eda14cbcSMatt Macy { 858eda14cbcSMatt Macy gcm_impl_ops_t *curr_impl; 859eda14cbcSMatt Macy int i, c; 860eda14cbcSMatt Macy 861eda14cbcSMatt Macy /* Move supported implementations into gcm_supp_impls */ 862eda14cbcSMatt Macy for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { 863eda14cbcSMatt Macy curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; 864eda14cbcSMatt Macy 865eda14cbcSMatt Macy if (curr_impl->is_supported()) 866eda14cbcSMatt Macy gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl; 867eda14cbcSMatt Macy } 868eda14cbcSMatt Macy gcm_supp_impl_cnt = c; 869eda14cbcSMatt Macy 870eda14cbcSMatt Macy /* 871eda14cbcSMatt Macy * Set the fastest implementation given the assumption that the 872eda14cbcSMatt Macy * hardware accelerated version is the fastest. 873eda14cbcSMatt Macy */ 874eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 875eda14cbcSMatt Macy if (gcm_pclmulqdq_impl.is_supported()) { 876eda14cbcSMatt Macy memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, 877eda14cbcSMatt Macy sizeof (gcm_fastest_impl)); 878eda14cbcSMatt Macy } else 879eda14cbcSMatt Macy #endif 880eda14cbcSMatt Macy { 881eda14cbcSMatt Macy memcpy(&gcm_fastest_impl, &gcm_generic_impl, 882eda14cbcSMatt Macy sizeof (gcm_fastest_impl)); 883eda14cbcSMatt Macy } 884eda14cbcSMatt Macy 885eda14cbcSMatt Macy strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX); 886eda14cbcSMatt Macy 887eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 888eda14cbcSMatt Macy /* 889eda14cbcSMatt Macy * Use the avx implementation if it's available and the implementation 890eda14cbcSMatt Macy * hasn't changed from its default value of fastest on module load. 891eda14cbcSMatt Macy */ 892eda14cbcSMatt Macy if (gcm_avx_will_work()) { 893eda14cbcSMatt Macy #ifdef HAVE_MOVBE 894eda14cbcSMatt Macy if (zfs_movbe_available() == B_TRUE) { 895eda14cbcSMatt Macy atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); 896eda14cbcSMatt Macy } 897eda14cbcSMatt Macy #endif 898eda14cbcSMatt Macy if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { 899eda14cbcSMatt Macy gcm_set_avx(B_TRUE); 900eda14cbcSMatt Macy } 901eda14cbcSMatt Macy } 902eda14cbcSMatt Macy #endif 903eda14cbcSMatt Macy /* Finish initialization */ 904eda14cbcSMatt Macy atomic_swap_32(&icp_gcm_impl, user_sel_impl); 905eda14cbcSMatt Macy gcm_impl_initialized = B_TRUE; 906eda14cbcSMatt Macy } 907eda14cbcSMatt Macy 908eda14cbcSMatt Macy static const struct { 909a0b956f5SMartin Matuska const char *name; 910eda14cbcSMatt Macy uint32_t sel; 911eda14cbcSMatt Macy } gcm_impl_opts[] = { 912eda14cbcSMatt Macy { "cycle", IMPL_CYCLE }, 913eda14cbcSMatt Macy { "fastest", IMPL_FASTEST }, 914eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 915eda14cbcSMatt Macy { "avx", IMPL_AVX }, 916eda14cbcSMatt Macy #endif 917eda14cbcSMatt Macy }; 918eda14cbcSMatt Macy 919eda14cbcSMatt Macy /* 920eda14cbcSMatt Macy * Function sets desired gcm implementation. 921eda14cbcSMatt Macy * 922eda14cbcSMatt Macy * If we are called before init(), user preference will be saved in 923eda14cbcSMatt Macy * user_sel_impl, and applied in later init() call. This occurs when module 924eda14cbcSMatt Macy * parameter is specified on module load. Otherwise, directly update 925eda14cbcSMatt Macy * icp_gcm_impl. 926eda14cbcSMatt Macy * 927eda14cbcSMatt Macy * @val Name of gcm implementation to use 928eda14cbcSMatt Macy * @param Unused. 929eda14cbcSMatt Macy */ 930eda14cbcSMatt Macy int 931eda14cbcSMatt Macy gcm_impl_set(const char *val) 932eda14cbcSMatt Macy { 933eda14cbcSMatt Macy int err = -EINVAL; 934eda14cbcSMatt Macy char req_name[GCM_IMPL_NAME_MAX]; 935eda14cbcSMatt Macy uint32_t impl = GCM_IMPL_READ(user_sel_impl); 936eda14cbcSMatt Macy size_t i; 937eda14cbcSMatt Macy 938eda14cbcSMatt Macy /* sanitize input */ 939eda14cbcSMatt Macy i = strnlen(val, GCM_IMPL_NAME_MAX); 940eda14cbcSMatt Macy if (i == 0 || i >= GCM_IMPL_NAME_MAX) 941eda14cbcSMatt Macy return (err); 942eda14cbcSMatt Macy 943eda14cbcSMatt Macy strlcpy(req_name, val, GCM_IMPL_NAME_MAX); 944eda14cbcSMatt Macy while (i > 0 && isspace(req_name[i-1])) 945eda14cbcSMatt Macy i--; 946eda14cbcSMatt Macy req_name[i] = '\0'; 947eda14cbcSMatt Macy 948eda14cbcSMatt Macy /* Check mandatory options */ 949eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 950eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 951eda14cbcSMatt Macy /* Ignore avx implementation if it won't work. */ 952eda14cbcSMatt Macy if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 953eda14cbcSMatt Macy continue; 954eda14cbcSMatt Macy } 955eda14cbcSMatt Macy #endif 956eda14cbcSMatt Macy if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { 957eda14cbcSMatt Macy impl = gcm_impl_opts[i].sel; 958eda14cbcSMatt Macy err = 0; 959eda14cbcSMatt Macy break; 960eda14cbcSMatt Macy } 961eda14cbcSMatt Macy } 962eda14cbcSMatt Macy 963eda14cbcSMatt Macy /* check all supported impl if init() was already called */ 964eda14cbcSMatt Macy if (err != 0 && gcm_impl_initialized) { 965eda14cbcSMatt Macy /* check all supported implementations */ 966eda14cbcSMatt Macy for (i = 0; i < gcm_supp_impl_cnt; i++) { 967eda14cbcSMatt Macy if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) { 968eda14cbcSMatt Macy impl = i; 969eda14cbcSMatt Macy err = 0; 970eda14cbcSMatt Macy break; 971eda14cbcSMatt Macy } 972eda14cbcSMatt Macy } 973eda14cbcSMatt Macy } 974eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 975eda14cbcSMatt Macy /* 976eda14cbcSMatt Macy * Use the avx implementation if available and the requested one is 977eda14cbcSMatt Macy * avx or fastest. 978eda14cbcSMatt Macy */ 979eda14cbcSMatt Macy if (gcm_avx_will_work() == B_TRUE && 980eda14cbcSMatt Macy (impl == IMPL_AVX || impl == IMPL_FASTEST)) { 981eda14cbcSMatt Macy gcm_set_avx(B_TRUE); 982eda14cbcSMatt Macy } else { 983eda14cbcSMatt Macy gcm_set_avx(B_FALSE); 984eda14cbcSMatt Macy } 985eda14cbcSMatt Macy #endif 986eda14cbcSMatt Macy 987eda14cbcSMatt Macy if (err == 0) { 988eda14cbcSMatt Macy if (gcm_impl_initialized) 989eda14cbcSMatt Macy atomic_swap_32(&icp_gcm_impl, impl); 990eda14cbcSMatt Macy else 991eda14cbcSMatt Macy atomic_swap_32(&user_sel_impl, impl); 992eda14cbcSMatt Macy } 993eda14cbcSMatt Macy 994eda14cbcSMatt Macy return (err); 995eda14cbcSMatt Macy } 996eda14cbcSMatt Macy 997eda14cbcSMatt Macy #if defined(_KERNEL) && defined(__linux__) 998eda14cbcSMatt Macy 999eda14cbcSMatt Macy static int 1000eda14cbcSMatt Macy icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp) 1001eda14cbcSMatt Macy { 1002eda14cbcSMatt Macy return (gcm_impl_set(val)); 1003eda14cbcSMatt Macy } 1004eda14cbcSMatt Macy 1005eda14cbcSMatt Macy static int 1006eda14cbcSMatt Macy icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) 1007eda14cbcSMatt Macy { 1008eda14cbcSMatt Macy int i, cnt = 0; 1009eda14cbcSMatt Macy char *fmt; 1010eda14cbcSMatt Macy const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 1011eda14cbcSMatt Macy 1012eda14cbcSMatt Macy ASSERT(gcm_impl_initialized); 1013eda14cbcSMatt Macy 1014eda14cbcSMatt Macy /* list mandatory options */ 1015eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 1016eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 1017eda14cbcSMatt Macy /* Ignore avx implementation if it won't work. */ 1018eda14cbcSMatt Macy if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 1019eda14cbcSMatt Macy continue; 1020eda14cbcSMatt Macy } 1021eda14cbcSMatt Macy #endif 1022eda14cbcSMatt Macy fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; 1023*bb2d13b6SMartin Matuska cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, 1024*bb2d13b6SMartin Matuska gcm_impl_opts[i].name); 1025eda14cbcSMatt Macy } 1026eda14cbcSMatt Macy 1027eda14cbcSMatt Macy /* list all supported implementations */ 1028eda14cbcSMatt Macy for (i = 0; i < gcm_supp_impl_cnt; i++) { 1029eda14cbcSMatt Macy fmt = (i == impl) ? "[%s] " : "%s "; 1030*bb2d13b6SMartin Matuska cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, 1031*bb2d13b6SMartin Matuska gcm_supp_impl[i]->name); 1032eda14cbcSMatt Macy } 1033eda14cbcSMatt Macy 1034eda14cbcSMatt Macy return (cnt); 1035eda14cbcSMatt Macy } 1036eda14cbcSMatt Macy 1037eda14cbcSMatt Macy module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, 1038eda14cbcSMatt Macy NULL, 0644); 1039eda14cbcSMatt Macy MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); 1040eda14cbcSMatt Macy #endif /* defined(__KERNEL) */ 1041eda14cbcSMatt Macy 1042eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM 1043eda14cbcSMatt Macy #define GCM_BLOCK_LEN 16 1044eda14cbcSMatt Macy /* 1045eda14cbcSMatt Macy * The openssl asm routines are 6x aggregated and need that many bytes 1046eda14cbcSMatt Macy * at minimum. 1047eda14cbcSMatt Macy */ 1048eda14cbcSMatt Macy #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) 1049eda14cbcSMatt Macy #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) 1050eda14cbcSMatt Macy /* 1051eda14cbcSMatt Macy * Ensure the chunk size is reasonable since we are allocating a 1052eda14cbcSMatt Macy * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. 1053eda14cbcSMatt Macy */ 1054eda14cbcSMatt Macy #define GCM_AVX_MAX_CHUNK_SIZE \ 1055eda14cbcSMatt Macy (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) 1056eda14cbcSMatt Macy 1057eda14cbcSMatt Macy /* Clear the FPU registers since they hold sensitive internal state. */ 1058eda14cbcSMatt Macy #define clear_fpu_regs() clear_fpu_regs_avx() 1059eda14cbcSMatt Macy #define GHASH_AVX(ctx, in, len) \ 10607877fdebSMatt Macy gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ 1061eda14cbcSMatt Macy in, len) 1062eda14cbcSMatt Macy 1063eda14cbcSMatt Macy #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) 1064eda14cbcSMatt Macy 1065e92ffd9bSMartin Matuska /* Get the chunk size module parameter. */ 1066e92ffd9bSMartin Matuska #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size 1067e92ffd9bSMartin Matuska 1068eda14cbcSMatt Macy /* 1069eda14cbcSMatt Macy * Module parameter: number of bytes to process at once while owning the FPU. 1070eda14cbcSMatt Macy * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is 1071eda14cbcSMatt Macy * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. 1072eda14cbcSMatt Macy */ 1073eda14cbcSMatt Macy static uint32_t gcm_avx_chunk_size = 1074eda14cbcSMatt Macy ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1075eda14cbcSMatt Macy 1076eda14cbcSMatt Macy extern void clear_fpu_regs_avx(void); 1077eda14cbcSMatt Macy extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); 1078eda14cbcSMatt Macy extern void aes_encrypt_intel(const uint32_t rk[], int nr, 1079eda14cbcSMatt Macy const uint32_t pt[4], uint32_t ct[4]); 1080eda14cbcSMatt Macy 10817877fdebSMatt Macy extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); 10827877fdebSMatt Macy extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, 1083eda14cbcSMatt Macy const uint8_t *in, size_t len); 1084eda14cbcSMatt Macy 1085eda14cbcSMatt Macy extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, 1086eda14cbcSMatt Macy const void *, uint64_t *, uint64_t *); 1087eda14cbcSMatt Macy 1088eda14cbcSMatt Macy extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, 1089eda14cbcSMatt Macy const void *, uint64_t *, uint64_t *); 1090eda14cbcSMatt Macy 1091eda14cbcSMatt Macy static inline boolean_t 1092eda14cbcSMatt Macy gcm_avx_will_work(void) 1093eda14cbcSMatt Macy { 1094eda14cbcSMatt Macy /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ 1095eda14cbcSMatt Macy return (kfpu_allowed() && 1096eda14cbcSMatt Macy zfs_avx_available() && zfs_aes_available() && 1097eda14cbcSMatt Macy zfs_pclmulqdq_available()); 1098eda14cbcSMatt Macy } 1099eda14cbcSMatt Macy 1100eda14cbcSMatt Macy static inline void 1101eda14cbcSMatt Macy gcm_set_avx(boolean_t val) 1102eda14cbcSMatt Macy { 1103eda14cbcSMatt Macy if (gcm_avx_will_work() == B_TRUE) { 1104eda14cbcSMatt Macy atomic_swap_32(&gcm_use_avx, val); 1105eda14cbcSMatt Macy } 1106eda14cbcSMatt Macy } 1107eda14cbcSMatt Macy 1108eda14cbcSMatt Macy static inline boolean_t 1109eda14cbcSMatt Macy gcm_toggle_avx(void) 1110eda14cbcSMatt Macy { 1111eda14cbcSMatt Macy if (gcm_avx_will_work() == B_TRUE) { 1112eda14cbcSMatt Macy return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); 1113eda14cbcSMatt Macy } else { 1114eda14cbcSMatt Macy return (B_FALSE); 1115eda14cbcSMatt Macy } 1116eda14cbcSMatt Macy } 1117eda14cbcSMatt Macy 11187877fdebSMatt Macy static inline size_t 11197877fdebSMatt Macy gcm_simd_get_htab_size(boolean_t simd_mode) 11207877fdebSMatt Macy { 11217877fdebSMatt Macy switch (simd_mode) { 11227877fdebSMatt Macy case B_TRUE: 11237877fdebSMatt Macy return (2 * 6 * 2 * sizeof (uint64_t)); 11247877fdebSMatt Macy 11257877fdebSMatt Macy default: 11267877fdebSMatt Macy return (0); 11277877fdebSMatt Macy } 11287877fdebSMatt Macy } 11297877fdebSMatt Macy 1130eda14cbcSMatt Macy /* 1131eda14cbcSMatt Macy * Clear sensitive data in the context. 1132eda14cbcSMatt Macy * 1133eda14cbcSMatt Macy * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and 1134eda14cbcSMatt Macy * ctx->gcm_Htable contain the hash sub key which protects authentication. 1135eda14cbcSMatt Macy * 1136eda14cbcSMatt Macy * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for 1137eda14cbcSMatt Macy * a known plaintext attack, they consists of the IV and the first and last 1138eda14cbcSMatt Macy * counter respectively. If they should be cleared is debatable. 1139eda14cbcSMatt Macy */ 1140eda14cbcSMatt Macy static inline void 1141eda14cbcSMatt Macy gcm_clear_ctx(gcm_ctx_t *ctx) 1142eda14cbcSMatt Macy { 1143da5137abSMartin Matuska memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder)); 1144da5137abSMartin Matuska memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); 1145da5137abSMartin Matuska memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0)); 1146da5137abSMartin Matuska memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp)); 1147eda14cbcSMatt Macy } 1148eda14cbcSMatt Macy 1149eda14cbcSMatt Macy /* Increment the GCM counter block by n. */ 1150eda14cbcSMatt Macy static inline void 1151eda14cbcSMatt Macy gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) 1152eda14cbcSMatt Macy { 1153eda14cbcSMatt Macy uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 1154eda14cbcSMatt Macy uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); 1155eda14cbcSMatt Macy 1156eda14cbcSMatt Macy counter = htonll(counter + n); 1157eda14cbcSMatt Macy counter &= counter_mask; 1158eda14cbcSMatt Macy ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 1159eda14cbcSMatt Macy } 1160eda14cbcSMatt Macy 1161eda14cbcSMatt Macy /* 1162eda14cbcSMatt Macy * Encrypt multiple blocks of data in GCM mode. 1163eda14cbcSMatt Macy * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines 1164eda14cbcSMatt Macy * if possible. While processing a chunk the FPU is "locked". 1165eda14cbcSMatt Macy */ 1166eda14cbcSMatt Macy static int 1167eda14cbcSMatt Macy gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, 1168eda14cbcSMatt Macy size_t length, crypto_data_t *out, size_t block_size) 1169eda14cbcSMatt Macy { 1170eda14cbcSMatt Macy size_t bleft = length; 1171eda14cbcSMatt Macy size_t need = 0; 1172eda14cbcSMatt Macy size_t done = 0; 1173eda14cbcSMatt Macy uint8_t *datap = (uint8_t *)data; 1174eda14cbcSMatt Macy size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1175eda14cbcSMatt Macy const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1176eda14cbcSMatt Macy uint64_t *ghash = ctx->gcm_ghash; 1177eda14cbcSMatt Macy uint64_t *cb = ctx->gcm_cb; 1178eda14cbcSMatt Macy uint8_t *ct_buf = NULL; 1179eda14cbcSMatt Macy uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1180eda14cbcSMatt Macy int rv = CRYPTO_SUCCESS; 1181eda14cbcSMatt Macy 1182eda14cbcSMatt Macy ASSERT(block_size == GCM_BLOCK_LEN); 1183eda14cbcSMatt Macy /* 1184eda14cbcSMatt Macy * If the last call left an incomplete block, try to fill 1185eda14cbcSMatt Macy * it first. 1186eda14cbcSMatt Macy */ 1187eda14cbcSMatt Macy if (ctx->gcm_remainder_len > 0) { 1188eda14cbcSMatt Macy need = block_size - ctx->gcm_remainder_len; 1189eda14cbcSMatt Macy if (length < need) { 1190eda14cbcSMatt Macy /* Accumulate bytes here and return. */ 1191da5137abSMartin Matuska memcpy((uint8_t *)ctx->gcm_remainder + 1192da5137abSMartin Matuska ctx->gcm_remainder_len, datap, length); 1193eda14cbcSMatt Macy 1194eda14cbcSMatt Macy ctx->gcm_remainder_len += length; 1195eda14cbcSMatt Macy if (ctx->gcm_copy_to == NULL) { 1196eda14cbcSMatt Macy ctx->gcm_copy_to = datap; 1197eda14cbcSMatt Macy } 1198eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 1199eda14cbcSMatt Macy } else { 1200eda14cbcSMatt Macy /* Complete incomplete block. */ 1201da5137abSMartin Matuska memcpy((uint8_t *)ctx->gcm_remainder + 1202da5137abSMartin Matuska ctx->gcm_remainder_len, datap, need); 1203eda14cbcSMatt Macy 1204eda14cbcSMatt Macy ctx->gcm_copy_to = NULL; 1205eda14cbcSMatt Macy } 1206eda14cbcSMatt Macy } 1207eda14cbcSMatt Macy 1208eda14cbcSMatt Macy /* Allocate a buffer to encrypt to if there is enough input. */ 1209eda14cbcSMatt Macy if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1210c03c5b1cSMartin Matuska ct_buf = vmem_alloc(chunk_size, KM_SLEEP); 1211eda14cbcSMatt Macy if (ct_buf == NULL) { 1212eda14cbcSMatt Macy return (CRYPTO_HOST_MEMORY); 1213eda14cbcSMatt Macy } 1214eda14cbcSMatt Macy } 1215eda14cbcSMatt Macy 1216eda14cbcSMatt Macy /* If we completed an incomplete block, encrypt and write it out. */ 1217eda14cbcSMatt Macy if (ctx->gcm_remainder_len > 0) { 1218eda14cbcSMatt Macy kfpu_begin(); 1219eda14cbcSMatt Macy aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1220eda14cbcSMatt Macy (const uint32_t *)cb, (uint32_t *)tmp); 1221eda14cbcSMatt Macy 1222eda14cbcSMatt Macy gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); 1223eda14cbcSMatt Macy GHASH_AVX(ctx, tmp, block_size); 1224eda14cbcSMatt Macy clear_fpu_regs(); 1225eda14cbcSMatt Macy kfpu_end(); 1226eda14cbcSMatt Macy rv = crypto_put_output_data(tmp, out, block_size); 1227eda14cbcSMatt Macy out->cd_offset += block_size; 1228eda14cbcSMatt Macy gcm_incr_counter_block(ctx); 1229eda14cbcSMatt Macy ctx->gcm_processed_data_len += block_size; 1230eda14cbcSMatt Macy bleft -= need; 1231eda14cbcSMatt Macy datap += need; 1232eda14cbcSMatt Macy ctx->gcm_remainder_len = 0; 1233eda14cbcSMatt Macy } 1234eda14cbcSMatt Macy 1235eda14cbcSMatt Macy /* Do the bulk encryption in chunk_size blocks. */ 1236eda14cbcSMatt Macy for (; bleft >= chunk_size; bleft -= chunk_size) { 1237eda14cbcSMatt Macy kfpu_begin(); 1238eda14cbcSMatt Macy done = aesni_gcm_encrypt( 1239eda14cbcSMatt Macy datap, ct_buf, chunk_size, key, cb, ghash); 1240eda14cbcSMatt Macy 1241eda14cbcSMatt Macy clear_fpu_regs(); 1242eda14cbcSMatt Macy kfpu_end(); 1243eda14cbcSMatt Macy if (done != chunk_size) { 1244eda14cbcSMatt Macy rv = CRYPTO_FAILED; 1245eda14cbcSMatt Macy goto out_nofpu; 1246eda14cbcSMatt Macy } 1247eda14cbcSMatt Macy rv = crypto_put_output_data(ct_buf, out, chunk_size); 1248eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) { 1249eda14cbcSMatt Macy goto out_nofpu; 1250eda14cbcSMatt Macy } 1251eda14cbcSMatt Macy out->cd_offset += chunk_size; 1252eda14cbcSMatt Macy datap += chunk_size; 1253eda14cbcSMatt Macy ctx->gcm_processed_data_len += chunk_size; 1254eda14cbcSMatt Macy } 1255eda14cbcSMatt Macy /* Check if we are already done. */ 1256eda14cbcSMatt Macy if (bleft == 0) { 1257eda14cbcSMatt Macy goto out_nofpu; 1258eda14cbcSMatt Macy } 1259eda14cbcSMatt Macy /* Bulk encrypt the remaining data. */ 1260eda14cbcSMatt Macy kfpu_begin(); 1261eda14cbcSMatt Macy if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1262eda14cbcSMatt Macy done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); 1263eda14cbcSMatt Macy if (done == 0) { 1264eda14cbcSMatt Macy rv = CRYPTO_FAILED; 1265eda14cbcSMatt Macy goto out; 1266eda14cbcSMatt Macy } 1267eda14cbcSMatt Macy rv = crypto_put_output_data(ct_buf, out, done); 1268eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) { 1269eda14cbcSMatt Macy goto out; 1270eda14cbcSMatt Macy } 1271eda14cbcSMatt Macy out->cd_offset += done; 1272eda14cbcSMatt Macy ctx->gcm_processed_data_len += done; 1273eda14cbcSMatt Macy datap += done; 1274eda14cbcSMatt Macy bleft -= done; 1275eda14cbcSMatt Macy 1276eda14cbcSMatt Macy } 1277eda14cbcSMatt Macy /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ 1278eda14cbcSMatt Macy while (bleft > 0) { 1279eda14cbcSMatt Macy if (bleft < block_size) { 1280da5137abSMartin Matuska memcpy(ctx->gcm_remainder, datap, bleft); 1281eda14cbcSMatt Macy ctx->gcm_remainder_len = bleft; 1282eda14cbcSMatt Macy ctx->gcm_copy_to = datap; 1283eda14cbcSMatt Macy goto out; 1284eda14cbcSMatt Macy } 1285eda14cbcSMatt Macy /* Encrypt, hash and write out. */ 1286eda14cbcSMatt Macy aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1287eda14cbcSMatt Macy (const uint32_t *)cb, (uint32_t *)tmp); 1288eda14cbcSMatt Macy 1289eda14cbcSMatt Macy gcm_xor_avx(datap, tmp); 1290eda14cbcSMatt Macy GHASH_AVX(ctx, tmp, block_size); 1291eda14cbcSMatt Macy rv = crypto_put_output_data(tmp, out, block_size); 1292eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) { 1293eda14cbcSMatt Macy goto out; 1294eda14cbcSMatt Macy } 1295eda14cbcSMatt Macy out->cd_offset += block_size; 1296eda14cbcSMatt Macy gcm_incr_counter_block(ctx); 1297eda14cbcSMatt Macy ctx->gcm_processed_data_len += block_size; 1298eda14cbcSMatt Macy datap += block_size; 1299eda14cbcSMatt Macy bleft -= block_size; 1300eda14cbcSMatt Macy } 1301eda14cbcSMatt Macy out: 1302eda14cbcSMatt Macy clear_fpu_regs(); 1303eda14cbcSMatt Macy kfpu_end(); 1304eda14cbcSMatt Macy out_nofpu: 1305eda14cbcSMatt Macy if (ct_buf != NULL) { 1306eda14cbcSMatt Macy vmem_free(ct_buf, chunk_size); 1307eda14cbcSMatt Macy } 1308eda14cbcSMatt Macy return (rv); 1309eda14cbcSMatt Macy } 1310eda14cbcSMatt Macy 1311eda14cbcSMatt Macy /* 1312eda14cbcSMatt Macy * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual 1313eda14cbcSMatt Macy * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. 1314eda14cbcSMatt Macy */ 1315eda14cbcSMatt Macy static int 1316eda14cbcSMatt Macy gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1317eda14cbcSMatt Macy { 1318eda14cbcSMatt Macy uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; 1319eda14cbcSMatt Macy uint32_t *J0 = (uint32_t *)ctx->gcm_J0; 1320eda14cbcSMatt Macy uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; 1321eda14cbcSMatt Macy size_t rem_len = ctx->gcm_remainder_len; 1322eda14cbcSMatt Macy const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1323eda14cbcSMatt Macy int aes_rounds = ((aes_key_t *)keysched)->nr; 1324eda14cbcSMatt Macy int rv; 1325eda14cbcSMatt Macy 1326eda14cbcSMatt Macy ASSERT(block_size == GCM_BLOCK_LEN); 1327eda14cbcSMatt Macy 1328eda14cbcSMatt Macy if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { 1329eda14cbcSMatt Macy return (CRYPTO_DATA_LEN_RANGE); 1330eda14cbcSMatt Macy } 1331eda14cbcSMatt Macy 1332eda14cbcSMatt Macy kfpu_begin(); 1333eda14cbcSMatt Macy /* Pad last incomplete block with zeros, encrypt and hash. */ 1334eda14cbcSMatt Macy if (rem_len > 0) { 1335eda14cbcSMatt Macy uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1336eda14cbcSMatt Macy const uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1337eda14cbcSMatt Macy 1338eda14cbcSMatt Macy aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); 1339da5137abSMartin Matuska memset(remainder + rem_len, 0, block_size - rem_len); 1340eda14cbcSMatt Macy for (int i = 0; i < rem_len; i++) { 1341eda14cbcSMatt Macy remainder[i] ^= tmp[i]; 1342eda14cbcSMatt Macy } 1343eda14cbcSMatt Macy GHASH_AVX(ctx, remainder, block_size); 1344eda14cbcSMatt Macy ctx->gcm_processed_data_len += rem_len; 1345eda14cbcSMatt Macy /* No need to increment counter_block, it's the last block. */ 1346eda14cbcSMatt Macy } 1347eda14cbcSMatt Macy /* Finish tag. */ 1348eda14cbcSMatt Macy ctx->gcm_len_a_len_c[1] = 1349eda14cbcSMatt Macy htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 1350eda14cbcSMatt Macy GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); 1351eda14cbcSMatt Macy aes_encrypt_intel(keysched, aes_rounds, J0, J0); 1352eda14cbcSMatt Macy 1353eda14cbcSMatt Macy gcm_xor_avx((uint8_t *)J0, ghash); 1354eda14cbcSMatt Macy clear_fpu_regs(); 1355eda14cbcSMatt Macy kfpu_end(); 1356eda14cbcSMatt Macy 1357eda14cbcSMatt Macy /* Output remainder. */ 1358eda14cbcSMatt Macy if (rem_len > 0) { 1359eda14cbcSMatt Macy rv = crypto_put_output_data(remainder, out, rem_len); 1360eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) 1361eda14cbcSMatt Macy return (rv); 1362eda14cbcSMatt Macy } 1363eda14cbcSMatt Macy out->cd_offset += rem_len; 1364eda14cbcSMatt Macy ctx->gcm_remainder_len = 0; 1365eda14cbcSMatt Macy rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 1366eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) 1367eda14cbcSMatt Macy return (rv); 1368eda14cbcSMatt Macy 1369eda14cbcSMatt Macy out->cd_offset += ctx->gcm_tag_len; 1370eda14cbcSMatt Macy /* Clear sensitive data in the context before returning. */ 1371eda14cbcSMatt Macy gcm_clear_ctx(ctx); 1372eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 1373eda14cbcSMatt Macy } 1374eda14cbcSMatt Macy 1375eda14cbcSMatt Macy /* 1376eda14cbcSMatt Macy * Finalize decryption: We just have accumulated crypto text, so now we 1377eda14cbcSMatt Macy * decrypt it here inplace. 1378eda14cbcSMatt Macy */ 1379eda14cbcSMatt Macy static int 1380eda14cbcSMatt Macy gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1381eda14cbcSMatt Macy { 1382eda14cbcSMatt Macy ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); 1383eda14cbcSMatt Macy ASSERT3U(block_size, ==, 16); 1384eda14cbcSMatt Macy 1385eda14cbcSMatt Macy size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1386eda14cbcSMatt Macy size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 1387eda14cbcSMatt Macy uint8_t *datap = ctx->gcm_pt_buf; 1388eda14cbcSMatt Macy const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1389eda14cbcSMatt Macy uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1390eda14cbcSMatt Macy uint64_t *ghash = ctx->gcm_ghash; 1391eda14cbcSMatt Macy uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; 1392eda14cbcSMatt Macy int rv = CRYPTO_SUCCESS; 1393eda14cbcSMatt Macy size_t bleft, done; 1394eda14cbcSMatt Macy 1395eda14cbcSMatt Macy /* 1396eda14cbcSMatt Macy * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be 1397eda14cbcSMatt Macy * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of 1398eda14cbcSMatt Macy * GCM_AVX_MIN_DECRYPT_BYTES. 1399eda14cbcSMatt Macy */ 1400eda14cbcSMatt Macy for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { 1401eda14cbcSMatt Macy kfpu_begin(); 1402eda14cbcSMatt Macy done = aesni_gcm_decrypt(datap, datap, chunk_size, 1403eda14cbcSMatt Macy (const void *)key, ctx->gcm_cb, ghash); 1404eda14cbcSMatt Macy clear_fpu_regs(); 1405eda14cbcSMatt Macy kfpu_end(); 1406eda14cbcSMatt Macy if (done != chunk_size) { 1407eda14cbcSMatt Macy return (CRYPTO_FAILED); 1408eda14cbcSMatt Macy } 1409eda14cbcSMatt Macy datap += done; 1410eda14cbcSMatt Macy } 141116038816SMartin Matuska /* Decrypt remainder, which is less than chunk size, in one go. */ 1412eda14cbcSMatt Macy kfpu_begin(); 1413eda14cbcSMatt Macy if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { 1414eda14cbcSMatt Macy done = aesni_gcm_decrypt(datap, datap, bleft, 1415eda14cbcSMatt Macy (const void *)key, ctx->gcm_cb, ghash); 1416eda14cbcSMatt Macy if (done == 0) { 1417eda14cbcSMatt Macy clear_fpu_regs(); 1418eda14cbcSMatt Macy kfpu_end(); 1419eda14cbcSMatt Macy return (CRYPTO_FAILED); 1420eda14cbcSMatt Macy } 1421eda14cbcSMatt Macy datap += done; 1422eda14cbcSMatt Macy bleft -= done; 1423eda14cbcSMatt Macy } 1424eda14cbcSMatt Macy ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); 1425eda14cbcSMatt Macy 1426eda14cbcSMatt Macy /* 142716038816SMartin Matuska * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain, 1428eda14cbcSMatt Macy * decrypt them block by block. 1429eda14cbcSMatt Macy */ 1430eda14cbcSMatt Macy while (bleft > 0) { 1431eda14cbcSMatt Macy /* Incomplete last block. */ 1432eda14cbcSMatt Macy if (bleft < block_size) { 1433eda14cbcSMatt Macy uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; 1434eda14cbcSMatt Macy 1435da5137abSMartin Matuska memset(lastb, 0, block_size); 1436da5137abSMartin Matuska memcpy(lastb, datap, bleft); 1437eda14cbcSMatt Macy /* The GCM processing. */ 1438eda14cbcSMatt Macy GHASH_AVX(ctx, lastb, block_size); 1439eda14cbcSMatt Macy aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1440eda14cbcSMatt Macy for (size_t i = 0; i < bleft; i++) { 1441eda14cbcSMatt Macy datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; 1442eda14cbcSMatt Macy } 1443eda14cbcSMatt Macy break; 1444eda14cbcSMatt Macy } 1445eda14cbcSMatt Macy /* The GCM processing. */ 1446eda14cbcSMatt Macy GHASH_AVX(ctx, datap, block_size); 1447eda14cbcSMatt Macy aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1448eda14cbcSMatt Macy gcm_xor_avx((uint8_t *)tmp, datap); 1449eda14cbcSMatt Macy gcm_incr_counter_block(ctx); 1450eda14cbcSMatt Macy 1451eda14cbcSMatt Macy datap += block_size; 1452eda14cbcSMatt Macy bleft -= block_size; 1453eda14cbcSMatt Macy } 1454eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) { 1455eda14cbcSMatt Macy clear_fpu_regs(); 1456eda14cbcSMatt Macy kfpu_end(); 1457eda14cbcSMatt Macy return (rv); 1458eda14cbcSMatt Macy } 1459eda14cbcSMatt Macy /* Decryption done, finish the tag. */ 1460eda14cbcSMatt Macy ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 1461eda14cbcSMatt Macy GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); 1462eda14cbcSMatt Macy aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, 1463eda14cbcSMatt Macy (uint32_t *)ctx->gcm_J0); 1464eda14cbcSMatt Macy 1465eda14cbcSMatt Macy gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); 1466eda14cbcSMatt Macy 1467eda14cbcSMatt Macy /* We are done with the FPU, restore its state. */ 1468eda14cbcSMatt Macy clear_fpu_regs(); 1469eda14cbcSMatt Macy kfpu_end(); 1470eda14cbcSMatt Macy 1471eda14cbcSMatt Macy /* Compare the input authentication tag with what we calculated. */ 1472da5137abSMartin Matuska if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 1473eda14cbcSMatt Macy /* They don't match. */ 1474eda14cbcSMatt Macy return (CRYPTO_INVALID_MAC); 1475eda14cbcSMatt Macy } 1476eda14cbcSMatt Macy rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 1477eda14cbcSMatt Macy if (rv != CRYPTO_SUCCESS) { 1478eda14cbcSMatt Macy return (rv); 1479eda14cbcSMatt Macy } 1480eda14cbcSMatt Macy out->cd_offset += pt_len; 1481eda14cbcSMatt Macy gcm_clear_ctx(ctx); 1482eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 1483eda14cbcSMatt Macy } 1484eda14cbcSMatt Macy 1485eda14cbcSMatt Macy /* 1486eda14cbcSMatt Macy * Initialize the GCM params H, Htabtle and the counter block. Save the 1487eda14cbcSMatt Macy * initial counter block. 1488eda14cbcSMatt Macy */ 1489eda14cbcSMatt Macy static int 1490eda14cbcSMatt Macy gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 1491eda14cbcSMatt Macy unsigned char *auth_data, size_t auth_data_len, size_t block_size) 1492eda14cbcSMatt Macy { 1493eda14cbcSMatt Macy uint8_t *cb = (uint8_t *)ctx->gcm_cb; 1494eda14cbcSMatt Macy uint64_t *H = ctx->gcm_H; 1495eda14cbcSMatt Macy const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1496eda14cbcSMatt Macy int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; 1497eda14cbcSMatt Macy uint8_t *datap = auth_data; 1498eda14cbcSMatt Macy size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1499eda14cbcSMatt Macy size_t bleft; 1500eda14cbcSMatt Macy 1501eda14cbcSMatt Macy ASSERT(block_size == GCM_BLOCK_LEN); 1502eda14cbcSMatt Macy 1503eda14cbcSMatt Macy /* Init H (encrypt zero block) and create the initial counter block. */ 1504da5137abSMartin Matuska memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash)); 1505da5137abSMartin Matuska memset(H, 0, sizeof (ctx->gcm_H)); 1506eda14cbcSMatt Macy kfpu_begin(); 1507eda14cbcSMatt Macy aes_encrypt_intel(keysched, aes_rounds, 1508eda14cbcSMatt Macy (const uint32_t *)H, (uint32_t *)H); 1509eda14cbcSMatt Macy 1510eda14cbcSMatt Macy gcm_init_htab_avx(ctx->gcm_Htable, H); 1511eda14cbcSMatt Macy 1512eda14cbcSMatt Macy if (iv_len == 12) { 1513da5137abSMartin Matuska memcpy(cb, iv, 12); 1514eda14cbcSMatt Macy cb[12] = 0; 1515eda14cbcSMatt Macy cb[13] = 0; 1516eda14cbcSMatt Macy cb[14] = 0; 1517eda14cbcSMatt Macy cb[15] = 1; 1518eda14cbcSMatt Macy /* We need the ICB later. */ 1519da5137abSMartin Matuska memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0)); 1520eda14cbcSMatt Macy } else { 1521eda14cbcSMatt Macy /* 1522eda14cbcSMatt Macy * Most consumers use 12 byte IVs, so it's OK to use the 1523eda14cbcSMatt Macy * original routines for other IV sizes, just avoid nesting 1524eda14cbcSMatt Macy * kfpu_begin calls. 1525eda14cbcSMatt Macy */ 1526eda14cbcSMatt Macy clear_fpu_regs(); 1527eda14cbcSMatt Macy kfpu_end(); 1528eda14cbcSMatt Macy gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 1529eda14cbcSMatt Macy aes_copy_block, aes_xor_block); 1530eda14cbcSMatt Macy kfpu_begin(); 1531eda14cbcSMatt Macy } 1532eda14cbcSMatt Macy 1533eda14cbcSMatt Macy /* Openssl post increments the counter, adjust for that. */ 1534eda14cbcSMatt Macy gcm_incr_counter_block(ctx); 1535eda14cbcSMatt Macy 1536eda14cbcSMatt Macy /* Ghash AAD in chunk_size blocks. */ 1537eda14cbcSMatt Macy for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { 1538eda14cbcSMatt Macy GHASH_AVX(ctx, datap, chunk_size); 1539eda14cbcSMatt Macy datap += chunk_size; 1540eda14cbcSMatt Macy clear_fpu_regs(); 1541eda14cbcSMatt Macy kfpu_end(); 1542eda14cbcSMatt Macy kfpu_begin(); 1543eda14cbcSMatt Macy } 1544eda14cbcSMatt Macy /* Ghash the remainder and handle possible incomplete GCM block. */ 1545eda14cbcSMatt Macy if (bleft > 0) { 1546eda14cbcSMatt Macy size_t incomp = bleft % block_size; 1547eda14cbcSMatt Macy 1548eda14cbcSMatt Macy bleft -= incomp; 1549eda14cbcSMatt Macy if (bleft > 0) { 1550eda14cbcSMatt Macy GHASH_AVX(ctx, datap, bleft); 1551eda14cbcSMatt Macy datap += bleft; 1552eda14cbcSMatt Macy } 1553eda14cbcSMatt Macy if (incomp > 0) { 1554eda14cbcSMatt Macy /* Zero pad and hash incomplete last block. */ 1555eda14cbcSMatt Macy uint8_t *authp = (uint8_t *)ctx->gcm_tmp; 1556eda14cbcSMatt Macy 1557da5137abSMartin Matuska memset(authp, 0, block_size); 1558da5137abSMartin Matuska memcpy(authp, datap, incomp); 1559eda14cbcSMatt Macy GHASH_AVX(ctx, authp, block_size); 1560eda14cbcSMatt Macy } 1561eda14cbcSMatt Macy } 1562eda14cbcSMatt Macy clear_fpu_regs(); 1563eda14cbcSMatt Macy kfpu_end(); 1564eda14cbcSMatt Macy return (CRYPTO_SUCCESS); 1565eda14cbcSMatt Macy } 1566eda14cbcSMatt Macy 1567eda14cbcSMatt Macy #if defined(_KERNEL) 1568eda14cbcSMatt Macy static int 1569eda14cbcSMatt Macy icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) 1570eda14cbcSMatt Macy { 1571eda14cbcSMatt Macy unsigned long val; 1572eda14cbcSMatt Macy char val_rounded[16]; 1573eda14cbcSMatt Macy int error = 0; 1574eda14cbcSMatt Macy 1575eda14cbcSMatt Macy error = kstrtoul(buf, 0, &val); 1576eda14cbcSMatt Macy if (error) 1577eda14cbcSMatt Macy return (error); 1578eda14cbcSMatt Macy 1579eda14cbcSMatt Macy val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1580eda14cbcSMatt Macy 1581eda14cbcSMatt Macy if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) 1582eda14cbcSMatt Macy return (-EINVAL); 1583eda14cbcSMatt Macy 1584eda14cbcSMatt Macy snprintf(val_rounded, 16, "%u", (uint32_t)val); 1585eda14cbcSMatt Macy error = param_set_uint(val_rounded, kp); 1586eda14cbcSMatt Macy return (error); 1587eda14cbcSMatt Macy } 1588eda14cbcSMatt Macy 1589eda14cbcSMatt Macy module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, 1590eda14cbcSMatt Macy param_get_uint, &gcm_avx_chunk_size, 0644); 1591eda14cbcSMatt Macy 1592eda14cbcSMatt Macy MODULE_PARM_DESC(icp_gcm_avx_chunk_size, 1593eda14cbcSMatt Macy "How many bytes to process while owning the FPU"); 1594eda14cbcSMatt Macy 1595eda14cbcSMatt Macy #endif /* defined(__KERNEL) */ 1596eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */ 1597