11f1e2261SMartin Matuska /*
21f1e2261SMartin Matuska * CDDL HEADER START
31f1e2261SMartin Matuska *
41f1e2261SMartin Matuska * The contents of this file are subject to the terms of the
51f1e2261SMartin Matuska * Common Development and Distribution License (the "License").
61f1e2261SMartin Matuska * You may not use this file except in compliance with the License.
71f1e2261SMartin Matuska *
81f1e2261SMartin Matuska * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
101f1e2261SMartin Matuska * See the License for the specific language governing permissions
111f1e2261SMartin Matuska * and limitations under the License.
121f1e2261SMartin Matuska *
131f1e2261SMartin Matuska * When distributing Covered Code, include this CDDL HEADER in each
141f1e2261SMartin Matuska * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151f1e2261SMartin Matuska * If applicable, add the following below this CDDL HEADER, with the
161f1e2261SMartin Matuska * fields enclosed by brackets "[]" replaced with your own identifying
171f1e2261SMartin Matuska * information: Portions Copyright [yyyy] [name of copyright owner]
181f1e2261SMartin Matuska *
191f1e2261SMartin Matuska * CDDL HEADER END
201f1e2261SMartin Matuska */
211f1e2261SMartin Matuska
221f1e2261SMartin Matuska /*
231f1e2261SMartin Matuska * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
241f1e2261SMartin Matuska * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
251f1e2261SMartin Matuska * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
261f1e2261SMartin Matuska */
271f1e2261SMartin Matuska
28*2a58b312SMartin Matuska #include <sys/simd.h>
291f1e2261SMartin Matuska #include <sys/zfs_context.h>
301f1e2261SMartin Matuska #include "blake3_impl.h"
311f1e2261SMartin Matuska
321f1e2261SMartin Matuska #define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
g(uint32_t * state,size_t a,size_t b,size_t c,size_t d,uint32_t x,uint32_t y)331f1e2261SMartin Matuska static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
341f1e2261SMartin Matuska uint32_t x, uint32_t y)
351f1e2261SMartin Matuska {
361f1e2261SMartin Matuska state[a] = state[a] + state[b] + x;
371f1e2261SMartin Matuska state[d] = rotr32(state[d] ^ state[a], 16);
381f1e2261SMartin Matuska state[c] = state[c] + state[d];
391f1e2261SMartin Matuska state[b] = rotr32(state[b] ^ state[c], 12);
401f1e2261SMartin Matuska state[a] = state[a] + state[b] + y;
411f1e2261SMartin Matuska state[d] = rotr32(state[d] ^ state[a], 8);
421f1e2261SMartin Matuska state[c] = state[c] + state[d];
431f1e2261SMartin Matuska state[b] = rotr32(state[b] ^ state[c], 7);
441f1e2261SMartin Matuska }
451f1e2261SMartin Matuska
round_fn(uint32_t state[16],const uint32_t * msg,size_t round)461f1e2261SMartin Matuska static inline void round_fn(uint32_t state[16], const uint32_t *msg,
471f1e2261SMartin Matuska size_t round)
481f1e2261SMartin Matuska {
491f1e2261SMartin Matuska /* Select the message schedule based on the round. */
501f1e2261SMartin Matuska const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round];
511f1e2261SMartin Matuska
521f1e2261SMartin Matuska /* Mix the columns. */
531f1e2261SMartin Matuska g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
541f1e2261SMartin Matuska g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
551f1e2261SMartin Matuska g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
561f1e2261SMartin Matuska g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
571f1e2261SMartin Matuska
581f1e2261SMartin Matuska /* Mix the rows. */
591f1e2261SMartin Matuska g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
601f1e2261SMartin Matuska g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
611f1e2261SMartin Matuska g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
621f1e2261SMartin Matuska g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
631f1e2261SMartin Matuska }
641f1e2261SMartin Matuska
compress_pre(uint32_t state[16],const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags)651f1e2261SMartin Matuska static inline void compress_pre(uint32_t state[16], const uint32_t cv[8],
661f1e2261SMartin Matuska const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
671f1e2261SMartin Matuska uint64_t counter, uint8_t flags)
681f1e2261SMartin Matuska {
691f1e2261SMartin Matuska uint32_t block_words[16];
701f1e2261SMartin Matuska block_words[0] = load32(block + 4 * 0);
711f1e2261SMartin Matuska block_words[1] = load32(block + 4 * 1);
721f1e2261SMartin Matuska block_words[2] = load32(block + 4 * 2);
731f1e2261SMartin Matuska block_words[3] = load32(block + 4 * 3);
741f1e2261SMartin Matuska block_words[4] = load32(block + 4 * 4);
751f1e2261SMartin Matuska block_words[5] = load32(block + 4 * 5);
761f1e2261SMartin Matuska block_words[6] = load32(block + 4 * 6);
771f1e2261SMartin Matuska block_words[7] = load32(block + 4 * 7);
781f1e2261SMartin Matuska block_words[8] = load32(block + 4 * 8);
791f1e2261SMartin Matuska block_words[9] = load32(block + 4 * 9);
801f1e2261SMartin Matuska block_words[10] = load32(block + 4 * 10);
811f1e2261SMartin Matuska block_words[11] = load32(block + 4 * 11);
821f1e2261SMartin Matuska block_words[12] = load32(block + 4 * 12);
831f1e2261SMartin Matuska block_words[13] = load32(block + 4 * 13);
841f1e2261SMartin Matuska block_words[14] = load32(block + 4 * 14);
851f1e2261SMartin Matuska block_words[15] = load32(block + 4 * 15);
861f1e2261SMartin Matuska
871f1e2261SMartin Matuska state[0] = cv[0];
881f1e2261SMartin Matuska state[1] = cv[1];
891f1e2261SMartin Matuska state[2] = cv[2];
901f1e2261SMartin Matuska state[3] = cv[3];
911f1e2261SMartin Matuska state[4] = cv[4];
921f1e2261SMartin Matuska state[5] = cv[5];
931f1e2261SMartin Matuska state[6] = cv[6];
941f1e2261SMartin Matuska state[7] = cv[7];
951f1e2261SMartin Matuska state[8] = BLAKE3_IV[0];
961f1e2261SMartin Matuska state[9] = BLAKE3_IV[1];
971f1e2261SMartin Matuska state[10] = BLAKE3_IV[2];
981f1e2261SMartin Matuska state[11] = BLAKE3_IV[3];
991f1e2261SMartin Matuska state[12] = counter_low(counter);
1001f1e2261SMartin Matuska state[13] = counter_high(counter);
1011f1e2261SMartin Matuska state[14] = (uint32_t)block_len;
1021f1e2261SMartin Matuska state[15] = (uint32_t)flags;
1031f1e2261SMartin Matuska
1041f1e2261SMartin Matuska round_fn(state, &block_words[0], 0);
1051f1e2261SMartin Matuska round_fn(state, &block_words[0], 1);
1061f1e2261SMartin Matuska round_fn(state, &block_words[0], 2);
1071f1e2261SMartin Matuska round_fn(state, &block_words[0], 3);
1081f1e2261SMartin Matuska round_fn(state, &block_words[0], 4);
1091f1e2261SMartin Matuska round_fn(state, &block_words[0], 5);
1101f1e2261SMartin Matuska round_fn(state, &block_words[0], 6);
1111f1e2261SMartin Matuska }
1121f1e2261SMartin Matuska
blake3_compress_in_place_generic(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags)1131f1e2261SMartin Matuska static inline void blake3_compress_in_place_generic(uint32_t cv[8],
1141f1e2261SMartin Matuska const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
1151f1e2261SMartin Matuska uint64_t counter, uint8_t flags)
1161f1e2261SMartin Matuska {
1171f1e2261SMartin Matuska uint32_t state[16];
1181f1e2261SMartin Matuska compress_pre(state, cv, block, block_len, counter, flags);
1191f1e2261SMartin Matuska cv[0] = state[0] ^ state[8];
1201f1e2261SMartin Matuska cv[1] = state[1] ^ state[9];
1211f1e2261SMartin Matuska cv[2] = state[2] ^ state[10];
1221f1e2261SMartin Matuska cv[3] = state[3] ^ state[11];
1231f1e2261SMartin Matuska cv[4] = state[4] ^ state[12];
1241f1e2261SMartin Matuska cv[5] = state[5] ^ state[13];
1251f1e2261SMartin Matuska cv[6] = state[6] ^ state[14];
1261f1e2261SMartin Matuska cv[7] = state[7] ^ state[15];
1271f1e2261SMartin Matuska }
1281f1e2261SMartin Matuska
hash_one_generic(const uint8_t * input,size_t blocks,const uint32_t key[8],uint64_t counter,uint8_t flags,uint8_t flags_start,uint8_t flags_end,uint8_t out[BLAKE3_OUT_LEN])1291f1e2261SMartin Matuska static inline void hash_one_generic(const uint8_t *input, size_t blocks,
1301f1e2261SMartin Matuska const uint32_t key[8], uint64_t counter, uint8_t flags,
1311f1e2261SMartin Matuska uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
1321f1e2261SMartin Matuska {
1331f1e2261SMartin Matuska uint32_t cv[8];
1341f1e2261SMartin Matuska memcpy(cv, key, BLAKE3_KEY_LEN);
1351f1e2261SMartin Matuska uint8_t block_flags = flags | flags_start;
1361f1e2261SMartin Matuska while (blocks > 0) {
1371f1e2261SMartin Matuska if (blocks == 1) {
1381f1e2261SMartin Matuska block_flags |= flags_end;
1391f1e2261SMartin Matuska }
1401f1e2261SMartin Matuska blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN,
1411f1e2261SMartin Matuska counter, block_flags);
1421f1e2261SMartin Matuska input = &input[BLAKE3_BLOCK_LEN];
1431f1e2261SMartin Matuska blocks -= 1;
1441f1e2261SMartin Matuska block_flags = flags;
1451f1e2261SMartin Matuska }
1461f1e2261SMartin Matuska store_cv_words(out, cv);
1471f1e2261SMartin Matuska }
1481f1e2261SMartin Matuska
blake3_compress_xof_generic(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags,uint8_t out[64])1491f1e2261SMartin Matuska static inline void blake3_compress_xof_generic(const uint32_t cv[8],
1501f1e2261SMartin Matuska const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
1511f1e2261SMartin Matuska uint64_t counter, uint8_t flags, uint8_t out[64])
1521f1e2261SMartin Matuska {
1531f1e2261SMartin Matuska uint32_t state[16];
1541f1e2261SMartin Matuska compress_pre(state, cv, block, block_len, counter, flags);
1551f1e2261SMartin Matuska
1561f1e2261SMartin Matuska store32(&out[0 * 4], state[0] ^ state[8]);
1571f1e2261SMartin Matuska store32(&out[1 * 4], state[1] ^ state[9]);
1581f1e2261SMartin Matuska store32(&out[2 * 4], state[2] ^ state[10]);
1591f1e2261SMartin Matuska store32(&out[3 * 4], state[3] ^ state[11]);
1601f1e2261SMartin Matuska store32(&out[4 * 4], state[4] ^ state[12]);
1611f1e2261SMartin Matuska store32(&out[5 * 4], state[5] ^ state[13]);
1621f1e2261SMartin Matuska store32(&out[6 * 4], state[6] ^ state[14]);
1631f1e2261SMartin Matuska store32(&out[7 * 4], state[7] ^ state[15]);
1641f1e2261SMartin Matuska store32(&out[8 * 4], state[8] ^ cv[0]);
1651f1e2261SMartin Matuska store32(&out[9 * 4], state[9] ^ cv[1]);
1661f1e2261SMartin Matuska store32(&out[10 * 4], state[10] ^ cv[2]);
1671f1e2261SMartin Matuska store32(&out[11 * 4], state[11] ^ cv[3]);
1681f1e2261SMartin Matuska store32(&out[12 * 4], state[12] ^ cv[4]);
1691f1e2261SMartin Matuska store32(&out[13 * 4], state[13] ^ cv[5]);
1701f1e2261SMartin Matuska store32(&out[14 * 4], state[14] ^ cv[6]);
1711f1e2261SMartin Matuska store32(&out[15 * 4], state[15] ^ cv[7]);
1721f1e2261SMartin Matuska }
1731f1e2261SMartin Matuska
blake3_hash_many_generic(const uint8_t * const * inputs,size_t num_inputs,size_t blocks,const uint32_t key[8],uint64_t counter,boolean_t increment_counter,uint8_t flags,uint8_t flags_start,uint8_t flags_end,uint8_t * out)1741f1e2261SMartin Matuska static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
1751f1e2261SMartin Matuska size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter,
1761f1e2261SMartin Matuska boolean_t increment_counter, uint8_t flags, uint8_t flags_start,
1771f1e2261SMartin Matuska uint8_t flags_end, uint8_t *out)
1781f1e2261SMartin Matuska {
1791f1e2261SMartin Matuska while (num_inputs > 0) {
1801f1e2261SMartin Matuska hash_one_generic(inputs[0], blocks, key, counter, flags,
1811f1e2261SMartin Matuska flags_start, flags_end, out);
1821f1e2261SMartin Matuska if (increment_counter) {
1831f1e2261SMartin Matuska counter += 1;
1841f1e2261SMartin Matuska }
1851f1e2261SMartin Matuska inputs += 1;
1861f1e2261SMartin Matuska num_inputs -= 1;
1871f1e2261SMartin Matuska out = &out[BLAKE3_OUT_LEN];
1881f1e2261SMartin Matuska }
1891f1e2261SMartin Matuska }
1901f1e2261SMartin Matuska
191*2a58b312SMartin Matuska /* the generic implementation is always okay */
blake3_is_supported(void)192*2a58b312SMartin Matuska static boolean_t blake3_is_supported(void)
1931f1e2261SMartin Matuska {
1941f1e2261SMartin Matuska return (B_TRUE);
1951f1e2261SMartin Matuska }
1961f1e2261SMartin Matuska
197c7046f76SMartin Matuska const blake3_ops_t blake3_generic_impl = {
1981f1e2261SMartin Matuska .compress_in_place = blake3_compress_in_place_generic,
1991f1e2261SMartin Matuska .compress_xof = blake3_compress_xof_generic,
2001f1e2261SMartin Matuska .hash_many = blake3_hash_many_generic,
201*2a58b312SMartin Matuska .is_supported = blake3_is_supported,
2021f1e2261SMartin Matuska .degree = 4,
2031f1e2261SMartin Matuska .name = "generic"
2041f1e2261SMartin Matuska };
205