Lines Matching +full:ouput +full:- +full:only
9 * or https://opensource.org/licenses/CDDL-1.0.
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
25 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
36 * - we define this pragma to make gcc happy
39 #pragma GCC diagnostic ignored "-Wframe-larger-than="
66 memcpy(ctx->cv, key, BLAKE3_KEY_LEN); in chunk_state_init()
67 ctx->chunk_counter = 0; in chunk_state_init()
68 memset(ctx->buf, 0, BLAKE3_BLOCK_LEN); in chunk_state_init()
69 ctx->buf_len = 0; in chunk_state_init()
70 ctx->blocks_compressed = 0; in chunk_state_init()
71 ctx->flags = flags; in chunk_state_init()
77 memcpy(ctx->cv, key, BLAKE3_KEY_LEN); in chunk_state_reset()
78 ctx->chunk_counter = chunk_counter; in chunk_state_reset()
79 ctx->blocks_compressed = 0; in chunk_state_reset()
80 memset(ctx->buf, 0, BLAKE3_BLOCK_LEN); in chunk_state_reset()
81 ctx->buf_len = 0; in chunk_state_reset()
86 return (BLAKE3_BLOCK_LEN * (size_t)ctx->blocks_compressed) + in chunk_state_len()
87 ((size_t)ctx->buf_len); in chunk_state_len()
93 size_t take = BLAKE3_BLOCK_LEN - ((size_t)ctx->buf_len); in chunk_state_fill_buf()
97 uint8_t *dest = ctx->buf + ((size_t)ctx->buf_len); in chunk_state_fill_buf()
99 ctx->buf_len += (uint8_t)take; in chunk_state_fill_buf()
105 if (ctx->blocks_compressed == 0) { in chunk_state_maybe_start_flag()
127 * interface) are represented as words. This avoids unnecessary bytes<->words
137 memcpy(cv_words, ctx->input_cv, 32); in output_chaining_value()
138 ops->compress_in_place(cv_words, ctx->block, ctx->block_len, in output_chaining_value()
139 ctx->counter, ctx->flags); in output_chaining_value()
150 ops->compress_xof(ctx->input_cv, ctx->block, ctx->block_len, in output_root_bytes()
151 output_block_counter, ctx->flags | ROOT, wide_buf); in output_root_bytes()
152 size_t available_bytes = 64 - offset_within_block; in output_root_bytes()
161 out_len -= memcpy_len; in output_root_bytes()
170 if (ctx->buf_len > 0) { in chunk_state_update()
173 input_len -= take; in chunk_state_update()
175 ops->compress_in_place(ctx->cv, ctx->buf, in chunk_state_update()
176 BLAKE3_BLOCK_LEN, ctx->chunk_counter, in chunk_state_update()
177 ctx->flags|chunk_state_maybe_start_flag(ctx)); in chunk_state_update()
178 ctx->blocks_compressed += 1; in chunk_state_update()
179 ctx->buf_len = 0; in chunk_state_update()
180 memset(ctx->buf, 0, BLAKE3_BLOCK_LEN); in chunk_state_update()
185 ops->compress_in_place(ctx->cv, input, BLAKE3_BLOCK_LEN, in chunk_state_update()
186 ctx->chunk_counter, in chunk_state_update()
187 ctx->flags|chunk_state_maybe_start_flag(ctx)); in chunk_state_update()
188 ctx->blocks_compressed += 1; in chunk_state_update()
190 input_len -= BLAKE3_BLOCK_LEN; in chunk_state_update()
199 ctx->flags | chunk_state_maybe_start_flag(ctx) | CHUNK_END; in chunk_state_output()
200 return (make_output(ctx->cv, ctx->buf, ctx->buf_len, ctx->chunk_counter, in chunk_state_output()
212 * should go in the left subtree. This is the largest power-of-2 number of
222 size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; in left_len()
239 while (input_len - input_position >= BLAKE3_CHUNK_LEN) { in compress_chunks_parallel()
245 ops->hash_many(chunks_array, chunks_array_len, BLAKE3_CHUNK_LEN / in compress_chunks_parallel()
259 input_len - input_position); in compress_chunks_parallel()
283 while (num_chaining_values - (2 * parents_array_len) >= 2) { in compress_parents_parallel()
289 ops->hash_many(parents_array, parents_array_len, 1, key, 0, B_FALSE, in compress_parents_parallel()
314 * wouldn't be able to implement exendable ouput.) Note that this function is
315 * not used when the whole input is only 1 chunk long; that's a different
320 * multi-threading parallelism for that update().
328 * to 2 when it is 1. If this implementation adds multi-threading in in blake3_compress_subtree_wide()
329 * the future, this gives us the option of multi-threading even the in blake3_compress_subtree_wide()
330 * 2-chunk case, which can help performance on smaller platforms. in blake3_compress_subtree_wide()
332 if (input_len <= (size_t)(ops->degree * BLAKE3_CHUNK_LEN)) { in blake3_compress_subtree_wide()
341 * only optimal as long as the SIMD degree is a power of 2. If we ever in blake3_compress_subtree_wide()
346 size_t right_input_len = input_len - left_input_len; in blake3_compress_subtree_wide()
357 size_t degree = ops->degree; in blake3_compress_subtree_wide()
364 * 1-chunk-input case is a different codepath.) in blake3_compress_subtree_wide()
371 * Recurse! If this implementation adds multi-threading support in the in blake3_compress_subtree_wide()
433 memcpy(ctx->key, key, BLAKE3_KEY_LEN); in hasher_init_base()
434 chunk_state_init(&ctx->chunk, key, flags); in hasher_init_base()
435 ctx->cv_stack_len = 0; in hasher_init_base()
436 ctx->ops = blake3_get_ops(); in hasher_init_base()
444 * any power-of-two number of chunks, as long as the smaller-above-larger
445 * stack order is maintained. Instead of the "count the trailing 0-bits"
447 * 1-bits" variant that doesn't require us to retain the subtree size of the
449 * remain in the stack is represented by a 1-bit in the total number of chunks
455 while (ctx->cv_stack_len > post_merge_stack_len) { in hasher_merge_cv_stack()
457 &ctx->cv_stack[(ctx->cv_stack_len - 2) * BLAKE3_OUT_LEN]; in hasher_merge_cv_stack()
459 parent_output(parent_node, ctx->key, ctx->chunk.flags); in hasher_merge_cv_stack()
460 output_chaining_value(ctx->ops, &output, parent_node); in hasher_merge_cv_stack()
461 ctx->cv_stack_len -= 1; in hasher_merge_cv_stack()
476 * 1) This 64 KiB input might be the only call that ever gets made to update.
497 * hashing an input all-at-once.)
503 memcpy(&ctx->cv_stack[ctx->cv_stack_len * BLAKE3_OUT_LEN], new_cv, in hasher_push_cv()
505 ctx->cv_stack_len += 1; in hasher_push_cv()
541 if (chunk_state_len(&ctx->chunk) > 0) { in Blake3_Update2()
542 size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&ctx->chunk); in Blake3_Update2()
546 chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, take); in Blake3_Update2()
548 input_len -= take; in Blake3_Update2()
555 output_t output = chunk_state_output(&ctx->chunk); in Blake3_Update2()
557 output_chaining_value(ctx->ops, &output, chunk_cv); in Blake3_Update2()
558 hasher_push_cv(ctx, chunk_cv, ctx->chunk.chunk_counter); in Blake3_Update2()
559 chunk_state_reset(&ctx->chunk, ctx->key, in Blake3_Update2()
560 ctx->chunk.chunk_counter + 1); in Blake3_Update2()
570 * (and maybe in the future, multi-threading) parallelism. Two in Blake3_Update2()
572 * - The subtree has to be a power-of-2 number of chunks. Only in Blake3_Update2()
575 * - The subtree must evenly divide the total number of chunks up in Blake3_Update2()
577 * subtree is only waiting for 1 more chunk, we can't hash a subtree in Blake3_Update2()
585 ctx->chunk.chunk_counter * BLAKE3_CHUNK_LEN; in Blake3_Update2()
591 * power-of-2 inputs of the same size, as is hopefully in Blake3_Update2()
599 * and we might get to use 2-way SIMD parallelism. The problem in Blake3_Update2()
604 * https://github.com/BLAKE3-team/BLAKE3/issues/69. in Blake3_Update2()
606 while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) { in Blake3_Update2()
617 chunk_state_init(&chunk_state, ctx->key, in Blake3_Update2()
618 ctx->chunk.flags); in Blake3_Update2()
619 chunk_state.chunk_counter = ctx->chunk.chunk_counter; in Blake3_Update2()
620 chunk_state_update(ctx->ops, &chunk_state, input_bytes, in Blake3_Update2()
624 output_chaining_value(ctx->ops, &output, cv); in Blake3_Update2()
628 * This is the high-performance happy path, though in Blake3_Update2()
633 compress_subtree_to_parent_node(ctx->ops, input_bytes, in Blake3_Update2()
634 subtree_len, ctx->key, ctx-> chunk.chunk_counter, in Blake3_Update2()
635 ctx->chunk.flags, cv_pair); in Blake3_Update2()
636 hasher_push_cv(ctx, cv_pair, ctx->chunk.chunk_counter); in Blake3_Update2()
638 ctx->chunk.chunk_counter + (subtree_chunks / 2)); in Blake3_Update2()
640 ctx->chunk.chunk_counter += subtree_chunks; in Blake3_Update2()
642 input_len -= subtree_len; in Blake3_Update2()
649 * remaining input means we know these merges are non-root. This merge in Blake3_Update2()
655 chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, in Blake3_Update2()
657 hasher_merge_cv_stack(ctx, ctx->chunk.chunk_counter); in Blake3_Update2()
673 todo -= block; in Blake3_Update()
697 if (ctx->cv_stack_len == 0) { in Blake3_FinalSeek()
698 output_t output = chunk_state_output(&ctx->chunk); in Blake3_FinalSeek()
699 output_root_bytes(ctx->ops, &output, seek, out, out_len); in Blake3_FinalSeek()
704 * do a roll-up merge between that chunk hash and every subtree in the in Blake3_FinalSeek()
713 if (chunk_state_len(&ctx->chunk) > 0) { in Blake3_FinalSeek()
714 cvs_remaining = ctx->cv_stack_len; in Blake3_FinalSeek()
715 output = chunk_state_output(&ctx->chunk); in Blake3_FinalSeek()
718 cvs_remaining = ctx->cv_stack_len - 2; in Blake3_FinalSeek()
719 output = parent_output(&ctx->cv_stack[cvs_remaining * 32], in Blake3_FinalSeek()
720 ctx->key, ctx->chunk.flags); in Blake3_FinalSeek()
723 cvs_remaining -= 1; in Blake3_FinalSeek()
725 memcpy(parent_block, &ctx->cv_stack[cvs_remaining * 32], 32); in Blake3_FinalSeek()
726 output_chaining_value(ctx->ops, &output, &parent_block[32]); in Blake3_FinalSeek()
727 output = parent_output(parent_block, ctx->key, in Blake3_FinalSeek()
728 ctx->chunk.flags); in Blake3_FinalSeek()
730 output_root_bytes(ctx->ops, &output, seek, out, out_len); in Blake3_FinalSeek()