Lines Matching full:inputs
452 INLINE void transpose_msg_vecs4(const uint8_t *const *inputs, in transpose_msg_vecs4() argument
454 out[0] = loadu_128(&inputs[0][block_offset + 0 * sizeof(__m128i)]); in transpose_msg_vecs4()
455 out[1] = loadu_128(&inputs[1][block_offset + 0 * sizeof(__m128i)]); in transpose_msg_vecs4()
456 out[2] = loadu_128(&inputs[2][block_offset + 0 * sizeof(__m128i)]); in transpose_msg_vecs4()
457 out[3] = loadu_128(&inputs[3][block_offset + 0 * sizeof(__m128i)]); in transpose_msg_vecs4()
458 out[4] = loadu_128(&inputs[0][block_offset + 1 * sizeof(__m128i)]); in transpose_msg_vecs4()
459 out[5] = loadu_128(&inputs[1][block_offset + 1 * sizeof(__m128i)]); in transpose_msg_vecs4()
460 out[6] = loadu_128(&inputs[2][block_offset + 1 * sizeof(__m128i)]); in transpose_msg_vecs4()
461 out[7] = loadu_128(&inputs[3][block_offset + 1 * sizeof(__m128i)]); in transpose_msg_vecs4()
462 out[8] = loadu_128(&inputs[0][block_offset + 2 * sizeof(__m128i)]); in transpose_msg_vecs4()
463 out[9] = loadu_128(&inputs[1][block_offset + 2 * sizeof(__m128i)]); in transpose_msg_vecs4()
464 out[10] = loadu_128(&inputs[2][block_offset + 2 * sizeof(__m128i)]); in transpose_msg_vecs4()
465 out[11] = loadu_128(&inputs[3][block_offset + 2 * sizeof(__m128i)]); in transpose_msg_vecs4()
466 out[12] = loadu_128(&inputs[0][block_offset + 3 * sizeof(__m128i)]); in transpose_msg_vecs4()
467 out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(__m128i)]); in transpose_msg_vecs4()
468 out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]); in transpose_msg_vecs4()
469 out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]); in transpose_msg_vecs4()
471 _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); in transpose_msg_vecs4()
492 void blake3_hash4_avx512(const uint8_t *const *inputs, size_t blocks, in blake3_hash4_avx512() argument
512 transpose_msg_vecs4(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); in blake3_hash4_avx512()
709 INLINE void transpose_msg_vecs8(const uint8_t *const *inputs, in transpose_msg_vecs8() argument
711 out[0] = loadu_256(&inputs[0][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
712 out[1] = loadu_256(&inputs[1][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
713 out[2] = loadu_256(&inputs[2][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
714 out[3] = loadu_256(&inputs[3][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
715 out[4] = loadu_256(&inputs[4][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
716 out[5] = loadu_256(&inputs[5][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
717 out[6] = loadu_256(&inputs[6][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
718 out[7] = loadu_256(&inputs[7][block_offset + 0 * sizeof(__m256i)]); in transpose_msg_vecs8()
719 out[8] = loadu_256(&inputs[0][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
720 out[9] = loadu_256(&inputs[1][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
721 out[10] = loadu_256(&inputs[2][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
722 out[11] = loadu_256(&inputs[3][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
723 out[12] = loadu_256(&inputs[4][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
724 out[13] = loadu_256(&inputs[5][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
725 out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
726 out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]); in transpose_msg_vecs8()
728 _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); in transpose_msg_vecs8()
747 void blake3_hash8_avx512(const uint8_t *const *inputs, size_t blocks, in blake3_hash8_avx512() argument
767 transpose_msg_vecs8(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); in blake3_hash8_avx512()
1023 INLINE void transpose_msg_vecs16(const uint8_t *const *inputs, in transpose_msg_vecs16() argument
1025 out[0] = loadu_512(&inputs[0][block_offset]); in transpose_msg_vecs16()
1026 out[1] = loadu_512(&inputs[1][block_offset]); in transpose_msg_vecs16()
1027 out[2] = loadu_512(&inputs[2][block_offset]); in transpose_msg_vecs16()
1028 out[3] = loadu_512(&inputs[3][block_offset]); in transpose_msg_vecs16()
1029 out[4] = loadu_512(&inputs[4][block_offset]); in transpose_msg_vecs16()
1030 out[5] = loadu_512(&inputs[5][block_offset]); in transpose_msg_vecs16()
1031 out[6] = loadu_512(&inputs[6][block_offset]); in transpose_msg_vecs16()
1032 out[7] = loadu_512(&inputs[7][block_offset]); in transpose_msg_vecs16()
1033 out[8] = loadu_512(&inputs[8][block_offset]); in transpose_msg_vecs16()
1034 out[9] = loadu_512(&inputs[9][block_offset]); in transpose_msg_vecs16()
1035 out[10] = loadu_512(&inputs[10][block_offset]); in transpose_msg_vecs16()
1036 out[11] = loadu_512(&inputs[11][block_offset]); in transpose_msg_vecs16()
1037 out[12] = loadu_512(&inputs[12][block_offset]); in transpose_msg_vecs16()
1038 out[13] = loadu_512(&inputs[13][block_offset]); in transpose_msg_vecs16()
1039 out[14] = loadu_512(&inputs[14][block_offset]); in transpose_msg_vecs16()
1040 out[15] = loadu_512(&inputs[15][block_offset]); in transpose_msg_vecs16()
1042 _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0); in transpose_msg_vecs16()
1060 void blake3_hash16_avx512(const uint8_t *const *inputs, size_t blocks, in blake3_hash16_avx512() argument
1081 transpose_msg_vecs16(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs); in blake3_hash16_avx512()
1162 void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, in blake3_hash_many_avx512() argument
1168 blake3_hash16_avx512(inputs, blocks, key, counter, increment_counter, flags, in blake3_hash_many_avx512()
1173 inputs += 16; in blake3_hash_many_avx512()
1178 blake3_hash8_avx512(inputs, blocks, key, counter, increment_counter, flags, in blake3_hash_many_avx512()
1183 inputs += 8; in blake3_hash_many_avx512()
1188 blake3_hash4_avx512(inputs, blocks, key, counter, increment_counter, flags, in blake3_hash_many_avx512()
1193 inputs += 4; in blake3_hash_many_avx512()
1198 hash_one_avx512(inputs[0], blocks, key, counter, flags, flags_start, in blake3_hash_many_avx512()
1203 inputs += 1; in blake3_hash_many_avx512()