xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_portable.c (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1*81ad6265SDimitry Andric #include "blake3_impl.h"
2*81ad6265SDimitry Andric #include <string.h>
3*81ad6265SDimitry Andric 
rotr32(uint32_t w,uint32_t c)4*81ad6265SDimitry Andric INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
5*81ad6265SDimitry Andric   return (w >> c) | (w << (32 - c));
6*81ad6265SDimitry Andric }
7*81ad6265SDimitry Andric 
g(uint32_t * state,size_t a,size_t b,size_t c,size_t d,uint32_t x,uint32_t y)8*81ad6265SDimitry Andric INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
9*81ad6265SDimitry Andric               uint32_t x, uint32_t y) {
10*81ad6265SDimitry Andric   state[a] = state[a] + state[b] + x;
11*81ad6265SDimitry Andric   state[d] = rotr32(state[d] ^ state[a], 16);
12*81ad6265SDimitry Andric   state[c] = state[c] + state[d];
13*81ad6265SDimitry Andric   state[b] = rotr32(state[b] ^ state[c], 12);
14*81ad6265SDimitry Andric   state[a] = state[a] + state[b] + y;
15*81ad6265SDimitry Andric   state[d] = rotr32(state[d] ^ state[a], 8);
16*81ad6265SDimitry Andric   state[c] = state[c] + state[d];
17*81ad6265SDimitry Andric   state[b] = rotr32(state[b] ^ state[c], 7);
18*81ad6265SDimitry Andric }
19*81ad6265SDimitry Andric 
round_fn(uint32_t state[16],const uint32_t * msg,size_t round)20*81ad6265SDimitry Andric INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
21*81ad6265SDimitry Andric   // Select the message schedule based on the round.
22*81ad6265SDimitry Andric   const uint8_t *schedule = MSG_SCHEDULE[round];
23*81ad6265SDimitry Andric 
24*81ad6265SDimitry Andric   // Mix the columns.
25*81ad6265SDimitry Andric   g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
26*81ad6265SDimitry Andric   g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
27*81ad6265SDimitry Andric   g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
28*81ad6265SDimitry Andric   g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
29*81ad6265SDimitry Andric 
30*81ad6265SDimitry Andric   // Mix the rows.
31*81ad6265SDimitry Andric   g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
32*81ad6265SDimitry Andric   g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
33*81ad6265SDimitry Andric   g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
34*81ad6265SDimitry Andric   g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
35*81ad6265SDimitry Andric }
36*81ad6265SDimitry Andric 
compress_pre(uint32_t state[16],const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags)37*81ad6265SDimitry Andric INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
38*81ad6265SDimitry Andric                          const uint8_t block[BLAKE3_BLOCK_LEN],
39*81ad6265SDimitry Andric                          uint8_t block_len, uint64_t counter, uint8_t flags) {
40*81ad6265SDimitry Andric   uint32_t block_words[16];
41*81ad6265SDimitry Andric   block_words[0] = load32(block + 4 * 0);
42*81ad6265SDimitry Andric   block_words[1] = load32(block + 4 * 1);
43*81ad6265SDimitry Andric   block_words[2] = load32(block + 4 * 2);
44*81ad6265SDimitry Andric   block_words[3] = load32(block + 4 * 3);
45*81ad6265SDimitry Andric   block_words[4] = load32(block + 4 * 4);
46*81ad6265SDimitry Andric   block_words[5] = load32(block + 4 * 5);
47*81ad6265SDimitry Andric   block_words[6] = load32(block + 4 * 6);
48*81ad6265SDimitry Andric   block_words[7] = load32(block + 4 * 7);
49*81ad6265SDimitry Andric   block_words[8] = load32(block + 4 * 8);
50*81ad6265SDimitry Andric   block_words[9] = load32(block + 4 * 9);
51*81ad6265SDimitry Andric   block_words[10] = load32(block + 4 * 10);
52*81ad6265SDimitry Andric   block_words[11] = load32(block + 4 * 11);
53*81ad6265SDimitry Andric   block_words[12] = load32(block + 4 * 12);
54*81ad6265SDimitry Andric   block_words[13] = load32(block + 4 * 13);
55*81ad6265SDimitry Andric   block_words[14] = load32(block + 4 * 14);
56*81ad6265SDimitry Andric   block_words[15] = load32(block + 4 * 15);
57*81ad6265SDimitry Andric 
58*81ad6265SDimitry Andric   state[0] = cv[0];
59*81ad6265SDimitry Andric   state[1] = cv[1];
60*81ad6265SDimitry Andric   state[2] = cv[2];
61*81ad6265SDimitry Andric   state[3] = cv[3];
62*81ad6265SDimitry Andric   state[4] = cv[4];
63*81ad6265SDimitry Andric   state[5] = cv[5];
64*81ad6265SDimitry Andric   state[6] = cv[6];
65*81ad6265SDimitry Andric   state[7] = cv[7];
66*81ad6265SDimitry Andric   state[8] = IV[0];
67*81ad6265SDimitry Andric   state[9] = IV[1];
68*81ad6265SDimitry Andric   state[10] = IV[2];
69*81ad6265SDimitry Andric   state[11] = IV[3];
70*81ad6265SDimitry Andric   state[12] = counter_low(counter);
71*81ad6265SDimitry Andric   state[13] = counter_high(counter);
72*81ad6265SDimitry Andric   state[14] = (uint32_t)block_len;
73*81ad6265SDimitry Andric   state[15] = (uint32_t)flags;
74*81ad6265SDimitry Andric 
75*81ad6265SDimitry Andric   round_fn(state, &block_words[0], 0);
76*81ad6265SDimitry Andric   round_fn(state, &block_words[0], 1);
77*81ad6265SDimitry Andric   round_fn(state, &block_words[0], 2);
78*81ad6265SDimitry Andric   round_fn(state, &block_words[0], 3);
79*81ad6265SDimitry Andric   round_fn(state, &block_words[0], 4);
80*81ad6265SDimitry Andric   round_fn(state, &block_words[0], 5);
81*81ad6265SDimitry Andric   round_fn(state, &block_words[0], 6);
82*81ad6265SDimitry Andric }
83*81ad6265SDimitry Andric 
blake3_compress_in_place_portable(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags)84*81ad6265SDimitry Andric void blake3_compress_in_place_portable(uint32_t cv[8],
85*81ad6265SDimitry Andric                                        const uint8_t block[BLAKE3_BLOCK_LEN],
86*81ad6265SDimitry Andric                                        uint8_t block_len, uint64_t counter,
87*81ad6265SDimitry Andric                                        uint8_t flags) {
88*81ad6265SDimitry Andric   uint32_t state[16];
89*81ad6265SDimitry Andric   compress_pre(state, cv, block, block_len, counter, flags);
90*81ad6265SDimitry Andric   cv[0] = state[0] ^ state[8];
91*81ad6265SDimitry Andric   cv[1] = state[1] ^ state[9];
92*81ad6265SDimitry Andric   cv[2] = state[2] ^ state[10];
93*81ad6265SDimitry Andric   cv[3] = state[3] ^ state[11];
94*81ad6265SDimitry Andric   cv[4] = state[4] ^ state[12];
95*81ad6265SDimitry Andric   cv[5] = state[5] ^ state[13];
96*81ad6265SDimitry Andric   cv[6] = state[6] ^ state[14];
97*81ad6265SDimitry Andric   cv[7] = state[7] ^ state[15];
98*81ad6265SDimitry Andric }
99*81ad6265SDimitry Andric 
blake3_compress_xof_portable(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags,uint8_t out[64])100*81ad6265SDimitry Andric void blake3_compress_xof_portable(const uint32_t cv[8],
101*81ad6265SDimitry Andric                                   const uint8_t block[BLAKE3_BLOCK_LEN],
102*81ad6265SDimitry Andric                                   uint8_t block_len, uint64_t counter,
103*81ad6265SDimitry Andric                                   uint8_t flags, uint8_t out[64]) {
104*81ad6265SDimitry Andric   uint32_t state[16];
105*81ad6265SDimitry Andric   compress_pre(state, cv, block, block_len, counter, flags);
106*81ad6265SDimitry Andric 
107*81ad6265SDimitry Andric   store32(&out[0 * 4], state[0] ^ state[8]);
108*81ad6265SDimitry Andric   store32(&out[1 * 4], state[1] ^ state[9]);
109*81ad6265SDimitry Andric   store32(&out[2 * 4], state[2] ^ state[10]);
110*81ad6265SDimitry Andric   store32(&out[3 * 4], state[3] ^ state[11]);
111*81ad6265SDimitry Andric   store32(&out[4 * 4], state[4] ^ state[12]);
112*81ad6265SDimitry Andric   store32(&out[5 * 4], state[5] ^ state[13]);
113*81ad6265SDimitry Andric   store32(&out[6 * 4], state[6] ^ state[14]);
114*81ad6265SDimitry Andric   store32(&out[7 * 4], state[7] ^ state[15]);
115*81ad6265SDimitry Andric   store32(&out[8 * 4], state[8] ^ cv[0]);
116*81ad6265SDimitry Andric   store32(&out[9 * 4], state[9] ^ cv[1]);
117*81ad6265SDimitry Andric   store32(&out[10 * 4], state[10] ^ cv[2]);
118*81ad6265SDimitry Andric   store32(&out[11 * 4], state[11] ^ cv[3]);
119*81ad6265SDimitry Andric   store32(&out[12 * 4], state[12] ^ cv[4]);
120*81ad6265SDimitry Andric   store32(&out[13 * 4], state[13] ^ cv[5]);
121*81ad6265SDimitry Andric   store32(&out[14 * 4], state[14] ^ cv[6]);
122*81ad6265SDimitry Andric   store32(&out[15 * 4], state[15] ^ cv[7]);
123*81ad6265SDimitry Andric }
124*81ad6265SDimitry Andric 
hash_one_portable(const uint8_t * input,size_t blocks,const uint32_t key[8],uint64_t counter,uint8_t flags,uint8_t flags_start,uint8_t flags_end,uint8_t out[BLAKE3_OUT_LEN])125*81ad6265SDimitry Andric INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
126*81ad6265SDimitry Andric                               const uint32_t key[8], uint64_t counter,
127*81ad6265SDimitry Andric                               uint8_t flags, uint8_t flags_start,
128*81ad6265SDimitry Andric                               uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
129*81ad6265SDimitry Andric   uint32_t cv[8];
130*81ad6265SDimitry Andric   memcpy(cv, key, BLAKE3_KEY_LEN);
131*81ad6265SDimitry Andric   uint8_t block_flags = flags | flags_start;
132*81ad6265SDimitry Andric   while (blocks > 0) {
133*81ad6265SDimitry Andric     if (blocks == 1) {
134*81ad6265SDimitry Andric       block_flags |= flags_end;
135*81ad6265SDimitry Andric     }
136*81ad6265SDimitry Andric     blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
137*81ad6265SDimitry Andric                                       block_flags);
138*81ad6265SDimitry Andric     input = &input[BLAKE3_BLOCK_LEN];
139*81ad6265SDimitry Andric     blocks -= 1;
140*81ad6265SDimitry Andric     block_flags = flags;
141*81ad6265SDimitry Andric   }
142*81ad6265SDimitry Andric   store_cv_words(out, cv);
143*81ad6265SDimitry Andric }
144*81ad6265SDimitry Andric 
blake3_hash_many_portable(const uint8_t * const * inputs,size_t num_inputs,size_t blocks,const uint32_t key[8],uint64_t counter,bool increment_counter,uint8_t flags,uint8_t flags_start,uint8_t flags_end,uint8_t * out)145*81ad6265SDimitry Andric void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
146*81ad6265SDimitry Andric                                size_t blocks, const uint32_t key[8],
147*81ad6265SDimitry Andric                                uint64_t counter, bool increment_counter,
148*81ad6265SDimitry Andric                                uint8_t flags, uint8_t flags_start,
149*81ad6265SDimitry Andric                                uint8_t flags_end, uint8_t *out) {
150*81ad6265SDimitry Andric   while (num_inputs > 0) {
151*81ad6265SDimitry Andric     hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
152*81ad6265SDimitry Andric                       flags_end, out);
153*81ad6265SDimitry Andric     if (increment_counter) {
154*81ad6265SDimitry Andric       counter += 1;
155*81ad6265SDimitry Andric     }
156*81ad6265SDimitry Andric     inputs += 1;
157*81ad6265SDimitry Andric     num_inputs -= 1;
158*81ad6265SDimitry Andric     out = &out[BLAKE3_OUT_LEN];
159*81ad6265SDimitry Andric   }
160*81ad6265SDimitry Andric }
161