xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/BLAKE3/README.md (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1*81ad6265SDimitry AndricImplementation of BLAKE3, originating from https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c
2*81ad6265SDimitry Andric
3*81ad6265SDimitry Andric# Example
4*81ad6265SDimitry Andric
5*81ad6265SDimitry AndricAn example program that hashes bytes from standard input and prints the
6*81ad6265SDimitry Andricresult:
7*81ad6265SDimitry Andric
8*81ad6265SDimitry AndricUsing the C++ API:
9*81ad6265SDimitry Andric
10*81ad6265SDimitry Andric```c++
11*81ad6265SDimitry Andric#include "llvm/Support/BLAKE3.h"
12*81ad6265SDimitry Andric#include <errno.h>
13*81ad6265SDimitry Andric#include <stdio.h>
14*81ad6265SDimitry Andric#include <stdlib.h>
15*81ad6265SDimitry Andric#include <string.h>
16*81ad6265SDimitry Andric#include <unistd.h>
17*81ad6265SDimitry Andric
18*81ad6265SDimitry Andricint main() {
19*81ad6265SDimitry Andric  // Initialize the hasher.
20*81ad6265SDimitry Andric  llvm::BLAKE3 hasher;
21*81ad6265SDimitry Andric
22*81ad6265SDimitry Andric  // Read input bytes from stdin.
23*81ad6265SDimitry Andric  char buf[65536];
24*81ad6265SDimitry Andric  while (1) {
25*81ad6265SDimitry Andric    ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
26*81ad6265SDimitry Andric    if (n > 0) {
27*81ad6265SDimitry Andric      hasher.update(llvm::StringRef(buf, n));
28*81ad6265SDimitry Andric    } else if (n == 0) {
29*81ad6265SDimitry Andric      break; // end of file
30*81ad6265SDimitry Andric    } else {
31*81ad6265SDimitry Andric      fprintf(stderr, "read failed: %s\n", strerror(errno));
32*81ad6265SDimitry Andric      exit(1);
33*81ad6265SDimitry Andric    }
34*81ad6265SDimitry Andric  }
35*81ad6265SDimitry Andric
36*81ad6265SDimitry Andric  // Finalize the hash. Default output length is 32 bytes.
37*81ad6265SDimitry Andric  auto output = hasher.final();
38*81ad6265SDimitry Andric
39*81ad6265SDimitry Andric  // Print the hash as hexadecimal.
40*81ad6265SDimitry Andric  for (uint8_t byte : output) {
41*81ad6265SDimitry Andric    printf("%02x", byte);
42*81ad6265SDimitry Andric  }
43*81ad6265SDimitry Andric  printf("\n");
44*81ad6265SDimitry Andric  return 0;
45*81ad6265SDimitry Andric}
46*81ad6265SDimitry Andric```
47*81ad6265SDimitry Andric
48*81ad6265SDimitry AndricUsing the C API:
49*81ad6265SDimitry Andric
50*81ad6265SDimitry Andric```c
51*81ad6265SDimitry Andric#include "llvm-c/blake3.h"
52*81ad6265SDimitry Andric#include <errno.h>
53*81ad6265SDimitry Andric#include <stdio.h>
54*81ad6265SDimitry Andric#include <stdlib.h>
55*81ad6265SDimitry Andric#include <string.h>
56*81ad6265SDimitry Andric#include <unistd.h>
57*81ad6265SDimitry Andric
58*81ad6265SDimitry Andricint main() {
59*81ad6265SDimitry Andric  // Initialize the hasher.
60*81ad6265SDimitry Andric  llvm_blake3_hasher hasher;
61*81ad6265SDimitry Andric  llvm_blake3_hasher_init(&hasher);
62*81ad6265SDimitry Andric
63*81ad6265SDimitry Andric  // Read input bytes from stdin.
64*81ad6265SDimitry Andric  unsigned char buf[65536];
65*81ad6265SDimitry Andric  while (1) {
66*81ad6265SDimitry Andric    ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
67*81ad6265SDimitry Andric    if (n > 0) {
68*81ad6265SDimitry Andric      llvm_blake3_hasher_update(&hasher, buf, n);
69*81ad6265SDimitry Andric    } else if (n == 0) {
70*81ad6265SDimitry Andric      break; // end of file
71*81ad6265SDimitry Andric    } else {
72*81ad6265SDimitry Andric      fprintf(stderr, "read failed: %s\n", strerror(errno));
73*81ad6265SDimitry Andric      exit(1);
74*81ad6265SDimitry Andric    }
75*81ad6265SDimitry Andric  }
76*81ad6265SDimitry Andric
77*81ad6265SDimitry Andric  // Finalize the hash. LLVM_BLAKE3_OUT_LEN is the default output length, 32 bytes.
78*81ad6265SDimitry Andric  uint8_t output[LLVM_BLAKE3_OUT_LEN];
79*81ad6265SDimitry Andric  llvm_blake3_hasher_finalize(&hasher, output, LLVM_BLAKE3_OUT_LEN);
80*81ad6265SDimitry Andric
81*81ad6265SDimitry Andric  // Print the hash as hexadecimal.
82*81ad6265SDimitry Andric  for (size_t i = 0; i < LLVM_BLAKE3_OUT_LEN; i++) {
83*81ad6265SDimitry Andric    printf("%02x", output[i]);
84*81ad6265SDimitry Andric  }
85*81ad6265SDimitry Andric  printf("\n");
86*81ad6265SDimitry Andric  return 0;
87*81ad6265SDimitry Andric}
88*81ad6265SDimitry Andric```
89*81ad6265SDimitry Andric
90*81ad6265SDimitry Andric# API
91*81ad6265SDimitry Andric
92*81ad6265SDimitry Andric## The Class/Struct
93*81ad6265SDimitry Andric
94*81ad6265SDimitry Andric```c++
95*81ad6265SDimitry Andricclass BLAKE3 {
96*81ad6265SDimitry Andric  // API
97*81ad6265SDimitry Andricprivate:
98*81ad6265SDimitry Andric  llvm_blake3_hasher Hasher;
99*81ad6265SDimitry Andric};
100*81ad6265SDimitry Andric```
101*81ad6265SDimitry Andric```c
102*81ad6265SDimitry Andrictypedef struct {
103*81ad6265SDimitry Andric  // private fields
104*81ad6265SDimitry Andric} llvm_blake3_hasher;
105*81ad6265SDimitry Andric```
106*81ad6265SDimitry Andric
107*81ad6265SDimitry AndricAn incremental BLAKE3 hashing state, which can accept any number of
108*81ad6265SDimitry Andricupdates. This implementation doesn't allocate any heap memory, but
109*81ad6265SDimitry Andric`sizeof(llvm_blake3_hasher)` itself is relatively large, currently 1912 bytes
110*81ad6265SDimitry Andricon x86-64. This size can be reduced by restricting the maximum input
111*81ad6265SDimitry Andriclength, as described in Section 5.4 of [the BLAKE3
112*81ad6265SDimitry Andricspec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf),
113*81ad6265SDimitry Andricbut this implementation doesn't currently support that strategy.
114*81ad6265SDimitry Andric
115*81ad6265SDimitry Andric## Common API Functions
116*81ad6265SDimitry Andric
117*81ad6265SDimitry Andric```c++
118*81ad6265SDimitry AndricBLAKE3::BLAKE3();
119*81ad6265SDimitry Andric
120*81ad6265SDimitry Andricvoid BLAKE3::init();
121*81ad6265SDimitry Andric```
122*81ad6265SDimitry Andric```c
123*81ad6265SDimitry Andricvoid llvm_blake3_hasher_init(
124*81ad6265SDimitry Andric  llvm_blake3_hasher *self);
125*81ad6265SDimitry Andric```
126*81ad6265SDimitry Andric
127*81ad6265SDimitry AndricInitialize a `llvm_blake3_hasher` in the default hashing mode.
128*81ad6265SDimitry Andric
129*81ad6265SDimitry Andric---
130*81ad6265SDimitry Andric
131*81ad6265SDimitry Andric```c++
132*81ad6265SDimitry Andricvoid BLAKE3::update(ArrayRef<uint8_t> Data);
133*81ad6265SDimitry Andric
134*81ad6265SDimitry Andricvoid BLAKE3::update(StringRef Str);
135*81ad6265SDimitry Andric```
136*81ad6265SDimitry Andric```c
137*81ad6265SDimitry Andricvoid llvm_blake3_hasher_update(
138*81ad6265SDimitry Andric  llvm_blake3_hasher *self,
139*81ad6265SDimitry Andric  const void *input,
140*81ad6265SDimitry Andric  size_t input_len);
141*81ad6265SDimitry Andric```
142*81ad6265SDimitry Andric
143*81ad6265SDimitry AndricAdd input to the hasher. This can be called any number of times.
144*81ad6265SDimitry Andric
145*81ad6265SDimitry Andric---
146*81ad6265SDimitry Andric
147*81ad6265SDimitry Andric```c++
148*81ad6265SDimitry Andrictemplate <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
149*81ad6265SDimitry Andricusing BLAKE3Result = std::array<uint8_t, NumBytes>;
150*81ad6265SDimitry Andric
151*81ad6265SDimitry Andrictemplate <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
152*81ad6265SDimitry Andricvoid BLAKE3::final(BLAKE3Result<NumBytes> &Result);
153*81ad6265SDimitry Andric
154*81ad6265SDimitry Andrictemplate <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
155*81ad6265SDimitry AndricBLAKE3Result<NumBytes> BLAKE3::final();
156*81ad6265SDimitry Andric```
157*81ad6265SDimitry Andric```c
158*81ad6265SDimitry Andricvoid llvm_blake3_hasher_finalize(
159*81ad6265SDimitry Andric  const llvm_blake3_hasher *self,
160*81ad6265SDimitry Andric  uint8_t *out,
161*81ad6265SDimitry Andric  size_t out_len);
162*81ad6265SDimitry Andric```
163*81ad6265SDimitry Andric
164*81ad6265SDimitry AndricFinalize the hasher and return an output of any length, given in bytes.
165*81ad6265SDimitry AndricThis doesn't modify the hasher itself, and it's possible to finalize
166*81ad6265SDimitry Andricagain after adding more input. The constant `LLVM_BLAKE3_OUT_LEN` provides
167*81ad6265SDimitry Andricthe default output length, 32 bytes, which is recommended for most
168*81ad6265SDimitry Andriccallers.
169*81ad6265SDimitry Andric
170*81ad6265SDimitry AndricOutputs shorter than the default length of 32 bytes (256 bits) provide
171*81ad6265SDimitry Andricless security. An N-bit BLAKE3 output is intended to provide N bits of
172*81ad6265SDimitry Andricfirst and second preimage resistance and N/2 bits of collision
173*81ad6265SDimitry Andricresistance, for any N up to 256. Longer outputs don't provide any
174*81ad6265SDimitry Andricadditional security.
175*81ad6265SDimitry Andric
176*81ad6265SDimitry AndricShorter BLAKE3 outputs are prefixes of longer ones. Explicitly
177*81ad6265SDimitry Andricrequesting a short output is equivalent to truncating the default-length
178*81ad6265SDimitry Andricoutput. (Note that this is different between BLAKE2 and BLAKE3.)
179*81ad6265SDimitry Andric
180*81ad6265SDimitry Andric## Less Common API Functions
181*81ad6265SDimitry Andric
182*81ad6265SDimitry Andric```c
183*81ad6265SDimitry Andricvoid llvm_blake3_hasher_init_keyed(
184*81ad6265SDimitry Andric  llvm_blake3_hasher *self,
185*81ad6265SDimitry Andric  const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
186*81ad6265SDimitry Andric```
187*81ad6265SDimitry Andric
188*81ad6265SDimitry AndricInitialize a `llvm_blake3_hasher` in the keyed hashing mode. The key must be
189*81ad6265SDimitry Andricexactly 32 bytes.
190*81ad6265SDimitry Andric
191*81ad6265SDimitry Andric---
192*81ad6265SDimitry Andric
193*81ad6265SDimitry Andric```c
194*81ad6265SDimitry Andricvoid llvm_blake3_hasher_init_derive_key(
195*81ad6265SDimitry Andric  llvm_blake3_hasher *self,
196*81ad6265SDimitry Andric  const char *context);
197*81ad6265SDimitry Andric```
198*81ad6265SDimitry Andric
199*81ad6265SDimitry AndricInitialize a `llvm_blake3_hasher` in the key derivation mode. The context
200*81ad6265SDimitry Andricstring is given as an initialization parameter, and afterwards input key
201*81ad6265SDimitry Andricmaterial should be given with `llvm_blake3_hasher_update`. The context string
202*81ad6265SDimitry Andricis a null-terminated C string which should be **hardcoded, globally
203*81ad6265SDimitry Andricunique, and application-specific**. The context string should not
204*81ad6265SDimitry Andricinclude any dynamic input like salts, nonces, or identifiers read from a
205*81ad6265SDimitry Andricdatabase at runtime. A good default format for the context string is
206*81ad6265SDimitry Andric`"[application] [commit timestamp] [purpose]"`, e.g., `"example.com
207*81ad6265SDimitry Andric2019-12-25 16:18:03 session tokens v1"`.
208*81ad6265SDimitry Andric
209*81ad6265SDimitry AndricThis function is intended for application code written in C. For
210*81ad6265SDimitry Andriclanguage bindings, see `llvm_blake3_hasher_init_derive_key_raw` below.
211*81ad6265SDimitry Andric
212*81ad6265SDimitry Andric---
213*81ad6265SDimitry Andric
214*81ad6265SDimitry Andric```c
215*81ad6265SDimitry Andricvoid llvm_blake3_hasher_init_derive_key_raw(
216*81ad6265SDimitry Andric  llvm_blake3_hasher *self,
217*81ad6265SDimitry Andric  const void *context,
218*81ad6265SDimitry Andric  size_t context_len);
219*81ad6265SDimitry Andric```
220*81ad6265SDimitry Andric
221*81ad6265SDimitry AndricAs `llvm_blake3_hasher_init_derive_key` above, except that the context string
222*81ad6265SDimitry Andricis given as a pointer to an array of arbitrary bytes with a provided
223*81ad6265SDimitry Andriclength. This is intended for writing language bindings, where C string
224*81ad6265SDimitry Andricconversion would add unnecessary overhead and new error cases. Unicode
225*81ad6265SDimitry Andricstrings should be encoded as UTF-8.
226*81ad6265SDimitry Andric
227*81ad6265SDimitry AndricApplication code in C should prefer `llvm_blake3_hasher_init_derive_key`,
228*81ad6265SDimitry Andricwhich takes the context as a C string. If you need to use arbitrary
229*81ad6265SDimitry Andricbytes as a context string in application code, consider whether you're
230*81ad6265SDimitry Andricviolating the requirement that context strings should be hardcoded.
231*81ad6265SDimitry Andric
232*81ad6265SDimitry Andric---
233*81ad6265SDimitry Andric
234*81ad6265SDimitry Andric```c
235*81ad6265SDimitry Andricvoid llvm_blake3_hasher_finalize_seek(
236*81ad6265SDimitry Andric  const llvm_blake3_hasher *self,
237*81ad6265SDimitry Andric  uint64_t seek,
238*81ad6265SDimitry Andric  uint8_t *out,
239*81ad6265SDimitry Andric  size_t out_len);
240*81ad6265SDimitry Andric```
241*81ad6265SDimitry Andric
242*81ad6265SDimitry AndricThe same as `llvm_blake3_hasher_finalize`, but with an additional `seek`
243*81ad6265SDimitry Andricparameter for the starting byte position in the output stream. To
244*81ad6265SDimitry Andricefficiently stream a large output without allocating memory, call this
245*81ad6265SDimitry Andricfunction in a loop, incrementing `seek` by the output length each time.
246*81ad6265SDimitry Andric
247*81ad6265SDimitry Andric---
248*81ad6265SDimitry Andric
249*81ad6265SDimitry Andric```c
250*81ad6265SDimitry Andricvoid llvm_blake3_hasher_reset(
251*81ad6265SDimitry Andric  llvm_blake3_hasher *self);
252*81ad6265SDimitry Andric```
253*81ad6265SDimitry Andric
254*81ad6265SDimitry AndricReset the hasher to its initial state, prior to any calls to
255*81ad6265SDimitry Andric`llvm_blake3_hasher_update`. Currently this is no different from calling
256*81ad6265SDimitry Andric`llvm_blake3_hasher_init` or similar again. However, if this implementation gains
257*81ad6265SDimitry Andricmultithreading support in the future, and if `llvm_blake3_hasher` holds (optional)
258*81ad6265SDimitry Andricthreading resources, this function will reuse those resources.
259*81ad6265SDimitry Andric
260*81ad6265SDimitry Andric
261*81ad6265SDimitry Andric# Building
262*81ad6265SDimitry Andric
263*81ad6265SDimitry AndricThis implementation is just C and assembly files.
264*81ad6265SDimitry Andric
265*81ad6265SDimitry Andric## x86
266*81ad6265SDimitry Andric
267*81ad6265SDimitry AndricDynamic dispatch is enabled by default on x86. The implementation will
268*81ad6265SDimitry Andricquery the CPU at runtime to detect SIMD support, and it will use the
269*81ad6265SDimitry Andricwidest instruction set available. By default, `blake3_dispatch.c`
270*81ad6265SDimitry Andricexpects to be linked with code for five different instruction sets:
271*81ad6265SDimitry Andricportable C, SSE2, SSE4.1, AVX2, and AVX-512.
272*81ad6265SDimitry Andric
273*81ad6265SDimitry AndricFor each of the x86 SIMD instruction sets, four versions are available:
274*81ad6265SDimitry Andricthree flavors of assembly (Unix, Windows MSVC, and Windows GNU) and one
275*81ad6265SDimitry Andricversion using C intrinsics. The assembly versions are generally
276*81ad6265SDimitry Andricpreferred. They perform better, they perform more consistently across
277*81ad6265SDimitry Andricdifferent compilers, and they build more quickly. On the other hand, the
278*81ad6265SDimitry Andricassembly versions are x86\_64-only, and you need to select the right
279*81ad6265SDimitry Andricflavor for your target platform.
280*81ad6265SDimitry Andric
281*81ad6265SDimitry Andric## ARM NEON
282*81ad6265SDimitry Andric
283*81ad6265SDimitry AndricThe NEON implementation is enabled by default on AArch64, but not on
284*81ad6265SDimitry Andricother ARM targets, since not all of them support it. To enable it, set
285*81ad6265SDimitry Andric`BLAKE3_USE_NEON=1`.
286*81ad6265SDimitry Andric
287*81ad6265SDimitry AndricTo explicitiy disable using NEON instructions on AArch64, set
288*81ad6265SDimitry Andric`BLAKE3_USE_NEON=0`.
289*81ad6265SDimitry Andric
290*81ad6265SDimitry Andric## Other Platforms
291*81ad6265SDimitry Andric
292*81ad6265SDimitry AndricThe portable implementation should work on most other architectures.
293*81ad6265SDimitry Andric
294*81ad6265SDimitry Andric# Multithreading
295*81ad6265SDimitry Andric
296*81ad6265SDimitry AndricThe implementation doesn't currently support multithreading.
297