1*06c3fb27SDimitry Andric /*===--------------- sha512intrin.h - SHA512 intrinsics -----------------===
2*06c3fb27SDimitry Andric *
3*06c3fb27SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric * See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric *
7*06c3fb27SDimitry Andric *===-----------------------------------------------------------------------===
8*06c3fb27SDimitry Andric */
9*06c3fb27SDimitry Andric
10*06c3fb27SDimitry Andric #ifndef __IMMINTRIN_H
11*06c3fb27SDimitry Andric #error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
12*06c3fb27SDimitry Andric #endif // __IMMINTRIN_H
13*06c3fb27SDimitry Andric
14*06c3fb27SDimitry Andric #ifndef __SHA512INTRIN_H
15*06c3fb27SDimitry Andric #define __SHA512INTRIN_H
16*06c3fb27SDimitry Andric
17*06c3fb27SDimitry Andric #define __DEFAULT_FN_ATTRS256 \
18*06c3fb27SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sha512"), \
19*06c3fb27SDimitry Andric __min_vector_width__(256)))
20*06c3fb27SDimitry Andric
21*06c3fb27SDimitry Andric /// This intrinisc is one of the two SHA512 message scheduling instructions.
22*06c3fb27SDimitry Andric /// The intrinsic performs an intermediate calculation for the next four
23*06c3fb27SDimitry Andric /// SHA512 message qwords. The calculated results are stored in \a dst.
24*06c3fb27SDimitry Andric ///
25*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
26*06c3fb27SDimitry Andric ///
27*06c3fb27SDimitry Andric /// \code
28*06c3fb27SDimitry Andric /// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B)
29*06c3fb27SDimitry Andric /// \endcode
30*06c3fb27SDimitry Andric ///
31*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VSHA512MSG1 instruction.
32*06c3fb27SDimitry Andric ///
33*06c3fb27SDimitry Andric /// \param __A
34*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
35*06c3fb27SDimitry Andric /// \param __B
36*06c3fb27SDimitry Andric /// A 128-bit vector of [2 x long long].
37*06c3fb27SDimitry Andric /// \returns
38*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
39*06c3fb27SDimitry Andric ///
40*06c3fb27SDimitry Andric /// \code{.operation}
41*06c3fb27SDimitry Andric /// DEFINE ROR64(qword, n) {
42*06c3fb27SDimitry Andric /// count := n % 64
43*06c3fb27SDimitry Andric /// dest := (qword >> count) | (qword << (64 - count))
44*06c3fb27SDimitry Andric /// RETURN dest
45*06c3fb27SDimitry Andric /// }
46*06c3fb27SDimitry Andric /// DEFINE SHR64(qword, n) {
47*06c3fb27SDimitry Andric /// RETURN qword >> n
48*06c3fb27SDimitry Andric /// }
49*06c3fb27SDimitry Andric /// DEFINE s0(qword):
50*06c3fb27SDimitry Andric /// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7)
51*06c3fb27SDimitry Andric /// }
52*06c3fb27SDimitry Andric /// W[4] := __B.qword[0]
53*06c3fb27SDimitry Andric /// W[3] := __A.qword[3]
54*06c3fb27SDimitry Andric /// W[2] := __A.qword[2]
55*06c3fb27SDimitry Andric /// W[1] := __A.qword[1]
56*06c3fb27SDimitry Andric /// W[0] := __A.qword[0]
57*06c3fb27SDimitry Andric /// dst.qword[3] := W[3] + s0(W[4])
58*06c3fb27SDimitry Andric /// dst.qword[2] := W[2] + s0(W[3])
59*06c3fb27SDimitry Andric /// dst.qword[1] := W[1] + s0(W[2])
60*06c3fb27SDimitry Andric /// dst.qword[0] := W[0] + s0(W[1])
61*06c3fb27SDimitry Andric /// dst[MAX:256] := 0
62*06c3fb27SDimitry Andric /// \endcode
63*06c3fb27SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sha512msg1_epi64(__m256i __A,__m128i __B)64*06c3fb27SDimitry Andric _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
65*06c3fb27SDimitry Andric return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B);
66*06c3fb27SDimitry Andric }
67*06c3fb27SDimitry Andric
68*06c3fb27SDimitry Andric /// This intrinisc is one of the two SHA512 message scheduling instructions.
69*06c3fb27SDimitry Andric /// The intrinsic performs the final calculation for the next four SHA512
70*06c3fb27SDimitry Andric /// message qwords. The calculated results are stored in \a dst.
71*06c3fb27SDimitry Andric ///
72*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
73*06c3fb27SDimitry Andric ///
74*06c3fb27SDimitry Andric /// \code
75*06c3fb27SDimitry Andric /// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B)
76*06c3fb27SDimitry Andric /// \endcode
77*06c3fb27SDimitry Andric ///
78*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VSHA512MSG2 instruction.
79*06c3fb27SDimitry Andric ///
80*06c3fb27SDimitry Andric /// \param __A
81*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
82*06c3fb27SDimitry Andric /// \param __B
83*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
84*06c3fb27SDimitry Andric /// \returns
85*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
86*06c3fb27SDimitry Andric ///
87*06c3fb27SDimitry Andric /// \code{.operation}
88*06c3fb27SDimitry Andric /// DEFINE ROR64(qword, n) {
89*06c3fb27SDimitry Andric /// count := n % 64
90*06c3fb27SDimitry Andric /// dest := (qword >> count) | (qword << (64 - count))
91*06c3fb27SDimitry Andric /// RETURN dest
92*06c3fb27SDimitry Andric /// }
93*06c3fb27SDimitry Andric /// DEFINE SHR64(qword, n) {
94*06c3fb27SDimitry Andric /// RETURN qword >> n
95*06c3fb27SDimitry Andric /// }
96*06c3fb27SDimitry Andric /// DEFINE s1(qword) {
97*06c3fb27SDimitry Andric /// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6)
98*06c3fb27SDimitry Andric /// }
99*06c3fb27SDimitry Andric /// W[14] := __B.qword[2]
100*06c3fb27SDimitry Andric /// W[15] := __B.qword[3]
101*06c3fb27SDimitry Andric /// W[16] := __A.qword[0] + s1(W[14])
102*06c3fb27SDimitry Andric /// W[17] := __A.qword[1] + s1(W[15])
103*06c3fb27SDimitry Andric /// W[18] := __A.qword[2] + s1(W[16])
104*06c3fb27SDimitry Andric /// W[19] := __A.qword[3] + s1(W[17])
105*06c3fb27SDimitry Andric /// dst.qword[3] := W[19]
106*06c3fb27SDimitry Andric /// dst.qword[2] := W[18]
107*06c3fb27SDimitry Andric /// dst.qword[1] := W[17]
108*06c3fb27SDimitry Andric /// dst.qword[0] := W[16]
109*06c3fb27SDimitry Andric /// dst[MAX:256] := 0
110*06c3fb27SDimitry Andric /// \endcode
111*06c3fb27SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sha512msg2_epi64(__m256i __A,__m256i __B)112*06c3fb27SDimitry Andric _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
113*06c3fb27SDimitry Andric return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B);
114*06c3fb27SDimitry Andric }
115*06c3fb27SDimitry Andric
116*06c3fb27SDimitry Andric /// This intrinisc performs two rounds of SHA512 operation using initial SHA512
117*06c3fb27SDimitry Andric /// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from
118*06c3fb27SDimitry Andric /// \a __A, and a pre-computed sum of the next two round message qwords and
119*06c3fb27SDimitry Andric /// the corresponding round constants from \a __C (only the two lower qwords
120*06c3fb27SDimitry Andric /// of the third operand). The updated SHA512 state (A,B,E,F) is written to
121*06c3fb27SDimitry Andric /// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later
122*06c3fb27SDimitry Andric /// rounds.
123*06c3fb27SDimitry Andric ///
124*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
125*06c3fb27SDimitry Andric ///
126*06c3fb27SDimitry Andric /// \code
127*06c3fb27SDimitry Andric /// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C)
128*06c3fb27SDimitry Andric /// \endcode
129*06c3fb27SDimitry Andric ///
130*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VSHA512RNDS2 instruction.
131*06c3fb27SDimitry Andric ///
132*06c3fb27SDimitry Andric /// \param __A
133*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
134*06c3fb27SDimitry Andric /// \param __B
135*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
136*06c3fb27SDimitry Andric /// \param __C
137*06c3fb27SDimitry Andric /// A 128-bit vector of [2 x long long].
138*06c3fb27SDimitry Andric /// \returns
139*06c3fb27SDimitry Andric /// A 256-bit vector of [4 x long long].
140*06c3fb27SDimitry Andric ///
141*06c3fb27SDimitry Andric /// \code{.operation}
142*06c3fb27SDimitry Andric /// DEFINE ROR64(qword, n) {
143*06c3fb27SDimitry Andric /// count := n % 64
144*06c3fb27SDimitry Andric /// dest := (qword >> count) | (qword << (64 - count))
145*06c3fb27SDimitry Andric /// RETURN dest
146*06c3fb27SDimitry Andric /// }
147*06c3fb27SDimitry Andric /// DEFINE SHR64(qword, n) {
148*06c3fb27SDimitry Andric /// RETURN qword >> n
149*06c3fb27SDimitry Andric /// }
150*06c3fb27SDimitry Andric /// DEFINE cap_sigma0(qword) {
151*06c3fb27SDimitry Andric /// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39)
152*06c3fb27SDimitry Andric /// }
153*06c3fb27SDimitry Andric /// DEFINE cap_sigma1(qword) {
154*06c3fb27SDimitry Andric /// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41)
155*06c3fb27SDimitry Andric /// }
156*06c3fb27SDimitry Andric /// DEFINE MAJ(a,b,c) {
157*06c3fb27SDimitry Andric /// RETURN (a & b) ^ (a & c) ^ (b & c)
158*06c3fb27SDimitry Andric /// }
159*06c3fb27SDimitry Andric /// DEFINE CH(e,f,g) {
160*06c3fb27SDimitry Andric /// RETURN (e & f) ^ (g & ~e)
161*06c3fb27SDimitry Andric /// }
162*06c3fb27SDimitry Andric /// A[0] := __B.qword[3]
163*06c3fb27SDimitry Andric /// B[0] := __B.qword[2]
164*06c3fb27SDimitry Andric /// C[0] := __C.qword[3]
165*06c3fb27SDimitry Andric /// D[0] := __C.qword[2]
166*06c3fb27SDimitry Andric /// E[0] := __B.qword[1]
167*06c3fb27SDimitry Andric /// F[0] := __B.qword[0]
168*06c3fb27SDimitry Andric /// G[0] := __C.qword[1]
169*06c3fb27SDimitry Andric /// H[0] := __C.qword[0]
170*06c3fb27SDimitry Andric /// WK[0]:= __A.qword[0]
171*06c3fb27SDimitry Andric /// WK[1]:= __A.qword[1]
172*06c3fb27SDimitry Andric /// FOR i := 0 to 1:
173*06c3fb27SDimitry Andric /// A[i+1] := CH(E[i], F[i], G[i]) +
174*06c3fb27SDimitry Andric /// cap_sigma1(E[i]) + WK[i] + H[i] +
175*06c3fb27SDimitry Andric /// MAJ(A[i], B[i], C[i]) +
176*06c3fb27SDimitry Andric /// cap_sigma0(A[i])
177*06c3fb27SDimitry Andric /// B[i+1] := A[i]
178*06c3fb27SDimitry Andric /// C[i+1] := B[i]
179*06c3fb27SDimitry Andric /// D[i+1] := C[i]
180*06c3fb27SDimitry Andric /// E[i+1] := CH(E[i], F[i], G[i]) +
181*06c3fb27SDimitry Andric /// cap_sigma1(E[i]) + WK[i] + H[i] + D[i]
182*06c3fb27SDimitry Andric /// F[i+1] := E[i]
183*06c3fb27SDimitry Andric /// G[i+1] := F[i]
184*06c3fb27SDimitry Andric /// H[i+1] := G[i]
185*06c3fb27SDimitry Andric /// ENDFOR
186*06c3fb27SDimitry Andric /// dst.qword[3] := A[2]
187*06c3fb27SDimitry Andric /// dst.qword[2] := B[2]
188*06c3fb27SDimitry Andric /// dst.qword[1] := E[2]
189*06c3fb27SDimitry Andric /// dst.qword[0] := F[2]
190*06c3fb27SDimitry Andric /// dst[MAX:256] := 0
191*06c3fb27SDimitry Andric /// \endcode
192*06c3fb27SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sha512rnds2_epi64(__m256i __A,__m256i __B,__m128i __C)193*06c3fb27SDimitry Andric _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
194*06c3fb27SDimitry Andric return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B,
195*06c3fb27SDimitry Andric (__v2du)__C);
196*06c3fb27SDimitry Andric }
197*06c3fb27SDimitry Andric
198*06c3fb27SDimitry Andric #undef __DEFAULT_FN_ATTRS256
199*06c3fb27SDimitry Andric
200*06c3fb27SDimitry Andric #endif // __SHA512INTRIN_H
201