xref: /llvm-project/llvm/test/CodeGen/X86/sha.ll (revision de3e4a9dfe89dfc0a4d6d5e0891c542f6c82ca57)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4
5declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
6
7define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
8; SSE-LABEL: test_sha1rnds4rr:
9; SSE:       # %bb.0: # %entry
10; SSE-NEXT:    sha1rnds4 $3, %xmm1, %xmm0
11; SSE-NEXT:    retq
12;
13; AVX-LABEL: test_sha1rnds4rr:
14; AVX:       # %bb.0: # %entry
15; AVX-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x0f,0x3a,0xcc,0xc1,0x03]
16; AVX-NEXT:    retq # encoding: [0xc3]
17entry:
18  %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
19  ret <4 x i32> %0
20}
21
22define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
23; SSE-LABEL: test_sha1rnds4rm:
24; SSE:       # %bb.0: # %entry
25; SSE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
26; SSE-NEXT:    retq
27;
28; AVX-LABEL: test_sha1rnds4rm:
29; AVX:       # %bb.0: # %entry
30; AVX-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03]
31; AVX-NEXT:    retq # encoding: [0xc3]
32entry:
33  %0 = load <4 x i32>, ptr %b
34  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
35  ret <4 x i32> %1
36}
37
38declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
39
40define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
41; SSE-LABEL: test_sha1nexterr:
42; SSE:       # %bb.0: # %entry
43; SSE-NEXT:    sha1nexte %xmm1, %xmm0
44; SSE-NEXT:    retq
45;
46; AVX-LABEL: test_sha1nexterr:
47; AVX:       # %bb.0: # %entry
48; AVX-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc8,0xc1]
49; AVX-NEXT:    retq # encoding: [0xc3]
50entry:
51  %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
52  ret <4 x i32> %0
53}
54
55define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
56; SSE-LABEL: test_sha1nexterm:
57; SSE:       # %bb.0: # %entry
58; SSE-NEXT:    sha1nexte (%rdi), %xmm0
59; SSE-NEXT:    retq
60;
61; AVX-LABEL: test_sha1nexterm:
62; AVX:       # %bb.0: # %entry
63; AVX-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc8,0x07]
64; AVX-NEXT:    retq # encoding: [0xc3]
65entry:
66  %0 = load <4 x i32>, ptr %b
67  %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
68  ret <4 x i32> %1
69}
70
71declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
72
73define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
74; SSE-LABEL: test_sha1msg1rr:
75; SSE:       # %bb.0: # %entry
76; SSE-NEXT:    sha1msg1 %xmm1, %xmm0
77; SSE-NEXT:    retq
78;
79; AVX-LABEL: test_sha1msg1rr:
80; AVX:       # %bb.0: # %entry
81; AVX-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc9,0xc1]
82; AVX-NEXT:    retq # encoding: [0xc3]
83entry:
84  %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
85  ret <4 x i32> %0
86}
87
88define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
89; SSE-LABEL: test_sha1msg1rm:
90; SSE:       # %bb.0: # %entry
91; SSE-NEXT:    sha1msg1 (%rdi), %xmm0
92; SSE-NEXT:    retq
93;
94; AVX-LABEL: test_sha1msg1rm:
95; AVX:       # %bb.0: # %entry
96; AVX-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc9,0x07]
97; AVX-NEXT:    retq # encoding: [0xc3]
98entry:
99  %0 = load <4 x i32>, ptr %b
100  %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
101  ret <4 x i32> %1
102}
103
104declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
105
106define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
107; SSE-LABEL: test_sha1msg2rr:
108; SSE:       # %bb.0: # %entry
109; SSE-NEXT:    sha1msg2 %xmm1, %xmm0
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: test_sha1msg2rr:
113; AVX:       # %bb.0: # %entry
114; AVX-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xca,0xc1]
115; AVX-NEXT:    retq # encoding: [0xc3]
116entry:
117  %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
118  ret <4 x i32> %0
119}
120
121define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
122; SSE-LABEL: test_sha1msg2rm:
123; SSE:       # %bb.0: # %entry
124; SSE-NEXT:    sha1msg2 (%rdi), %xmm0
125; SSE-NEXT:    retq
126;
127; AVX-LABEL: test_sha1msg2rm:
128; AVX:       # %bb.0: # %entry
129; AVX-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xca,0x07]
130; AVX-NEXT:    retq # encoding: [0xc3]
131entry:
132  %0 = load <4 x i32>, ptr %b
133  %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
134  ret <4 x i32> %1
135}
136
137declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
138
139define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
140; SSE-LABEL: test_sha256rnds2rr:
141; SSE:       # %bb.0: # %entry
142; SSE-NEXT:    movaps %xmm0, %xmm3
143; SSE-NEXT:    movaps %xmm2, %xmm0
144; SSE-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3
145; SSE-NEXT:    movaps %xmm3, %xmm0
146; SSE-NEXT:    retq
147;
148; AVX-LABEL: test_sha256rnds2rr:
149; AVX:       # %bb.0: # %entry
150; AVX-NEXT:    vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
151; AVX-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
152; AVX-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x0f,0x38,0xcb,0xd9]
153; AVX-NEXT:    vmovaps %xmm3, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc3]
154; AVX-NEXT:    retq # encoding: [0xc3]
155entry:
156  %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
157  ret <4 x i32> %0
158}
159
160define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable {
161; SSE-LABEL: test_sha256rnds2rm:
162; SSE:       # %bb.0: # %entry
163; SSE-NEXT:    movaps %xmm0, %xmm2
164; SSE-NEXT:    movaps %xmm1, %xmm0
165; SSE-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2
166; SSE-NEXT:    movaps %xmm2, %xmm0
167; SSE-NEXT:    retq
168;
169; AVX-LABEL: test_sha256rnds2rm:
170; AVX:       # %bb.0: # %entry
171; AVX-NEXT:    vmovaps %xmm0, %xmm2 # encoding: [0xc5,0xf8,0x28,0xd0]
172; AVX-NEXT:    vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
173; AVX-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x0f,0x38,0xcb,0x17]
174; AVX-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
175; AVX-NEXT:    retq # encoding: [0xc3]
176entry:
177  %0 = load <4 x i32>, ptr %b
178  %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
179  ret <4 x i32> %1
180}
181
182declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
183
184define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
185; SSE-LABEL: test_sha256msg1rr:
186; SSE:       # %bb.0: # %entry
187; SSE-NEXT:    sha256msg1 %xmm1, %xmm0
188; SSE-NEXT:    retq
189;
190; AVX-LABEL: test_sha256msg1rr:
191; AVX:       # %bb.0: # %entry
192; AVX-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcc,0xc1]
193; AVX-NEXT:    retq # encoding: [0xc3]
194entry:
195  %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
196  ret <4 x i32> %0
197}
198
199define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
200; SSE-LABEL: test_sha256msg1rm:
201; SSE:       # %bb.0: # %entry
202; SSE-NEXT:    sha256msg1 (%rdi), %xmm0
203; SSE-NEXT:    retq
204;
205; AVX-LABEL: test_sha256msg1rm:
206; AVX:       # %bb.0: # %entry
207; AVX-NEXT:    sha256msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcc,0x07]
208; AVX-NEXT:    retq # encoding: [0xc3]
209entry:
210  %0 = load <4 x i32>, ptr %b
211  %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
212  ret <4 x i32> %1
213}
214
215declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
216
217define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
218; SSE-LABEL: test_sha256msg2rr:
219; SSE:       # %bb.0: # %entry
220; SSE-NEXT:    sha256msg2 %xmm1, %xmm0
221; SSE-NEXT:    retq
222;
223; AVX-LABEL: test_sha256msg2rr:
224; AVX:       # %bb.0: # %entry
225; AVX-NEXT:    sha256msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcd,0xc1]
226; AVX-NEXT:    retq # encoding: [0xc3]
227entry:
228  %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
229  ret <4 x i32> %0
230}
231
232define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
233; SSE-LABEL: test_sha256msg2rm:
234; SSE:       # %bb.0: # %entry
235; SSE-NEXT:    sha256msg2 (%rdi), %xmm0
236; SSE-NEXT:    retq
237;
238; AVX-LABEL: test_sha256msg2rm:
239; AVX:       # %bb.0: # %entry
240; AVX-NEXT:    sha256msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcd,0x07]
241; AVX-NEXT:    retq # encoding: [0xc3]
242entry:
243  %0 = load <4 x i32>, ptr %b
244  %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
245  ret <4 x i32> %1
246}
247
248; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
249define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable {
250; SSE-LABEL: test_sha1rnds4_zero_extend:
251; SSE:       # %bb.0: # %entry
252; SSE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
253; SSE-NEXT:    xorps %xmm1, %xmm1
254; SSE-NEXT:    retq
255;
256; AVX-LABEL: test_sha1rnds4_zero_extend:
257; AVX:       # %bb.0: # %entry
258; AVX-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03]
259; AVX-NEXT:    vmovaps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc0]
260; AVX-NEXT:    retq # encoding: [0xc3]
261entry:
262  %0 = load <4 x i32>, ptr %b
263  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
264  %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
265  ret <8 x i32> %2
266}
267;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
268; CHECK: {{.*}}
269