xref: /llvm-project/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll (revision 4def1ce1012897d4f80f5094c29e5383b9aa6a0b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3
4declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr, <4 x i32>, <4 x float>, i8)
5
6define <4 x float> @test_llvm_x86_avx2_gather_d_ps(ptr %b, <4 x i32> %iv, <4 x float> %mask) #0 {
7; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps:
8; CHECK:       # %bb.0: # %entry
9; CHECK-NEXT:    movq %rsp, %rax
10; CHECK-NEXT:    movq $-1, %rcx
11; CHECK-NEXT:    sarq $63, %rax
12; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
13; CHECK-NEXT:    orq %rax, %rdi
14; CHECK-NEXT:    vmovq %rax, %xmm3
15; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
16; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
17; CHECK-NEXT:    vgatherdps %xmm1, (%rdi,%xmm0), %xmm2
18; CHECK-NEXT:    shlq $47, %rax
19; CHECK-NEXT:    vmovaps %xmm2, %xmm0
20; CHECK-NEXT:    orq %rax, %rsp
21; CHECK-NEXT:    retq
22entry:
23  %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x float> %mask, i8 1)
24  ret <4 x float> %v
25}
26
27declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr, <2 x i64>, <4 x float>, i8)
28
29define <4 x float> @test_llvm_x86_avx2_gather_q_ps(ptr %b, <2 x i64> %iv, <4 x float> %mask) #0 {
30; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps:
31; CHECK:       # %bb.0: # %entry
32; CHECK-NEXT:    movq %rsp, %rax
33; CHECK-NEXT:    movq $-1, %rcx
34; CHECK-NEXT:    sarq $63, %rax
35; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
36; CHECK-NEXT:    orq %rax, %rdi
37; CHECK-NEXT:    vmovq %rax, %xmm3
38; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
39; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
40; CHECK-NEXT:    vgatherqps %xmm1, (%rdi,%xmm0), %xmm2
41; CHECK-NEXT:    shlq $47, %rax
42; CHECK-NEXT:    vmovaps %xmm2, %xmm0
43; CHECK-NEXT:    orq %rax, %rsp
44; CHECK-NEXT:    retq
45entry:
46  %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, ptr %b, <2 x i64> %iv, <4 x float> %mask, i8 1)
47  ret <4 x float> %v
48}
49
50declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr, <4 x i32>, <2 x double>, i8)
51
52define <2 x double> @test_llvm_x86_avx2_gather_d_pd(ptr %b, <4 x i32> %iv, <2 x double> %mask) #0 {
53; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd:
54; CHECK:       # %bb.0: # %entry
55; CHECK-NEXT:    movq %rsp, %rax
56; CHECK-NEXT:    movq $-1, %rcx
57; CHECK-NEXT:    sarq $63, %rax
58; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
59; CHECK-NEXT:    orq %rax, %rdi
60; CHECK-NEXT:    vmovq %rax, %xmm3
61; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
62; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
63; CHECK-NEXT:    vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2
64; CHECK-NEXT:    shlq $47, %rax
65; CHECK-NEXT:    vmovapd %xmm2, %xmm0
66; CHECK-NEXT:    orq %rax, %rsp
67; CHECK-NEXT:    retq
68entry:
69  %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, ptr %b, <4 x i32> %iv, <2 x double> %mask, i8 1)
70  ret <2 x double> %v
71}
72
73declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr, <2 x i64>, <2 x double>, i8)
74
75define <2 x double> @test_llvm_x86_avx2_gather_q_pd(ptr %b, <2 x i64> %iv, <2 x double> %mask) #0 {
76; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd:
77; CHECK:       # %bb.0: # %entry
78; CHECK-NEXT:    movq %rsp, %rax
79; CHECK-NEXT:    movq $-1, %rcx
80; CHECK-NEXT:    sarq $63, %rax
81; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
82; CHECK-NEXT:    orq %rax, %rdi
83; CHECK-NEXT:    vmovq %rax, %xmm3
84; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
85; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
86; CHECK-NEXT:    vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2
87; CHECK-NEXT:    shlq $47, %rax
88; CHECK-NEXT:    vmovapd %xmm2, %xmm0
89; CHECK-NEXT:    orq %rax, %rsp
90; CHECK-NEXT:    retq
91entry:
92  %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, ptr %b, <2 x i64> %iv, <2 x double> %mask, i8 1)
93  ret <2 x double> %v
94}
95
96declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr, <8 x i32>, <8 x float>, i8)
97
98define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(ptr %b, <8 x i32> %iv, <8 x float> %mask) #0 {
99; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256:
100; CHECK:       # %bb.0: # %entry
101; CHECK-NEXT:    movq %rsp, %rax
102; CHECK-NEXT:    movq $-1, %rcx
103; CHECK-NEXT:    sarq $63, %rax
104; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
105; CHECK-NEXT:    orq %rax, %rdi
106; CHECK-NEXT:    vmovq %rax, %xmm3
107; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
108; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
109; CHECK-NEXT:    vgatherdps %ymm1, (%rdi,%ymm0), %ymm2
110; CHECK-NEXT:    shlq $47, %rax
111; CHECK-NEXT:    vmovaps %ymm2, %ymm0
112; CHECK-NEXT:    orq %rax, %rsp
113; CHECK-NEXT:    retq
114entry:
115  %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, ptr %b, <8 x i32> %iv, <8 x float> %mask, i8 1)
116  ret <8 x float> %v
117}
118
119declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr, <4 x i64>, <4 x float>, i8)
120
121define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(ptr %b, <4 x i64> %iv, <4 x float> %mask) #0 {
122; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256:
123; CHECK:       # %bb.0: # %entry
124; CHECK-NEXT:    movq %rsp, %rax
125; CHECK-NEXT:    movq $-1, %rcx
126; CHECK-NEXT:    sarq $63, %rax
127; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
128; CHECK-NEXT:    orq %rax, %rdi
129; CHECK-NEXT:    vmovq %rax, %xmm3
130; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
131; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
132; CHECK-NEXT:    vgatherqps %xmm1, (%rdi,%ymm0), %xmm2
133; CHECK-NEXT:    shlq $47, %rax
134; CHECK-NEXT:    vmovaps %xmm2, %xmm0
135; CHECK-NEXT:    orq %rax, %rsp
136; CHECK-NEXT:    vzeroupper
137; CHECK-NEXT:    retq
138entry:
139  %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x float> %mask, i8 1)
140  ret <4 x float> %v
141}
142
143declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr, <4 x i32>, <4 x double>, i8)
144
145define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(ptr %b, <4 x i32> %iv, <4 x double> %mask) #0 {
146; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256:
147; CHECK:       # %bb.0: # %entry
148; CHECK-NEXT:    movq %rsp, %rax
149; CHECK-NEXT:    movq $-1, %rcx
150; CHECK-NEXT:    sarq $63, %rax
151; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
152; CHECK-NEXT:    orq %rax, %rdi
153; CHECK-NEXT:    vmovq %rax, %xmm3
154; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
155; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
156; CHECK-NEXT:    vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2
157; CHECK-NEXT:    shlq $47, %rax
158; CHECK-NEXT:    vmovapd %ymm2, %ymm0
159; CHECK-NEXT:    orq %rax, %rsp
160; CHECK-NEXT:    retq
161entry:
162  %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x double> %mask, i8 1)
163  ret <4 x double> %v
164}
165
166declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr, <4 x i64>, <4 x double>, i8)
167
168define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(ptr %b, <4 x i64> %iv, <4 x double> %mask) #0 {
169; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256:
170; CHECK:       # %bb.0: # %entry
171; CHECK-NEXT:    movq %rsp, %rax
172; CHECK-NEXT:    movq $-1, %rcx
173; CHECK-NEXT:    sarq $63, %rax
174; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
175; CHECK-NEXT:    orq %rax, %rdi
176; CHECK-NEXT:    vmovq %rax, %xmm3
177; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
178; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
179; CHECK-NEXT:    vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2
180; CHECK-NEXT:    shlq $47, %rax
181; CHECK-NEXT:    vmovapd %ymm2, %ymm0
182; CHECK-NEXT:    orq %rax, %rsp
183; CHECK-NEXT:    retq
184entry:
185  %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x double> %mask, i8 1)
186  ret <4 x double> %v
187}
188
189declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr, <4 x i32>, <4 x i32>, i8)
190
191define <4 x i32> @test_llvm_x86_avx2_gather_d_d(ptr %b, <4 x i32> %iv, <4 x i32> %mask) #0 {
192; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d:
193; CHECK:       # %bb.0: # %entry
194; CHECK-NEXT:    movq %rsp, %rax
195; CHECK-NEXT:    movq $-1, %rcx
196; CHECK-NEXT:    sarq $63, %rax
197; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
198; CHECK-NEXT:    orq %rax, %rdi
199; CHECK-NEXT:    vmovq %rax, %xmm3
200; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
201; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
202; CHECK-NEXT:    vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2
203; CHECK-NEXT:    shlq $47, %rax
204; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
205; CHECK-NEXT:    orq %rax, %rsp
206; CHECK-NEXT:    retq
207entry:
208  %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x i32> %mask, i8 1)
209  ret <4 x i32> %v
210}
211
212declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr, <2 x i64>, <4 x i32>, i8)
213
214define <4 x i32> @test_llvm_x86_avx2_gather_q_d(ptr %b, <2 x i64> %iv, <4 x i32> %mask) #0 {
215; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d:
216; CHECK:       # %bb.0: # %entry
217; CHECK-NEXT:    movq %rsp, %rax
218; CHECK-NEXT:    movq $-1, %rcx
219; CHECK-NEXT:    sarq $63, %rax
220; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
221; CHECK-NEXT:    orq %rax, %rdi
222; CHECK-NEXT:    vmovq %rax, %xmm3
223; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
224; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
225; CHECK-NEXT:    vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2
226; CHECK-NEXT:    shlq $47, %rax
227; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
228; CHECK-NEXT:    orq %rax, %rsp
229; CHECK-NEXT:    retq
230entry:
231  %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, ptr %b, <2 x i64> %iv, <4 x i32> %mask, i8 1)
232  ret <4 x i32> %v
233}
234
235declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr, <4 x i32>, <2 x i64>, i8)
236
237define <2 x i64> @test_llvm_x86_avx2_gather_d_q(ptr %b, <4 x i32> %iv, <2 x i64> %mask) #0 {
238; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q:
239; CHECK:       # %bb.0: # %entry
240; CHECK-NEXT:    movq %rsp, %rax
241; CHECK-NEXT:    movq $-1, %rcx
242; CHECK-NEXT:    sarq $63, %rax
243; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
244; CHECK-NEXT:    orq %rax, %rdi
245; CHECK-NEXT:    vmovq %rax, %xmm3
246; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
247; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
248; CHECK-NEXT:    vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2
249; CHECK-NEXT:    shlq $47, %rax
250; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
251; CHECK-NEXT:    orq %rax, %rsp
252; CHECK-NEXT:    retq
253entry:
254  %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, <2 x i64> %mask, i8 1)
255  ret <2 x i64> %v
256}
257
258declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr, <2 x i64>, <2 x i64>, i8)
259
260define <2 x i64> @test_llvm_x86_avx2_gather_q_q(ptr %b, <2 x i64> %iv, <2 x i64> %mask) #0 {
261; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q:
262; CHECK:       # %bb.0: # %entry
263; CHECK-NEXT:    movq %rsp, %rax
264; CHECK-NEXT:    movq $-1, %rcx
265; CHECK-NEXT:    sarq $63, %rax
266; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
267; CHECK-NEXT:    orq %rax, %rdi
268; CHECK-NEXT:    vmovq %rax, %xmm3
269; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
270; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
271; CHECK-NEXT:    vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2
272; CHECK-NEXT:    shlq $47, %rax
273; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
274; CHECK-NEXT:    orq %rax, %rsp
275; CHECK-NEXT:    retq
276entry:
277  %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, ptr %b, <2 x i64> %iv, <2 x i64> %mask, i8 1)
278  ret <2 x i64> %v
279}
280
281declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr, <8 x i32>, <8 x i32>, i8)
282
283define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(ptr %b, <8 x i32> %iv, <8 x i32> %mask) #0 {
284; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256:
285; CHECK:       # %bb.0: # %entry
286; CHECK-NEXT:    movq %rsp, %rax
287; CHECK-NEXT:    movq $-1, %rcx
288; CHECK-NEXT:    sarq $63, %rax
289; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
290; CHECK-NEXT:    orq %rax, %rdi
291; CHECK-NEXT:    vmovq %rax, %xmm3
292; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
293; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
294; CHECK-NEXT:    vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2
295; CHECK-NEXT:    shlq $47, %rax
296; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
297; CHECK-NEXT:    orq %rax, %rsp
298; CHECK-NEXT:    retq
299entry:
300  %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, ptr %b, <8 x i32> %iv, <8 x i32> %mask, i8 1)
301  ret <8 x i32> %v
302}
303
304declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr, <4 x i64>, <4 x i32>, i8)
305
306define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(ptr %b, <4 x i64> %iv, <4 x i32> %mask) #0 {
307; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256:
308; CHECK:       # %bb.0: # %entry
309; CHECK-NEXT:    movq %rsp, %rax
310; CHECK-NEXT:    movq $-1, %rcx
311; CHECK-NEXT:    sarq $63, %rax
312; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
313; CHECK-NEXT:    orq %rax, %rdi
314; CHECK-NEXT:    vmovq %rax, %xmm3
315; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
316; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
317; CHECK-NEXT:    vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2
318; CHECK-NEXT:    shlq $47, %rax
319; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
320; CHECK-NEXT:    orq %rax, %rsp
321; CHECK-NEXT:    vzeroupper
322; CHECK-NEXT:    retq
323entry:
324  %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x i32> %mask, i8 1)
325  ret <4 x i32> %v
326}
327
328declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr, <4 x i32>, <4 x i64>, i8)
329
330define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(ptr %b, <4 x i32> %iv, <4 x i64> %mask) #0 {
331; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256:
332; CHECK:       # %bb.0: # %entry
333; CHECK-NEXT:    movq %rsp, %rax
334; CHECK-NEXT:    movq $-1, %rcx
335; CHECK-NEXT:    sarq $63, %rax
336; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
337; CHECK-NEXT:    orq %rax, %rdi
338; CHECK-NEXT:    vmovq %rax, %xmm3
339; CHECK-NEXT:    vpbroadcastq %xmm3, %xmm3
340; CHECK-NEXT:    vpor %xmm0, %xmm3, %xmm0
341; CHECK-NEXT:    vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2
342; CHECK-NEXT:    shlq $47, %rax
343; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
344; CHECK-NEXT:    orq %rax, %rsp
345; CHECK-NEXT:    retq
346entry:
347  %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x i64> %mask, i8 1)
348  ret <4 x i64> %v
349}
350
351declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr, <4 x i64>, <4 x i64>, i8)
352
353define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(ptr %b, <4 x i64> %iv, <4 x i64> %mask) #0 {
354; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256:
355; CHECK:       # %bb.0: # %entry
356; CHECK-NEXT:    movq %rsp, %rax
357; CHECK-NEXT:    movq $-1, %rcx
358; CHECK-NEXT:    sarq $63, %rax
359; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
360; CHECK-NEXT:    orq %rax, %rdi
361; CHECK-NEXT:    vmovq %rax, %xmm3
362; CHECK-NEXT:    vpbroadcastq %xmm3, %ymm3
363; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
364; CHECK-NEXT:    vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2
365; CHECK-NEXT:    shlq $47, %rax
366; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
367; CHECK-NEXT:    orq %rax, %rsp
368; CHECK-NEXT:    retq
369entry:
370  %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x i64> %mask, i8 1)
371  ret <4 x i64> %v
372}
373
374declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, ptr, <16 x i32>, i16, i32)
375
376define <16 x float> @test_llvm_x86_avx512_gather_dps_512(ptr %b, <16 x i32> %iv) #1 {
377; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512:
378; CHECK:       # %bb.0: # %entry
379; CHECK-NEXT:    movq %rsp, %rax
380; CHECK-NEXT:    movq $-1, %rcx
381; CHECK-NEXT:    sarq $63, %rax
382; CHECK-NEXT:    kxnorw %k0, %k0, %k1
383; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
384; CHECK-NEXT:    orq %rax, %rdi
385; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
386; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
387; CHECK-NEXT:    vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
388; CHECK-NEXT:    shlq $47, %rax
389; CHECK-NEXT:    vmovaps %zmm1, %zmm0
390; CHECK-NEXT:    orq %rax, %rsp
391; CHECK-NEXT:    retq
392entry:
393  %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, ptr %b, <16 x i32> %iv, i16 -1, i32 1)
394  ret <16 x float> %v
395}
396
397declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, ptr, <8 x i32>, i8, i32)
398
399define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(ptr %b, <8 x i32> %iv) #1 {
400; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512:
401; CHECK:       # %bb.0: # %entry
402; CHECK-NEXT:    movq %rsp, %rax
403; CHECK-NEXT:    movq $-1, %rcx
404; CHECK-NEXT:    sarq $63, %rax
405; CHECK-NEXT:    kxnorw %k0, %k0, %k1
406; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
407; CHECK-NEXT:    orq %rax, %rdi
408; CHECK-NEXT:    vmovq %rax, %xmm2
409; CHECK-NEXT:    vpbroadcastq %xmm2, %ymm2
410; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
411; CHECK-NEXT:    vgatherdpd (%rdi,%ymm0), %zmm1 {%k1}
412; CHECK-NEXT:    shlq $47, %rax
413; CHECK-NEXT:    vmovapd %zmm1, %zmm0
414; CHECK-NEXT:    orq %rax, %rsp
415; CHECK-NEXT:    retq
416entry:
417  %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
418  ret <8 x double> %v
419}
420
421declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, ptr, <8 x i64>, i8, i32)
422
423define <8 x float> @test_llvm_x86_avx512_gather_qps_512(ptr %b, <8 x i64> %iv) #1 {
424; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512:
425; CHECK:       # %bb.0: # %entry
426; CHECK-NEXT:    movq %rsp, %rax
427; CHECK-NEXT:    movq $-1, %rcx
428; CHECK-NEXT:    sarq $63, %rax
429; CHECK-NEXT:    kxnorw %k0, %k0, %k1
430; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
431; CHECK-NEXT:    orq %rax, %rdi
432; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
433; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
434; CHECK-NEXT:    vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
435; CHECK-NEXT:    shlq $47, %rax
436; CHECK-NEXT:    vmovaps %ymm1, %ymm0
437; CHECK-NEXT:    orq %rax, %rsp
438; CHECK-NEXT:    retq
439entry:
440  %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
441  ret <8 x float> %v
442}
443
444declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, ptr, <8 x i64>, i8, i32)
445
446define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(ptr %b, <8 x i64> %iv) #1 {
447; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512:
448; CHECK:       # %bb.0: # %entry
449; CHECK-NEXT:    movq %rsp, %rax
450; CHECK-NEXT:    movq $-1, %rcx
451; CHECK-NEXT:    sarq $63, %rax
452; CHECK-NEXT:    kxnorw %k0, %k0, %k1
453; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
454; CHECK-NEXT:    orq %rax, %rdi
455; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
456; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
457; CHECK-NEXT:    vgatherqpd (%rdi,%zmm0), %zmm1 {%k1}
458; CHECK-NEXT:    shlq $47, %rax
459; CHECK-NEXT:    vmovapd %zmm1, %zmm0
460; CHECK-NEXT:    orq %rax, %rsp
461; CHECK-NEXT:    retq
462entry:
463  %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
464  ret <8 x double> %v
465}
466
467declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, ptr, <16 x i32>, i16, i32)
468
469define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(ptr %b, <16 x i32> %iv) #1 {
470; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512:
471; CHECK:       # %bb.0: # %entry
472; CHECK-NEXT:    movq %rsp, %rax
473; CHECK-NEXT:    movq $-1, %rcx
474; CHECK-NEXT:    sarq $63, %rax
475; CHECK-NEXT:    kxnorw %k0, %k0, %k1
476; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
477; CHECK-NEXT:    orq %rax, %rdi
478; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
479; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
480; CHECK-NEXT:    vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
481; CHECK-NEXT:    shlq $47, %rax
482; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
483; CHECK-NEXT:    orq %rax, %rsp
484; CHECK-NEXT:    retq
485entry:
486  %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, ptr %b, <16 x i32> %iv, i16 -1, i32 1)
487  ret <16 x i32> %v
488}
489
490declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, ptr, <8 x i32>, i8, i32)
491
492define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(ptr %b, <8 x i32> %iv) #1 {
493; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512:
494; CHECK:       # %bb.0: # %entry
495; CHECK-NEXT:    movq %rsp, %rax
496; CHECK-NEXT:    movq $-1, %rcx
497; CHECK-NEXT:    sarq $63, %rax
498; CHECK-NEXT:    kxnorw %k0, %k0, %k1
499; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
500; CHECK-NEXT:    orq %rax, %rdi
501; CHECK-NEXT:    vmovq %rax, %xmm2
502; CHECK-NEXT:    vpbroadcastq %xmm2, %ymm2
503; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
504; CHECK-NEXT:    vpgatherdq (%rdi,%ymm0), %zmm1 {%k1}
505; CHECK-NEXT:    shlq $47, %rax
506; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
507; CHECK-NEXT:    orq %rax, %rsp
508; CHECK-NEXT:    retq
509entry:
510  %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
511  ret <8 x i64> %v
512}
513
514
515declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, ptr, <8 x i64>, i8, i32)
516
517define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(ptr %b, <8 x i64> %iv) #1 {
518; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512:
519; CHECK:       # %bb.0: # %entry
520; CHECK-NEXT:    movq %rsp, %rax
521; CHECK-NEXT:    movq $-1, %rcx
522; CHECK-NEXT:    sarq $63, %rax
523; CHECK-NEXT:    kxnorw %k0, %k0, %k1
524; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
525; CHECK-NEXT:    orq %rax, %rdi
526; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
527; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
528; CHECK-NEXT:    vpgatherqd (%rdi,%zmm0), %ymm1 {%k1}
529; CHECK-NEXT:    shlq $47, %rax
530; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
531; CHECK-NEXT:    orq %rax, %rsp
532; CHECK-NEXT:    retq
533entry:
534  %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
535  ret <8 x i32> %v
536}
537
538declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, ptr, <8 x i64>, i8, i32)
539
540define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(ptr %b, <8 x i64> %iv) #1 {
541; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512:
542; CHECK:       # %bb.0: # %entry
543; CHECK-NEXT:    movq %rsp, %rax
544; CHECK-NEXT:    movq $-1, %rcx
545; CHECK-NEXT:    sarq $63, %rax
546; CHECK-NEXT:    kxnorw %k0, %k0, %k1
547; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
548; CHECK-NEXT:    orq %rax, %rdi
549; CHECK-NEXT:    vpbroadcastq %rax, %zmm2
550; CHECK-NEXT:    vporq %zmm0, %zmm2, %zmm0
551; CHECK-NEXT:    vpgatherqq (%rdi,%zmm0), %zmm1 {%k1}
552; CHECK-NEXT:    shlq $47, %rax
553; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
554; CHECK-NEXT:    orq %rax, %rsp
555; CHECK-NEXT:    retq
556entry:
557  %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
558  ret <8 x i64> %v
559}
560
561declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, ptr, <4 x i32>, i8, i32)
562
563define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(ptr %b, <4 x i32> %iv) #2 {
564; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf:
565; CHECK:       # %bb.0: # %entry
566; CHECK-NEXT:    movq %rsp, %rax
567; CHECK-NEXT:    movq $-1, %rcx
568; CHECK-NEXT:    sarq $63, %rax
569; CHECK-NEXT:    kxnorw %k0, %k0, %k1
570; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
571; CHECK-NEXT:    orq %rax, %rdi
572; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
573; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
574; CHECK-NEXT:    vgatherdps (%rdi,%xmm0), %xmm1 {%k1}
575; CHECK-NEXT:    shlq $47, %rax
576; CHECK-NEXT:    vmovaps %xmm1, %xmm0
577; CHECK-NEXT:    orq %rax, %rsp
578; CHECK-NEXT:    retq
579entry:
580  %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
581  ret <4 x float> %v
582}
583
584declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, ptr, <2 x i64>, i8, i32)
585
586define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(ptr %b, <2 x i64> %iv) #2 {
587; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf:
588; CHECK:       # %bb.0: # %entry
589; CHECK-NEXT:    movq %rsp, %rax
590; CHECK-NEXT:    movq $-1, %rcx
591; CHECK-NEXT:    sarq $63, %rax
592; CHECK-NEXT:    kxnorw %k0, %k0, %k1
593; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
594; CHECK-NEXT:    orq %rax, %rdi
595; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
596; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
597; CHECK-NEXT:    vgatherqps (%rdi,%xmm0), %xmm1 {%k1}
598; CHECK-NEXT:    shlq $47, %rax
599; CHECK-NEXT:    vmovaps %xmm1, %xmm0
600; CHECK-NEXT:    orq %rax, %rsp
601; CHECK-NEXT:    retq
602entry:
603  %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
604  ret <4 x float> %v
605}
606
607declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, ptr, <4 x i32>, i8, i32)
608
609define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(ptr %b, <4 x i32> %iv) #2 {
610; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df:
611; CHECK:       # %bb.0: # %entry
612; CHECK-NEXT:    movq %rsp, %rax
613; CHECK-NEXT:    movq $-1, %rcx
614; CHECK-NEXT:    sarq $63, %rax
615; CHECK-NEXT:    kxnorw %k0, %k0, %k1
616; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
617; CHECK-NEXT:    orq %rax, %rdi
618; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
619; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
620; CHECK-NEXT:    vgatherdpd (%rdi,%xmm0), %xmm1 {%k1}
621; CHECK-NEXT:    shlq $47, %rax
622; CHECK-NEXT:    vmovapd %xmm1, %xmm0
623; CHECK-NEXT:    orq %rax, %rsp
624; CHECK-NEXT:    retq
625entry:
626  %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
627  ret <2 x double> %v
628}
629
630declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32)
631
632define <2 x double> @test_llvm_x86_avx512_gather3div2_df(ptr %b, <2 x i64> %iv) #2 {
633; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df:
634; CHECK:       # %bb.0: # %entry
635; CHECK-NEXT:    movq %rsp, %rax
636; CHECK-NEXT:    movq $-1, %rcx
637; CHECK-NEXT:    sarq $63, %rax
638; CHECK-NEXT:    kxnorw %k0, %k0, %k1
639; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
640; CHECK-NEXT:    orq %rax, %rdi
641; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
642; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
643; CHECK-NEXT:    vgatherqpd (%rdi,%xmm0), %xmm1 {%k1}
644; CHECK-NEXT:    shlq $47, %rax
645; CHECK-NEXT:    vmovapd %xmm1, %xmm0
646; CHECK-NEXT:    orq %rax, %rsp
647; CHECK-NEXT:    retq
648entry:
649  %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
650  ret <2 x double> %v
651}
652
653declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, ptr, <8 x i32>, i8, i32)
654
655define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(ptr %b, <8 x i32> %iv) #2 {
656; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf:
657; CHECK:       # %bb.0: # %entry
658; CHECK-NEXT:    movq %rsp, %rax
659; CHECK-NEXT:    movq $-1, %rcx
660; CHECK-NEXT:    sarq $63, %rax
661; CHECK-NEXT:    kxnorw %k0, %k0, %k1
662; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
663; CHECK-NEXT:    orq %rax, %rdi
664; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
665; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
666; CHECK-NEXT:    vgatherdps (%rdi,%ymm0), %ymm1 {%k1}
667; CHECK-NEXT:    shlq $47, %rax
668; CHECK-NEXT:    vmovaps %ymm1, %ymm0
669; CHECK-NEXT:    orq %rax, %rsp
670; CHECK-NEXT:    retq
671entry:
672  %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
673  ret <8 x float> %v
674}
675
676declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, ptr, <4 x i64>, i8, i32)
677
678define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(ptr %b, <4 x i64> %iv) #2 {
679; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf:
680; CHECK:       # %bb.0: # %entry
681; CHECK-NEXT:    movq %rsp, %rax
682; CHECK-NEXT:    movq $-1, %rcx
683; CHECK-NEXT:    sarq $63, %rax
684; CHECK-NEXT:    kxnorw %k0, %k0, %k1
685; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
686; CHECK-NEXT:    orq %rax, %rdi
687; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
688; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
689; CHECK-NEXT:    vgatherqps (%rdi,%ymm0), %xmm1 {%k1}
690; CHECK-NEXT:    shlq $47, %rax
691; CHECK-NEXT:    vmovaps %xmm1, %xmm0
692; CHECK-NEXT:    orq %rax, %rsp
693; CHECK-NEXT:    vzeroupper
694; CHECK-NEXT:    retq
695entry:
696  %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
697  ret <4 x float> %v
698}
699
700declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, ptr, <4 x i32>, i8, i32)
701
702define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(ptr %b, <4 x i32> %iv) #2 {
703; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df:
704; CHECK:       # %bb.0: # %entry
705; CHECK-NEXT:    movq %rsp, %rax
706; CHECK-NEXT:    movq $-1, %rcx
707; CHECK-NEXT:    sarq $63, %rax
708; CHECK-NEXT:    kxnorw %k0, %k0, %k1
709; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
710; CHECK-NEXT:    orq %rax, %rdi
711; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
712; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
713; CHECK-NEXT:    vgatherdpd (%rdi,%xmm0), %ymm1 {%k1}
714; CHECK-NEXT:    shlq $47, %rax
715; CHECK-NEXT:    vmovapd %ymm1, %ymm0
716; CHECK-NEXT:    orq %rax, %rsp
717; CHECK-NEXT:    retq
718entry:
719  %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
720  ret <4 x double> %v
721}
722
723declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, ptr, <4 x i64>, i8, i32)
724
725define <4 x double> @test_llvm_x86_avx512_gather3div4_df(ptr %b, <4 x i64> %iv) #2 {
726; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df:
727; CHECK:       # %bb.0: # %entry
728; CHECK-NEXT:    movq %rsp, %rax
729; CHECK-NEXT:    movq $-1, %rcx
730; CHECK-NEXT:    sarq $63, %rax
731; CHECK-NEXT:    kxnorw %k0, %k0, %k1
732; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
733; CHECK-NEXT:    orq %rax, %rdi
734; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
735; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
736; CHECK-NEXT:    vgatherqpd (%rdi,%ymm0), %ymm1 {%k1}
737; CHECK-NEXT:    shlq $47, %rax
738; CHECK-NEXT:    vmovapd %ymm1, %ymm0
739; CHECK-NEXT:    orq %rax, %rsp
740; CHECK-NEXT:    retq
741entry:
742  %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
743  ret <4 x double> %v
744}
745
746declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, ptr, <4 x i32>, i8, i32)
747
748define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(ptr %b, <4 x i32> %iv) #2 {
749; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si:
750; CHECK:       # %bb.0: # %entry
751; CHECK-NEXT:    movq %rsp, %rax
752; CHECK-NEXT:    movq $-1, %rcx
753; CHECK-NEXT:    sarq $63, %rax
754; CHECK-NEXT:    kxnorw %k0, %k0, %k1
755; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
756; CHECK-NEXT:    orq %rax, %rdi
757; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
758; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
759; CHECK-NEXT:    vpgatherdd (%rdi,%xmm0), %xmm1 {%k1}
760; CHECK-NEXT:    shlq $47, %rax
761; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
762; CHECK-NEXT:    orq %rax, %rsp
763; CHECK-NEXT:    retq
764entry:
765  %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
766  ret <4 x i32> %v
767}
768
769declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, ptr, <2 x i64>, i8, i32)
770
771define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(ptr %b, <2 x i64> %iv) #2 {
772; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si:
773; CHECK:       # %bb.0: # %entry
774; CHECK-NEXT:    movq %rsp, %rax
775; CHECK-NEXT:    movq $-1, %rcx
776; CHECK-NEXT:    sarq $63, %rax
777; CHECK-NEXT:    kxnorw %k0, %k0, %k1
778; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
779; CHECK-NEXT:    orq %rax, %rdi
780; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
781; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
782; CHECK-NEXT:    vpgatherqd (%rdi,%xmm0), %xmm1 {%k1}
783; CHECK-NEXT:    shlq $47, %rax
784; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
785; CHECK-NEXT:    orq %rax, %rsp
786; CHECK-NEXT:    retq
787entry:
788  %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
789  ret <4 x i32> %v
790}
791
792declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, ptr, <4 x i32>, i8, i32)
793
794define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(ptr %b, <4 x i32> %iv) #2 {
795; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di:
796; CHECK:       # %bb.0: # %entry
797; CHECK-NEXT:    movq %rsp, %rax
798; CHECK-NEXT:    movq $-1, %rcx
799; CHECK-NEXT:    sarq $63, %rax
800; CHECK-NEXT:    kxnorw %k0, %k0, %k1
801; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
802; CHECK-NEXT:    orq %rax, %rdi
803; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
804; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
805; CHECK-NEXT:    vpgatherdq (%rdi,%xmm0), %xmm1 {%k1}
806; CHECK-NEXT:    shlq $47, %rax
807; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
808; CHECK-NEXT:    orq %rax, %rsp
809; CHECK-NEXT:    retq
810entry:
811  %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
812  ret <2 x i64> %v
813}
814
815declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, ptr, <2 x i64>, i8, i32)
816
817define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(ptr %b, <2 x i64> %iv) #2 {
818; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di:
819; CHECK:       # %bb.0: # %entry
820; CHECK-NEXT:    movq %rsp, %rax
821; CHECK-NEXT:    movq $-1, %rcx
822; CHECK-NEXT:    sarq $63, %rax
823; CHECK-NEXT:    kxnorw %k0, %k0, %k1
824; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
825; CHECK-NEXT:    orq %rax, %rdi
826; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
827; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
828; CHECK-NEXT:    vpgatherqq (%rdi,%xmm0), %xmm1 {%k1}
829; CHECK-NEXT:    shlq $47, %rax
830; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
831; CHECK-NEXT:    orq %rax, %rsp
832; CHECK-NEXT:    retq
833entry:
834  %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
835  ret <2 x i64> %v
836}
837
838declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, ptr, <8 x i32>, i8, i32)
839
840define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(ptr %b, <8 x i32> %iv) #2 {
841; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si:
842; CHECK:       # %bb.0: # %entry
843; CHECK-NEXT:    movq %rsp, %rax
844; CHECK-NEXT:    movq $-1, %rcx
845; CHECK-NEXT:    sarq $63, %rax
846; CHECK-NEXT:    kxnorw %k0, %k0, %k1
847; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
848; CHECK-NEXT:    orq %rax, %rdi
849; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
850; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
851; CHECK-NEXT:    vpgatherdd (%rdi,%ymm0), %ymm1 {%k1}
852; CHECK-NEXT:    shlq $47, %rax
853; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
854; CHECK-NEXT:    orq %rax, %rsp
855; CHECK-NEXT:    retq
856entry:
857  %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
858  ret <8 x i32> %v
859}
860
861declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, ptr, <4 x i64>, i8, i32)
862
863define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(ptr %b, <4 x i64> %iv) #2 {
864; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si:
865; CHECK:       # %bb.0: # %entry
866; CHECK-NEXT:    movq %rsp, %rax
867; CHECK-NEXT:    movq $-1, %rcx
868; CHECK-NEXT:    sarq $63, %rax
869; CHECK-NEXT:    kxnorw %k0, %k0, %k1
870; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
871; CHECK-NEXT:    orq %rax, %rdi
872; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
873; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
874; CHECK-NEXT:    vpgatherqd (%rdi,%ymm0), %xmm1 {%k1}
875; CHECK-NEXT:    shlq $47, %rax
876; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
877; CHECK-NEXT:    orq %rax, %rsp
878; CHECK-NEXT:    vzeroupper
879; CHECK-NEXT:    retq
880entry:
881  %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
882  ret <4 x i32> %v
883}
884
885declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, ptr, <4 x i32>, i8, i32)
886
887define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(ptr %b, <4 x i32> %iv) #2 {
888; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di:
889; CHECK:       # %bb.0: # %entry
890; CHECK-NEXT:    movq %rsp, %rax
891; CHECK-NEXT:    movq $-1, %rcx
892; CHECK-NEXT:    sarq $63, %rax
893; CHECK-NEXT:    kxnorw %k0, %k0, %k1
894; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
895; CHECK-NEXT:    orq %rax, %rdi
896; CHECK-NEXT:    vpbroadcastq %rax, %xmm2
897; CHECK-NEXT:    vpor %xmm0, %xmm2, %xmm0
898; CHECK-NEXT:    vpgatherdq (%rdi,%xmm0), %ymm1 {%k1}
899; CHECK-NEXT:    shlq $47, %rax
900; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
901; CHECK-NEXT:    orq %rax, %rsp
902; CHECK-NEXT:    retq
903entry:
904  %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
905  ret <4 x i64> %v
906}
907
908declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, ptr, <4 x i64>, i8, i32)
909
910define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(ptr %b, <4 x i64> %iv) #2 {
911; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di:
912; CHECK:       # %bb.0: # %entry
913; CHECK-NEXT:    movq %rsp, %rax
914; CHECK-NEXT:    movq $-1, %rcx
915; CHECK-NEXT:    sarq $63, %rax
916; CHECK-NEXT:    kxnorw %k0, %k0, %k1
917; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
918; CHECK-NEXT:    orq %rax, %rdi
919; CHECK-NEXT:    vpbroadcastq %rax, %ymm2
920; CHECK-NEXT:    vpor %ymm0, %ymm2, %ymm0
921; CHECK-NEXT:    vpgatherqq (%rdi,%ymm0), %ymm1 {%k1}
922; CHECK-NEXT:    shlq $47, %rax
923; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
924; CHECK-NEXT:    orq %rax, %rsp
925; CHECK-NEXT:    retq
926entry:
927  %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
928  ret <4 x i64> %v
929}
930
931attributes #0 = { nounwind speculative_load_hardening "target-features"="+avx2" }
932attributes #1 = { nounwind speculative_load_hardening "target-features"="+avx512f" }
933attributes #2 = { nounwind speculative_load_hardening "target-features"="+avx512vl" }
934