xref: /llvm-project/llvm/test/CodeGen/X86/avx2-masked-gather.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s
3; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s
4; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s
5; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s
6
7declare <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro)
8
9define <2 x i32> @masked_gather_v2i32(ptr %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
10; X86-LABEL: masked_gather_v2i32:
11; X86:       # %bb.0: # %entry
12; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
13; X86-NEXT:    vpslld $31, %xmm0, %xmm0
14; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
15; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
16; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
17; X86-NEXT:    vmovdqa %xmm1, %xmm0
18; X86-NEXT:    retl
19;
20; X64-LABEL: masked_gather_v2i32:
21; X64:       # %bb.0: # %entry
22; X64-NEXT:    vmovdqa (%rdi), %xmm2
23; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
24; X64-NEXT:    vpslld $31, %xmm0, %xmm0
25; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
26; X64-NEXT:    vmovdqa %xmm1, %xmm0
27; X64-NEXT:    retq
28;
29; NOGATHER-LABEL: masked_gather_v2i32:
30; NOGATHER:       # %bb.0: # %entry
31; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
32; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
33; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
34; NOGATHER-NEXT:    testb $1, %al
35; NOGATHER-NEXT:    jne .LBB0_1
36; NOGATHER-NEXT:  # %bb.2: # %else
37; NOGATHER-NEXT:    testb $2, %al
38; NOGATHER-NEXT:    jne .LBB0_3
39; NOGATHER-NEXT:  .LBB0_4: # %else2
40; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
41; NOGATHER-NEXT:    retq
42; NOGATHER-NEXT:  .LBB0_1: # %cond.load
43; NOGATHER-NEXT:    vmovq %xmm2, %rcx
44; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm1, %xmm1
45; NOGATHER-NEXT:    testb $2, %al
46; NOGATHER-NEXT:    je .LBB0_4
47; NOGATHER-NEXT:  .LBB0_3: # %cond.load1
48; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
49; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm1, %xmm1
50; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
51; NOGATHER-NEXT:    retq
52entry:
53  %ld  = load <2 x ptr>, ptr %ptr
54  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
55  ret <2 x i32> %res
56}
57
58define <4 x i32> @masked_gather_v2i32_concat(ptr %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
59; X86-LABEL: masked_gather_v2i32_concat:
60; X86:       # %bb.0: # %entry
61; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
62; X86-NEXT:    vpslld $31, %xmm0, %xmm0
63; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
64; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
65; X86-NEXT:    vpgatherdd %xmm0, (,%xmm2), %xmm1
66; X86-NEXT:    vmovdqa %xmm1, %xmm0
67; X86-NEXT:    retl
68;
69; X64-LABEL: masked_gather_v2i32_concat:
70; X64:       # %bb.0: # %entry
71; X64-NEXT:    vmovdqa (%rdi), %xmm2
72; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
73; X64-NEXT:    vpslld $31, %xmm0, %xmm0
74; X64-NEXT:    vpgatherqd %xmm0, (,%xmm2), %xmm1
75; X64-NEXT:    vmovdqa %xmm1, %xmm0
76; X64-NEXT:    retq
77;
78; NOGATHER-LABEL: masked_gather_v2i32_concat:
79; NOGATHER:       # %bb.0: # %entry
80; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
81; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
82; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
83; NOGATHER-NEXT:    testb $1, %al
84; NOGATHER-NEXT:    jne .LBB1_1
85; NOGATHER-NEXT:  # %bb.2: # %else
86; NOGATHER-NEXT:    testb $2, %al
87; NOGATHER-NEXT:    jne .LBB1_3
88; NOGATHER-NEXT:  .LBB1_4: # %else2
89; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
90; NOGATHER-NEXT:    retq
91; NOGATHER-NEXT:  .LBB1_1: # %cond.load
92; NOGATHER-NEXT:    vmovq %xmm2, %rcx
93; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm1, %xmm1
94; NOGATHER-NEXT:    testb $2, %al
95; NOGATHER-NEXT:    je .LBB1_4
96; NOGATHER-NEXT:  .LBB1_3: # %cond.load1
97; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
98; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm1, %xmm1
99; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
100; NOGATHER-NEXT:    retq
101entry:
102  %ld  = load <2 x ptr>, ptr %ptr
103  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
104  %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
105  ret <4 x i32> %res2
106}
107
108declare <2 x float> @llvm.masked.gather.v2float(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro)
109
110define <2 x float> @masked_gather_v2float(ptr %ptr, <2 x i1> %masks, <2 x float> %passthro) {
111; X86-LABEL: masked_gather_v2float:
112; X86:       # %bb.0: # %entry
113; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
114; X86-NEXT:    vpslld $31, %xmm0, %xmm0
115; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
116; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
117; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
118; X86-NEXT:    vmovaps %xmm1, %xmm0
119; X86-NEXT:    retl
120;
121; X64-LABEL: masked_gather_v2float:
122; X64:       # %bb.0: # %entry
123; X64-NEXT:    vmovaps (%rdi), %xmm2
124; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
125; X64-NEXT:    vpslld $31, %xmm0, %xmm0
126; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
127; X64-NEXT:    vmovaps %xmm1, %xmm0
128; X64-NEXT:    retq
129;
130; NOGATHER-LABEL: masked_gather_v2float:
131; NOGATHER:       # %bb.0: # %entry
132; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
133; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
134; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
135; NOGATHER-NEXT:    testb $1, %al
136; NOGATHER-NEXT:    jne .LBB2_1
137; NOGATHER-NEXT:  # %bb.2: # %else
138; NOGATHER-NEXT:    testb $2, %al
139; NOGATHER-NEXT:    jne .LBB2_3
140; NOGATHER-NEXT:  .LBB2_4: # %else2
141; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
142; NOGATHER-NEXT:    retq
143; NOGATHER-NEXT:  .LBB2_1: # %cond.load
144; NOGATHER-NEXT:    vmovq %xmm2, %rcx
145; NOGATHER-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
146; NOGATHER-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
147; NOGATHER-NEXT:    testb $2, %al
148; NOGATHER-NEXT:    je .LBB2_4
149; NOGATHER-NEXT:  .LBB2_3: # %cond.load1
150; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
151; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
152; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
153; NOGATHER-NEXT:    retq
154entry:
155  %ld  = load <2 x ptr>, ptr %ptr
156  %res = call <2 x float> @llvm.masked.gather.v2float(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
157  ret <2 x float> %res
158}
159
160define <4 x float> @masked_gather_v2float_concat(ptr %ptr, <2 x i1> %masks, <2 x float> %passthro) {
161; X86-LABEL: masked_gather_v2float_concat:
162; X86:       # %bb.0: # %entry
163; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
164; X86-NEXT:    vpslld $31, %xmm0, %xmm0
165; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
166; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
167; X86-NEXT:    vgatherdps %xmm0, (,%xmm2), %xmm1
168; X86-NEXT:    vmovaps %xmm1, %xmm0
169; X86-NEXT:    retl
170;
171; X64-LABEL: masked_gather_v2float_concat:
172; X64:       # %bb.0: # %entry
173; X64-NEXT:    vmovaps (%rdi), %xmm2
174; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
175; X64-NEXT:    vpslld $31, %xmm0, %xmm0
176; X64-NEXT:    vgatherqps %xmm0, (,%xmm2), %xmm1
177; X64-NEXT:    vmovaps %xmm1, %xmm0
178; X64-NEXT:    retq
179;
180; NOGATHER-LABEL: masked_gather_v2float_concat:
181; NOGATHER:       # %bb.0: # %entry
182; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
183; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
184; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
185; NOGATHER-NEXT:    testb $1, %al
186; NOGATHER-NEXT:    jne .LBB3_1
187; NOGATHER-NEXT:  # %bb.2: # %else
188; NOGATHER-NEXT:    testb $2, %al
189; NOGATHER-NEXT:    jne .LBB3_3
190; NOGATHER-NEXT:  .LBB3_4: # %else2
191; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
192; NOGATHER-NEXT:    retq
193; NOGATHER-NEXT:  .LBB3_1: # %cond.load
194; NOGATHER-NEXT:    vmovq %xmm2, %rcx
195; NOGATHER-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
196; NOGATHER-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
197; NOGATHER-NEXT:    testb $2, %al
198; NOGATHER-NEXT:    je .LBB3_4
199; NOGATHER-NEXT:  .LBB3_3: # %cond.load1
200; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
201; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
202; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
203; NOGATHER-NEXT:    retq
204entry:
205  %ld  = load <2 x ptr>, ptr %ptr
206  %res = call <2 x float> @llvm.masked.gather.v2float(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
207  %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
208  ret <4 x float> %res2
209}
210
211
212declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro)
213
214define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) {
215; X86-LABEL: masked_gather_v4i32:
216; X86:       # %bb.0: # %entry
217; X86-NEXT:    vpslld $31, %xmm1, %xmm1
218; X86-NEXT:    vpgatherdd %xmm1, (,%xmm0), %xmm2
219; X86-NEXT:    vmovdqa %xmm2, %xmm0
220; X86-NEXT:    retl
221;
222; X64-LABEL: masked_gather_v4i32:
223; X64:       # %bb.0: # %entry
224; X64-NEXT:    vpslld $31, %xmm1, %xmm1
225; X64-NEXT:    vpgatherqd %xmm1, (,%ymm0), %xmm2
226; X64-NEXT:    vmovdqa %xmm2, %xmm0
227; X64-NEXT:    vzeroupper
228; X64-NEXT:    retq
229;
230; NOGATHER-LABEL: masked_gather_v4i32:
231; NOGATHER:       # %bb.0: # %entry
232; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm1
233; NOGATHER-NEXT:    vmovmskps %xmm1, %eax
234; NOGATHER-NEXT:    testb $1, %al
235; NOGATHER-NEXT:    je .LBB4_2
236; NOGATHER-NEXT:  # %bb.1: # %cond.load
237; NOGATHER-NEXT:    vmovq %xmm0, %rcx
238; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm2, %xmm2
239; NOGATHER-NEXT:  .LBB4_2: # %else
240; NOGATHER-NEXT:    testb $2, %al
241; NOGATHER-NEXT:    je .LBB4_4
242; NOGATHER-NEXT:  # %bb.3: # %cond.load1
243; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
244; NOGATHER-NEXT:    vpinsrd $1, (%rcx), %xmm2, %xmm2
245; NOGATHER-NEXT:  .LBB4_4: # %else2
246; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
247; NOGATHER-NEXT:    testb $4, %al
248; NOGATHER-NEXT:    jne .LBB4_5
249; NOGATHER-NEXT:  # %bb.6: # %else5
250; NOGATHER-NEXT:    testb $8, %al
251; NOGATHER-NEXT:    jne .LBB4_7
252; NOGATHER-NEXT:  .LBB4_8: # %else8
253; NOGATHER-NEXT:    vmovdqa %xmm2, %xmm0
254; NOGATHER-NEXT:    vzeroupper
255; NOGATHER-NEXT:    retq
256; NOGATHER-NEXT:  .LBB4_5: # %cond.load4
257; NOGATHER-NEXT:    vmovq %xmm0, %rcx
258; NOGATHER-NEXT:    vpinsrd $2, (%rcx), %xmm2, %xmm2
259; NOGATHER-NEXT:    testb $8, %al
260; NOGATHER-NEXT:    je .LBB4_8
261; NOGATHER-NEXT:  .LBB4_7: # %cond.load7
262; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
263; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm2, %xmm2
264; NOGATHER-NEXT:    vmovdqa %xmm2, %xmm0
265; NOGATHER-NEXT:    vzeroupper
266; NOGATHER-NEXT:    retq
267entry:
268  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro)
269  ret <4 x i32> %res
270}
271
272declare <4 x float> @llvm.masked.gather.v4float(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro)
273
274define <4 x float> @masked_gather_v4float(<4 x ptr> %ptrs, <4 x i1> %masks, <4 x float> %passthro) {
275; X86-LABEL: masked_gather_v4float:
276; X86:       # %bb.0: # %entry
277; X86-NEXT:    vpslld $31, %xmm1, %xmm1
278; X86-NEXT:    vgatherdps %xmm1, (,%xmm0), %xmm2
279; X86-NEXT:    vmovaps %xmm2, %xmm0
280; X86-NEXT:    retl
281;
282; X64-LABEL: masked_gather_v4float:
283; X64:       # %bb.0: # %entry
284; X64-NEXT:    vpslld $31, %xmm1, %xmm1
285; X64-NEXT:    vgatherqps %xmm1, (,%ymm0), %xmm2
286; X64-NEXT:    vmovaps %xmm2, %xmm0
287; X64-NEXT:    vzeroupper
288; X64-NEXT:    retq
289;
290; NOGATHER-LABEL: masked_gather_v4float:
291; NOGATHER:       # %bb.0: # %entry
292; NOGATHER-NEXT:    vpslld $31, %xmm1, %xmm1
293; NOGATHER-NEXT:    vmovmskps %xmm1, %eax
294; NOGATHER-NEXT:    testb $1, %al
295; NOGATHER-NEXT:    je .LBB5_2
296; NOGATHER-NEXT:  # %bb.1: # %cond.load
297; NOGATHER-NEXT:    vmovq %xmm0, %rcx
298; NOGATHER-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
299; NOGATHER-NEXT:    vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
300; NOGATHER-NEXT:  .LBB5_2: # %else
301; NOGATHER-NEXT:    testb $2, %al
302; NOGATHER-NEXT:    je .LBB5_4
303; NOGATHER-NEXT:  # %bb.3: # %cond.load1
304; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
305; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
306; NOGATHER-NEXT:  .LBB5_4: # %else2
307; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
308; NOGATHER-NEXT:    testb $4, %al
309; NOGATHER-NEXT:    jne .LBB5_5
310; NOGATHER-NEXT:  # %bb.6: # %else5
311; NOGATHER-NEXT:    testb $8, %al
312; NOGATHER-NEXT:    jne .LBB5_7
313; NOGATHER-NEXT:  .LBB5_8: # %else8
314; NOGATHER-NEXT:    vmovaps %xmm2, %xmm0
315; NOGATHER-NEXT:    vzeroupper
316; NOGATHER-NEXT:    retq
317; NOGATHER-NEXT:  .LBB5_5: # %cond.load4
318; NOGATHER-NEXT:    vmovq %xmm0, %rcx
319; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
320; NOGATHER-NEXT:    testb $8, %al
321; NOGATHER-NEXT:    je .LBB5_8
322; NOGATHER-NEXT:  .LBB5_7: # %cond.load7
323; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
324; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
325; NOGATHER-NEXT:    vmovaps %xmm2, %xmm0
326; NOGATHER-NEXT:    vzeroupper
327; NOGATHER-NEXT:    retq
328entry:
329  %res = call <4 x float> @llvm.masked.gather.v4float(<4 x ptr> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro)
330  ret <4 x float> %res
331}
332
333declare <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro)
334
335define <8 x i32> @masked_gather_v8i32(ptr %ptr, <8 x i1> %masks, <8 x i32> %passthro) {
336; X86-LABEL: masked_gather_v8i32:
337; X86:       # %bb.0: # %entry
338; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
339; X86-NEXT:    vpslld $31, %ymm0, %ymm0
340; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
341; X86-NEXT:    vmovdqa (%eax), %ymm2
342; X86-NEXT:    vpgatherdd %ymm0, (,%ymm2), %ymm1
343; X86-NEXT:    vmovdqa %ymm1, %ymm0
344; X86-NEXT:    retl
345;
346; X64-LABEL: masked_gather_v8i32:
347; X64:       # %bb.0: # %entry
348; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
349; X64-NEXT:    vpslld $31, %ymm0, %ymm0
350; X64-NEXT:    vmovdqa (%rdi), %ymm2
351; X64-NEXT:    vmovdqa 32(%rdi), %ymm3
352; X64-NEXT:    vextracti128 $1, %ymm1, %xmm4
353; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
354; X64-NEXT:    vpgatherqd %xmm5, (,%ymm3), %xmm4
355; X64-NEXT:    vpgatherqd %xmm0, (,%ymm2), %xmm1
356; X64-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm0
357; X64-NEXT:    retq
358;
359; NOGATHER-LABEL: masked_gather_v8i32:
360; NOGATHER:       # %bb.0: # %entry
361; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
362; NOGATHER-NEXT:    vpsllw $15, %xmm0, %xmm0
363; NOGATHER-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
364; NOGATHER-NEXT:    vpmovmskb %xmm0, %eax
365; NOGATHER-NEXT:    testb $1, %al
366; NOGATHER-NEXT:    je .LBB6_2
367; NOGATHER-NEXT:  # %bb.1: # %cond.load
368; NOGATHER-NEXT:    vmovq %xmm2, %rcx
369; NOGATHER-NEXT:    vpinsrd $0, (%rcx), %xmm1, %xmm0
370; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
371; NOGATHER-NEXT:  .LBB6_2: # %else
372; NOGATHER-NEXT:    testb $2, %al
373; NOGATHER-NEXT:    je .LBB6_4
374; NOGATHER-NEXT:  # %bb.3: # %cond.load1
375; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
376; NOGATHER-NEXT:    vpinsrd $1, (%rcx), %xmm1, %xmm0
377; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
378; NOGATHER-NEXT:  .LBB6_4: # %else2
379; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
380; NOGATHER-NEXT:    testb $4, %al
381; NOGATHER-NEXT:    je .LBB6_6
382; NOGATHER-NEXT:  # %bb.5: # %cond.load4
383; NOGATHER-NEXT:    vmovq %xmm0, %rcx
384; NOGATHER-NEXT:    vpinsrd $2, (%rcx), %xmm1, %xmm2
385; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
386; NOGATHER-NEXT:  .LBB6_6: # %else5
387; NOGATHER-NEXT:    testb $8, %al
388; NOGATHER-NEXT:    je .LBB6_8
389; NOGATHER-NEXT:  # %bb.7: # %cond.load7
390; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
391; NOGATHER-NEXT:    vpinsrd $3, (%rcx), %xmm1, %xmm0
392; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
393; NOGATHER-NEXT:  .LBB6_8: # %else8
394; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm0
395; NOGATHER-NEXT:    testb $16, %al
396; NOGATHER-NEXT:    je .LBB6_10
397; NOGATHER-NEXT:  # %bb.9: # %cond.load10
398; NOGATHER-NEXT:    vmovq %xmm0, %rcx
399; NOGATHER-NEXT:    vbroadcastss (%rcx), %ymm2
400; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4],ymm1[5,6,7]
401; NOGATHER-NEXT:  .LBB6_10: # %else11
402; NOGATHER-NEXT:    testb $32, %al
403; NOGATHER-NEXT:    je .LBB6_12
404; NOGATHER-NEXT:  # %bb.11: # %cond.load13
405; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
406; NOGATHER-NEXT:    vbroadcastss (%rcx), %ymm2
407; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6,7]
408; NOGATHER-NEXT:  .LBB6_12: # %else14
409; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
410; NOGATHER-NEXT:    testb $64, %al
411; NOGATHER-NEXT:    jne .LBB6_13
412; NOGATHER-NEXT:  # %bb.14: # %else17
413; NOGATHER-NEXT:    testb $-128, %al
414; NOGATHER-NEXT:    jne .LBB6_15
415; NOGATHER-NEXT:  .LBB6_16: # %else20
416; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
417; NOGATHER-NEXT:    retq
418; NOGATHER-NEXT:  .LBB6_13: # %cond.load16
419; NOGATHER-NEXT:    vmovq %xmm0, %rcx
420; NOGATHER-NEXT:    vbroadcastss (%rcx), %ymm2
421; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6],ymm1[7]
422; NOGATHER-NEXT:    testb $-128, %al
423; NOGATHER-NEXT:    je .LBB6_16
424; NOGATHER-NEXT:  .LBB6_15: # %cond.load19
425; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
426; NOGATHER-NEXT:    vbroadcastss (%rax), %ymm0
427; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm0[7]
428; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
429; NOGATHER-NEXT:    retq
430entry:
431  %ld  = load <8 x ptr>, ptr %ptr
432  %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro)
433  ret <8 x i32> %res
434}
435
436declare <8 x float> @llvm.masked.gather.v8float(<8 x ptr> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro)
437
438define <8 x float> @masked_gather_v8float(ptr %ptr, <8 x i1> %masks, <8 x float> %passthro) {
439; X86-LABEL: masked_gather_v8float:
440; X86:       # %bb.0: # %entry
441; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
442; X86-NEXT:    vpslld $31, %ymm0, %ymm0
443; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
444; X86-NEXT:    vmovaps (%eax), %ymm2
445; X86-NEXT:    vgatherdps %ymm0, (,%ymm2), %ymm1
446; X86-NEXT:    vmovaps %ymm1, %ymm0
447; X86-NEXT:    retl
448;
449; X64-LABEL: masked_gather_v8float:
450; X64:       # %bb.0: # %entry
451; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
452; X64-NEXT:    vpslld $31, %ymm0, %ymm0
453; X64-NEXT:    vmovaps (%rdi), %ymm2
454; X64-NEXT:    vmovaps 32(%rdi), %ymm3
455; X64-NEXT:    vextractf128 $1, %ymm1, %xmm4
456; X64-NEXT:    vextracti128 $1, %ymm0, %xmm5
457; X64-NEXT:    vgatherqps %xmm5, (,%ymm3), %xmm4
458; X64-NEXT:    vgatherqps %xmm0, (,%ymm2), %xmm1
459; X64-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm0
460; X64-NEXT:    retq
461;
462; NOGATHER-LABEL: masked_gather_v8float:
463; NOGATHER:       # %bb.0: # %entry
464; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
465; NOGATHER-NEXT:    vpsllw $15, %xmm0, %xmm0
466; NOGATHER-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
467; NOGATHER-NEXT:    vpmovmskb %xmm0, %eax
468; NOGATHER-NEXT:    testb $1, %al
469; NOGATHER-NEXT:    je .LBB7_2
470; NOGATHER-NEXT:  # %bb.1: # %cond.load
471; NOGATHER-NEXT:    vmovq %xmm2, %rcx
472; NOGATHER-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
473; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7]
474; NOGATHER-NEXT:  .LBB7_2: # %else
475; NOGATHER-NEXT:    testb $2, %al
476; NOGATHER-NEXT:    je .LBB7_4
477; NOGATHER-NEXT:  # %bb.3: # %cond.load1
478; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
479; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],mem[0],xmm1[2,3]
480; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
481; NOGATHER-NEXT:  .LBB7_4: # %else2
482; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
483; NOGATHER-NEXT:    testb $4, %al
484; NOGATHER-NEXT:    je .LBB7_6
485; NOGATHER-NEXT:  # %bb.5: # %cond.load4
486; NOGATHER-NEXT:    vmovq %xmm0, %rcx
487; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm1[0,1],mem[0],xmm1[3]
488; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
489; NOGATHER-NEXT:  .LBB7_6: # %else5
490; NOGATHER-NEXT:    testb $8, %al
491; NOGATHER-NEXT:    je .LBB7_8
492; NOGATHER-NEXT:  # %bb.7: # %cond.load7
493; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
494; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],mem[0]
495; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
496; NOGATHER-NEXT:  .LBB7_8: # %else8
497; NOGATHER-NEXT:    vmovdqa 32(%rdi), %ymm0
498; NOGATHER-NEXT:    testb $16, %al
499; NOGATHER-NEXT:    je .LBB7_10
500; NOGATHER-NEXT:  # %bb.9: # %cond.load10
501; NOGATHER-NEXT:    vmovq %xmm0, %rcx
502; NOGATHER-NEXT:    vbroadcastss (%rcx), %ymm2
503; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4],ymm1[5,6,7]
504; NOGATHER-NEXT:  .LBB7_10: # %else11
505; NOGATHER-NEXT:    testb $32, %al
506; NOGATHER-NEXT:    je .LBB7_12
507; NOGATHER-NEXT:  # %bb.11: # %cond.load13
508; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rcx
509; NOGATHER-NEXT:    vbroadcastss (%rcx), %ymm2
510; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6,7]
511; NOGATHER-NEXT:  .LBB7_12: # %else14
512; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
513; NOGATHER-NEXT:    testb $64, %al
514; NOGATHER-NEXT:    jne .LBB7_13
515; NOGATHER-NEXT:  # %bb.14: # %else17
516; NOGATHER-NEXT:    testb $-128, %al
517; NOGATHER-NEXT:    jne .LBB7_15
518; NOGATHER-NEXT:  .LBB7_16: # %else20
519; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
520; NOGATHER-NEXT:    retq
521; NOGATHER-NEXT:  .LBB7_13: # %cond.load16
522; NOGATHER-NEXT:    vmovq %xmm0, %rcx
523; NOGATHER-NEXT:    vbroadcastss (%rcx), %ymm2
524; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6],ymm1[7]
525; NOGATHER-NEXT:    testb $-128, %al
526; NOGATHER-NEXT:    je .LBB7_16
527; NOGATHER-NEXT:  .LBB7_15: # %cond.load19
528; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
529; NOGATHER-NEXT:    vbroadcastss (%rax), %ymm0
530; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm0[7]
531; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
532; NOGATHER-NEXT:    retq
533entry:
534  %ld  = load <8 x ptr>, ptr %ptr
535  %res = call <8 x float> @llvm.masked.gather.v8float(<8 x ptr> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro)
536  ret <8 x float> %res
537}
538
539declare <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro)
540
541define <4 x i64> @masked_gather_v4i64(ptr %ptr, <4 x i1> %masks, <4 x i64> %passthro) {
542; X86-LABEL: masked_gather_v4i64:
543; X86:       # %bb.0: # %entry
544; X86-NEXT:    vpslld $31, %xmm0, %xmm0
545; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
546; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
547; X86-NEXT:    vmovdqa (%eax), %xmm2
548; X86-NEXT:    vpgatherdq %ymm0, (,%xmm2), %ymm1
549; X86-NEXT:    vmovdqa %ymm1, %ymm0
550; X86-NEXT:    retl
551;
552; X64-LABEL: masked_gather_v4i64:
553; X64:       # %bb.0: # %entry
554; X64-NEXT:    vpslld $31, %xmm0, %xmm0
555; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
556; X64-NEXT:    vmovdqa (%rdi), %ymm2
557; X64-NEXT:    vpgatherqq %ymm0, (,%ymm2), %ymm1
558; X64-NEXT:    vmovdqa %ymm1, %ymm0
559; X64-NEXT:    retq
560;
561; NOGATHER-LABEL: masked_gather_v4i64:
562; NOGATHER:       # %bb.0: # %entry
563; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
564; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
565; NOGATHER-NEXT:    vmovmskps %xmm0, %eax
566; NOGATHER-NEXT:    testb $1, %al
567; NOGATHER-NEXT:    je .LBB8_2
568; NOGATHER-NEXT:  # %bb.1: # %cond.load
569; NOGATHER-NEXT:    vmovq %xmm2, %rcx
570; NOGATHER-NEXT:    vpinsrq $0, (%rcx), %xmm1, %xmm0
571; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
572; NOGATHER-NEXT:  .LBB8_2: # %else
573; NOGATHER-NEXT:    testb $2, %al
574; NOGATHER-NEXT:    je .LBB8_4
575; NOGATHER-NEXT:  # %bb.3: # %cond.load1
576; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
577; NOGATHER-NEXT:    vpinsrq $1, (%rcx), %xmm1, %xmm0
578; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
579; NOGATHER-NEXT:  .LBB8_4: # %else2
580; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
581; NOGATHER-NEXT:    testb $4, %al
582; NOGATHER-NEXT:    jne .LBB8_5
583; NOGATHER-NEXT:  # %bb.6: # %else5
584; NOGATHER-NEXT:    testb $8, %al
585; NOGATHER-NEXT:    jne .LBB8_7
586; NOGATHER-NEXT:  .LBB8_8: # %else8
587; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
588; NOGATHER-NEXT:    retq
589; NOGATHER-NEXT:  .LBB8_5: # %cond.load4
590; NOGATHER-NEXT:    vmovq %xmm0, %rcx
591; NOGATHER-NEXT:    vbroadcastsd (%rcx), %ymm2
592; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
593; NOGATHER-NEXT:    testb $8, %al
594; NOGATHER-NEXT:    je .LBB8_8
595; NOGATHER-NEXT:  .LBB8_7: # %cond.load7
596; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
597; NOGATHER-NEXT:    vbroadcastsd (%rax), %ymm0
598; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm0[6,7]
599; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
600; NOGATHER-NEXT:    retq
601entry:
602  %ld  = load <4 x ptr>, ptr %ptr
603  %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro)
604  ret <4 x i64> %res
605}
606
607declare <4 x double> @llvm.masked.gather.v4double(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro)
608
609define <4 x double> @masked_gather_v4double(ptr %ptr, <4 x i1> %masks, <4 x double> %passthro) {
610; X86-LABEL: masked_gather_v4double:
611; X86:       # %bb.0: # %entry
612; X86-NEXT:    vpslld $31, %xmm0, %xmm0
613; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
614; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
615; X86-NEXT:    vmovapd (%eax), %xmm2
616; X86-NEXT:    vgatherdpd %ymm0, (,%xmm2), %ymm1
617; X86-NEXT:    vmovapd %ymm1, %ymm0
618; X86-NEXT:    retl
619;
620; X64-LABEL: masked_gather_v4double:
621; X64:       # %bb.0: # %entry
622; X64-NEXT:    vpslld $31, %xmm0, %xmm0
623; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
624; X64-NEXT:    vmovapd (%rdi), %ymm2
625; X64-NEXT:    vgatherqpd %ymm0, (,%ymm2), %ymm1
626; X64-NEXT:    vmovapd %ymm1, %ymm0
627; X64-NEXT:    retq
628;
629; NOGATHER-LABEL: masked_gather_v4double:
630; NOGATHER:       # %bb.0: # %entry
631; NOGATHER-NEXT:    vmovdqa (%rdi), %ymm2
632; NOGATHER-NEXT:    vpslld $31, %xmm0, %xmm0
633; NOGATHER-NEXT:    vmovmskps %xmm0, %eax
634; NOGATHER-NEXT:    testb $1, %al
635; NOGATHER-NEXT:    je .LBB9_2
636; NOGATHER-NEXT:  # %bb.1: # %cond.load
637; NOGATHER-NEXT:    vmovq %xmm2, %rcx
638; NOGATHER-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
639; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3,4,5,6,7]
640; NOGATHER-NEXT:  .LBB9_2: # %else
641; NOGATHER-NEXT:    testb $2, %al
642; NOGATHER-NEXT:    je .LBB9_4
643; NOGATHER-NEXT:  # %bb.3: # %cond.load1
644; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rcx
645; NOGATHER-NEXT:    vmovhps {{.*#+}} xmm0 = xmm1[0,1],mem[0,1]
646; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
647; NOGATHER-NEXT:  .LBB9_4: # %else2
648; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
649; NOGATHER-NEXT:    testb $4, %al
650; NOGATHER-NEXT:    jne .LBB9_5
651; NOGATHER-NEXT:  # %bb.6: # %else5
652; NOGATHER-NEXT:    testb $8, %al
653; NOGATHER-NEXT:    jne .LBB9_7
654; NOGATHER-NEXT:  .LBB9_8: # %else8
655; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
656; NOGATHER-NEXT:    retq
657; NOGATHER-NEXT:  .LBB9_5: # %cond.load4
658; NOGATHER-NEXT:    vmovq %xmm0, %rcx
659; NOGATHER-NEXT:    vbroadcastsd (%rcx), %ymm2
660; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
661; NOGATHER-NEXT:    testb $8, %al
662; NOGATHER-NEXT:    je .LBB9_8
663; NOGATHER-NEXT:  .LBB9_7: # %cond.load7
664; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
665; NOGATHER-NEXT:    vbroadcastsd (%rax), %ymm0
666; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm0[6,7]
667; NOGATHER-NEXT:    vmovaps %ymm1, %ymm0
668; NOGATHER-NEXT:    retq
669entry:
670  %ld  = load <4 x ptr>, ptr %ptr
671  %res = call <4 x double> @llvm.masked.gather.v4double(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro)
672  ret <4 x double> %res
673}
674
675declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro)
676
677define <2 x i64> @masked_gather_v2i64(ptr %ptr, <2 x i1> %masks, <2 x i64> %passthro) {
678; X86-LABEL: masked_gather_v2i64:
679; X86:       # %bb.0: # %entry
680; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
681; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
682; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
683; X86-NEXT:    vpgatherdq %xmm0, (,%xmm2), %xmm1
684; X86-NEXT:    vmovdqa %xmm1, %xmm0
685; X86-NEXT:    retl
686;
687; X64-LABEL: masked_gather_v2i64:
688; X64:       # %bb.0: # %entry
689; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
690; X64-NEXT:    vmovdqa (%rdi), %xmm2
691; X64-NEXT:    vpgatherqq %xmm0, (,%xmm2), %xmm1
692; X64-NEXT:    vmovdqa %xmm1, %xmm0
693; X64-NEXT:    retq
694;
695; NOGATHER-LABEL: masked_gather_v2i64:
696; NOGATHER:       # %bb.0: # %entry
697; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
698; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
699; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
700; NOGATHER-NEXT:    testb $1, %al
701; NOGATHER-NEXT:    jne .LBB10_1
702; NOGATHER-NEXT:  # %bb.2: # %else
703; NOGATHER-NEXT:    testb $2, %al
704; NOGATHER-NEXT:    jne .LBB10_3
705; NOGATHER-NEXT:  .LBB10_4: # %else2
706; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
707; NOGATHER-NEXT:    retq
708; NOGATHER-NEXT:  .LBB10_1: # %cond.load
709; NOGATHER-NEXT:    vmovq %xmm2, %rcx
710; NOGATHER-NEXT:    vpinsrq $0, (%rcx), %xmm1, %xmm1
711; NOGATHER-NEXT:    testb $2, %al
712; NOGATHER-NEXT:    je .LBB10_4
713; NOGATHER-NEXT:  .LBB10_3: # %cond.load1
714; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
715; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm1, %xmm1
716; NOGATHER-NEXT:    vmovdqa %xmm1, %xmm0
717; NOGATHER-NEXT:    retq
718entry:
719  %ld  = load <2 x ptr>, ptr %ptr
720  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro)
721  ret <2 x i64> %res
722}
723
724declare <2 x double> @llvm.masked.gather.v2double(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro)
725
726define <2 x double> @masked_gather_v2double(ptr %ptr, <2 x i1> %masks, <2 x double> %passthro) {
727; X86-LABEL: masked_gather_v2double:
728; X86:       # %bb.0: # %entry
729; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
730; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
731; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
732; X86-NEXT:    vgatherdpd %xmm0, (,%xmm2), %xmm1
733; X86-NEXT:    vmovapd %xmm1, %xmm0
734; X86-NEXT:    retl
735;
736; X64-LABEL: masked_gather_v2double:
737; X64:       # %bb.0: # %entry
738; X64-NEXT:    vpsllq $63, %xmm0, %xmm0
739; X64-NEXT:    vmovapd (%rdi), %xmm2
740; X64-NEXT:    vgatherqpd %xmm0, (,%xmm2), %xmm1
741; X64-NEXT:    vmovapd %xmm1, %xmm0
742; X64-NEXT:    retq
743;
744; NOGATHER-LABEL: masked_gather_v2double:
745; NOGATHER:       # %bb.0: # %entry
746; NOGATHER-NEXT:    vmovdqa (%rdi), %xmm2
747; NOGATHER-NEXT:    vpsllq $63, %xmm0, %xmm0
748; NOGATHER-NEXT:    vmovmskpd %xmm0, %eax
749; NOGATHER-NEXT:    testb $1, %al
750; NOGATHER-NEXT:    jne .LBB11_1
751; NOGATHER-NEXT:  # %bb.2: # %else
752; NOGATHER-NEXT:    testb $2, %al
753; NOGATHER-NEXT:    jne .LBB11_3
754; NOGATHER-NEXT:  .LBB11_4: # %else2
755; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
756; NOGATHER-NEXT:    retq
757; NOGATHER-NEXT:  .LBB11_1: # %cond.load
758; NOGATHER-NEXT:    vmovq %xmm2, %rcx
759; NOGATHER-NEXT:    vmovlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
760; NOGATHER-NEXT:    testb $2, %al
761; NOGATHER-NEXT:    je .LBB11_4
762; NOGATHER-NEXT:  .LBB11_3: # %cond.load1
763; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
764; NOGATHER-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
765; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
766; NOGATHER-NEXT:    retq
767entry:
768  %ld  = load <2 x ptr>, ptr %ptr
769  %res = call <2 x double> @llvm.masked.gather.v2double(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro)
770  ret <2 x double> %res
771}
772
773
774define <2 x double> @masked_gather_zeromask(ptr %ptr, <2 x double> %dummy, <2 x double> %passthru) {
775; X86-LABEL: masked_gather_zeromask:
776; X86:       # %bb.0: # %entry
777; X86-NEXT:    vmovaps %xmm1, %xmm0
778; X86-NEXT:    retl
779;
780; X64-LABEL: masked_gather_zeromask:
781; X64:       # %bb.0: # %entry
782; X64-NEXT:    vmovaps %xmm1, %xmm0
783; X64-NEXT:    retq
784;
785; NOGATHER-LABEL: masked_gather_zeromask:
786; NOGATHER:       # %bb.0: # %entry
787; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
788; NOGATHER-NEXT:    retq
789entry:
790  %ld  = load <2 x ptr>, ptr %ptr
791  %res = call <2 x double> @llvm.masked.gather.v2double(<2 x ptr> %ld, i32 0, <2 x i1> zeroinitializer, <2 x double> %passthru)
792  ret <2 x double> %res
793}
794