xref: /llvm-project/llvm/test/CodeGen/X86/avx512-load-store.ll (revision f0dd12ec5c0169ba5b4363b62d59511181cf954a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -O2 -mattr=avx512f -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64
3; RUN: llc < %s -O2 -mattr=avx512f -mtriple=i386-unknown | FileCheck %s --check-prefix=CHECK32
4; RUN: llc < %s -O2 -mattr=avx512vl -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64
5; RUN: llc < %s -O2 -mattr=avx512vl -mtriple=i386-unknown | FileCheck %s --check-prefix=CHECK32
6
7define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 {
8; CHECK64-LABEL: test_mm_mask_move_ss:
9; CHECK64:       # %bb.0: # %entry
10; CHECK64-NEXT:    kmovw %edi, %k1
11; CHECK64-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1}
12; CHECK64-NEXT:    retq
13;
14; CHECK32-LABEL: test_mm_mask_move_ss:
15; CHECK32:       # %bb.0: # %entry
16; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
17; CHECK32-NEXT:    kmovw %eax, %k1
18; CHECK32-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1}
19; CHECK32-NEXT:    retl
20entry:
21  %0 = and i8 %__U, 1
22  %tobool.i = icmp ne i8 %0, 0
23  %__B.elt.i = extractelement <4 x float> %__B, i32 0
24  %__W.elt.i = extractelement <4 x float> %__W, i32 0
25  %vecext1.i = select i1 %tobool.i, float %__B.elt.i, float %__W.elt.i
26  %vecins.i = insertelement <4 x float> %__A, float %vecext1.i, i32 0
27  ret <4 x float> %vecins.i
28}
29
30define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 {
31; CHECK64-LABEL: test_mm_maskz_move_ss:
32; CHECK64:       # %bb.0: # %entry
33; CHECK64-NEXT:    kmovw %edi, %k1
34; CHECK64-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
35; CHECK64-NEXT:    retq
36;
37; CHECK32-LABEL: test_mm_maskz_move_ss:
38; CHECK32:       # %bb.0: # %entry
39; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
40; CHECK32-NEXT:    kmovw %eax, %k1
41; CHECK32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
42; CHECK32-NEXT:    retl
43entry:
44  %0 = and i8 %__U, 1
45  %tobool.i = icmp ne i8 %0, 0
46  %vecext.i = extractelement <4 x float> %__B, i32 0
47  %cond.i = select i1 %tobool.i, float %vecext.i, float 0.000000e+00
48  %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
49  ret <4 x float> %vecins.i
50}
51
52define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 {
53; CHECK64-LABEL: test_mm_mask_move_sd:
54; CHECK64:       # %bb.0: # %entry
55; CHECK64-NEXT:    kmovw %edi, %k1
56; CHECK64-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1}
57; CHECK64-NEXT:    retq
58;
59; CHECK32-LABEL: test_mm_mask_move_sd:
60; CHECK32:       # %bb.0: # %entry
61; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
62; CHECK32-NEXT:    kmovw %eax, %k1
63; CHECK32-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1}
64; CHECK32-NEXT:    retl
65entry:
66  %0 = and i8 %__U, 1
67  %tobool.i = icmp ne i8 %0, 0
68  %__B.elt.i = extractelement <2 x double> %__B, i32 0
69  %__W.elt.i = extractelement <2 x double> %__W, i32 0
70  %vecext1.i = select i1 %tobool.i, double %__B.elt.i, double %__W.elt.i
71  %vecins.i = insertelement <2 x double> %__A, double %vecext1.i, i32 0
72  ret <2 x double> %vecins.i
73}
74
75define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 {
76; CHECK64-LABEL: test_mm_maskz_move_sd:
77; CHECK64:       # %bb.0: # %entry
78; CHECK64-NEXT:    kmovw %edi, %k1
79; CHECK64-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
80; CHECK64-NEXT:    retq
81;
82; CHECK32-LABEL: test_mm_maskz_move_sd:
83; CHECK32:       # %bb.0: # %entry
84; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
85; CHECK32-NEXT:    kmovw %eax, %k1
86; CHECK32-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
87; CHECK32-NEXT:    retl
88entry:
89  %0 = and i8 %__U, 1
90  %tobool.i = icmp ne i8 %0, 0
91  %vecext.i = extractelement <2 x double> %__B, i32 0
92  %cond.i = select i1 %tobool.i, double %vecext.i, double 0.000000e+00
93  %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
94  ret <2 x double> %vecins.i
95}
96
97define void @test_mm_mask_store_ss(ptr %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #1 {
98; CHECK64-LABEL: test_mm_mask_store_ss:
99; CHECK64:       # %bb.0: # %entry
100; CHECK64-NEXT:    kmovw %esi, %k1
101; CHECK64-NEXT:    vmovss %xmm0, (%rdi) {%k1}
102; CHECK64-NEXT:    retq
103;
104; CHECK32-LABEL: test_mm_mask_store_ss:
105; CHECK32:       # %bb.0: # %entry
106; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
107; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
108; CHECK32-NEXT:    kmovw %ecx, %k1
109; CHECK32-NEXT:    vmovss %xmm0, (%eax) {%k1}
110; CHECK32-NEXT:    retl
111entry:
112  %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
113  %0 = and i8 %__U, 1
114  %conv2.i = zext i8 %0 to i16
115  %1 = bitcast i16 %conv2.i to <16 x i1>
116  tail call void @llvm.masked.store.v16f32.p0(<16 x float> %shuffle.i.i, ptr %__W, i32 16, <16 x i1> %1) #5
117  ret void
118}
119
120define void @test_mm_mask_store_sd(ptr %__W, i8 zeroext %__U, <2 x double> %__A) local_unnamed_addr #1 {
121; CHECK64-LABEL: test_mm_mask_store_sd:
122; CHECK64:       # %bb.0: # %entry
123; CHECK64-NEXT:    kmovw %esi, %k1
124; CHECK64-NEXT:    vmovsd %xmm0, (%rdi) {%k1}
125; CHECK64-NEXT:    retq
126;
127; CHECK32-LABEL: test_mm_mask_store_sd:
128; CHECK32:       # %bb.0: # %entry
129; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
130; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
131; CHECK32-NEXT:    kmovw %ecx, %k1
132; CHECK32-NEXT:    vmovsd %xmm0, (%eax) {%k1}
133; CHECK32-NEXT:    retl
134entry:
135  %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
136  %0 = and i8 %__U, 1
137  %1 = bitcast i8 %0 to <8 x i1>
138  tail call void @llvm.masked.store.v8f64.p0(<8 x double> %shuffle.i.i, ptr %__W, i32 16, <8 x i1> %1) #5
139  ret void
140}
141
142define <4 x float> @test_mm_mask_load_ss(<4 x float> %__A, i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 {
143; CHECK64-LABEL: test_mm_mask_load_ss:
144; CHECK64:       # %bb.0: # %entry
145; CHECK64-NEXT:    kmovw %edi, %k1
146; CHECK64-NEXT:    vmovss (%rsi), %xmm0 {%k1}
147; CHECK64-NEXT:    retq
148;
149; CHECK32-LABEL: test_mm_mask_load_ss:
150; CHECK32:       # %bb.0: # %entry
151; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
152; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
153; CHECK32-NEXT:    kmovw %ecx, %k1
154; CHECK32-NEXT:    vmovss (%eax), %xmm0 {%k1}
155; CHECK32-NEXT:    retl
156entry:
157  %shuffle.i = shufflevector <4 x float> %__A, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
158  %shuffle.i.i = shufflevector <4 x float> %shuffle.i, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
159  %0 = and i8 %__U, 1
160  %conv2.i = zext i8 %0 to i16
161  %1 = bitcast i16 %conv2.i to <16 x i1>
162  %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0(ptr %__W, i32 16, <16 x i1> %1, <16 x float> %shuffle.i.i) #5
163  %shuffle4.i = shufflevector <16 x float> %2, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
164  ret <4 x float> %shuffle4.i
165}
166
167define <2 x double> @test_mm_mask_load_sd(<2 x double> %__A, i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 {
168; CHECK64-LABEL: test_mm_mask_load_sd:
169; CHECK64:       # %bb.0: # %entry
170; CHECK64-NEXT:    kmovw %edi, %k1
171; CHECK64-NEXT:    vmovsd (%rsi), %xmm0 {%k1}
172; CHECK64-NEXT:    retq
173;
174; CHECK32-LABEL: test_mm_mask_load_sd:
175; CHECK32:       # %bb.0: # %entry
176; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
177; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
178; CHECK32-NEXT:    kmovw %ecx, %k1
179; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1}
180; CHECK32-NEXT:    retl
181entry:
182  %shuffle5.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1
183  %shuffle.i.i = shufflevector <2 x double> %shuffle5.i, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
184  %0 = and i8 %__U, 1
185  %1 = bitcast i8 %0 to <8 x i1>
186  %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0(ptr %__W, i32 16, <8 x i1> %1, <8 x double> %shuffle.i.i) #5
187  %shuffle3.i = shufflevector <8 x double> %2, <8 x double> undef, <2 x i32> <i32 0, i32 1>
188  ret <2 x double> %shuffle3.i
189}
190
191define <4 x float> @test_mm_maskz_load_ss(i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 {
192; CHECK64-LABEL: test_mm_maskz_load_ss:
193; CHECK64:       # %bb.0: # %entry
194; CHECK64-NEXT:    kmovw %edi, %k1
195; CHECK64-NEXT:    vmovss (%rsi), %xmm0 {%k1} {z}
196; CHECK64-NEXT:    retq
197;
198; CHECK32-LABEL: test_mm_maskz_load_ss:
199; CHECK32:       # %bb.0: # %entry
200; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
201; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
202; CHECK32-NEXT:    kmovw %ecx, %k1
203; CHECK32-NEXT:    vmovss (%eax), %xmm0 {%k1} {z}
204; CHECK32-NEXT:    retl
205entry:
206  %0 = and i8 %__U, 1
207  %conv2.i = zext i8 %0 to i16
208  %1 = bitcast i16 %conv2.i to <16 x i1>
209  %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0(ptr %__W, i32 16, <16 x i1> %1, <16 x float> zeroinitializer) #5
210  %shuffle.i = shufflevector <16 x float> %2, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
211  ret <4 x float> %shuffle.i
212}
213
214define <2 x double> @test_mm_maskz_load_sd(i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 {
215; CHECK64-LABEL: test_mm_maskz_load_sd:
216; CHECK64:       # %bb.0: # %entry
217; CHECK64-NEXT:    kmovw %edi, %k1
218; CHECK64-NEXT:    vmovsd (%rsi), %xmm0 {%k1} {z}
219; CHECK64-NEXT:    retq
220;
221; CHECK32-LABEL: test_mm_maskz_load_sd:
222; CHECK32:       # %bb.0: # %entry
223; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
224; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
225; CHECK32-NEXT:    kmovw %ecx, %k1
226; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1} {z}
227; CHECK32-NEXT:    retl
228entry:
229  %0 = and i8 %__U, 1
230  %1 = bitcast i8 %0 to <8 x i1>
231  %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0(ptr %__W, i32 16, <8 x i1> %1, <8 x double> zeroinitializer) #5
232  %shuffle.i = shufflevector <8 x double> %2, <8 x double> undef, <2 x i32> <i32 0, i32 1>
233  ret <2 x double> %shuffle.i
234}
235
236; The tests below match clang's newer codegen that uses 128-bit masked load/stores.
237
238define void @test_mm_mask_store_ss_2(ptr %__P, i8 zeroext %__U, <4 x float> %__A) {
239; CHECK64-LABEL: test_mm_mask_store_ss_2:
240; CHECK64:       # %bb.0: # %entry
241; CHECK64-NEXT:    kmovw %esi, %k1
242; CHECK64-NEXT:    vmovss %xmm0, (%rdi) {%k1}
243; CHECK64-NEXT:    retq
244;
245; CHECK32-LABEL: test_mm_mask_store_ss_2:
246; CHECK32:       # %bb.0: # %entry
247; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
248; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
249; CHECK32-NEXT:    kmovw %ecx, %k1
250; CHECK32-NEXT:    vmovss %xmm0, (%eax) {%k1}
251; CHECK32-NEXT:    retl
252entry:
253  %0 = and i8 %__U, 1
254  %1 = bitcast i8 %0 to <8 x i1>
255  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
256  tail call void @llvm.masked.store.v4f32.p0(<4 x float> %__A, ptr %__P, i32 1, <4 x i1> %extract.i)
257  ret void
258}
259
260define void @test_mm_mask_store_sd_2(ptr %__P, i8 zeroext %__U, <2 x double> %__A) {
261; CHECK64-LABEL: test_mm_mask_store_sd_2:
262; CHECK64:       # %bb.0: # %entry
263; CHECK64-NEXT:    kmovw %esi, %k1
264; CHECK64-NEXT:    vmovsd %xmm0, (%rdi) {%k1}
265; CHECK64-NEXT:    retq
266;
267; CHECK32-LABEL: test_mm_mask_store_sd_2:
268; CHECK32:       # %bb.0: # %entry
269; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
270; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
271; CHECK32-NEXT:    kmovw %ecx, %k1
272; CHECK32-NEXT:    vmovsd %xmm0, (%eax) {%k1}
273; CHECK32-NEXT:    retl
274entry:
275  %0 = and i8 %__U, 1
276  %1 = bitcast i8 %0 to <8 x i1>
277  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
278  tail call void @llvm.masked.store.v2f64.p0(<2 x double> %__A, ptr %__P, i32 1, <2 x i1> %extract.i)
279  ret void
280}
281
282define <4 x float> @test_mm_mask_load_ss_2(<4 x float> %__A, i8 zeroext %__U, ptr readonly %__W) {
283; CHECK64-LABEL: test_mm_mask_load_ss_2:
284; CHECK64:       # %bb.0: # %entry
285; CHECK64-NEXT:    kmovw %edi, %k1
286; CHECK64-NEXT:    vmovss (%rsi), %xmm0 {%k1}
287; CHECK64-NEXT:    retq
288;
289; CHECK32-LABEL: test_mm_mask_load_ss_2:
290; CHECK32:       # %bb.0: # %entry
291; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
292; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
293; CHECK32-NEXT:    kmovw %ecx, %k1
294; CHECK32-NEXT:    vmovss (%eax), %xmm0 {%k1}
295; CHECK32-NEXT:    retl
296entry:
297  %shuffle.i = shufflevector <4 x float> %__A, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
298  %0 = and i8 %__U, 1
299  %1 = bitcast i8 %0 to <8 x i1>
300  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301  %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %__W, i32 1, <4 x i1> %extract.i, <4 x float> %shuffle.i)
302  ret <4 x float> %2
303}
304
305define <4 x float> @test_mm_maskz_load_ss_2(i8 zeroext %__U, ptr readonly %__W) {
306; CHECK64-LABEL: test_mm_maskz_load_ss_2:
307; CHECK64:       # %bb.0: # %entry
308; CHECK64-NEXT:    kmovw %edi, %k1
309; CHECK64-NEXT:    vmovss (%rsi), %xmm0 {%k1} {z}
310; CHECK64-NEXT:    retq
311;
312; CHECK32-LABEL: test_mm_maskz_load_ss_2:
313; CHECK32:       # %bb.0: # %entry
314; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
315; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
316; CHECK32-NEXT:    kmovw %ecx, %k1
317; CHECK32-NEXT:    vmovss (%eax), %xmm0 {%k1} {z}
318; CHECK32-NEXT:    retl
319entry:
320  %0 = and i8 %__U, 1
321  %1 = bitcast i8 %0 to <8 x i1>
322  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
323  %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %__W, i32 1, <4 x i1> %extract.i, <4 x float> zeroinitializer)
324  ret <4 x float> %2
325}
326
327define <2 x double> @test_mm_mask_load_sd_2(<2 x double> %__A, i8 zeroext %__U, ptr readonly %__W) {
328; CHECK64-LABEL: test_mm_mask_load_sd_2:
329; CHECK64:       # %bb.0: # %entry
330; CHECK64-NEXT:    kmovw %edi, %k1
331; CHECK64-NEXT:    vmovsd (%rsi), %xmm0 {%k1}
332; CHECK64-NEXT:    retq
333;
334; CHECK32-LABEL: test_mm_mask_load_sd_2:
335; CHECK32:       # %bb.0: # %entry
336; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
337; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
338; CHECK32-NEXT:    kmovw %ecx, %k1
339; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1}
340; CHECK32-NEXT:    retl
341entry:
342  %shuffle3.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1
343  %0 = and i8 %__U, 1
344  %1 = bitcast i8 %0 to <8 x i1>
345  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
346  %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0(ptr %__W, i32 1, <2 x i1> %extract.i, <2 x double> %shuffle3.i)
347  ret <2 x double> %2
348}
349
350define <2 x double> @test_mm_maskz_load_sd_2(i8 zeroext %__U, ptr readonly %__W) {
351; CHECK64-LABEL: test_mm_maskz_load_sd_2:
352; CHECK64:       # %bb.0: # %entry
353; CHECK64-NEXT:    kmovw %edi, %k1
354; CHECK64-NEXT:    vmovsd (%rsi), %xmm0 {%k1} {z}
355; CHECK64-NEXT:    retq
356;
357; CHECK32-LABEL: test_mm_maskz_load_sd_2:
358; CHECK32:       # %bb.0: # %entry
359; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
360; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
361; CHECK32-NEXT:    kmovw %ecx, %k1
362; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1} {z}
363; CHECK32-NEXT:    retl
364entry:
365  %0 = and i8 %__U, 1
366  %1 = bitcast i8 %0 to <8 x i1>
367  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
368  %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0(ptr %__W, i32 1, <2 x i1> %extract.i, <2 x double> zeroinitializer)
369  ret <2 x double> %2
370}
371
372
373declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) #3
374
375declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) #3
376
377declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) #4
378
379declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) #4
380
381declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
382
383declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>)
384
385declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
386
387declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>)
388