xref: /llvm-project/llvm/test/CodeGen/X86/gpr-to-mask.ll (revision 77589e945f0d167bd46ed3218b81c16af1e917ae)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-64
3; RUN: llc -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-32
4
5define void @test_fcmp_storefloat(i1 %cond, ptr %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
6; X86-64-LABEL: test_fcmp_storefloat:
7; X86-64:       # %bb.0: # %entry
8; X86-64-NEXT:    testb $1, %dil
9; X86-64-NEXT:    je .LBB0_2
10; X86-64-NEXT:  # %bb.1: # %if
11; X86-64-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
12; X86-64-NEXT:    jmp .LBB0_3
13; X86-64-NEXT:  .LBB0_2: # %else
14; X86-64-NEXT:    vcmpeqss %xmm5, %xmm4, %k1
15; X86-64-NEXT:  .LBB0_3: # %exit
16; X86-64-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
17; X86-64-NEXT:    vmovss %xmm1, (%rsi)
18; X86-64-NEXT:    retq
19;
20; X86-32-LABEL: test_fcmp_storefloat:
21; X86-32:       # %bb.0: # %entry
22; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
23; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
24; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
25; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
26; X86-32-NEXT:    je .LBB0_2
27; X86-32-NEXT:  # %bb.1: # %if
28; X86-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
29; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
30; X86-32-NEXT:    jmp .LBB0_3
31; X86-32-NEXT:  .LBB0_2: # %else
32; X86-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
33; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
34; X86-32-NEXT:  .LBB0_3: # %exit
35; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
36; X86-32-NEXT:    vmovss %xmm0, (%eax)
37; X86-32-NEXT:    retl
38entry:
39  br i1 %cond, label %if, label %else
40
41if:
42  %cmp1 = fcmp oeq float %f3, %f4
43  br label %exit
44
45else:
46  %cmp2 = fcmp oeq float %f5, %f6
47  br label %exit
48
49exit:
50  %val = phi i1 [%cmp1, %if], [%cmp2, %else]
51  %selected = select i1 %val, float %f1, float %f2
52  store float %selected, ptr %fptr
53  ret void
54}
55
56define void @test_fcmp_storei1(i1 %cond, ptr %fptr, ptr %iptr, float %f1, float %f2, float %f3, float %f4) {
57; X86-64-LABEL: test_fcmp_storei1:
58; X86-64:       # %bb.0: # %entry
59; X86-64-NEXT:    testb $1, %dil
60; X86-64-NEXT:    je .LBB1_2
61; X86-64-NEXT:  # %bb.1: # %if
62; X86-64-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
63; X86-64-NEXT:    kmovb %k0, (%rdx)
64; X86-64-NEXT:    retq
65; X86-64-NEXT:  .LBB1_2: # %else
66; X86-64-NEXT:    vcmpeqss %xmm3, %xmm2, %k0
67; X86-64-NEXT:    kmovb %k0, (%rdx)
68; X86-64-NEXT:    retq
69;
70; X86-32-LABEL: test_fcmp_storei1:
71; X86-32:       # %bb.0: # %entry
72; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
73; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
74; X86-32-NEXT:    je .LBB1_2
75; X86-32-NEXT:  # %bb.1: # %if
76; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
77; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
78; X86-32-NEXT:    kmovb %k0, (%eax)
79; X86-32-NEXT:    retl
80; X86-32-NEXT:  .LBB1_2: # %else
81; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
82; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
83; X86-32-NEXT:    kmovb %k0, (%eax)
84; X86-32-NEXT:    retl
85entry:
86  br i1 %cond, label %if, label %else
87
88if:
89  %cmp1 = fcmp oeq float %f1, %f2
90  br label %exit
91
92else:
93  %cmp2 = fcmp oeq float %f3, %f4
94  br label %exit
95
96exit:
97  %val = phi i1 [%cmp1, %if], [%cmp2, %else]
98  store i1 %val, ptr %iptr
99  ret void
100}
101
102define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f1, float %f2)  {
103; X86-64-LABEL: test_load_add:
104; X86-64:       # %bb.0: # %entry
105; X86-64-NEXT:    testb $1, %dil
106; X86-64-NEXT:    je .LBB2_2
107; X86-64-NEXT:  # %bb.1: # %if
108; X86-64-NEXT:    movzbl (%rdx), %eax
109; X86-64-NEXT:    addb (%rcx), %al
110; X86-64-NEXT:    jmp .LBB2_3
111; X86-64-NEXT:  .LBB2_2: # %else
112; X86-64-NEXT:    movzbl (%rcx), %eax
113; X86-64-NEXT:  .LBB2_3: # %exit
114; X86-64-NEXT:    kmovd %eax, %k1
115; X86-64-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
116; X86-64-NEXT:    vmovss %xmm1, (%rsi)
117; X86-64-NEXT:    retq
118;
119; X86-32-LABEL: test_load_add:
120; X86-32:       # %bb.0: # %entry
121; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
122; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
123; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
124; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
125; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
126; X86-32-NEXT:    je .LBB2_2
127; X86-32-NEXT:  # %bb.1: # %if
128; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
129; X86-32-NEXT:    movzbl (%edx), %edx
130; X86-32-NEXT:    addb (%ecx), %dl
131; X86-32-NEXT:    jmp .LBB2_3
132; X86-32-NEXT:  .LBB2_2: # %else
133; X86-32-NEXT:    movzbl (%ecx), %edx
134; X86-32-NEXT:  .LBB2_3: # %exit
135; X86-32-NEXT:    kmovd %edx, %k1
136; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
137; X86-32-NEXT:    vmovss %xmm0, (%eax)
138; X86-32-NEXT:    retl
139entry:
140  br i1 %cond, label %if, label %else
141
142if:
143  %loaded1 = load i1, ptr %iptr1
144  %loaded2if = load i1, ptr %iptr2
145  %added = add i1 %loaded1, %loaded2if
146  br label %exit
147
148else:
149  %loaded2else = load i1, ptr %iptr2
150  br label %exit
151
152exit:
153  %val = phi i1 [%added, %if], [%loaded2else, %else]
154  %selected = select i1 %val, float %f1, float %f2
155  store float %selected, ptr %fptr
156  ret void
157}
158
159define void @test_load_i1(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f1, float %f2)  {
160; X86-64-LABEL: test_load_i1:
161; X86-64:       # %bb.0: # %entry
162; X86-64-NEXT:    testb $1, %dil
163; X86-64-NEXT:    je .LBB3_2
164; X86-64-NEXT:  # %bb.1: # %if
165; X86-64-NEXT:    kmovb (%rdx), %k1
166; X86-64-NEXT:    jmp .LBB3_3
167; X86-64-NEXT:  .LBB3_2: # %else
168; X86-64-NEXT:    kmovb (%rcx), %k1
169; X86-64-NEXT:  .LBB3_3: # %exit
170; X86-64-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
171; X86-64-NEXT:    vmovss %xmm1, (%rsi)
172; X86-64-NEXT:    retq
173;
174; X86-32-LABEL: test_load_i1:
175; X86-32:       # %bb.0: # %entry
176; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
177; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
178; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
179; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
180; X86-32-NEXT:    je .LBB3_2
181; X86-32-NEXT:  # %bb.1: # %if
182; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
183; X86-32-NEXT:    jmp .LBB3_3
184; X86-32-NEXT:  .LBB3_2: # %else
185; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
186; X86-32-NEXT:  .LBB3_3: # %exit
187; X86-32-NEXT:    kmovb (%ecx), %k1
188; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
189; X86-32-NEXT:    vmovss %xmm0, (%eax)
190; X86-32-NEXT:    retl
191entry:
192  br i1 %cond, label %if, label %else
193
194if:
195  %loaded1 = load i1, ptr %iptr1
196  br label %exit
197
198else:
199  %loaded2 = load i1, ptr %iptr2
200  br label %exit
201
202exit:
203  %val = phi i1 [%loaded1, %if], [%loaded2, %else]
204  %selected = select i1 %val, float %f1, float %f2
205  store float %selected, ptr %fptr
206  ret void
207}
208
209define void @test_loadi1_storei1(i1 %cond, ptr %iptr1, ptr %iptr2, ptr %iptr3)  {
210; X86-64-LABEL: test_loadi1_storei1:
211; X86-64:       # %bb.0: # %entry
212; X86-64-NEXT:    testb $1, %dil
213; X86-64-NEXT:    je .LBB4_2
214; X86-64-NEXT:  # %bb.1: # %if
215; X86-64-NEXT:    movzbl (%rsi), %eax
216; X86-64-NEXT:    jmp .LBB4_3
217; X86-64-NEXT:  .LBB4_2: # %else
218; X86-64-NEXT:    movzbl (%rdx), %eax
219; X86-64-NEXT:  .LBB4_3: # %exit
220; X86-64-NEXT:    andb $1, %al
221; X86-64-NEXT:    movb %al, (%rcx)
222; X86-64-NEXT:    retq
223;
224; X86-32-LABEL: test_loadi1_storei1:
225; X86-32:       # %bb.0: # %entry
226; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
227; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
228; X86-32-NEXT:    je .LBB4_2
229; X86-32-NEXT:  # %bb.1: # %if
230; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
231; X86-32-NEXT:    jmp .LBB4_3
232; X86-32-NEXT:  .LBB4_2: # %else
233; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
234; X86-32-NEXT:  .LBB4_3: # %exit
235; X86-32-NEXT:    movzbl (%ecx), %ecx
236; X86-32-NEXT:    andb $1, %cl
237; X86-32-NEXT:    movb %cl, (%eax)
238; X86-32-NEXT:    retl
239entry:
240  br i1 %cond, label %if, label %else
241
242if:
243  %loaded1 = load i1, ptr %iptr1
244  br label %exit
245
246else:
247  %loaded2 = load i1, ptr %iptr2
248  br label %exit
249
250exit:
251  %val = phi i1 [%loaded1, %if], [%loaded2, %else]
252  store i1 %val, ptr %iptr3
253  ret void
254}
255
256define void @test_shl1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) {
257; X86-64-LABEL: test_shl1:
258; X86-64:       # %bb.0: # %entry
259; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
260; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
261; X86-64-NEXT:    testb $1, %dil
262; X86-64-NEXT:    je .LBB5_2
263; X86-64-NEXT:  # %bb.1: # %if
264; X86-64-NEXT:    kmovb (%rsi), %k0
265; X86-64-NEXT:    kaddb %k0, %k0, %k1
266; X86-64-NEXT:    jmp .LBB5_3
267; X86-64-NEXT:  .LBB5_2: # %else
268; X86-64-NEXT:    kmovb (%rdx), %k1
269; X86-64-NEXT:  .LBB5_3: # %exit
270; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
271; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
272; X86-64-NEXT:    vzeroupper
273; X86-64-NEXT:    retq
274;
275; X86-32-LABEL: test_shl1:
276; X86-32:       # %bb.0: # %entry
277; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
278; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
279; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
280; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
281; X86-32-NEXT:    je .LBB5_2
282; X86-32-NEXT:  # %bb.1: # %if
283; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
284; X86-32-NEXT:    kmovb (%ecx), %k0
285; X86-32-NEXT:    kaddb %k0, %k0, %k1
286; X86-32-NEXT:    jmp .LBB5_3
287; X86-32-NEXT:  .LBB5_2: # %else
288; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
289; X86-32-NEXT:    kmovb (%ecx), %k1
290; X86-32-NEXT:  .LBB5_3: # %exit
291; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
292; X86-32-NEXT:    vmovaps %ymm1, (%eax)
293; X86-32-NEXT:    vzeroupper
294; X86-32-NEXT:    retl
295entry:
296  br i1 %cond, label %if, label %else
297
298if:
299  %loaded1 = load i8, ptr %ptr1
300  %shifted = shl i8 %loaded1, 1
301  br label %exit
302
303else:
304  %loaded2 = load i8, ptr %ptr2
305  br label %exit
306
307exit:
308  %val = phi i8 [%shifted, %if], [%loaded2, %else]
309  %mask = bitcast i8 %val to <8 x i1>
310  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
311  store <8 x float> %selected, ptr %fptrvec
312  ret void
313}
314
315define void @test_shr1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) {
316; X86-64-LABEL: test_shr1:
317; X86-64:       # %bb.0: # %entry
318; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
319; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
320; X86-64-NEXT:    testb $1, %dil
321; X86-64-NEXT:    je .LBB6_2
322; X86-64-NEXT:  # %bb.1: # %if
323; X86-64-NEXT:    kmovb (%rsi), %k0
324; X86-64-NEXT:    kshiftrb $1, %k0, %k1
325; X86-64-NEXT:    jmp .LBB6_3
326; X86-64-NEXT:  .LBB6_2: # %else
327; X86-64-NEXT:    kmovb (%rdx), %k1
328; X86-64-NEXT:  .LBB6_3: # %exit
329; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
330; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
331; X86-64-NEXT:    vzeroupper
332; X86-64-NEXT:    retq
333;
334; X86-32-LABEL: test_shr1:
335; X86-32:       # %bb.0: # %entry
336; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
337; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
338; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
339; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
340; X86-32-NEXT:    je .LBB6_2
341; X86-32-NEXT:  # %bb.1: # %if
342; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
343; X86-32-NEXT:    kmovb (%ecx), %k0
344; X86-32-NEXT:    kshiftrb $1, %k0, %k1
345; X86-32-NEXT:    jmp .LBB6_3
346; X86-32-NEXT:  .LBB6_2: # %else
347; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
348; X86-32-NEXT:    kmovb (%ecx), %k1
349; X86-32-NEXT:  .LBB6_3: # %exit
350; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
351; X86-32-NEXT:    vmovaps %ymm1, (%eax)
352; X86-32-NEXT:    vzeroupper
353; X86-32-NEXT:    retl
354entry:
355  br i1 %cond, label %if, label %else
356
357if:
358  %loaded1 = load i8, ptr %ptr1
359  %shifted = lshr i8 %loaded1, 1
360  br label %exit
361
362else:
363  %loaded2 = load i8, ptr %ptr2
364  br label %exit
365
366exit:
367  %val = phi i8 [%shifted, %if], [%loaded2, %else]
368  %mask = bitcast i8 %val to <8 x i1>
369  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
370  store <8 x float> %selected, ptr %fptrvec
371  ret void
372}
373
374define void @test_shr2(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) {
375; X86-64-LABEL: test_shr2:
376; X86-64:       # %bb.0: # %entry
377; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
378; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
379; X86-64-NEXT:    testb $1, %dil
380; X86-64-NEXT:    je .LBB7_2
381; X86-64-NEXT:  # %bb.1: # %if
382; X86-64-NEXT:    kmovb (%rsi), %k0
383; X86-64-NEXT:    kshiftrb $2, %k0, %k1
384; X86-64-NEXT:    jmp .LBB7_3
385; X86-64-NEXT:  .LBB7_2: # %else
386; X86-64-NEXT:    kmovb (%rdx), %k1
387; X86-64-NEXT:  .LBB7_3: # %exit
388; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
389; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
390; X86-64-NEXT:    vzeroupper
391; X86-64-NEXT:    retq
392;
393; X86-32-LABEL: test_shr2:
394; X86-32:       # %bb.0: # %entry
395; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
396; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
397; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
398; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
399; X86-32-NEXT:    je .LBB7_2
400; X86-32-NEXT:  # %bb.1: # %if
401; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
402; X86-32-NEXT:    kmovb (%ecx), %k0
403; X86-32-NEXT:    kshiftrb $2, %k0, %k1
404; X86-32-NEXT:    jmp .LBB7_3
405; X86-32-NEXT:  .LBB7_2: # %else
406; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
407; X86-32-NEXT:    kmovb (%ecx), %k1
408; X86-32-NEXT:  .LBB7_3: # %exit
409; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
410; X86-32-NEXT:    vmovaps %ymm1, (%eax)
411; X86-32-NEXT:    vzeroupper
412; X86-32-NEXT:    retl
413entry:
414  br i1 %cond, label %if, label %else
415
416if:
417  %loaded1 = load i8, ptr %ptr1
418  %shifted = lshr i8 %loaded1, 2
419  br label %exit
420
421else:
422  %loaded2 = load i8, ptr %ptr2
423  br label %exit
424
425exit:
426  %val = phi i8 [%shifted, %if], [%loaded2, %else]
427  %mask = bitcast i8 %val to <8 x i1>
428  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
429  store <8 x float> %selected, ptr %fptrvec
430  ret void
431}
432
433define void @test_shl(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) {
434; X86-64-LABEL: test_shl:
435; X86-64:       # %bb.0: # %entry
436; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
437; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
438; X86-64-NEXT:    testb $1, %dil
439; X86-64-NEXT:    je .LBB8_2
440; X86-64-NEXT:  # %bb.1: # %if
441; X86-64-NEXT:    kmovb (%rsi), %k0
442; X86-64-NEXT:    kshiftlb $6, %k0, %k1
443; X86-64-NEXT:    jmp .LBB8_3
444; X86-64-NEXT:  .LBB8_2: # %else
445; X86-64-NEXT:    kmovb (%rdx), %k1
446; X86-64-NEXT:  .LBB8_3: # %exit
447; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
448; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
449; X86-64-NEXT:    vzeroupper
450; X86-64-NEXT:    retq
451;
452; X86-32-LABEL: test_shl:
453; X86-32:       # %bb.0: # %entry
454; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
455; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
456; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
457; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
458; X86-32-NEXT:    je .LBB8_2
459; X86-32-NEXT:  # %bb.1: # %if
460; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
461; X86-32-NEXT:    kmovb (%ecx), %k0
462; X86-32-NEXT:    kshiftlb $6, %k0, %k1
463; X86-32-NEXT:    jmp .LBB8_3
464; X86-32-NEXT:  .LBB8_2: # %else
465; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
466; X86-32-NEXT:    kmovb (%ecx), %k1
467; X86-32-NEXT:  .LBB8_3: # %exit
468; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
469; X86-32-NEXT:    vmovaps %ymm1, (%eax)
470; X86-32-NEXT:    vzeroupper
471; X86-32-NEXT:    retl
472entry:
473  br i1 %cond, label %if, label %else
474
475if:
476  %loaded1 = load i8, ptr %ptr1
477  %shifted = shl i8 %loaded1, 6
478  br label %exit
479
480else:
481  %loaded2 = load i8, ptr %ptr2
482  br label %exit
483
484exit:
485  %val = phi i8 [%shifted, %if], [%loaded2, %else]
486  %mask = bitcast i8 %val to <8 x i1>
487  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
488  store <8 x float> %selected, ptr %fptrvec
489  ret void
490}
491
492define void @test_add(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, ptr %fptrvec) {
493; X86-64-LABEL: test_add:
494; X86-64:       # %bb.0: # %entry
495; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
496; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
497; X86-64-NEXT:    kmovb (%rsi), %k0
498; X86-64-NEXT:    kmovb (%rdx), %k1
499; X86-64-NEXT:    testb $1, %dil
500; X86-64-NEXT:    je .LBB9_2
501; X86-64-NEXT:  # %bb.1: # %if
502; X86-64-NEXT:    kandb %k1, %k0, %k1
503; X86-64-NEXT:    jmp .LBB9_3
504; X86-64-NEXT:  .LBB9_2: # %else
505; X86-64-NEXT:    kaddb %k1, %k0, %k1
506; X86-64-NEXT:  .LBB9_3: # %exit
507; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
508; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
509; X86-64-NEXT:    vzeroupper
510; X86-64-NEXT:    retq
511;
512; X86-32-LABEL: test_add:
513; X86-32:       # %bb.0: # %entry
514; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
515; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
516; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
517; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
518; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
519; X86-32-NEXT:    kmovb (%edx), %k0
520; X86-32-NEXT:    kmovb (%ecx), %k1
521; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
522; X86-32-NEXT:    je .LBB9_2
523; X86-32-NEXT:  # %bb.1: # %if
524; X86-32-NEXT:    kandb %k1, %k0, %k1
525; X86-32-NEXT:    jmp .LBB9_3
526; X86-32-NEXT:  .LBB9_2: # %else
527; X86-32-NEXT:    kaddb %k1, %k0, %k1
528; X86-32-NEXT:  .LBB9_3: # %exit
529; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
530; X86-32-NEXT:    vmovaps %ymm1, (%eax)
531; X86-32-NEXT:    vzeroupper
532; X86-32-NEXT:    retl
533entry:
534  %loaded1 = load i8, ptr %ptr1
535  %loaded2 = load i8, ptr %ptr2
536  br i1 %cond, label %if, label %else
537
538if:
539  %and = and i8 %loaded1, %loaded2
540  br label %exit
541
542else:
543  %add = add i8 %loaded1, %loaded2
544  br label %exit
545
546exit:
547  %val = phi i8 [%and, %if], [%add, %else]
548  %mask = bitcast i8 %val to <8 x i1>
549  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
550  store <8 x float> %selected, ptr %fptrvec
551  ret void
552}
553