xref: /llvm-project/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll (revision 4318b033bddc64d5654f3e368fddde859ff4d02e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mcpu=corei7   | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=X64
4
5; Make sure that we don't crash when legalizing vselect and vsetcc and that
6; we are able to generate vector blend instructions.
7
8define void @simple_widen(<2 x float> %a, <2 x float> %b) {
9; X86-LABEL: simple_widen:
10; X86:       # %bb.0: # %entry
11; X86-NEXT:    movlps %xmm1, (%eax)
12; X86-NEXT:    retl
13;
14; X64-LABEL: simple_widen:
15; X64:       # %bb.0: # %entry
16; X64-NEXT:    movlps %xmm1, (%rax)
17; X64-NEXT:    retq
18entry:
19  %0 = select <2 x i1> undef, <2 x float> %a, <2 x float> %b
20  store <2 x float> %0, ptr undef
21  ret void
22}
23
24define void @complex_inreg_work(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
25; X86-LABEL: complex_inreg_work:
26; X86:       # %bb.0: # %entry
27; X86-NEXT:    movaps %xmm0, %xmm3
28; X86-NEXT:    cmpordps %xmm2, %xmm2
29; X86-NEXT:    movaps %xmm2, %xmm0
30; X86-NEXT:    blendvps %xmm0, %xmm3, %xmm1
31; X86-NEXT:    movlps %xmm1, (%eax)
32; X86-NEXT:    retl
33;
34; X64-LABEL: complex_inreg_work:
35; X64:       # %bb.0: # %entry
36; X64-NEXT:    movaps %xmm0, %xmm3
37; X64-NEXT:    cmpordps %xmm2, %xmm2
38; X64-NEXT:    movaps %xmm2, %xmm0
39; X64-NEXT:    blendvps %xmm0, %xmm3, %xmm1
40; X64-NEXT:    movlps %xmm1, (%rax)
41; X64-NEXT:    retq
42entry:
43  %0 = fcmp oeq <2 x float> %c, %c
44  %1 = select <2 x i1> %0, <2 x float> %a, <2 x float> %b
45  store <2 x float> %1, ptr undef
46  ret void
47}
48
49define void @zero_test() {
50; X86-LABEL: zero_test:
51; X86:       # %bb.0: # %entry
52; X86-NEXT:    xorps %xmm0, %xmm0
53; X86-NEXT:    movsd %xmm0, (%eax)
54; X86-NEXT:    retl
55;
56; X64-LABEL: zero_test:
57; X64:       # %bb.0: # %entry
58; X64-NEXT:    movq $0, (%rax)
59; X64-NEXT:    retq
60entry:
61  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
62  store <2 x float> %0, ptr undef
63  ret void
64}
65
66define void @full_test() {
67; X86-LABEL: full_test:
68; X86:       # %bb.0: # %entry
69; X86-NEXT:    subl $60, %esp
70; X86-NEXT:    .cfi_def_cfa_offset 64
71; X86-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
72; X86-NEXT:    cvttps2dq %xmm2, %xmm0
73; X86-NEXT:    cvtdq2ps %xmm0, %xmm1
74; X86-NEXT:    xorps %xmm0, %xmm0
75; X86-NEXT:    cmpltps %xmm2, %xmm0
76; X86-NEXT:    movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,u,u]
77; X86-NEXT:    addps %xmm1, %xmm3
78; X86-NEXT:    movaps %xmm1, %xmm4
79; X86-NEXT:    blendvps %xmm0, %xmm3, %xmm4
80; X86-NEXT:    cmpeqps %xmm2, %xmm1
81; X86-NEXT:    movaps %xmm1, %xmm0
82; X86-NEXT:    blendvps %xmm0, %xmm2, %xmm4
83; X86-NEXT:    movlps %xmm4, {{[0-9]+}}(%esp)
84; X86-NEXT:    movlps %xmm4, {{[0-9]+}}(%esp)
85; X86-NEXT:    addl $60, %esp
86; X86-NEXT:    .cfi_def_cfa_offset 4
87; X86-NEXT:    retl
88;
89; X64-LABEL: full_test:
90; X64:       # %bb.0: # %entry
91; X64-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
92; X64-NEXT:    cvttps2dq %xmm2, %xmm0
93; X64-NEXT:    cvtdq2ps %xmm0, %xmm1
94; X64-NEXT:    xorps %xmm0, %xmm0
95; X64-NEXT:    cmpltps %xmm2, %xmm0
96; X64-NEXT:    movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,u,u]
97; X64-NEXT:    addps %xmm1, %xmm3
98; X64-NEXT:    movaps %xmm1, %xmm4
99; X64-NEXT:    blendvps %xmm0, %xmm3, %xmm4
100; X64-NEXT:    cmpeqps %xmm2, %xmm1
101; X64-NEXT:    movaps %xmm1, %xmm0
102; X64-NEXT:    blendvps %xmm0, %xmm2, %xmm4
103; X64-NEXT:    movlps %xmm4, -{{[0-9]+}}(%rsp)
104; X64-NEXT:    movlps %xmm4, -{{[0-9]+}}(%rsp)
105; X64-NEXT:    retq
106 entry:
107   %Cy300 = alloca <4 x float>
108   %Cy11a = alloca <2 x float>
109   %Cy118 = alloca <2 x float>
110   %Cy119 = alloca <2 x float>
111   br label %B1
112
113 B1:                                               ; preds = %entry
114   %0 = load <2 x float>, ptr %Cy119
115   %1 = fptosi <2 x float> %0 to <2 x i32>
116   %2 = sitofp <2 x i32> %1 to <2 x float>
117   %3 = fcmp ogt <2 x float> %0, zeroinitializer
118   %4 = fadd <2 x float> %2, <float 1.000000e+00, float 1.000000e+00>
119   %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2
120   %6 = fcmp oeq <2 x float> %2, %0
121   %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
122   store <2 x float> %7, ptr %Cy118
123   %8 = load <2 x float>, ptr %Cy118
124   store <2 x float> %8, ptr %Cy11a
125   ret void
126}
127