xref: /llvm-project/llvm/test/CodeGen/X86/avx1-logical-load-folding.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -O3 -disable-peephole -mtriple=i686-apple-macosx10.9.0 -mcpu=corei7-avx -mattr=+avx | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -O3 -disable-peephole -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -mattr=+avx | FileCheck %s --check-prefix=X64
4
5; Function Attrs: nounwind ssp uwtable
6define void @test1(ptr %A, ptr %C) #0 {
7; X86-LABEL: test1:
8; X86:       ## %bb.0:
9; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
10; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
11; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
12; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
13; X86-NEXT:    vmovss %xmm0, (%eax)
14; X86-NEXT:    retl
15;
16; X64-LABEL: test1:
17; X64:       ## %bb.0:
18; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
19; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20; X64-NEXT:    vmovss %xmm0, (%rsi)
21; X64-NEXT:    retq
22  %tmp2 = load <8 x float>, ptr %A, align 32
23  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
24  %tmp4 = and <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
25  %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
26  %tmp6 = extractelement <8 x float> %tmp5, i32 0
27  store float %tmp6, ptr %C
28  ret void
29}
30
31; Function Attrs: nounwind ssp uwtable
32define void @test2(ptr %A, ptr %C) #0 {
33; X86-LABEL: test2:
34; X86:       ## %bb.0:
35; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
36; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
37; X86-NEXT:    vmovaps (%ecx), %xmm0
38; X86-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
39; X86-NEXT:    vmovss %xmm0, (%eax)
40; X86-NEXT:    retl
41;
42; X64-LABEL: test2:
43; X64:       ## %bb.0:
44; X64-NEXT:    vmovaps (%rdi), %xmm0
45; X64-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
46; X64-NEXT:    vmovss %xmm0, (%rsi)
47; X64-NEXT:    retq
48  %tmp2 = load <8 x float>, ptr %A, align 32
49  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
50  %tmp4 = or <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
51  %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
52  %tmp6 = extractelement <8 x float> %tmp5, i32 0
53  store float %tmp6, ptr %C
54  ret void
55}
56
57; Function Attrs: nounwind ssp uwtable
58define void @test3(ptr %A, ptr %C) #0 {
59; X86-LABEL: test3:
60; X86:       ## %bb.0:
61; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
62; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
63; X86-NEXT:    vmovaps (%ecx), %xmm0
64; X86-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
65; X86-NEXT:    vmovss %xmm0, (%eax)
66; X86-NEXT:    retl
67;
68; X64-LABEL: test3:
69; X64:       ## %bb.0:
70; X64-NEXT:    vmovaps (%rdi), %xmm0
71; X64-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
72; X64-NEXT:    vmovss %xmm0, (%rsi)
73; X64-NEXT:    retq
74  %tmp2 = load <8 x float>, ptr %A, align 32
75  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
76  %tmp4 = xor <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
77  %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
78  %tmp6 = extractelement <8 x float> %tmp5, i32 0
79  store float %tmp6, ptr %C
80  ret void
81}
82
83define void @test4(ptr %A, ptr %C) #0 {
84; X86-LABEL: test4:
85; X86:       ## %bb.0:
86; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
87; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
88; X86-NEXT:    vmovaps (%ecx), %xmm0
89; X86-NEXT:    vandnps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
90; X86-NEXT:    vmovss %xmm0, (%eax)
91; X86-NEXT:    retl
92;
93; X64-LABEL: test4:
94; X64:       ## %bb.0:
95; X64-NEXT:    vmovaps (%rdi), %xmm0
96; X64-NEXT:    vandnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
97; X64-NEXT:    vmovss %xmm0, (%rsi)
98; X64-NEXT:    retq
99  %tmp2 = load <8 x float>, ptr %A, align 32
100  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
101  %tmp4 = xor <8 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
102  %tmp5 = and <8 x i32> %tmp4, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
103  %tmp6 = bitcast <8 x i32> %tmp5 to <8 x float>
104  %tmp7 = extractelement <8 x float> %tmp6, i32 0
105  store float %tmp7, ptr %C
106  ret void
107}
108