xref: /llvm-project/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
3
4define float @f(ptr nocapture %in) {
5; CHECK-LABEL: f:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vld1.16 {d16}, [r0:64]
8; CHECK-NEXT:    vmovl.u16 q8, d16
9; CHECK-NEXT:    vcvt.f32.u32 q0, q8
10; CHECK-NEXT:    vadd.f32 s4, s0, s1
11; CHECK-NEXT:    vadd.f32 s0, s4, s2
12; CHECK-NEXT:    vmov r0, s0
13; CHECK-NEXT:    bx lr
14  %1 = load <4 x i16>, ptr %in
15  %2 = uitofp <4 x i16> %1 to <4 x float>
16  %3 = extractelement <4 x float> %2, i32 0
17  %4 = extractelement <4 x float> %2, i32 1
18  %5 = extractelement <4 x float> %2, i32 2
19
20  %6 = fadd float %3, %4
21  %7 = fadd float %6, %5
22
23  ret float %7
24}
25
26define float @g(ptr nocapture %in) {
27; CHECK-LABEL: g:
28; CHECK:       @ %bb.0:
29; CHECK-NEXT:    vldr d16, [r0]
30; CHECK-NEXT:    vmov.u16 r0, d16[0]
31; CHECK-NEXT:    vmov s0, r0
32; CHECK-NEXT:    vcvt.f32.u32 s0, s0
33; CHECK-NEXT:    vmov r0, s0
34; CHECK-NEXT:    bx lr
35  %1 = load <4 x i16>, ptr %in
36  %2 = extractelement <4 x i16> %1, i32 0
37  %3 = uitofp i16 %2 to float
38  ret float %3
39}
40
41; Make sure we generate zext from <4 x i8> to <4 x 32>.
42define <4 x i32> @h(ptr %in) {
43; CHECK-LABEL: h:
44; CHECK:       @ %bb.0:
45; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
46; CHECK-NEXT:    vmovl.u8 q8, d16
47; CHECK-NEXT:    vmov.u16 r0, d16[0]
48; CHECK-NEXT:    vmov.u16 r1, d16[1]
49; CHECK-NEXT:    vmov.u16 r2, d16[2]
50; CHECK-NEXT:    vmov.u16 r3, d16[3]
51; CHECK-NEXT:    uxtb r0, r0
52; CHECK-NEXT:    uxtb r1, r1
53; CHECK-NEXT:    uxtb r2, r2
54; CHECK-NEXT:    uxtb r3, r3
55; CHECK-NEXT:    bx lr
56  %1 = load <4 x i8>, ptr %in, align 4
57  %2 = extractelement <4 x i8> %1, i32 0
58  %3 = zext i8 %2 to i32
59  %4 = insertelement <4 x i32> undef, i32 %3, i32 0
60  %5 = extractelement <4 x i8> %1, i32 1
61  %6 = zext i8 %5 to i32
62  %7 = insertelement <4 x i32> %4, i32 %6, i32 1
63  %8 = extractelement <4 x i8> %1, i32 2
64  %9 = zext i8 %8 to i32
65  %10 = insertelement <4 x i32> %7, i32 %9, i32 2
66  %11 = extractelement <4 x i8> %1, i32 3
67  %12 = zext i8 %11 to i32
68  %13 = insertelement <4 x i32> %10, i32 %12, i32 3
69  ret <4 x i32> %13
70}
71
72define float @i(ptr nocapture %in) {
73; CHECK-LABEL: i:
74; CHECK:       @ %bb.0:
75; CHECK-NEXT:    vldr d16, [r0]
76; CHECK-NEXT:    vmov.s16 r0, d16[0]
77; CHECK-NEXT:    vmov s0, r0
78; CHECK-NEXT:    vcvt.f32.s32 s0, s0
79; CHECK-NEXT:    vmov r0, s0
80; CHECK-NEXT:    bx lr
81  %1 = load <4 x i16>, ptr %in
82  %2 = extractelement <4 x i16> %1, i32 0
83  %3 = sitofp i16 %2 to float
84  ret float %3
85}
86
87define float @j(ptr nocapture %in) {
88; CHECK-LABEL: j:
89; CHECK:       @ %bb.0:
90; CHECK-NEXT:    vldr d16, [r0]
91; CHECK-NEXT:    vmov.u8 r0, d16[7]
92; CHECK-NEXT:    vmov s0, r0
93; CHECK-NEXT:    vcvt.f32.u32 s0, s0
94; CHECK-NEXT:    vmov r0, s0
95; CHECK-NEXT:    bx lr
96  %1 = load <8 x i8>, ptr %in
97  %2 = extractelement <8 x i8> %1, i32 7
98  %3 = uitofp i8 %2 to float
99  ret float %3
100}
101
102define float @k(ptr nocapture %in) {
103; CHECK-LABEL: k:
104; CHECK:       @ %bb.0:
105; CHECK-NEXT:    vldr d16, [r0]
106; CHECK-NEXT:    vmov.s8 r0, d16[7]
107; CHECK-NEXT:    vmov s0, r0
108; CHECK-NEXT:    vcvt.f32.s32 s0, s0
109; CHECK-NEXT:    vmov r0, s0
110; CHECK-NEXT:    bx lr
111  %1 = load <8 x i8>, ptr %in
112  %2 = extractelement <8 x i8> %1, i32 7
113  %3 = sitofp i8 %2 to float
114  ret float %3
115}
116
117define float @KnownUpperZero(<4 x i16> %v) {
118; CHECK-LABEL: KnownUpperZero:
119; CHECK:       @ %bb.0:
120; CHECK-NEXT:    vmov d16, r0, r1
121; CHECK-NEXT:    vmov.u16 r0, d16[0]
122; CHECK-NEXT:    vmov.u16 r1, d16[3]
123; CHECK-NEXT:    and r0, r0, #3
124; CHECK-NEXT:    vmov s0, r0
125; CHECK-NEXT:    and r0, r1, #3
126; CHECK-NEXT:    vmov s2, r0
127; CHECK-NEXT:    vcvt.f32.s32 s0, s0
128; CHECK-NEXT:    vcvt.f32.s32 s2, s2
129; CHECK-NEXT:    vadd.f32 s0, s2, s0
130; CHECK-NEXT:    vmov r0, s0
131; CHECK-NEXT:    bx lr
132  %1 = and <4 x i16> %v, <i16 3,i16 3,i16 3,i16 3>
133  %2 = extractelement <4 x i16> %1, i32 3
134  %3 = extractelement <4 x i16> %1, i32 0
135  %sinf1 = sitofp i16 %2 to float
136  %sinf2 = sitofp i16 %3 to float
137  %sum =   fadd float %sinf1, %sinf2
138  ret float %sum
139}
140