xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-extractstore.ll (revision b5b663aac17415625340eb29c8010832bfc4c21c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
3
4define half @extret1_f16_sf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
5; CHECK-LABEL: extret1_f16_sf:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vmov d0, r0, r1
8; CHECK-NEXT:    mov r0, sp
9; CHECK-NEXT:    vldrw.u32 q1, [r0]
10; CHECK-NEXT:    ldr r0, [sp, #16]
11; CHECK-NEXT:    vadd.f16 q0, q0, q1
12; CHECK-NEXT:    vmovx.f16 s0, s0
13; CHECK-NEXT:    vstr.16 s0, [r0]
14; CHECK-NEXT:    vmov r0, s0
15; CHECK-NEXT:    bx lr
16  %c = fadd <8 x half> %a, %b
17  %e = extractelement <8 x half> %c, i32 1
18  store half %e, ptr %p, align 2
19  ret half %e
20}
21
22define half @extret4_f16_sf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
23; CHECK-LABEL: extret4_f16_sf:
24; CHECK:       @ %bb.0:
25; CHECK-NEXT:    mov r0, sp
26; CHECK-NEXT:    vmov d1, r2, r3
27; CHECK-NEXT:    vldrw.u32 q1, [r0]
28; CHECK-NEXT:    ldr r0, [sp, #16]
29; CHECK-NEXT:    vadd.f16 q0, q0, q1
30; CHECK-NEXT:    vstr.16 s2, [r0]
31; CHECK-NEXT:    vmov r0, s2
32; CHECK-NEXT:    bx lr
33  %c = fadd <8 x half> %a, %b
34  %e = extractelement <8 x half> %c, i32 4
35  store half %e, ptr %p, align 2
36  ret half %e
37}
38
39define arm_aapcs_vfpcc half @extret1_f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
40; CHECK-LABEL: extret1_f16_hf:
41; CHECK:       @ %bb.0:
42; CHECK-NEXT:    vadd.f16 q0, q0, q1
43; CHECK-NEXT:    vmovx.f16 s0, s0
44; CHECK-NEXT:    vstr.16 s0, [r0]
45; CHECK-NEXT:    bx lr
46  %c = fadd <8 x half> %a, %b
47  %e = extractelement <8 x half> %c, i32 1
48  store half %e, ptr %p, align 2
49  ret half %e
50}
51
52define arm_aapcs_vfpcc half @extret4_f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
53; CHECK-LABEL: extret4_f16_hf:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    vadd.f16 q0, q0, q1
56; CHECK-NEXT:    vmov.f32 s0, s2
57; CHECK-NEXT:    vstr.16 s2, [r0]
58; CHECK-NEXT:    bx lr
59  %c = fadd <8 x half> %a, %b
60  %e = extractelement <8 x half> %c, i32 4
61  store half %e, ptr %p, align 2
62  ret half %e
63}
64
65define arm_aapcs_vfpcc <8 x half> @extret1_v8f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
66; CHECK-LABEL: extret1_v8f16_hf:
67; CHECK:       @ %bb.0:
68; CHECK-NEXT:    vadd.f16 q0, q0, q1
69; CHECK-NEXT:    vmov.u16 r1, q0[1]
70; CHECK-NEXT:    vdup.16 q0, r1
71; CHECK-NEXT:    strh r1, [r0]
72; CHECK-NEXT:    bx lr
73  %c = fadd <8 x half> %a, %b
74  %e = extractelement <8 x half> %c, i32 1
75  store half %e, ptr %p, align 2
76  %i = insertelement <8 x half> undef, half %e, i32 0
77  %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
78  ret <8 x half> %s
79}
80
81define arm_aapcs_vfpcc <8 x half> @extret4_v8f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
82; CHECK-LABEL: extret4_v8f16_hf:
83; CHECK:       @ %bb.0:
84; CHECK-NEXT:    vadd.f16 q0, q0, q1
85; CHECK-NEXT:    vmov.u16 r1, q0[4]
86; CHECK-NEXT:    vdup.16 q0, r1
87; CHECK-NEXT:    strh r1, [r0]
88; CHECK-NEXT:    bx lr
89  %c = fadd <8 x half> %a, %b
90  %e = extractelement <8 x half> %c, i32 4
91  store half %e, ptr %p, align 2
92  %i = insertelement <8 x half> undef, half %e, i32 0
93  %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
94  ret <8 x half> %s
95}
96
97
98define float @extret1_f32_sf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
99; CHECK-LABEL: extret1_f32_sf:
100; CHECK:       @ %bb.0:
101; CHECK-NEXT:    vmov d0, r0, r1
102; CHECK-NEXT:    mov r0, sp
103; CHECK-NEXT:    vldrw.u32 q1, [r0]
104; CHECK-NEXT:    ldr r1, [sp, #16]
105; CHECK-NEXT:    vadd.f32 q0, q0, q1
106; CHECK-NEXT:    vmov r0, s1
107; CHECK-NEXT:    vstr s1, [r1]
108; CHECK-NEXT:    bx lr
109  %c = fadd <4 x float> %a, %b
110  %e = extractelement <4 x float> %c, i32 1
111  store float %e, ptr %p, align 4
112  ret float %e
113}
114
115define float @extret2_f32_sf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
116; CHECK-LABEL: extret2_f32_sf:
117; CHECK:       @ %bb.0:
118; CHECK-NEXT:    mov r0, sp
119; CHECK-NEXT:    vmov d1, r2, r3
120; CHECK-NEXT:    vldrw.u32 q1, [r0]
121; CHECK-NEXT:    ldr r1, [sp, #16]
122; CHECK-NEXT:    vadd.f32 q0, q0, q1
123; CHECK-NEXT:    vmov r0, s2
124; CHECK-NEXT:    vstr s2, [r1]
125; CHECK-NEXT:    bx lr
126  %c = fadd <4 x float> %a, %b
127  %e = extractelement <4 x float> %c, i32 2
128  store float %e, ptr %p, align 4
129  ret float %e
130}
131
132define arm_aapcs_vfpcc float @extret1_f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
133; CHECK-LABEL: extret1_f32_hf:
134; CHECK:       @ %bb.0:
135; CHECK-NEXT:    vadd.f32 q0, q0, q1
136; CHECK-NEXT:    vmov.f32 s0, s1
137; CHECK-NEXT:    vstr s1, [r0]
138; CHECK-NEXT:    bx lr
139  %c = fadd <4 x float> %a, %b
140  %e = extractelement <4 x float> %c, i32 1
141  store float %e, ptr %p, align 4
142  ret float %e
143}
144
145
146define arm_aapcs_vfpcc float @extret2_f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
147; CHECK-LABEL: extret2_f32_hf:
148; CHECK:       @ %bb.0:
149; CHECK-NEXT:    vadd.f32 q0, q0, q1
150; CHECK-NEXT:    vmov.f32 s0, s2
151; CHECK-NEXT:    vstr s2, [r0]
152; CHECK-NEXT:    bx lr
153  %c = fadd <4 x float> %a, %b
154  %e = extractelement <4 x float> %c, i32 2
155  store float %e, ptr %p, align 4
156  ret float %e
157}
158
159define arm_aapcs_vfpcc <4 x float> @extret1_v4f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
160; CHECK-LABEL: extret1_v4f32_hf:
161; CHECK:       @ %bb.0:
162; CHECK-NEXT:    vadd.f32 q1, q0, q1
163; CHECK-NEXT:    vmov r1, s5
164; CHECK-NEXT:    vstr s5, [r0]
165; CHECK-NEXT:    vdup.32 q0, r1
166; CHECK-NEXT:    bx lr
167  %c = fadd <4 x float> %a, %b
168  %e = extractelement <4 x float> %c, i32 1
169  store float %e, ptr %p, align 4
170  %i = insertelement <4 x float> undef, float %e, i32 0
171  %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
172  ret <4 x float> %s
173}
174
175define arm_aapcs_vfpcc <4 x float> @extret2_v4f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
176; CHECK-LABEL: extret2_v4f32_hf:
177; CHECK:       @ %bb.0:
178; CHECK-NEXT:    vadd.f32 q1, q0, q1
179; CHECK-NEXT:    vmov r1, s6
180; CHECK-NEXT:    vstr s6, [r0]
181; CHECK-NEXT:    vdup.32 q0, r1
182; CHECK-NEXT:    bx lr
183  %c = fadd <4 x float> %a, %b
184  %e = extractelement <4 x float> %c, i32 2
185  store float %e, ptr %p, align 4
186  %i = insertelement <4 x float> undef, float %e, i32 0
187  %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
188  ret <4 x float> %s
189}
190