xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/dup.ll (revision fa15255d8af53126bbcb017f2fb6f9961e8574df)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x half> @test_vdupq_n_f16(float %a.coerce) {
5; CHECK-LABEL: test_vdupq_n_f16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vmov r0, s0
8; CHECK-NEXT:    vdup.16 q0, r0
9; CHECK-NEXT:    bx lr
10entry:
11  %0 = bitcast float %a.coerce to i32
12  %tmp.0.extract.trunc = trunc i32 %0 to i16
13  %1 = bitcast i16 %tmp.0.extract.trunc to half
14  %.splatinsert = insertelement <8 x half> undef, half %1, i32 0
15  %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
16  ret <8 x half> %.splat
17}
18
19define arm_aapcs_vfpcc <4 x float> @test_vdupq_n_f32(float %a) {
20; CHECK-LABEL: test_vdupq_n_f32:
21; CHECK:       @ %bb.0: @ %entry
22; CHECK-NEXT:    vmov r0, s0
23; CHECK-NEXT:    vdup.32 q0, r0
24; CHECK-NEXT:    bx lr
25entry:
26  %.splatinsert = insertelement <4 x float> undef, float %a, i32 0
27  %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
28  ret <4 x float> %.splat
29}
30
31define arm_aapcs_vfpcc <16 x i8> @test_vdupq_n_s8(i8 signext %a) {
32; CHECK-LABEL: test_vdupq_n_s8:
33; CHECK:       @ %bb.0: @ %entry
34; CHECK-NEXT:    vdup.8 q0, r0
35; CHECK-NEXT:    bx lr
36entry:
37  %.splatinsert = insertelement <16 x i8> undef, i8 %a, i32 0
38  %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
39  ret <16 x i8> %.splat
40}
41
42define arm_aapcs_vfpcc <8 x i16> @test_vdupq_n_s16(i16 signext %a) {
43; CHECK-LABEL: test_vdupq_n_s16:
44; CHECK:       @ %bb.0: @ %entry
45; CHECK-NEXT:    vdup.16 q0, r0
46; CHECK-NEXT:    bx lr
47entry:
48  %.splatinsert = insertelement <8 x i16> undef, i16 %a, i32 0
49  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
50  ret <8 x i16> %.splat
51}
52
53define arm_aapcs_vfpcc <4 x i32> @test_vdupq_n_s32(i32 %a) {
54; CHECK-LABEL: test_vdupq_n_s32:
55; CHECK:       @ %bb.0: @ %entry
56; CHECK-NEXT:    vdup.32 q0, r0
57; CHECK-NEXT:    bx lr
58entry:
59  %.splatinsert = insertelement <4 x i32> undef, i32 %a, i32 0
60  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
61  ret <4 x i32> %.splat
62}
63
64define arm_aapcs_vfpcc <16 x i8> @test_vdupq_n_u8(i8 zeroext %a) {
65; CHECK-LABEL: test_vdupq_n_u8:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vdup.8 q0, r0
68; CHECK-NEXT:    bx lr
69entry:
70  %.splatinsert = insertelement <16 x i8> undef, i8 %a, i32 0
71  %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
72  ret <16 x i8> %.splat
73}
74
75define arm_aapcs_vfpcc <8 x i16> @test_vdupq_n_u16(i16 zeroext %a) {
76; CHECK-LABEL: test_vdupq_n_u16:
77; CHECK:       @ %bb.0: @ %entry
78; CHECK-NEXT:    vdup.16 q0, r0
79; CHECK-NEXT:    bx lr
80entry:
81  %.splatinsert = insertelement <8 x i16> undef, i16 %a, i32 0
82  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
83  ret <8 x i16> %.splat
84}
85
86define arm_aapcs_vfpcc <4 x i32> @test_vdupq_n_u32(i32 %a) {
87; CHECK-LABEL: test_vdupq_n_u32:
88; CHECK:       @ %bb.0: @ %entry
89; CHECK-NEXT:    vdup.32 q0, r0
90; CHECK-NEXT:    bx lr
91entry:
92  %.splatinsert = insertelement <4 x i32> undef, i32 %a, i32 0
93  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
94  ret <4 x i32> %.splat
95}
96
97define arm_aapcs_vfpcc <8 x half> @test_vdupq_m_n_f16(<8 x half> %inactive, float %a.coerce, i16 zeroext %p) {
98; CHECK-LABEL: test_vdupq_m_n_f16:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    vmov r1, s4
101; CHECK-NEXT:    vmsr p0, r0
102; CHECK-NEXT:    vpst
103; CHECK-NEXT:    vdupt.16 q0, r1
104; CHECK-NEXT:    bx lr
105entry:
106  %0 = bitcast float %a.coerce to i32
107  %tmp.0.extract.trunc = trunc i32 %0 to i16
108  %1 = bitcast i16 %tmp.0.extract.trunc to half
109  %2 = zext i16 %p to i32
110  %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
111  %.splatinsert = insertelement <8 x half> undef, half %1, i32 0
112  %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
113  %4 = select <8 x i1> %3, <8 x half> %.splat, <8 x half> %inactive
114  ret <8 x half> %4
115}
116
117define arm_aapcs_vfpcc <4 x float> @test_vdupq_m_n_f32(<4 x float> %inactive, float %a, i16 zeroext %p) {
118; CHECK-LABEL: test_vdupq_m_n_f32:
119; CHECK:       @ %bb.0: @ %entry
120; CHECK-NEXT:    vmov r1, s4
121; CHECK-NEXT:    vmsr p0, r0
122; CHECK-NEXT:    vpst
123; CHECK-NEXT:    vdupt.32 q0, r1
124; CHECK-NEXT:    bx lr
125entry:
126  %0 = zext i16 %p to i32
127  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
128  %.splatinsert = insertelement <4 x float> undef, float %a, i32 0
129  %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
130  %2 = select <4 x i1> %1, <4 x float> %.splat, <4 x float> %inactive
131  ret <4 x float> %2
132}
133
134define arm_aapcs_vfpcc <16 x i8> @test_vdupq_m_n_s8(<16 x i8> %inactive, i8 signext %a, i16 zeroext %p) {
135; CHECK-LABEL: test_vdupq_m_n_s8:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vmsr p0, r1
138; CHECK-NEXT:    vpst
139; CHECK-NEXT:    vdupt.8 q0, r0
140; CHECK-NEXT:    bx lr
141entry:
142  %0 = zext i16 %p to i32
143  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
144  %.splatinsert = insertelement <16 x i8> undef, i8 %a, i32 0
145  %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
146  %2 = select <16 x i1> %1, <16 x i8> %.splat, <16 x i8> %inactive
147  ret <16 x i8> %2
148}
149
150define arm_aapcs_vfpcc <8 x i16> @test_vdupq_m_n_s16(<8 x i16> %inactive, i16 signext %a, i16 zeroext %p) {
151; CHECK-LABEL: test_vdupq_m_n_s16:
152; CHECK:       @ %bb.0: @ %entry
153; CHECK-NEXT:    vmsr p0, r1
154; CHECK-NEXT:    vpst
155; CHECK-NEXT:    vdupt.16 q0, r0
156; CHECK-NEXT:    bx lr
157entry:
158  %0 = zext i16 %p to i32
159  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
160  %.splatinsert = insertelement <8 x i16> undef, i16 %a, i32 0
161  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
162  %2 = select <8 x i1> %1, <8 x i16> %.splat, <8 x i16> %inactive
163  ret <8 x i16> %2
164}
165
166define arm_aapcs_vfpcc <4 x i32> @test_vdupq_m_n_s32(<4 x i32> %inactive, i32 %a, i16 zeroext %p) {
167; CHECK-LABEL: test_vdupq_m_n_s32:
168; CHECK:       @ %bb.0: @ %entry
169; CHECK-NEXT:    vmsr p0, r1
170; CHECK-NEXT:    vpst
171; CHECK-NEXT:    vdupt.32 q0, r0
172; CHECK-NEXT:    bx lr
173entry:
174  %0 = zext i16 %p to i32
175  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
176  %.splatinsert = insertelement <4 x i32> undef, i32 %a, i32 0
177  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
178  %2 = select <4 x i1> %1, <4 x i32> %.splat, <4 x i32> %inactive
179  ret <4 x i32> %2
180}
181
182define arm_aapcs_vfpcc <16 x i8> @test_vdupq_m_n_u8(<16 x i8> %inactive, i8 zeroext %a, i16 zeroext %p) {
183; CHECK-LABEL: test_vdupq_m_n_u8:
184; CHECK:       @ %bb.0: @ %entry
185; CHECK-NEXT:    vmsr p0, r1
186; CHECK-NEXT:    vpst
187; CHECK-NEXT:    vdupt.8 q0, r0
188; CHECK-NEXT:    bx lr
189entry:
190  %0 = zext i16 %p to i32
191  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
192  %.splatinsert = insertelement <16 x i8> undef, i8 %a, i32 0
193  %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
194  %2 = select <16 x i1> %1, <16 x i8> %.splat, <16 x i8> %inactive
195  ret <16 x i8> %2
196}
197
198define arm_aapcs_vfpcc <8 x i16> @test_vdupq_m_n_u16(<8 x i16> %inactive, i16 zeroext %a, i16 zeroext %p) {
199; CHECK-LABEL: test_vdupq_m_n_u16:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    vmsr p0, r1
202; CHECK-NEXT:    vpst
203; CHECK-NEXT:    vdupt.16 q0, r0
204; CHECK-NEXT:    bx lr
205entry:
206  %0 = zext i16 %p to i32
207  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
208  %.splatinsert = insertelement <8 x i16> undef, i16 %a, i32 0
209  %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
210  %2 = select <8 x i1> %1, <8 x i16> %.splat, <8 x i16> %inactive
211  ret <8 x i16> %2
212}
213
214define arm_aapcs_vfpcc <4 x i32> @test_vdupq_m_n_u32(<4 x i32> %inactive, i32 %a, i16 zeroext %p) {
215; CHECK-LABEL: test_vdupq_m_n_u32:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vmsr p0, r1
218; CHECK-NEXT:    vpst
219; CHECK-NEXT:    vdupt.32 q0, r0
220; CHECK-NEXT:    bx lr
221entry:
222  %0 = zext i16 %p to i32
223  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
224  %.splatinsert = insertelement <4 x i32> undef, i32 %a, i32 0
225  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
226  %2 = select <4 x i1> %1, <4 x i32> %.splat, <4 x i32> %inactive
227  ret <4 x i32> %2
228}
229
230declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
231declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
232declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
233