xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-halving.ll (revision 6bd8f114c8f1bccccfdbfd00f1bca118569147ee)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8(<16 x i8> %x, <16 x i8> %y) {
5; CHECK-LABEL: vhadds_v16i8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vadd.i8 q0, q0, q1
8; CHECK-NEXT:    vshr.s8 q0, q0, #1
9; CHECK-NEXT:    bx lr
10  %add = add <16 x i8> %x, %y
11  %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
12  ret <16 x i8> %half
13}
14define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) {
15; CHECK-LABEL: vhaddu_v16i8:
16; CHECK:       @ %bb.0:
17; CHECK-NEXT:    vadd.i8 q0, q0, q1
18; CHECK-NEXT:    vshr.u8 q0, q0, #1
19; CHECK-NEXT:    bx lr
20  %add = add <16 x i8> %x, %y
21  %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
22  ret <16 x i8> %half
23}
24define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16(<8 x i16> %x, <8 x i16> %y) {
25; CHECK-LABEL: vhadds_v8i16:
26; CHECK:       @ %bb.0:
27; CHECK-NEXT:    vadd.i16 q0, q0, q1
28; CHECK-NEXT:    vshr.s16 q0, q0, #1
29; CHECK-NEXT:    bx lr
30  %add = add <8 x i16> %x, %y
31  %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
32  ret <8 x i16> %half
33}
34define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) {
35; CHECK-LABEL: vhaddu_v8i16:
36; CHECK:       @ %bb.0:
37; CHECK-NEXT:    vadd.i16 q0, q0, q1
38; CHECK-NEXT:    vshr.u16 q0, q0, #1
39; CHECK-NEXT:    bx lr
40  %add = add <8 x i16> %x, %y
41  %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
42  ret <8 x i16> %half
43}
44define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32(<4 x i32> %x, <4 x i32> %y) {
45; CHECK-LABEL: vhadds_v4i32:
46; CHECK:       @ %bb.0:
47; CHECK-NEXT:    vadd.i32 q0, q0, q1
48; CHECK-NEXT:    vshr.s32 q0, q0, #1
49; CHECK-NEXT:    bx lr
50  %add = add <4 x i32> %x, %y
51  %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
52  ret <4 x i32> %half
53}
54define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) {
55; CHECK-LABEL: vhaddu_v4i32:
56; CHECK:       @ %bb.0:
57; CHECK-NEXT:    vadd.i32 q0, q0, q1
58; CHECK-NEXT:    vshr.u32 q0, q0, #1
59; CHECK-NEXT:    bx lr
60  %add = add <4 x i32> %x, %y
61  %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
62  ret <4 x i32> %half
63}
64define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8(<16 x i8> %x, <16 x i8> %y) {
65; CHECK-LABEL: vhsubs_v16i8:
66; CHECK:       @ %bb.0:
67; CHECK-NEXT:    vsub.i8 q0, q0, q1
68; CHECK-NEXT:    vshr.s8 q0, q0, #1
69; CHECK-NEXT:    bx lr
70  %sub = sub <16 x i8> %x, %y
71  %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
72  ret <16 x i8> %half
73}
74define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8(<16 x i8> %x, <16 x i8> %y) {
75; CHECK-LABEL: vhsubu_v16i8:
76; CHECK:       @ %bb.0:
77; CHECK-NEXT:    vsub.i8 q0, q0, q1
78; CHECK-NEXT:    vshr.u8 q0, q0, #1
79; CHECK-NEXT:    bx lr
80  %sub = sub <16 x i8> %x, %y
81  %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
82  ret <16 x i8> %half
83}
84define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16(<8 x i16> %x, <8 x i16> %y) {
85; CHECK-LABEL: vhsubs_v8i16:
86; CHECK:       @ %bb.0:
87; CHECK-NEXT:    vsub.i16 q0, q0, q1
88; CHECK-NEXT:    vshr.s16 q0, q0, #1
89; CHECK-NEXT:    bx lr
90  %sub = sub <8 x i16> %x, %y
91  %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
92  ret <8 x i16> %half
93}
94define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16(<8 x i16> %x, <8 x i16> %y) {
95; CHECK-LABEL: vhsubu_v8i16:
96; CHECK:       @ %bb.0:
97; CHECK-NEXT:    vsub.i16 q0, q0, q1
98; CHECK-NEXT:    vshr.u16 q0, q0, #1
99; CHECK-NEXT:    bx lr
100  %sub = sub <8 x i16> %x, %y
101  %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
102  ret <8 x i16> %half
103}
104define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32(<4 x i32> %x, <4 x i32> %y) {
105; CHECK-LABEL: vhsubs_v4i32:
106; CHECK:       @ %bb.0:
107; CHECK-NEXT:    vsub.i32 q0, q0, q1
108; CHECK-NEXT:    vshr.s32 q0, q0, #1
109; CHECK-NEXT:    bx lr
110  %sub = sub <4 x i32> %x, %y
111  %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
112  ret <4 x i32> %half
113}
114define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32(<4 x i32> %x, <4 x i32> %y) {
115; CHECK-LABEL: vhsubu_v4i32:
116; CHECK:       @ %bb.0:
117; CHECK-NEXT:    vsub.i32 q0, q0, q1
118; CHECK-NEXT:    vshr.u32 q0, q0, #1
119; CHECK-NEXT:    bx lr
120  %sub = sub <4 x i32> %x, %y
121  %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
122  ret <4 x i32> %half
123}
124
125define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
126; CHECK-LABEL: vhadds_v16i8_nw:
127; CHECK:       @ %bb.0:
128; CHECK-NEXT:    vhadd.s8 q0, q0, q1
129; CHECK-NEXT:    bx lr
130  %add = add nsw <16 x i8> %x, %y
131  %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
132  ret <16 x i8> %half
133}
134define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
135; CHECK-LABEL: vhaddu_v16i8_nw:
136; CHECK:       @ %bb.0:
137; CHECK-NEXT:    vhadd.u8 q0, q0, q1
138; CHECK-NEXT:    bx lr
139  %add = add nuw <16 x i8> %x, %y
140  %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
141  ret <16 x i8> %half
142}
143define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
144; CHECK-LABEL: vhadds_v8i16_nw:
145; CHECK:       @ %bb.0:
146; CHECK-NEXT:    vhadd.s16 q0, q0, q1
147; CHECK-NEXT:    bx lr
148  %add = add nsw <8 x i16> %x, %y
149  %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
150  ret <8 x i16> %half
151}
152define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
153; CHECK-LABEL: vhaddu_v8i16_nw:
154; CHECK:       @ %bb.0:
155; CHECK-NEXT:    vhadd.u16 q0, q0, q1
156; CHECK-NEXT:    bx lr
157  %add = add nuw <8 x i16> %x, %y
158  %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
159  ret <8 x i16> %half
160}
161define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
162; CHECK-LABEL: vhadds_v4i32_nw:
163; CHECK:       @ %bb.0:
164; CHECK-NEXT:    vhadd.s32 q0, q0, q1
165; CHECK-NEXT:    bx lr
166  %add = add nsw <4 x i32> %x, %y
167  %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
168  ret <4 x i32> %half
169}
170define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
171; CHECK-LABEL: vhaddu_v4i32_nw:
172; CHECK:       @ %bb.0:
173; CHECK-NEXT:    vhadd.u32 q0, q0, q1
174; CHECK-NEXT:    bx lr
175  %add = add nuw <4 x i32> %x, %y
176  %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
177  ret <4 x i32> %half
178}
179define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
180; CHECK-LABEL: vhsubs_v16i8_nw:
181; CHECK:       @ %bb.0:
182; CHECK-NEXT:    vhsub.s8 q0, q0, q1
183; CHECK-NEXT:    bx lr
184  %sub = sub nsw <16 x i8> %x, %y
185  %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
186  ret <16 x i8> %half
187}
188define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
189; CHECK-LABEL: vhsubu_v16i8_nw:
190; CHECK:       @ %bb.0:
191; CHECK-NEXT:    vhsub.u8 q0, q0, q1
192; CHECK-NEXT:    bx lr
193  %sub = sub nuw <16 x i8> %x, %y
194  %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
195  ret <16 x i8> %half
196}
197define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
198; CHECK-LABEL: vhsubs_v8i16_nw:
199; CHECK:       @ %bb.0:
200; CHECK-NEXT:    vhsub.s16 q0, q0, q1
201; CHECK-NEXT:    bx lr
202  %sub = sub nsw <8 x i16> %x, %y
203  %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
204  ret <8 x i16> %half
205}
206define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
207; CHECK-LABEL: vhsubu_v8i16_nw:
208; CHECK:       @ %bb.0:
209; CHECK-NEXT:    vhsub.u16 q0, q0, q1
210; CHECK-NEXT:    bx lr
211  %sub = sub nuw <8 x i16> %x, %y
212  %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
213  ret <8 x i16> %half
214}
215define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
216; CHECK-LABEL: vhsubs_v4i32_nw:
217; CHECK:       @ %bb.0:
218; CHECK-NEXT:    vhsub.s32 q0, q0, q1
219; CHECK-NEXT:    bx lr
220  %sub = sub nsw <4 x i32> %x, %y
221  %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
222  ret <4 x i32> %half
223}
224define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
225; CHECK-LABEL: vhsubu_v4i32_nw:
226; CHECK:       @ %bb.0:
227; CHECK-NEXT:    vhsub.u32 q0, q0, q1
228; CHECK-NEXT:    bx lr
229  %sub = sub nuw <4 x i32> %x, %y
230  %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
231  ret <4 x i32> %half
232}
233define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8(<16 x i8> %x, <16 x i8> %y) {
234; CHECK-LABEL: vrhadds_v16i8:
235; CHECK:       @ %bb.0:
236; CHECK-NEXT:    vadd.i8 q0, q0, q1
237; CHECK-NEXT:    movs r0, #1
238; CHECK-NEXT:    vadd.i8 q0, q0, r0
239; CHECK-NEXT:    vshr.s8 q0, q0, #1
240; CHECK-NEXT:    bx lr
241  %add = add <16 x i8> %x, %y
242  %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
243  %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
244  ret <16 x i8> %half
245}
246define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) {
247; CHECK-LABEL: vrhaddu_v16i8:
248; CHECK:       @ %bb.0:
249; CHECK-NEXT:    vadd.i8 q0, q0, q1
250; CHECK-NEXT:    movs r0, #1
251; CHECK-NEXT:    vadd.i8 q0, q0, r0
252; CHECK-NEXT:    vshr.u8 q0, q0, #1
253; CHECK-NEXT:    bx lr
254  %add = add <16 x i8> %x, %y
255  %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
256  %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
257  ret <16 x i8> %half
258}
259define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16(<8 x i16> %x, <8 x i16> %y) {
260; CHECK-LABEL: vrhadds_v8i16:
261; CHECK:       @ %bb.0:
262; CHECK-NEXT:    vadd.i16 q0, q0, q1
263; CHECK-NEXT:    movs r0, #1
264; CHECK-NEXT:    vadd.i16 q0, q0, r0
265; CHECK-NEXT:    vshr.s16 q0, q0, #1
266; CHECK-NEXT:    bx lr
267  %add = add <8 x i16> %x, %y
268  %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
269  %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
270  ret <8 x i16> %half
271}
272define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) {
273; CHECK-LABEL: vrhaddu_v8i16:
274; CHECK:       @ %bb.0:
275; CHECK-NEXT:    vadd.i16 q0, q0, q1
276; CHECK-NEXT:    movs r0, #1
277; CHECK-NEXT:    vadd.i16 q0, q0, r0
278; CHECK-NEXT:    vshr.u16 q0, q0, #1
279; CHECK-NEXT:    bx lr
280  %add = add <8 x i16> %x, %y
281  %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
282 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
283  ret <8 x i16> %half
284}
285define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32(<4 x i32> %x, <4 x i32> %y) {
286; CHECK-LABEL: vrhadds_v4i32:
287; CHECK:       @ %bb.0:
288; CHECK-NEXT:    vadd.i32 q0, q0, q1
289; CHECK-NEXT:    movs r0, #1
290; CHECK-NEXT:    vadd.i32 q0, q0, r0
291; CHECK-NEXT:    vshr.s32 q0, q0, #1
292; CHECK-NEXT:    bx lr
293  %add = add <4 x i32> %x, %y
294  %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
295  %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
296  ret <4 x i32> %half
297}
298define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) {
299; CHECK-LABEL: vrhaddu_v4i32:
300; CHECK:       @ %bb.0:
301; CHECK-NEXT:    vadd.i32 q0, q0, q1
302; CHECK-NEXT:    movs r0, #1
303; CHECK-NEXT:    vadd.i32 q0, q0, r0
304; CHECK-NEXT:    vshr.u32 q0, q0, #1
305; CHECK-NEXT:    bx lr
306  %add = add <4 x i32> %x, %y
307  %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
308  %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
309  ret <4 x i32> %half
310}
311define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) {
312; CHECK-LABEL: vrhadds_v16i8_nwop:
313; CHECK:       @ %bb.0:
314; CHECK-NEXT:    vadd.i8 q0, q0, q1
315; CHECK-NEXT:    movs r0, #1
316; CHECK-NEXT:    vadd.i8 q0, q0, r0
317; CHECK-NEXT:    vshr.s8 q0, q0, #1
318; CHECK-NEXT:    bx lr
319  %add = add nsw <16 x i8> %x, %y
320  %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
321  %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
322  ret <16 x i8> %half
323}
324define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) {
325; CHECK-LABEL: vrhaddu_v16i8_nwop:
326; CHECK:       @ %bb.0:
327; CHECK-NEXT:    vadd.i8 q0, q0, q1
328; CHECK-NEXT:    movs r0, #1
329; CHECK-NEXT:    vadd.i8 q0, q0, r0
330; CHECK-NEXT:    vshr.u8 q0, q0, #1
331; CHECK-NEXT:    bx lr
332  %add = add nuw <16 x i8> %x, %y
333  %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
334  %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
335  ret <16 x i8> %half
336}
337define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) {
338; CHECK-LABEL: vrhadds_v8i16_nwop:
339; CHECK:       @ %bb.0:
340; CHECK-NEXT:    vadd.i16 q0, q0, q1
341; CHECK-NEXT:    movs r0, #1
342; CHECK-NEXT:    vadd.i16 q0, q0, r0
343; CHECK-NEXT:    vshr.s16 q0, q0, #1
344; CHECK-NEXT:    bx lr
345  %add = add nsw <8 x i16> %x, %y
346  %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
347  %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
348  ret <8 x i16> %half
349}
350define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) {
351; CHECK-LABEL: vrhaddu_v8i16_nwop:
352; CHECK:       @ %bb.0:
353; CHECK-NEXT:    vadd.i16 q0, q0, q1
354; CHECK-NEXT:    movs r0, #1
355; CHECK-NEXT:    vadd.i16 q0, q0, r0
356; CHECK-NEXT:    vshr.u16 q0, q0, #1
357; CHECK-NEXT:    bx lr
358  %add = add nuw <8 x i16> %x, %y
359  %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
360  %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
361  ret <8 x i16> %half
362}
363define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) {
364; CHECK-LABEL: vrhadds_v4i32_nwop:
365; CHECK:       @ %bb.0:
366; CHECK-NEXT:    vadd.i32 q0, q0, q1
367; CHECK-NEXT:    movs r0, #1
368; CHECK-NEXT:    vadd.i32 q0, q0, r0
369; CHECK-NEXT:    vshr.s32 q0, q0, #1
370; CHECK-NEXT:    bx lr
371  %add = add nsw <4 x i32> %x, %y
372  %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
373  %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
374  ret <4 x i32> %half
375}
376define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) {
377; CHECK-LABEL: vrhaddu_v4i32_nwop:
378; CHECK:       @ %bb.0:
379; CHECK-NEXT:    vadd.i32 q0, q0, q1
380; CHECK-NEXT:    movs r0, #1
381; CHECK-NEXT:    vadd.i32 q0, q0, r0
382; CHECK-NEXT:    vshr.u32 q0, q0, #1
383; CHECK-NEXT:    bx lr
384  %add = add nuw <4 x i32> %x, %y
385  %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
386  %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
387  ret <4 x i32> %half
388}
389define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) {
390; CHECK-LABEL: vrhadds_v16i8_nwrnd:
391; CHECK:       @ %bb.0:
392; CHECK-NEXT:    vadd.i8 q0, q0, q1
393; CHECK-NEXT:    movs r0, #1
394; CHECK-NEXT:    vhadd.s8 q0, q0, r0
395; CHECK-NEXT:    bx lr
396  %add = add <16 x i8> %x, %y
397  %round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
398  %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
399  ret <16 x i8> %half
400}
401define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) {
402; CHECK-LABEL: vrhaddu_v16i8_nwrnd:
403; CHECK:       @ %bb.0:
404; CHECK-NEXT:    vadd.i8 q0, q0, q1
405; CHECK-NEXT:    movs r0, #1
406; CHECK-NEXT:    vhadd.u8 q0, q0, r0
407; CHECK-NEXT:    bx lr
408  %add = add <16 x i8> %x, %y
409  %round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
410  %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
411  ret <16 x i8> %half
412}
413define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) {
414; CHECK-LABEL: vrhadds_v8i16_nwrnd:
415; CHECK:       @ %bb.0:
416; CHECK-NEXT:    vadd.i16 q0, q0, q1
417; CHECK-NEXT:    movs r0, #1
418; CHECK-NEXT:    vhadd.s16 q0, q0, r0
419; CHECK-NEXT:    bx lr
420  %add = add <8 x i16> %x, %y
421  %round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
422  %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
423  ret <8 x i16> %half
424}
425define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) {
426; CHECK-LABEL: vrhaddu_v8i16_nwrnd:
427; CHECK:       @ %bb.0:
428; CHECK-NEXT:    vadd.i16 q0, q0, q1
429; CHECK-NEXT:    movs r0, #1
430; CHECK-NEXT:    vhadd.u16 q0, q0, r0
431; CHECK-NEXT:    bx lr
432  %add = add <8 x i16> %x, %y
433  %round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
434  %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
435  ret <8 x i16> %half
436}
437define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) {
438; CHECK-LABEL: vrhadds_v4i32_nwrnd:
439; CHECK:       @ %bb.0:
440; CHECK-NEXT:    vadd.i32 q0, q0, q1
441; CHECK-NEXT:    movs r0, #1
442; CHECK-NEXT:    vhadd.s32 q0, q0, r0
443; CHECK-NEXT:    bx lr
444  %add = add <4 x i32> %x, %y
445  %round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
446  %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
447  ret <4 x i32> %half
448}
449define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) {
450; CHECK-LABEL: vrhaddu_v4i32_nwrnd:
451; CHECK:       @ %bb.0:
452; CHECK-NEXT:    vadd.i32 q0, q0, q1
453; CHECK-NEXT:    movs r0, #1
454; CHECK-NEXT:    vhadd.u32 q0, q0, r0
455; CHECK-NEXT:    bx lr
456  %add = add <4 x i32> %x, %y
457  %round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
458  %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
459  ret <4 x i32> %half
460}
461define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) {
462; CHECK-LABEL: vrhadds_v16i8_both_nw:
463; CHECK:       @ %bb.0:
464; CHECK-NEXT:    vrhadd.s8 q0, q0, q1
465; CHECK-NEXT:    bx lr
466  %add = add nsw <16 x i8> %x, %y
467  %round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
468  %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
469  ret <16 x i8> %half
470}
471define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) {
472; CHECK-LABEL: vrhaddu_v16i8_both_nw:
473; CHECK:       @ %bb.0:
474; CHECK-NEXT:    vrhadd.u8 q0, q0, q1
475; CHECK-NEXT:    bx lr
476  %add = add nuw <16 x i8> %x, %y
477  %round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
478  %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
479  ret <16 x i8> %half
480}
481define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) {
482; CHECK-LABEL: vrhadds_v8i16_both_nw:
483; CHECK:       @ %bb.0:
484; CHECK-NEXT:    vrhadd.s16 q0, q0, q1
485; CHECK-NEXT:    bx lr
486  %add = add nsw <8 x i16> %x, %y
487  %round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
488  %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
489  ret <8 x i16> %half
490}
491define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) {
492; CHECK-LABEL: vrhaddu_v8i16_both_nw:
493; CHECK:       @ %bb.0:
494; CHECK-NEXT:    vrhadd.u16 q0, q0, q1
495; CHECK-NEXT:    bx lr
496  %add = add nuw <8 x i16> %x, %y
497  %round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
498 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
499  ret <8 x i16> %half
500}
501define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) {
502; CHECK-LABEL: vrhadds_v4i32_both_nw:
503; CHECK:       @ %bb.0:
504; CHECK-NEXT:    vrhadd.s32 q0, q0, q1
505; CHECK-NEXT:    bx lr
506  %add = add nsw <4 x i32> %x, %y
507  %round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
508  %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
509  ret <4 x i32> %half
510}
511define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) {
512; CHECK-LABEL: vrhaddu_v4i32_both_nw:
513; CHECK:       @ %bb.0:
514; CHECK-NEXT:    vrhadd.u32 q0, q0, q1
515; CHECK-NEXT:    bx lr
516  %add = add nuw <4 x i32> %x, %y
517  %round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
518  %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1>
519  ret <4 x i32> %half
520}
521