xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <8 x i8> @vaaddu_vv_v8i8_floor(<8 x i8> %x, <8 x i8> %y) {
6; CHECK-LABEL: vaaddu_vv_v8i8_floor:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    csrwi vxrm, 2
9; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
10; CHECK-NEXT:    vaaddu.vv v8, v8, v9
11; CHECK-NEXT:    ret
12  %xzv = zext <8 x i8> %x to <8 x i16>
13  %yzv = zext <8 x i8> %y to <8 x i16>
14  %add = add nuw nsw <8 x i16> %xzv, %yzv
15  %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
16  %ret = trunc <8 x i16> %div to <8 x i8>
17  ret <8 x i8> %ret
18}
19
20define <8 x i8> @vaaddu_vx_v8i8_floor(<8 x i8> %x, i8 %y) {
21; CHECK-LABEL: vaaddu_vx_v8i8_floor:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    csrwi vxrm, 2
24; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
25; CHECK-NEXT:    vaaddu.vx v8, v8, a0
26; CHECK-NEXT:    ret
27  %xzv = zext <8 x i8> %x to <8 x i16>
28  %yhead = insertelement <8 x i8> poison, i8 %y, i32 0
29  %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer
30  %yzv = zext <8 x i8> %ysplat to <8 x i16>
31  %add = add nuw nsw <8 x i16> %xzv, %yzv
32  %div = lshr <8 x i16> %add, splat (i16 1)
33  %ret = trunc <8 x i16> %div to <8 x i8>
34  ret <8 x i8> %ret
35}
36
37
38define <8 x i8> @vaaddu_vv_v8i8_floor_sexti16(<8 x i8> %x, <8 x i8> %y) {
39; CHECK-LABEL: vaaddu_vv_v8i8_floor_sexti16:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    csrwi vxrm, 2
42; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
43; CHECK-NEXT:    vaadd.vv v8, v8, v9
44; CHECK-NEXT:    ret
45  %xzv = sext <8 x i8> %x to <8 x i16>
46  %yzv = sext <8 x i8> %y to <8 x i16>
47  %add = add nuw nsw <8 x i16> %xzv, %yzv
48  %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
49  %ret = trunc <8 x i16> %div to <8 x i8>
50  ret <8 x i8> %ret
51}
52
53define <8 x i8> @vaaddu_vv_v8i8_floor_zexti32(<8 x i8> %x, <8 x i8> %y) {
54; CHECK-LABEL: vaaddu_vv_v8i8_floor_zexti32:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    csrwi vxrm, 2
57; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
58; CHECK-NEXT:    vaaddu.vv v8, v8, v9
59; CHECK-NEXT:    ret
60  %xzv = zext <8 x i8> %x to <8 x i32>
61  %yzv = zext <8 x i8> %y to <8 x i32>
62  %add = add nuw nsw <8 x i32> %xzv, %yzv
63  %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
64  %ret = trunc <8 x i32> %div to <8 x i8>
65  ret <8 x i8> %ret
66}
67
68define <8 x i8> @vaaddu_vv_v8i8_floor_lshr2(<8 x i8> %x, <8 x i8> %y) {
69; CHECK-LABEL: vaaddu_vv_v8i8_floor_lshr2:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
72; CHECK-NEXT:    vwaddu.vv v10, v8, v9
73; CHECK-NEXT:    vnsrl.wi v8, v10, 2
74; CHECK-NEXT:    ret
75  %xzv = zext <8 x i8> %x to <8 x i16>
76  %yzv = zext <8 x i8> %y to <8 x i16>
77  %add = add nuw nsw <8 x i16> %xzv, %yzv
78  %div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
79  %ret = trunc <8 x i16> %div to <8 x i8>
80  ret <8 x i8> %ret
81}
82
83define <8 x i16> @vaaddu_vv_v8i16_floor(<8 x i16> %x, <8 x i16> %y) {
84; CHECK-LABEL: vaaddu_vv_v8i16_floor:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    csrwi vxrm, 2
87; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
88; CHECK-NEXT:    vaaddu.vv v8, v8, v9
89; CHECK-NEXT:    ret
90  %xzv = zext <8 x i16> %x to <8 x i32>
91  %yzv = zext <8 x i16> %y to <8 x i32>
92  %add = add nuw nsw <8 x i32> %xzv, %yzv
93  %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
94  %ret = trunc <8 x i32> %div to <8 x i16>
95  ret <8 x i16> %ret
96}
97
98define <8 x i16> @vaaddu_vx_v8i16_floor(<8 x i16> %x, i16 %y) {
99; CHECK-LABEL: vaaddu_vx_v8i16_floor:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    csrwi vxrm, 2
102; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
103; CHECK-NEXT:    vaaddu.vx v8, v8, a0
104; CHECK-NEXT:    ret
105  %xzv = zext <8 x i16> %x to <8 x i32>
106  %yhead = insertelement <8 x i16> poison, i16 %y, i16 0
107  %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer
108  %yzv = zext <8 x i16> %ysplat to <8 x i32>
109  %add = add nuw nsw <8 x i32> %xzv, %yzv
110  %div = lshr <8 x i32> %add, splat (i32 1)
111  %ret = trunc <8 x i32> %div to <8 x i16>
112  ret <8 x i16> %ret
113}
114
115define <8 x i32> @vaaddu_vv_v8i32_floor(<8 x i32> %x, <8 x i32> %y) {
116; CHECK-LABEL: vaaddu_vv_v8i32_floor:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    csrwi vxrm, 2
119; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
120; CHECK-NEXT:    vaaddu.vv v8, v8, v10
121; CHECK-NEXT:    ret
122  %xzv = zext <8 x i32> %x to <8 x i64>
123  %yzv = zext <8 x i32> %y to <8 x i64>
124  %add = add nuw nsw <8 x i64> %xzv, %yzv
125  %div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
126  %ret = trunc <8 x i64> %div to <8 x i32>
127  ret <8 x i32> %ret
128}
129
130define <8 x i32> @vaaddu_vx_v8i32_floor(<8 x i32> %x, i32 %y) {
131; CHECK-LABEL: vaaddu_vx_v8i32_floor:
132; CHECK:       # %bb.0:
133; CHECK-NEXT:    csrwi vxrm, 2
134; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
135; CHECK-NEXT:    vaaddu.vx v8, v8, a0
136; CHECK-NEXT:    ret
137  %xzv = zext <8 x i32> %x to <8 x i64>
138  %yhead = insertelement <8 x i32> poison, i32 %y, i32 0
139  %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer
140  %yzv = zext <8 x i32> %ysplat to <8 x i64>
141  %add = add nuw nsw <8 x i64> %xzv, %yzv
142  %div = lshr <8 x i64> %add, splat (i64 1)
143  %ret = trunc <8 x i64> %div to <8 x i32>
144  ret <8 x i32> %ret
145}
146
147define <8 x i64> @vaaddu_vv_v8i64_floor(<8 x i64> %x, <8 x i64> %y) {
148; CHECK-LABEL: vaaddu_vv_v8i64_floor:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    csrwi vxrm, 2
151; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
152; CHECK-NEXT:    vaaddu.vv v8, v8, v12
153; CHECK-NEXT:    ret
154  %xzv = zext <8 x i64> %x to <8 x i128>
155  %yzv = zext <8 x i64> %y to <8 x i128>
156  %add = add nuw nsw <8 x i128> %xzv, %yzv
157  %div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
158  %ret = trunc <8 x i128> %div to <8 x i64>
159  ret <8 x i64> %ret
160}
161
162define <8 x i1> @vaaddu_vv_v8i1_floor(<8 x i1> %x, <8 x i1> %y) {
163; CHECK-LABEL: vaaddu_vv_v8i1_floor:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
166; CHECK-NEXT:    vmv.v.i v9, 0
167; CHECK-NEXT:    vmerge.vim v10, v9, 1, v0
168; CHECK-NEXT:    vmv1r.v v0, v8
169; CHECK-NEXT:    vmerge.vim v8, v9, 1, v0
170; CHECK-NEXT:    csrwi vxrm, 2
171; CHECK-NEXT:    vaaddu.vv v8, v10, v8
172; CHECK-NEXT:    vand.vi v8, v8, 1
173; CHECK-NEXT:    vmsne.vi v0, v8, 0
174; CHECK-NEXT:    ret
175  %xzv = zext <8 x i1> %x to <8 x i8>
176  %yzv = zext <8 x i1> %y to <8 x i8>
177  %add = add nuw nsw <8 x i8> %xzv, %yzv
178  %div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
179  %ret = trunc <8 x i8> %div to <8 x i1>
180  ret <8 x i1> %ret
181}
182
183define <8 x i64> @vaaddu_vx_v8i64_floor(<8 x i64> %x, i64 %y) {
184; RV32-LABEL: vaaddu_vx_v8i64_floor:
185; RV32:       # %bb.0:
186; RV32-NEXT:    addi sp, sp, -16
187; RV32-NEXT:    .cfi_def_cfa_offset 16
188; RV32-NEXT:    sw a0, 8(sp)
189; RV32-NEXT:    sw a1, 12(sp)
190; RV32-NEXT:    addi a0, sp, 8
191; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
192; RV32-NEXT:    vlse64.v v12, (a0), zero
193; RV32-NEXT:    csrwi vxrm, 2
194; RV32-NEXT:    vaaddu.vv v8, v8, v12
195; RV32-NEXT:    addi sp, sp, 16
196; RV32-NEXT:    .cfi_def_cfa_offset 0
197; RV32-NEXT:    ret
198;
199; RV64-LABEL: vaaddu_vx_v8i64_floor:
200; RV64:       # %bb.0:
201; RV64-NEXT:    csrwi vxrm, 2
202; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
203; RV64-NEXT:    vaaddu.vx v8, v8, a0
204; RV64-NEXT:    ret
205  %xzv = zext <8 x i64> %x to <8 x i128>
206  %yhead = insertelement <8 x i64> poison, i64 %y, i64 0
207  %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer
208  %yzv = zext <8 x i64> %ysplat to <8 x i128>
209  %add = add nuw nsw <8 x i128> %xzv, %yzv
210  %div = lshr <8 x i128> %add, splat (i128 1)
211  %ret = trunc <8 x i128> %div to <8 x i64>
212  ret <8 x i64> %ret
213}
214
215define <8 x i8> @vaaddu_vv_v8i8_ceil(<8 x i8> %x, <8 x i8> %y) {
216; CHECK-LABEL: vaaddu_vv_v8i8_ceil:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    csrwi vxrm, 0
219; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
220; CHECK-NEXT:    vaaddu.vv v8, v8, v9
221; CHECK-NEXT:    ret
222  %xzv = zext <8 x i8> %x to <8 x i16>
223  %yzv = zext <8 x i8> %y to <8 x i16>
224  %add = add nuw nsw <8 x i16> %xzv, %yzv
225  %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
226  %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
227  %ret = trunc <8 x i16> %div to <8 x i8>
228  ret <8 x i8> %ret
229}
230
231define <8 x i8> @vaaddu_vx_v8i8_ceil(<8 x i8> %x, i8 %y) {
232; CHECK-LABEL: vaaddu_vx_v8i8_ceil:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    csrwi vxrm, 0
235; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
236; CHECK-NEXT:    vaaddu.vx v8, v8, a0
237; CHECK-NEXT:    ret
238  %xzv = zext <8 x i8> %x to <8 x i16>
239  %yhead = insertelement <8 x i8> poison, i8 %y, i32 0
240  %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer
241  %yzv = zext <8 x i8> %ysplat to <8 x i16>
242  %add = add nuw nsw <8 x i16> %xzv, %yzv
243  %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
244  %div = lshr <8 x i16> %add1, splat (i16 1)
245  %ret = trunc <8 x i16> %div to <8 x i8>
246  ret <8 x i8> %ret
247}
248
249define <8 x i8> @vaaddu_vv_v8i8_ceil_sexti16(<8 x i8> %x, <8 x i8> %y) {
250; CHECK-LABEL: vaaddu_vv_v8i8_ceil_sexti16:
251; CHECK:       # %bb.0:
252; CHECK-NEXT:    csrwi vxrm, 0
253; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
254; CHECK-NEXT:    vaadd.vv v8, v8, v9
255; CHECK-NEXT:    ret
256  %xzv = sext <8 x i8> %x to <8 x i16>
257  %yzv = sext <8 x i8> %y to <8 x i16>
258  %add = add nuw nsw <8 x i16> %xzv, %yzv
259  %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
260  %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
261  %ret = trunc <8 x i16> %div to <8 x i8>
262  ret <8 x i8> %ret
263}
264
265define <8 x i8> @vaaddu_vv_v8i8_ceil_zexti32(<8 x i8> %x, <8 x i8> %y) {
266; CHECK-LABEL: vaaddu_vv_v8i8_ceil_zexti32:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    csrwi vxrm, 0
269; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
270; CHECK-NEXT:    vaaddu.vv v8, v8, v9
271; CHECK-NEXT:    ret
272  %xzv = zext <8 x i8> %x to <8 x i32>
273  %yzv = zext <8 x i8> %y to <8 x i32>
274  %add = add nuw nsw <8 x i32> %xzv, %yzv
275  %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
276  %div = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
277  %ret = trunc <8 x i32> %div to <8 x i8>
278  ret <8 x i8> %ret
279}
280
281define <8 x i8> @vaaddu_vv_v8i8_ceil_lshr2(<8 x i8> %x, <8 x i8> %y) {
282; CHECK-LABEL: vaaddu_vv_v8i8_ceil_lshr2:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
285; CHECK-NEXT:    vwaddu.vv v10, v8, v9
286; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
287; CHECK-NEXT:    vadd.vi v8, v10, 2
288; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
289; CHECK-NEXT:    vnsrl.wi v8, v8, 2
290; CHECK-NEXT:    ret
291  %xzv = zext <8 x i8> %x to <8 x i16>
292  %yzv = zext <8 x i8> %y to <8 x i16>
293  %add = add nuw nsw <8 x i16> %xzv, %yzv
294  %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
295  %div = lshr <8 x i16> %add1, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
296  %ret = trunc <8 x i16> %div to <8 x i8>
297  ret <8 x i8> %ret
298}
299
300define <8 x i8> @vaaddu_vv_v8i8_ceil_add2(<8 x i8> %x, <8 x i8> %y) {
301; CHECK-LABEL: vaaddu_vv_v8i8_ceil_add2:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
304; CHECK-NEXT:    vwaddu.vv v10, v8, v9
305; CHECK-NEXT:    li a0, 2
306; CHECK-NEXT:    csrwi vxrm, 2
307; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
308; CHECK-NEXT:    vaaddu.vx v8, v10, a0
309; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
310; CHECK-NEXT:    vnsrl.wi v8, v8, 0
311; CHECK-NEXT:    ret
312  %xzv = zext <8 x i8> %x to <8 x i16>
313  %yzv = zext <8 x i8> %y to <8 x i16>
314  %add = add nuw nsw <8 x i16> %xzv, %yzv
315  %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
316  %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
317  %ret = trunc <8 x i16> %div to <8 x i8>
318  ret <8 x i8> %ret
319}
320
321define <8 x i16> @vaaddu_vv_v8i16_ceil(<8 x i16> %x, <8 x i16> %y) {
322; CHECK-LABEL: vaaddu_vv_v8i16_ceil:
323; CHECK:       # %bb.0:
324; CHECK-NEXT:    csrwi vxrm, 0
325; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
326; CHECK-NEXT:    vaaddu.vv v8, v8, v9
327; CHECK-NEXT:    ret
328  %xzv = zext <8 x i16> %x to <8 x i32>
329  %yzv = zext <8 x i16> %y to <8 x i32>
330  %add = add nuw nsw <8 x i32> %xzv, %yzv
331  %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
332  %div = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
333  %ret = trunc <8 x i32> %div to <8 x i16>
334  ret <8 x i16> %ret
335}
336
337define <8 x i16> @vaaddu_vx_v8i16_ceil(<8 x i16> %x, i16 %y) {
338; CHECK-LABEL: vaaddu_vx_v8i16_ceil:
339; CHECK:       # %bb.0:
340; CHECK-NEXT:    csrwi vxrm, 0
341; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
342; CHECK-NEXT:    vaaddu.vx v8, v8, a0
343; CHECK-NEXT:    ret
344  %xzv = zext <8 x i16> %x to <8 x i32>
345  %yhead = insertelement <8 x i16> poison, i16 %y, i16 0
346  %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer
347  %yzv = zext <8 x i16> %ysplat to <8 x i32>
348  %add = add nuw nsw <8 x i32> %xzv, %yzv
349  %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
350  %div = lshr <8 x i32> %add1, splat (i32 1)
351  %ret = trunc <8 x i32> %div to <8 x i16>
352  ret <8 x i16> %ret
353}
354
355define <8 x i32> @vaaddu_vv_v8i32_ceil(<8 x i32> %x, <8 x i32> %y) {
356; CHECK-LABEL: vaaddu_vv_v8i32_ceil:
357; CHECK:       # %bb.0:
358; CHECK-NEXT:    csrwi vxrm, 0
359; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
360; CHECK-NEXT:    vaaddu.vv v8, v8, v10
361; CHECK-NEXT:    ret
362  %xzv = zext <8 x i32> %x to <8 x i64>
363  %yzv = zext <8 x i32> %y to <8 x i64>
364  %add = add nuw nsw <8 x i64> %xzv, %yzv
365  %add1 = add nuw nsw <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
366  %div = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
367  %ret = trunc <8 x i64> %div to <8 x i32>
368  ret <8 x i32> %ret
369}
370
371define <8 x i32> @vaaddu_vx_v8i32_ceil(<8 x i32> %x, i32 %y) {
372; CHECK-LABEL: vaaddu_vx_v8i32_ceil:
373; CHECK:       # %bb.0:
374; CHECK-NEXT:    csrwi vxrm, 0
375; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
376; CHECK-NEXT:    vaaddu.vx v8, v8, a0
377; CHECK-NEXT:    ret
378  %xzv = zext <8 x i32> %x to <8 x i64>
379  %yhead = insertelement <8 x i32> poison, i32 %y, i32 0
380  %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer
381  %yzv = zext <8 x i32> %ysplat to <8 x i64>
382  %add = add nuw nsw <8 x i64> %xzv, %yzv
383  %add1 = add nuw nsw <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
384  %div = lshr <8 x i64> %add1, splat (i64 1)
385  %ret = trunc <8 x i64> %div to <8 x i32>
386  ret <8 x i32> %ret
387}
388
389define <8 x i64> @vaaddu_vv_v8i64_ceil(<8 x i64> %x, <8 x i64> %y) {
390; CHECK-LABEL: vaaddu_vv_v8i64_ceil:
391; CHECK:       # %bb.0:
392; CHECK-NEXT:    csrwi vxrm, 0
393; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
394; CHECK-NEXT:    vaaddu.vv v8, v8, v12
395; CHECK-NEXT:    ret
396  %xzv = zext <8 x i64> %x to <8 x i128>
397  %yzv = zext <8 x i64> %y to <8 x i128>
398  %add = add nuw nsw <8 x i128> %xzv, %yzv
399  %add1 = add nuw nsw <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
400  %div = lshr <8 x i128> %add1, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
401  %ret = trunc <8 x i128> %div to <8 x i64>
402  ret <8 x i64> %ret
403}
404
405define <8 x i1> @vaaddu_vv_v8i1_ceil(<8 x i1> %x, <8 x i1> %y) {
406; CHECK-LABEL: vaaddu_vv_v8i1_ceil:
407; CHECK:       # %bb.0:
408; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
409; CHECK-NEXT:    vmv.v.i v9, 0
410; CHECK-NEXT:    vmerge.vim v10, v9, 1, v0
411; CHECK-NEXT:    vmv1r.v v0, v8
412; CHECK-NEXT:    vmerge.vim v8, v9, 1, v0
413; CHECK-NEXT:    csrwi vxrm, 0
414; CHECK-NEXT:    vaaddu.vv v8, v10, v8
415; CHECK-NEXT:    vand.vi v8, v8, 1
416; CHECK-NEXT:    vmsne.vi v0, v8, 0
417; CHECK-NEXT:    ret
418  %xzv = zext <8 x i1> %x to <8 x i8>
419  %yzv = zext <8 x i1> %y to <8 x i8>
420  %add = add nuw nsw <8 x i8> %xzv, %yzv
421  %add1 = add nuw nsw <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
422  %div = lshr <8 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
423  %ret = trunc <8 x i8> %div to <8 x i1>
424  ret <8 x i1> %ret
425}
426
427define <8 x i64> @vaaddu_vx_v8i64_ceil(<8 x i64> %x, i64 %y) {
428; RV32-LABEL: vaaddu_vx_v8i64_ceil:
429; RV32:       # %bb.0:
430; RV32-NEXT:    addi sp, sp, -16
431; RV32-NEXT:    .cfi_def_cfa_offset 16
432; RV32-NEXT:    sw a0, 8(sp)
433; RV32-NEXT:    sw a1, 12(sp)
434; RV32-NEXT:    addi a0, sp, 8
435; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
436; RV32-NEXT:    vlse64.v v12, (a0), zero
437; RV32-NEXT:    csrwi vxrm, 0
438; RV32-NEXT:    vaaddu.vv v8, v8, v12
439; RV32-NEXT:    addi sp, sp, 16
440; RV32-NEXT:    .cfi_def_cfa_offset 0
441; RV32-NEXT:    ret
442;
443; RV64-LABEL: vaaddu_vx_v8i64_ceil:
444; RV64:       # %bb.0:
445; RV64-NEXT:    csrwi vxrm, 0
446; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
447; RV64-NEXT:    vaaddu.vx v8, v8, a0
448; RV64-NEXT:    ret
449  %xzv = zext <8 x i64> %x to <8 x i128>
450  %yhead = insertelement <8 x i64> poison, i64 %y, i64 0
451  %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer
452  %yzv = zext <8 x i64> %ysplat to <8 x i128>
453  %add = add nuw nsw <8 x i128> %xzv, %yzv
454  %add1 = add nuw nsw <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
455  %div = lshr <8 x i128> %add1, splat (i128 1)
456  %ret = trunc <8 x i128> %div to <8 x i64>
457  ret <8 x i64> %ret
458}
459