1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <8 x i8> @vaaddu_vv_v8i8_floor(<8 x i8> %x, <8 x i8> %y) { 6; CHECK-LABEL: vaaddu_vv_v8i8_floor: 7; CHECK: # %bb.0: 8; CHECK-NEXT: csrwi vxrm, 2 9; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 10; CHECK-NEXT: vaaddu.vv v8, v8, v9 11; CHECK-NEXT: ret 12 %xzv = zext <8 x i8> %x to <8 x i16> 13 %yzv = zext <8 x i8> %y to <8 x i16> 14 %add = add nuw nsw <8 x i16> %xzv, %yzv 15 %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 16 %ret = trunc <8 x i16> %div to <8 x i8> 17 ret <8 x i8> %ret 18} 19 20define <8 x i8> @vaaddu_vx_v8i8_floor(<8 x i8> %x, i8 %y) { 21; CHECK-LABEL: vaaddu_vx_v8i8_floor: 22; CHECK: # %bb.0: 23; CHECK-NEXT: csrwi vxrm, 2 24; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 25; CHECK-NEXT: vaaddu.vx v8, v8, a0 26; CHECK-NEXT: ret 27 %xzv = zext <8 x i8> %x to <8 x i16> 28 %yhead = insertelement <8 x i8> poison, i8 %y, i32 0 29 %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer 30 %yzv = zext <8 x i8> %ysplat to <8 x i16> 31 %add = add nuw nsw <8 x i16> %xzv, %yzv 32 %div = lshr <8 x i16> %add, splat (i16 1) 33 %ret = trunc <8 x i16> %div to <8 x i8> 34 ret <8 x i8> %ret 35} 36 37 38define <8 x i8> @vaaddu_vv_v8i8_floor_sexti16(<8 x i8> %x, <8 x i8> %y) { 39; CHECK-LABEL: vaaddu_vv_v8i8_floor_sexti16: 40; CHECK: # %bb.0: 41; CHECK-NEXT: csrwi vxrm, 2 42; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 43; CHECK-NEXT: vaadd.vv v8, v8, v9 44; CHECK-NEXT: ret 45 %xzv = sext <8 x i8> %x to <8 x i16> 46 %yzv = sext <8 x i8> %y to <8 x i16> 47 %add = add nuw nsw <8 x i16> %xzv, %yzv 48 %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 49 %ret = trunc <8 x i16> %div to <8 x i8> 50 ret <8 x i8> %ret 51} 52 53define <8 x i8> @vaaddu_vv_v8i8_floor_zexti32(<8 x i8> %x, <8 x i8> %y) { 54; CHECK-LABEL: vaaddu_vv_v8i8_floor_zexti32: 55; CHECK: # %bb.0: 56; CHECK-NEXT: csrwi vxrm, 2 57; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 58; CHECK-NEXT: vaaddu.vv v8, v8, v9 59; CHECK-NEXT: ret 60 %xzv = zext <8 x i8> %x to <8 x i32> 61 %yzv = zext <8 x i8> %y to <8 x i32> 62 %add = add nuw nsw <8 x i32> %xzv, %yzv 63 %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 64 %ret = trunc <8 x i32> %div to <8 x i8> 65 ret <8 x i8> %ret 66} 67 68define <8 x i8> @vaaddu_vv_v8i8_floor_lshr2(<8 x i8> %x, <8 x i8> %y) { 69; CHECK-LABEL: vaaddu_vv_v8i8_floor_lshr2: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 72; CHECK-NEXT: vwaddu.vv v10, v8, v9 73; CHECK-NEXT: vnsrl.wi v8, v10, 2 74; CHECK-NEXT: ret 75 %xzv = zext <8 x i8> %x to <8 x i16> 76 %yzv = zext <8 x i8> %y to <8 x i16> 77 %add = add nuw nsw <8 x i16> %xzv, %yzv 78 %div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 79 %ret = trunc <8 x i16> %div to <8 x i8> 80 ret <8 x i8> %ret 81} 82 83define <8 x i16> @vaaddu_vv_v8i16_floor(<8 x i16> %x, <8 x i16> %y) { 84; CHECK-LABEL: vaaddu_vv_v8i16_floor: 85; CHECK: # %bb.0: 86; CHECK-NEXT: csrwi vxrm, 2 87; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 88; CHECK-NEXT: vaaddu.vv v8, v8, v9 89; CHECK-NEXT: ret 90 %xzv = zext <8 x i16> %x to <8 x i32> 91 %yzv = zext <8 x i16> %y to <8 x i32> 92 %add = add nuw nsw <8 x i32> %xzv, %yzv 93 %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 94 %ret = trunc <8 x i32> %div to <8 x i16> 95 ret <8 x i16> %ret 96} 97 98define <8 x i16> @vaaddu_vx_v8i16_floor(<8 x i16> %x, i16 %y) { 99; CHECK-LABEL: vaaddu_vx_v8i16_floor: 100; CHECK: # %bb.0: 101; CHECK-NEXT: csrwi vxrm, 2 102; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 103; CHECK-NEXT: vaaddu.vx v8, v8, a0 104; CHECK-NEXT: ret 105 %xzv = zext <8 x i16> %x to <8 x i32> 106 %yhead = insertelement <8 x i16> poison, i16 %y, i16 0 107 %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer 108 %yzv = zext <8 x i16> %ysplat to <8 x i32> 109 %add = add nuw nsw <8 x i32> %xzv, %yzv 110 %div = lshr <8 x i32> %add, splat (i32 1) 111 %ret = trunc <8 x i32> %div to <8 x i16> 112 ret <8 x i16> %ret 113} 114 115define <8 x i32> @vaaddu_vv_v8i32_floor(<8 x i32> %x, <8 x i32> %y) { 116; CHECK-LABEL: vaaddu_vv_v8i32_floor: 117; CHECK: # %bb.0: 118; CHECK-NEXT: csrwi vxrm, 2 119; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 120; CHECK-NEXT: vaaddu.vv v8, v8, v10 121; CHECK-NEXT: ret 122 %xzv = zext <8 x i32> %x to <8 x i64> 123 %yzv = zext <8 x i32> %y to <8 x i64> 124 %add = add nuw nsw <8 x i64> %xzv, %yzv 125 %div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 126 %ret = trunc <8 x i64> %div to <8 x i32> 127 ret <8 x i32> %ret 128} 129 130define <8 x i32> @vaaddu_vx_v8i32_floor(<8 x i32> %x, i32 %y) { 131; CHECK-LABEL: vaaddu_vx_v8i32_floor: 132; CHECK: # %bb.0: 133; CHECK-NEXT: csrwi vxrm, 2 134; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 135; CHECK-NEXT: vaaddu.vx v8, v8, a0 136; CHECK-NEXT: ret 137 %xzv = zext <8 x i32> %x to <8 x i64> 138 %yhead = insertelement <8 x i32> poison, i32 %y, i32 0 139 %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer 140 %yzv = zext <8 x i32> %ysplat to <8 x i64> 141 %add = add nuw nsw <8 x i64> %xzv, %yzv 142 %div = lshr <8 x i64> %add, splat (i64 1) 143 %ret = trunc <8 x i64> %div to <8 x i32> 144 ret <8 x i32> %ret 145} 146 147define <8 x i64> @vaaddu_vv_v8i64_floor(<8 x i64> %x, <8 x i64> %y) { 148; CHECK-LABEL: vaaddu_vv_v8i64_floor: 149; CHECK: # %bb.0: 150; CHECK-NEXT: csrwi vxrm, 2 151; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 152; CHECK-NEXT: vaaddu.vv v8, v8, v12 153; CHECK-NEXT: ret 154 %xzv = zext <8 x i64> %x to <8 x i128> 155 %yzv = zext <8 x i64> %y to <8 x i128> 156 %add = add nuw nsw <8 x i128> %xzv, %yzv 157 %div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1> 158 %ret = trunc <8 x i128> %div to <8 x i64> 159 ret <8 x i64> %ret 160} 161 162define <8 x i1> @vaaddu_vv_v8i1_floor(<8 x i1> %x, <8 x i1> %y) { 163; CHECK-LABEL: vaaddu_vv_v8i1_floor: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 166; CHECK-NEXT: vmv.v.i v9, 0 167; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 168; CHECK-NEXT: vmv1r.v v0, v8 169; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 170; CHECK-NEXT: csrwi vxrm, 2 171; CHECK-NEXT: vaaddu.vv v8, v10, v8 172; CHECK-NEXT: vand.vi v8, v8, 1 173; CHECK-NEXT: vmsne.vi v0, v8, 0 174; CHECK-NEXT: ret 175 %xzv = zext <8 x i1> %x to <8 x i8> 176 %yzv = zext <8 x i1> %y to <8 x i8> 177 %add = add nuw nsw <8 x i8> %xzv, %yzv 178 %div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 179 %ret = trunc <8 x i8> %div to <8 x i1> 180 ret <8 x i1> %ret 181} 182 183define <8 x i64> @vaaddu_vx_v8i64_floor(<8 x i64> %x, i64 %y) { 184; RV32-LABEL: vaaddu_vx_v8i64_floor: 185; RV32: # %bb.0: 186; RV32-NEXT: addi sp, sp, -16 187; RV32-NEXT: .cfi_def_cfa_offset 16 188; RV32-NEXT: sw a0, 8(sp) 189; RV32-NEXT: sw a1, 12(sp) 190; RV32-NEXT: addi a0, sp, 8 191; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 192; RV32-NEXT: vlse64.v v12, (a0), zero 193; RV32-NEXT: csrwi vxrm, 2 194; RV32-NEXT: vaaddu.vv v8, v8, v12 195; RV32-NEXT: addi sp, sp, 16 196; RV32-NEXT: .cfi_def_cfa_offset 0 197; RV32-NEXT: ret 198; 199; RV64-LABEL: vaaddu_vx_v8i64_floor: 200; RV64: # %bb.0: 201; RV64-NEXT: csrwi vxrm, 2 202; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 203; RV64-NEXT: vaaddu.vx v8, v8, a0 204; RV64-NEXT: ret 205 %xzv = zext <8 x i64> %x to <8 x i128> 206 %yhead = insertelement <8 x i64> poison, i64 %y, i64 0 207 %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer 208 %yzv = zext <8 x i64> %ysplat to <8 x i128> 209 %add = add nuw nsw <8 x i128> %xzv, %yzv 210 %div = lshr <8 x i128> %add, splat (i128 1) 211 %ret = trunc <8 x i128> %div to <8 x i64> 212 ret <8 x i64> %ret 213} 214 215define <8 x i8> @vaaddu_vv_v8i8_ceil(<8 x i8> %x, <8 x i8> %y) { 216; CHECK-LABEL: vaaddu_vv_v8i8_ceil: 217; CHECK: # %bb.0: 218; CHECK-NEXT: csrwi vxrm, 0 219; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 220; CHECK-NEXT: vaaddu.vv v8, v8, v9 221; CHECK-NEXT: ret 222 %xzv = zext <8 x i8> %x to <8 x i16> 223 %yzv = zext <8 x i8> %y to <8 x i16> 224 %add = add nuw nsw <8 x i16> %xzv, %yzv 225 %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 226 %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 227 %ret = trunc <8 x i16> %div to <8 x i8> 228 ret <8 x i8> %ret 229} 230 231define <8 x i8> @vaaddu_vx_v8i8_ceil(<8 x i8> %x, i8 %y) { 232; CHECK-LABEL: vaaddu_vx_v8i8_ceil: 233; CHECK: # %bb.0: 234; CHECK-NEXT: csrwi vxrm, 0 235; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 236; CHECK-NEXT: vaaddu.vx v8, v8, a0 237; CHECK-NEXT: ret 238 %xzv = zext <8 x i8> %x to <8 x i16> 239 %yhead = insertelement <8 x i8> poison, i8 %y, i32 0 240 %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer 241 %yzv = zext <8 x i8> %ysplat to <8 x i16> 242 %add = add nuw nsw <8 x i16> %xzv, %yzv 243 %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 244 %div = lshr <8 x i16> %add1, splat (i16 1) 245 %ret = trunc <8 x i16> %div to <8 x i8> 246 ret <8 x i8> %ret 247} 248 249define <8 x i8> @vaaddu_vv_v8i8_ceil_sexti16(<8 x i8> %x, <8 x i8> %y) { 250; CHECK-LABEL: vaaddu_vv_v8i8_ceil_sexti16: 251; CHECK: # %bb.0: 252; CHECK-NEXT: csrwi vxrm, 0 253; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 254; CHECK-NEXT: vaadd.vv v8, v8, v9 255; CHECK-NEXT: ret 256 %xzv = sext <8 x i8> %x to <8 x i16> 257 %yzv = sext <8 x i8> %y to <8 x i16> 258 %add = add nuw nsw <8 x i16> %xzv, %yzv 259 %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 260 %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 261 %ret = trunc <8 x i16> %div to <8 x i8> 262 ret <8 x i8> %ret 263} 264 265define <8 x i8> @vaaddu_vv_v8i8_ceil_zexti32(<8 x i8> %x, <8 x i8> %y) { 266; CHECK-LABEL: vaaddu_vv_v8i8_ceil_zexti32: 267; CHECK: # %bb.0: 268; CHECK-NEXT: csrwi vxrm, 0 269; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 270; CHECK-NEXT: vaaddu.vv v8, v8, v9 271; CHECK-NEXT: ret 272 %xzv = zext <8 x i8> %x to <8 x i32> 273 %yzv = zext <8 x i8> %y to <8 x i32> 274 %add = add nuw nsw <8 x i32> %xzv, %yzv 275 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 276 %div = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 277 %ret = trunc <8 x i32> %div to <8 x i8> 278 ret <8 x i8> %ret 279} 280 281define <8 x i8> @vaaddu_vv_v8i8_ceil_lshr2(<8 x i8> %x, <8 x i8> %y) { 282; CHECK-LABEL: vaaddu_vv_v8i8_ceil_lshr2: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 285; CHECK-NEXT: vwaddu.vv v10, v8, v9 286; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 287; CHECK-NEXT: vadd.vi v8, v10, 2 288; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 289; CHECK-NEXT: vnsrl.wi v8, v8, 2 290; CHECK-NEXT: ret 291 %xzv = zext <8 x i8> %x to <8 x i16> 292 %yzv = zext <8 x i8> %y to <8 x i16> 293 %add = add nuw nsw <8 x i16> %xzv, %yzv 294 %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 295 %div = lshr <8 x i16> %add1, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 296 %ret = trunc <8 x i16> %div to <8 x i8> 297 ret <8 x i8> %ret 298} 299 300define <8 x i8> @vaaddu_vv_v8i8_ceil_add2(<8 x i8> %x, <8 x i8> %y) { 301; CHECK-LABEL: vaaddu_vv_v8i8_ceil_add2: 302; CHECK: # %bb.0: 303; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 304; CHECK-NEXT: vwaddu.vv v10, v8, v9 305; CHECK-NEXT: li a0, 2 306; CHECK-NEXT: csrwi vxrm, 2 307; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 308; CHECK-NEXT: vaaddu.vx v8, v10, a0 309; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 310; CHECK-NEXT: vnsrl.wi v8, v8, 0 311; CHECK-NEXT: ret 312 %xzv = zext <8 x i8> %x to <8 x i16> 313 %yzv = zext <8 x i8> %y to <8 x i16> 314 %add = add nuw nsw <8 x i16> %xzv, %yzv 315 %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 316 %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 317 %ret = trunc <8 x i16> %div to <8 x i8> 318 ret <8 x i8> %ret 319} 320 321define <8 x i16> @vaaddu_vv_v8i16_ceil(<8 x i16> %x, <8 x i16> %y) { 322; CHECK-LABEL: vaaddu_vv_v8i16_ceil: 323; CHECK: # %bb.0: 324; CHECK-NEXT: csrwi vxrm, 0 325; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 326; CHECK-NEXT: vaaddu.vv v8, v8, v9 327; CHECK-NEXT: ret 328 %xzv = zext <8 x i16> %x to <8 x i32> 329 %yzv = zext <8 x i16> %y to <8 x i32> 330 %add = add nuw nsw <8 x i32> %xzv, %yzv 331 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 332 %div = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 333 %ret = trunc <8 x i32> %div to <8 x i16> 334 ret <8 x i16> %ret 335} 336 337define <8 x i16> @vaaddu_vx_v8i16_ceil(<8 x i16> %x, i16 %y) { 338; CHECK-LABEL: vaaddu_vx_v8i16_ceil: 339; CHECK: # %bb.0: 340; CHECK-NEXT: csrwi vxrm, 0 341; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 342; CHECK-NEXT: vaaddu.vx v8, v8, a0 343; CHECK-NEXT: ret 344 %xzv = zext <8 x i16> %x to <8 x i32> 345 %yhead = insertelement <8 x i16> poison, i16 %y, i16 0 346 %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer 347 %yzv = zext <8 x i16> %ysplat to <8 x i32> 348 %add = add nuw nsw <8 x i32> %xzv, %yzv 349 %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 350 %div = lshr <8 x i32> %add1, splat (i32 1) 351 %ret = trunc <8 x i32> %div to <8 x i16> 352 ret <8 x i16> %ret 353} 354 355define <8 x i32> @vaaddu_vv_v8i32_ceil(<8 x i32> %x, <8 x i32> %y) { 356; CHECK-LABEL: vaaddu_vv_v8i32_ceil: 357; CHECK: # %bb.0: 358; CHECK-NEXT: csrwi vxrm, 0 359; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 360; CHECK-NEXT: vaaddu.vv v8, v8, v10 361; CHECK-NEXT: ret 362 %xzv = zext <8 x i32> %x to <8 x i64> 363 %yzv = zext <8 x i32> %y to <8 x i64> 364 %add = add nuw nsw <8 x i64> %xzv, %yzv 365 %add1 = add nuw nsw <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 366 %div = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 367 %ret = trunc <8 x i64> %div to <8 x i32> 368 ret <8 x i32> %ret 369} 370 371define <8 x i32> @vaaddu_vx_v8i32_ceil(<8 x i32> %x, i32 %y) { 372; CHECK-LABEL: vaaddu_vx_v8i32_ceil: 373; CHECK: # %bb.0: 374; CHECK-NEXT: csrwi vxrm, 0 375; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 376; CHECK-NEXT: vaaddu.vx v8, v8, a0 377; CHECK-NEXT: ret 378 %xzv = zext <8 x i32> %x to <8 x i64> 379 %yhead = insertelement <8 x i32> poison, i32 %y, i32 0 380 %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer 381 %yzv = zext <8 x i32> %ysplat to <8 x i64> 382 %add = add nuw nsw <8 x i64> %xzv, %yzv 383 %add1 = add nuw nsw <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 384 %div = lshr <8 x i64> %add1, splat (i64 1) 385 %ret = trunc <8 x i64> %div to <8 x i32> 386 ret <8 x i32> %ret 387} 388 389define <8 x i64> @vaaddu_vv_v8i64_ceil(<8 x i64> %x, <8 x i64> %y) { 390; CHECK-LABEL: vaaddu_vv_v8i64_ceil: 391; CHECK: # %bb.0: 392; CHECK-NEXT: csrwi vxrm, 0 393; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 394; CHECK-NEXT: vaaddu.vv v8, v8, v12 395; CHECK-NEXT: ret 396 %xzv = zext <8 x i64> %x to <8 x i128> 397 %yzv = zext <8 x i64> %y to <8 x i128> 398 %add = add nuw nsw <8 x i128> %xzv, %yzv 399 %add1 = add nuw nsw <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1> 400 %div = lshr <8 x i128> %add1, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1> 401 %ret = trunc <8 x i128> %div to <8 x i64> 402 ret <8 x i64> %ret 403} 404 405define <8 x i1> @vaaddu_vv_v8i1_ceil(<8 x i1> %x, <8 x i1> %y) { 406; CHECK-LABEL: vaaddu_vv_v8i1_ceil: 407; CHECK: # %bb.0: 408; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 409; CHECK-NEXT: vmv.v.i v9, 0 410; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 411; CHECK-NEXT: vmv1r.v v0, v8 412; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 413; CHECK-NEXT: csrwi vxrm, 0 414; CHECK-NEXT: vaaddu.vv v8, v10, v8 415; CHECK-NEXT: vand.vi v8, v8, 1 416; CHECK-NEXT: vmsne.vi v0, v8, 0 417; CHECK-NEXT: ret 418 %xzv = zext <8 x i1> %x to <8 x i8> 419 %yzv = zext <8 x i1> %y to <8 x i8> 420 %add = add nuw nsw <8 x i8> %xzv, %yzv 421 %add1 = add nuw nsw <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 422 %div = lshr <8 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 423 %ret = trunc <8 x i8> %div to <8 x i1> 424 ret <8 x i1> %ret 425} 426 427define <8 x i64> @vaaddu_vx_v8i64_ceil(<8 x i64> %x, i64 %y) { 428; RV32-LABEL: vaaddu_vx_v8i64_ceil: 429; RV32: # %bb.0: 430; RV32-NEXT: addi sp, sp, -16 431; RV32-NEXT: .cfi_def_cfa_offset 16 432; RV32-NEXT: sw a0, 8(sp) 433; RV32-NEXT: sw a1, 12(sp) 434; RV32-NEXT: addi a0, sp, 8 435; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 436; RV32-NEXT: vlse64.v v12, (a0), zero 437; RV32-NEXT: csrwi vxrm, 0 438; RV32-NEXT: vaaddu.vv v8, v8, v12 439; RV32-NEXT: addi sp, sp, 16 440; RV32-NEXT: .cfi_def_cfa_offset 0 441; RV32-NEXT: ret 442; 443; RV64-LABEL: vaaddu_vx_v8i64_ceil: 444; RV64: # %bb.0: 445; RV64-NEXT: csrwi vxrm, 0 446; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 447; RV64-NEXT: vaaddu.vx v8, v8, a0 448; RV64-NEXT: ret 449 %xzv = zext <8 x i64> %x to <8 x i128> 450 %yhead = insertelement <8 x i64> poison, i64 %y, i64 0 451 %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer 452 %yzv = zext <8 x i64> %ysplat to <8 x i128> 453 %add = add nuw nsw <8 x i128> %xzv, %yzv 454 %add1 = add nuw nsw <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1> 455 %div = lshr <8 x i128> %add1, splat (i128 1) 456 %ret = trunc <8 x i128> %div to <8 x i64> 457 ret <8 x i64> %ret 458} 459