1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15declare <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) 16 17define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 18; CHECK-LABEL: vp_ceil_vv_nxv1bf16: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 21; CHECK-NEXT: vmv1r.v v9, v0 22; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 23; CHECK-NEXT: lui a0, 307200 24; CHECK-NEXT: vmv1r.v v8, v0 25; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 26; CHECK-NEXT: vfabs.v v11, v10, v0.t 27; CHECK-NEXT: fmv.w.x fa5, a0 28; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 29; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t 30; CHECK-NEXT: fsrmi a0, 3 31; CHECK-NEXT: vmv1r.v v0, v8 32; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 33; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t 34; CHECK-NEXT: fsrm a0 35; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t 36; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 37; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t 38; CHECK-NEXT: vmv1r.v v0, v9 39; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 40; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 41; CHECK-NEXT: ret 42 %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) 43 ret <vscale x 1 x bfloat> %v 44} 45 46define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { 47; CHECK-LABEL: vp_ceil_vv_nxv1bf16_unmasked: 48; CHECK: # %bb.0: 49; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 50; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 51; CHECK-NEXT: lui a0, 307200 52; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 53; CHECK-NEXT: vfabs.v v8, v9 54; CHECK-NEXT: fmv.w.x fa5, a0 55; CHECK-NEXT: vmflt.vf v0, v8, fa5 56; CHECK-NEXT: fsrmi a0, 3 57; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 58; CHECK-NEXT: fsrm a0 59; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 60; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 61; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 62; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 63; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 64; CHECK-NEXT: ret 65 %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 66 ret <vscale x 1 x bfloat> %v 67} 68 69declare <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) 70 71define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 72; CHECK-LABEL: vp_ceil_vv_nxv2bf16: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 75; CHECK-NEXT: vmv1r.v v9, v0 76; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 77; CHECK-NEXT: lui a0, 307200 78; CHECK-NEXT: vmv1r.v v8, v0 79; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 80; CHECK-NEXT: vfabs.v v11, v10, v0.t 81; CHECK-NEXT: fmv.w.x fa5, a0 82; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 83; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t 84; CHECK-NEXT: fsrmi a0, 3 85; CHECK-NEXT: vmv.v.v v0, v8 86; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 87; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t 88; CHECK-NEXT: fsrm a0 89; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t 90; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 91; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t 92; CHECK-NEXT: vmv1r.v v0, v9 93; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 94; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 95; CHECK-NEXT: ret 96 %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) 97 ret <vscale x 2 x bfloat> %v 98} 99 100define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { 101; CHECK-LABEL: vp_ceil_vv_nxv2bf16_unmasked: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 104; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 105; CHECK-NEXT: lui a0, 307200 106; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 107; CHECK-NEXT: vfabs.v v8, v9 108; CHECK-NEXT: fmv.w.x fa5, a0 109; CHECK-NEXT: vmflt.vf v0, v8, fa5 110; CHECK-NEXT: fsrmi a0, 3 111; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 112; CHECK-NEXT: fsrm a0 113; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 114; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 115; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 116; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 117; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 118; CHECK-NEXT: ret 119 %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 120 ret <vscale x 2 x bfloat> %v 121} 122 123declare <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) 124 125define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 126; CHECK-LABEL: vp_ceil_vv_nxv4bf16: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 129; CHECK-NEXT: vmv1r.v v9, v0 130; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 131; CHECK-NEXT: lui a0, 307200 132; CHECK-NEXT: vmv1r.v v8, v0 133; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 134; CHECK-NEXT: vfabs.v v12, v10, v0.t 135; CHECK-NEXT: fmv.w.x fa5, a0 136; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 137; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t 138; CHECK-NEXT: fsrmi a0, 3 139; CHECK-NEXT: vmv1r.v v0, v8 140; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 141; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t 142; CHECK-NEXT: fsrm a0 143; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 144; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 145; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t 146; CHECK-NEXT: vmv1r.v v0, v9 147; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 148; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 149; CHECK-NEXT: ret 150 %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) 151 ret <vscale x 4 x bfloat> %v 152} 153 154define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { 155; CHECK-LABEL: vp_ceil_vv_nxv4bf16_unmasked: 156; CHECK: # %bb.0: 157; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 158; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 159; CHECK-NEXT: lui a0, 307200 160; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 161; CHECK-NEXT: vfabs.v v8, v10 162; CHECK-NEXT: fmv.w.x fa5, a0 163; CHECK-NEXT: vmflt.vf v0, v8, fa5 164; CHECK-NEXT: fsrmi a0, 3 165; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 166; CHECK-NEXT: fsrm a0 167; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 168; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 169; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 170; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 171; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 172; CHECK-NEXT: ret 173 %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 174 ret <vscale x 4 x bfloat> %v 175} 176 177declare <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) 178 179define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 180; CHECK-LABEL: vp_ceil_vv_nxv8bf16: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 183; CHECK-NEXT: vmv1r.v v10, v0 184; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t 185; CHECK-NEXT: lui a0, 307200 186; CHECK-NEXT: vmv1r.v v8, v0 187; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 188; CHECK-NEXT: vfabs.v v16, v12, v0.t 189; CHECK-NEXT: fmv.w.x fa5, a0 190; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 191; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t 192; CHECK-NEXT: fsrmi a0, 3 193; CHECK-NEXT: vmv1r.v v0, v8 194; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 195; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t 196; CHECK-NEXT: fsrm a0 197; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 198; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 199; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t 200; CHECK-NEXT: vmv1r.v v0, v10 201; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 202; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t 203; CHECK-NEXT: ret 204 %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) 205 ret <vscale x 8 x bfloat> %v 206} 207 208define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { 209; CHECK-LABEL: vp_ceil_vv_nxv8bf16_unmasked: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 212; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 213; CHECK-NEXT: lui a0, 307200 214; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 215; CHECK-NEXT: vfabs.v v8, v12 216; CHECK-NEXT: fmv.w.x fa5, a0 217; CHECK-NEXT: vmflt.vf v0, v8, fa5 218; CHECK-NEXT: fsrmi a0, 3 219; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t 220; CHECK-NEXT: fsrm a0 221; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 222; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 223; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t 224; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 225; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 226; CHECK-NEXT: ret 227 %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 228 ret <vscale x 8 x bfloat> %v 229} 230 231declare <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) 232 233define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 234; CHECK-LABEL: vp_ceil_vv_nxv16bf16: 235; CHECK: # %bb.0: 236; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 237; CHECK-NEXT: vmv1r.v v12, v0 238; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t 239; CHECK-NEXT: lui a0, 307200 240; CHECK-NEXT: vmv1r.v v8, v0 241; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 242; CHECK-NEXT: vfabs.v v24, v16, v0.t 243; CHECK-NEXT: fmv.w.x fa5, a0 244; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 245; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t 246; CHECK-NEXT: fsrmi a0, 3 247; CHECK-NEXT: vmv1r.v v0, v8 248; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 249; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 250; CHECK-NEXT: fsrm a0 251; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 252; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 253; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 254; CHECK-NEXT: vmv1r.v v0, v12 255; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 256; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 257; CHECK-NEXT: ret 258 %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) 259 ret <vscale x 16 x bfloat> %v 260} 261 262define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { 263; CHECK-LABEL: vp_ceil_vv_nxv16bf16_unmasked: 264; CHECK: # %bb.0: 265; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 266; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 267; CHECK-NEXT: lui a0, 307200 268; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 269; CHECK-NEXT: vfabs.v v8, v16 270; CHECK-NEXT: fmv.w.x fa5, a0 271; CHECK-NEXT: vmflt.vf v0, v8, fa5 272; CHECK-NEXT: fsrmi a0, 3 273; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 274; CHECK-NEXT: fsrm a0 275; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 276; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 277; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 278; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 279; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 280; CHECK-NEXT: ret 281 %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 282 ret <vscale x 16 x bfloat> %v 283} 284 285declare <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) 286 287define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 288; CHECK-LABEL: vp_ceil_vv_nxv32bf16: 289; CHECK: # %bb.0: 290; CHECK-NEXT: addi sp, sp, -16 291; CHECK-NEXT: .cfi_def_cfa_offset 16 292; CHECK-NEXT: csrr a1, vlenb 293; CHECK-NEXT: slli a1, a1, 3 294; CHECK-NEXT: sub sp, sp, a1 295; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 296; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 297; CHECK-NEXT: vmv1r.v v7, v0 298; CHECK-NEXT: csrr a2, vlenb 299; CHECK-NEXT: lui a3, 307200 300; CHECK-NEXT: slli a1, a2, 1 301; CHECK-NEXT: srli a2, a2, 2 302; CHECK-NEXT: fmv.w.x fa5, a3 303; CHECK-NEXT: sub a3, a0, a1 304; CHECK-NEXT: vslidedown.vx v17, v0, a2 305; CHECK-NEXT: sltu a2, a0, a3 306; CHECK-NEXT: vmv1r.v v18, v17 307; CHECK-NEXT: addi a2, a2, -1 308; CHECK-NEXT: and a2, a2, a3 309; CHECK-NEXT: vmv1r.v v0, v17 310; CHECK-NEXT: addi a3, sp, 16 311; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 312; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 313; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 314; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 315; CHECK-NEXT: vfabs.v v8, v24, v0.t 316; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 317; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t 318; CHECK-NEXT: fsrmi a2, 3 319; CHECK-NEXT: vmv1r.v v0, v18 320; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 321; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t 322; CHECK-NEXT: fsrm a2 323; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 324; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 325; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t 326; CHECK-NEXT: vmv1r.v v0, v17 327; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 328; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 329; CHECK-NEXT: bltu a0, a1, .LBB10_2 330; CHECK-NEXT: # %bb.1: 331; CHECK-NEXT: mv a0, a1 332; CHECK-NEXT: .LBB10_2: 333; CHECK-NEXT: vmv1r.v v0, v7 334; CHECK-NEXT: addi a1, sp, 16 335; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 336; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 337; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t 338; CHECK-NEXT: vmv1r.v v8, v7 339; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 340; CHECK-NEXT: vfabs.v v16, v24, v0.t 341; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 342; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t 343; CHECK-NEXT: fsrmi a0, 3 344; CHECK-NEXT: vmv1r.v v0, v8 345; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 346; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t 347; CHECK-NEXT: fsrm a0 348; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 349; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 350; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t 351; CHECK-NEXT: vmv1r.v v0, v7 352; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 353; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t 354; CHECK-NEXT: csrr a0, vlenb 355; CHECK-NEXT: slli a0, a0, 3 356; CHECK-NEXT: add sp, sp, a0 357; CHECK-NEXT: .cfi_def_cfa sp, 16 358; CHECK-NEXT: addi sp, sp, 16 359; CHECK-NEXT: .cfi_def_cfa_offset 0 360; CHECK-NEXT: ret 361 %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) 362 ret <vscale x 32 x bfloat> %v 363} 364 365define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { 366; CHECK-LABEL: vp_ceil_vv_nxv32bf16_unmasked: 367; CHECK: # %bb.0: 368; CHECK-NEXT: addi sp, sp, -16 369; CHECK-NEXT: .cfi_def_cfa_offset 16 370; CHECK-NEXT: csrr a1, vlenb 371; CHECK-NEXT: slli a1, a1, 3 372; CHECK-NEXT: sub sp, sp, a1 373; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 374; CHECK-NEXT: csrr a2, vlenb 375; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 376; CHECK-NEXT: vmset.m v16 377; CHECK-NEXT: lui a3, 307200 378; CHECK-NEXT: slli a1, a2, 1 379; CHECK-NEXT: srli a2, a2, 2 380; CHECK-NEXT: fmv.w.x fa5, a3 381; CHECK-NEXT: sub a3, a0, a1 382; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 383; CHECK-NEXT: vslidedown.vx v16, v16, a2 384; CHECK-NEXT: sltu a2, a0, a3 385; CHECK-NEXT: vmv1r.v v17, v16 386; CHECK-NEXT: addi a2, a2, -1 387; CHECK-NEXT: and a2, a2, a3 388; CHECK-NEXT: vmv1r.v v0, v16 389; CHECK-NEXT: addi a3, sp, 16 390; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 391; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 392; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 393; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 394; CHECK-NEXT: vfabs.v v8, v24, v0.t 395; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 396; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t 397; CHECK-NEXT: fsrmi a2, 3 398; CHECK-NEXT: vmv1r.v v0, v17 399; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 400; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t 401; CHECK-NEXT: fsrm a2 402; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 403; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 404; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t 405; CHECK-NEXT: vmv1r.v v0, v16 406; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 407; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 408; CHECK-NEXT: bltu a0, a1, .LBB11_2 409; CHECK-NEXT: # %bb.1: 410; CHECK-NEXT: mv a0, a1 411; CHECK-NEXT: .LBB11_2: 412; CHECK-NEXT: addi a1, sp, 16 413; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 414; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 415; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 416; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 417; CHECK-NEXT: vfabs.v v24, v16 418; CHECK-NEXT: vmflt.vf v0, v24, fa5 419; CHECK-NEXT: fsrmi a0, 3 420; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 421; CHECK-NEXT: fsrm a0 422; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 423; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 424; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 425; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 426; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 427; CHECK-NEXT: csrr a0, vlenb 428; CHECK-NEXT: slli a0, a0, 3 429; CHECK-NEXT: add sp, sp, a0 430; CHECK-NEXT: .cfi_def_cfa sp, 16 431; CHECK-NEXT: addi sp, sp, 16 432; CHECK-NEXT: .cfi_def_cfa_offset 0 433; CHECK-NEXT: ret 434 %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 435 ret <vscale x 32 x bfloat> %v 436} 437declare <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) 438 439define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 440; ZVFH-LABEL: vp_ceil_vv_nxv1f16: 441; ZVFH: # %bb.0: 442; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) 443; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) 444; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 445; ZVFH-NEXT: vfabs.v v9, v8, v0.t 446; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 447; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 448; ZVFH-NEXT: fsrmi a0, 3 449; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 450; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 451; ZVFH-NEXT: fsrm a0 452; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 453; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 454; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 455; ZVFH-NEXT: ret 456; 457; ZVFHMIN-LABEL: vp_ceil_vv_nxv1f16: 458; ZVFHMIN: # %bb.0: 459; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 460; ZVFHMIN-NEXT: vmv1r.v v9, v0 461; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 462; ZVFHMIN-NEXT: lui a0, 307200 463; ZVFHMIN-NEXT: vmv1r.v v8, v0 464; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 465; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 466; ZVFHMIN-NEXT: fmv.w.x fa5, a0 467; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 468; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 469; ZVFHMIN-NEXT: fsrmi a0, 3 470; ZVFHMIN-NEXT: vmv1r.v v0, v8 471; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 472; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 473; ZVFHMIN-NEXT: fsrm a0 474; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 475; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 476; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 477; ZVFHMIN-NEXT: vmv1r.v v0, v9 478; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 479; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 480; ZVFHMIN-NEXT: ret 481 %v = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) 482 ret <vscale x 1 x half> %v 483} 484 485define <vscale x 1 x half> @vp_ceil_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { 486; ZVFH-LABEL: vp_ceil_vv_nxv1f16_unmasked: 487; ZVFH: # %bb.0: 488; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) 489; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) 490; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 491; ZVFH-NEXT: vfabs.v v9, v8 492; ZVFH-NEXT: vmflt.vf v0, v9, fa5 493; ZVFH-NEXT: fsrmi a0, 3 494; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 495; ZVFH-NEXT: fsrm a0 496; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 497; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 498; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 499; ZVFH-NEXT: ret 500; 501; ZVFHMIN-LABEL: vp_ceil_vv_nxv1f16_unmasked: 502; ZVFHMIN: # %bb.0: 503; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 504; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 505; ZVFHMIN-NEXT: lui a0, 307200 506; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 507; ZVFHMIN-NEXT: vfabs.v v8, v9 508; ZVFHMIN-NEXT: fmv.w.x fa5, a0 509; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 510; ZVFHMIN-NEXT: fsrmi a0, 3 511; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 512; ZVFHMIN-NEXT: fsrm a0 513; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 514; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 515; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 516; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 517; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 518; ZVFHMIN-NEXT: ret 519 %v = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 520 ret <vscale x 1 x half> %v 521} 522 523declare <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32) 524 525define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 526; ZVFH-LABEL: vp_ceil_vv_nxv2f16: 527; ZVFH: # %bb.0: 528; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) 529; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) 530; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 531; ZVFH-NEXT: vfabs.v v9, v8, v0.t 532; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 533; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 534; ZVFH-NEXT: fsrmi a0, 3 535; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 536; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 537; ZVFH-NEXT: fsrm a0 538; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 539; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 540; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 541; ZVFH-NEXT: ret 542; 543; ZVFHMIN-LABEL: vp_ceil_vv_nxv2f16: 544; ZVFHMIN: # %bb.0: 545; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 546; ZVFHMIN-NEXT: vmv1r.v v9, v0 547; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 548; ZVFHMIN-NEXT: lui a0, 307200 549; ZVFHMIN-NEXT: vmv1r.v v8, v0 550; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 551; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 552; ZVFHMIN-NEXT: fmv.w.x fa5, a0 553; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 554; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 555; ZVFHMIN-NEXT: fsrmi a0, 3 556; ZVFHMIN-NEXT: vmv.v.v v0, v8 557; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 558; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 559; ZVFHMIN-NEXT: fsrm a0 560; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 561; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 562; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 563; ZVFHMIN-NEXT: vmv1r.v v0, v9 564; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 565; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 566; ZVFHMIN-NEXT: ret 567 %v = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) 568 ret <vscale x 2 x half> %v 569} 570 571define <vscale x 2 x half> @vp_ceil_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { 572; ZVFH-LABEL: vp_ceil_vv_nxv2f16_unmasked: 573; ZVFH: # %bb.0: 574; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) 575; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) 576; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 577; ZVFH-NEXT: vfabs.v v9, v8 578; ZVFH-NEXT: vmflt.vf v0, v9, fa5 579; ZVFH-NEXT: fsrmi a0, 3 580; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 581; ZVFH-NEXT: fsrm a0 582; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 583; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 584; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 585; ZVFH-NEXT: ret 586; 587; ZVFHMIN-LABEL: vp_ceil_vv_nxv2f16_unmasked: 588; ZVFHMIN: # %bb.0: 589; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 590; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 591; ZVFHMIN-NEXT: lui a0, 307200 592; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 593; ZVFHMIN-NEXT: vfabs.v v8, v9 594; ZVFHMIN-NEXT: fmv.w.x fa5, a0 595; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 596; ZVFHMIN-NEXT: fsrmi a0, 3 597; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 598; ZVFHMIN-NEXT: fsrm a0 599; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 600; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 601; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 602; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 603; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 604; ZVFHMIN-NEXT: ret 605 %v = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 606 ret <vscale x 2 x half> %v 607} 608 609declare <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32) 610 611define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 612; ZVFH-LABEL: vp_ceil_vv_nxv4f16: 613; ZVFH: # %bb.0: 614; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) 615; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) 616; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 617; ZVFH-NEXT: vfabs.v v9, v8, v0.t 618; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 619; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 620; ZVFH-NEXT: fsrmi a0, 3 621; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma 622; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 623; ZVFH-NEXT: fsrm a0 624; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 625; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 626; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 627; ZVFH-NEXT: ret 628; 629; ZVFHMIN-LABEL: vp_ceil_vv_nxv4f16: 630; ZVFHMIN: # %bb.0: 631; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 632; ZVFHMIN-NEXT: vmv1r.v v9, v0 633; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 634; ZVFHMIN-NEXT: lui a0, 307200 635; ZVFHMIN-NEXT: vmv1r.v v8, v0 636; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 637; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t 638; ZVFHMIN-NEXT: fmv.w.x fa5, a0 639; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 640; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t 641; ZVFHMIN-NEXT: fsrmi a0, 3 642; ZVFHMIN-NEXT: vmv1r.v v0, v8 643; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 644; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t 645; ZVFHMIN-NEXT: fsrm a0 646; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t 647; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 648; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t 649; ZVFHMIN-NEXT: vmv1r.v v0, v9 650; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 651; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 652; ZVFHMIN-NEXT: ret 653 %v = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) 654 ret <vscale x 4 x half> %v 655} 656 657define <vscale x 4 x half> @vp_ceil_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { 658; ZVFH-LABEL: vp_ceil_vv_nxv4f16_unmasked: 659; ZVFH: # %bb.0: 660; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) 661; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) 662; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 663; ZVFH-NEXT: vfabs.v v9, v8 664; ZVFH-NEXT: vmflt.vf v0, v9, fa5 665; ZVFH-NEXT: fsrmi a0, 3 666; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 667; ZVFH-NEXT: fsrm a0 668; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 669; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 670; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 671; ZVFH-NEXT: ret 672; 673; ZVFHMIN-LABEL: vp_ceil_vv_nxv4f16_unmasked: 674; ZVFHMIN: # %bb.0: 675; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 676; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 677; ZVFHMIN-NEXT: lui a0, 307200 678; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 679; ZVFHMIN-NEXT: vfabs.v v8, v10 680; ZVFHMIN-NEXT: fmv.w.x fa5, a0 681; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 682; ZVFHMIN-NEXT: fsrmi a0, 3 683; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 684; ZVFHMIN-NEXT: fsrm a0 685; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 686; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 687; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 688; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 689; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 690; ZVFHMIN-NEXT: ret 691 %v = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 692 ret <vscale x 4 x half> %v 693} 694 695declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32) 696 697define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 698; ZVFH-LABEL: vp_ceil_vv_nxv8f16: 699; ZVFH: # %bb.0: 700; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 701; ZVFH-NEXT: vmv1r.v v10, v0 702; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) 703; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) 704; ZVFH-NEXT: vfabs.v v12, v8, v0.t 705; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 706; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t 707; ZVFH-NEXT: fsrmi a0, 3 708; ZVFH-NEXT: vmv1r.v v0, v10 709; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 710; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 711; ZVFH-NEXT: fsrm a0 712; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 713; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 714; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 715; ZVFH-NEXT: ret 716; 717; ZVFHMIN-LABEL: vp_ceil_vv_nxv8f16: 718; ZVFHMIN: # %bb.0: 719; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 720; ZVFHMIN-NEXT: vmv1r.v v10, v0 721; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 722; ZVFHMIN-NEXT: lui a0, 307200 723; ZVFHMIN-NEXT: vmv1r.v v8, v0 724; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 725; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t 726; ZVFHMIN-NEXT: fmv.w.x fa5, a0 727; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 728; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t 729; ZVFHMIN-NEXT: fsrmi a0, 3 730; ZVFHMIN-NEXT: vmv1r.v v0, v8 731; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 732; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t 733; ZVFHMIN-NEXT: fsrm a0 734; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 735; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 736; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t 737; ZVFHMIN-NEXT: vmv1r.v v0, v10 738; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 739; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 740; ZVFHMIN-NEXT: ret 741 %v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) 742 ret <vscale x 8 x half> %v 743} 744 745define <vscale x 8 x half> @vp_ceil_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { 746; ZVFH-LABEL: vp_ceil_vv_nxv8f16_unmasked: 747; ZVFH: # %bb.0: 748; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) 749; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) 750; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 751; ZVFH-NEXT: vfabs.v v10, v8 752; ZVFH-NEXT: vmflt.vf v0, v10, fa5 753; ZVFH-NEXT: fsrmi a0, 3 754; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 755; ZVFH-NEXT: fsrm a0 756; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 757; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 758; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 759; ZVFH-NEXT: ret 760; 761; ZVFHMIN-LABEL: vp_ceil_vv_nxv8f16_unmasked: 762; ZVFHMIN: # %bb.0: 763; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 764; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 765; ZVFHMIN-NEXT: lui a0, 307200 766; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 767; ZVFHMIN-NEXT: vfabs.v v8, v12 768; ZVFHMIN-NEXT: fmv.w.x fa5, a0 769; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 770; ZVFHMIN-NEXT: fsrmi a0, 3 771; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 772; ZVFHMIN-NEXT: fsrm a0 773; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 774; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 775; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 776; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 777; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 778; ZVFHMIN-NEXT: ret 779 %v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 780 ret <vscale x 8 x half> %v 781} 782 783declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32) 784 785define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 786; ZVFH-LABEL: vp_ceil_vv_nxv16f16: 787; ZVFH: # %bb.0: 788; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 789; ZVFH-NEXT: vmv1r.v v12, v0 790; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) 791; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) 792; ZVFH-NEXT: vfabs.v v16, v8, v0.t 793; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 794; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t 795; ZVFH-NEXT: fsrmi a0, 3 796; ZVFH-NEXT: vmv1r.v v0, v12 797; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma 798; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 799; ZVFH-NEXT: fsrm a0 800; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 801; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 802; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 803; ZVFH-NEXT: ret 804; 805; ZVFHMIN-LABEL: vp_ceil_vv_nxv16f16: 806; ZVFHMIN: # %bb.0: 807; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 808; ZVFHMIN-NEXT: vmv1r.v v12, v0 809; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 810; ZVFHMIN-NEXT: lui a0, 307200 811; ZVFHMIN-NEXT: vmv1r.v v8, v0 812; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 813; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t 814; ZVFHMIN-NEXT: fmv.w.x fa5, a0 815; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 816; ZVFHMIN-NEXT: vmflt.vf v8, v24, fa5, v0.t 817; ZVFHMIN-NEXT: fsrmi a0, 3 818; ZVFHMIN-NEXT: vmv1r.v v0, v8 819; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 820; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 821; ZVFHMIN-NEXT: fsrm a0 822; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 823; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 824; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 825; ZVFHMIN-NEXT: vmv1r.v v0, v12 826; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 827; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 828; ZVFHMIN-NEXT: ret 829 %v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) 830 ret <vscale x 16 x half> %v 831} 832 833define <vscale x 16 x half> @vp_ceil_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { 834; ZVFH-LABEL: vp_ceil_vv_nxv16f16_unmasked: 835; ZVFH: # %bb.0: 836; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) 837; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) 838; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 839; ZVFH-NEXT: vfabs.v v12, v8 840; ZVFH-NEXT: vmflt.vf v0, v12, fa5 841; ZVFH-NEXT: fsrmi a0, 3 842; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 843; ZVFH-NEXT: fsrm a0 844; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 845; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 846; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 847; ZVFH-NEXT: ret 848; 849; ZVFHMIN-LABEL: vp_ceil_vv_nxv16f16_unmasked: 850; ZVFHMIN: # %bb.0: 851; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 852; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 853; ZVFHMIN-NEXT: lui a0, 307200 854; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 855; ZVFHMIN-NEXT: vfabs.v v8, v16 856; ZVFHMIN-NEXT: fmv.w.x fa5, a0 857; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 858; ZVFHMIN-NEXT: fsrmi a0, 3 859; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 860; ZVFHMIN-NEXT: fsrm a0 861; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 862; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 863; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 864; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 865; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 866; ZVFHMIN-NEXT: ret 867 %v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 868 ret <vscale x 16 x half> %v 869} 870 871declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32) 872 873define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 874; ZVFH-LABEL: vp_ceil_vv_nxv32f16: 875; ZVFH: # %bb.0: 876; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 877; ZVFH-NEXT: vmv1r.v v16, v0 878; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) 879; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) 880; ZVFH-NEXT: vfabs.v v24, v8, v0.t 881; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 882; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t 883; ZVFH-NEXT: fsrmi a0, 3 884; ZVFH-NEXT: vmv1r.v v0, v16 885; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma 886; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t 887; ZVFH-NEXT: fsrm a0 888; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t 889; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 890; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t 891; ZVFH-NEXT: ret 892; 893; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16: 894; ZVFHMIN: # %bb.0: 895; ZVFHMIN-NEXT: addi sp, sp, -16 896; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 897; ZVFHMIN-NEXT: csrr a1, vlenb 898; ZVFHMIN-NEXT: slli a1, a1, 3 899; ZVFHMIN-NEXT: sub sp, sp, a1 900; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 901; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 902; ZVFHMIN-NEXT: vmv1r.v v7, v0 903; ZVFHMIN-NEXT: csrr a2, vlenb 904; ZVFHMIN-NEXT: lui a3, 307200 905; ZVFHMIN-NEXT: slli a1, a2, 1 906; ZVFHMIN-NEXT: srli a2, a2, 2 907; ZVFHMIN-NEXT: fmv.w.x fa5, a3 908; ZVFHMIN-NEXT: sub a3, a0, a1 909; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 910; ZVFHMIN-NEXT: sltu a2, a0, a3 911; ZVFHMIN-NEXT: vmv1r.v v18, v17 912; ZVFHMIN-NEXT: addi a2, a2, -1 913; ZVFHMIN-NEXT: and a2, a2, a3 914; ZVFHMIN-NEXT: vmv1r.v v0, v17 915; ZVFHMIN-NEXT: addi a3, sp, 16 916; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 917; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 918; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 919; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 920; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t 921; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 922; ZVFHMIN-NEXT: vmflt.vf v18, v8, fa5, v0.t 923; ZVFHMIN-NEXT: fsrmi a2, 3 924; ZVFHMIN-NEXT: vmv1r.v v0, v18 925; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 926; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t 927; ZVFHMIN-NEXT: fsrm a2 928; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 929; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 930; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t 931; ZVFHMIN-NEXT: vmv1r.v v0, v17 932; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 933; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 934; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 935; ZVFHMIN-NEXT: # %bb.1: 936; ZVFHMIN-NEXT: mv a0, a1 937; ZVFHMIN-NEXT: .LBB22_2: 938; ZVFHMIN-NEXT: vmv1r.v v0, v7 939; ZVFHMIN-NEXT: addi a1, sp, 16 940; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 941; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 942; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t 943; ZVFHMIN-NEXT: vmv1r.v v8, v7 944; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 945; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t 946; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 947; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t 948; ZVFHMIN-NEXT: fsrmi a0, 3 949; ZVFHMIN-NEXT: vmv1r.v v0, v8 950; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 951; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t 952; ZVFHMIN-NEXT: fsrm a0 953; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 954; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 955; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t 956; ZVFHMIN-NEXT: vmv1r.v v0, v7 957; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 958; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t 959; ZVFHMIN-NEXT: csrr a0, vlenb 960; ZVFHMIN-NEXT: slli a0, a0, 3 961; ZVFHMIN-NEXT: add sp, sp, a0 962; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 963; ZVFHMIN-NEXT: addi sp, sp, 16 964; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 965; ZVFHMIN-NEXT: ret 966 %v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl) 967 ret <vscale x 32 x half> %v 968} 969 970define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { 971; ZVFH-LABEL: vp_ceil_vv_nxv32f16_unmasked: 972; ZVFH: # %bb.0: 973; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) 974; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) 975; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 976; ZVFH-NEXT: vfabs.v v16, v8 977; ZVFH-NEXT: vmflt.vf v0, v16, fa5 978; ZVFH-NEXT: fsrmi a0, 3 979; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 980; ZVFH-NEXT: fsrm a0 981; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 982; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 983; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 984; ZVFH-NEXT: ret 985; 986; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16_unmasked: 987; ZVFHMIN: # %bb.0: 988; ZVFHMIN-NEXT: addi sp, sp, -16 989; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 990; ZVFHMIN-NEXT: csrr a1, vlenb 991; ZVFHMIN-NEXT: slli a1, a1, 3 992; ZVFHMIN-NEXT: sub sp, sp, a1 993; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 994; ZVFHMIN-NEXT: csrr a2, vlenb 995; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma 996; ZVFHMIN-NEXT: vmset.m v16 997; ZVFHMIN-NEXT: lui a3, 307200 998; ZVFHMIN-NEXT: slli a1, a2, 1 999; ZVFHMIN-NEXT: srli a2, a2, 2 1000; ZVFHMIN-NEXT: fmv.w.x fa5, a3 1001; ZVFHMIN-NEXT: sub a3, a0, a1 1002; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 1003; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 1004; ZVFHMIN-NEXT: sltu a2, a0, a3 1005; ZVFHMIN-NEXT: vmv1r.v v17, v16 1006; ZVFHMIN-NEXT: addi a2, a2, -1 1007; ZVFHMIN-NEXT: and a2, a2, a3 1008; ZVFHMIN-NEXT: vmv1r.v v0, v16 1009; ZVFHMIN-NEXT: addi a3, sp, 16 1010; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1011; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1012; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 1013; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1014; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t 1015; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1016; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t 1017; ZVFHMIN-NEXT: fsrmi a2, 3 1018; ZVFHMIN-NEXT: vmv1r.v v0, v17 1019; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1020; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t 1021; ZVFHMIN-NEXT: fsrm a2 1022; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 1023; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1024; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t 1025; ZVFHMIN-NEXT: vmv1r.v v0, v16 1026; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1027; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 1028; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 1029; ZVFHMIN-NEXT: # %bb.1: 1030; ZVFHMIN-NEXT: mv a0, a1 1031; ZVFHMIN-NEXT: .LBB23_2: 1032; ZVFHMIN-NEXT: addi a1, sp, 16 1033; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1034; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1035; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 1036; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1037; ZVFHMIN-NEXT: vfabs.v v24, v16 1038; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 1039; ZVFHMIN-NEXT: fsrmi a0, 3 1040; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 1041; ZVFHMIN-NEXT: fsrm a0 1042; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 1043; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1044; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1045; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1046; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 1047; ZVFHMIN-NEXT: csrr a0, vlenb 1048; ZVFHMIN-NEXT: slli a0, a0, 3 1049; ZVFHMIN-NEXT: add sp, sp, a0 1050; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1051; ZVFHMIN-NEXT: addi sp, sp, 16 1052; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1053; ZVFHMIN-NEXT: ret 1054 %v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 1055 ret <vscale x 32 x half> %v 1056} 1057 1058declare <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) 1059 1060define <vscale x 1 x float> @vp_ceil_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1061; CHECK-LABEL: vp_ceil_vv_nxv1f32: 1062; CHECK: # %bb.0: 1063; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1064; CHECK-NEXT: vfabs.v v9, v8, v0.t 1065; CHECK-NEXT: lui a0, 307200 1066; CHECK-NEXT: fmv.w.x fa5, a0 1067; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1068; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1069; CHECK-NEXT: fsrmi a0, 3 1070; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1071; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1072; CHECK-NEXT: fsrm a0 1073; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1074; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1075; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1076; CHECK-NEXT: ret 1077 %v = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl) 1078 ret <vscale x 1 x float> %v 1079} 1080 1081define <vscale x 1 x float> @vp_ceil_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) { 1082; CHECK-LABEL: vp_ceil_vv_nxv1f32_unmasked: 1083; CHECK: # %bb.0: 1084; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1085; CHECK-NEXT: vfabs.v v9, v8 1086; CHECK-NEXT: lui a0, 307200 1087; CHECK-NEXT: fmv.w.x fa5, a0 1088; CHECK-NEXT: vmflt.vf v0, v9, fa5 1089; CHECK-NEXT: fsrmi a0, 3 1090; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1091; CHECK-NEXT: fsrm a0 1092; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1093; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1094; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1095; CHECK-NEXT: ret 1096 %v = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1097 ret <vscale x 1 x float> %v 1098} 1099 1100declare <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 1101 1102define <vscale x 2 x float> @vp_ceil_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1103; CHECK-LABEL: vp_ceil_vv_nxv2f32: 1104; CHECK: # %bb.0: 1105; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1106; CHECK-NEXT: vfabs.v v9, v8, v0.t 1107; CHECK-NEXT: lui a0, 307200 1108; CHECK-NEXT: fmv.w.x fa5, a0 1109; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1110; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1111; CHECK-NEXT: fsrmi a0, 3 1112; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1113; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1114; CHECK-NEXT: fsrm a0 1115; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1116; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1117; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1118; CHECK-NEXT: ret 1119 %v = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl) 1120 ret <vscale x 2 x float> %v 1121} 1122 1123define <vscale x 2 x float> @vp_ceil_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) { 1124; CHECK-LABEL: vp_ceil_vv_nxv2f32_unmasked: 1125; CHECK: # %bb.0: 1126; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1127; CHECK-NEXT: vfabs.v v9, v8 1128; CHECK-NEXT: lui a0, 307200 1129; CHECK-NEXT: fmv.w.x fa5, a0 1130; CHECK-NEXT: vmflt.vf v0, v9, fa5 1131; CHECK-NEXT: fsrmi a0, 3 1132; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1133; CHECK-NEXT: fsrm a0 1134; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1135; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1136; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1137; CHECK-NEXT: ret 1138 %v = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1139 ret <vscale x 2 x float> %v 1140} 1141 1142declare <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) 1143 1144define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1145; CHECK-LABEL: vp_ceil_vv_nxv4f32: 1146; CHECK: # %bb.0: 1147; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1148; CHECK-NEXT: vmv1r.v v10, v0 1149; CHECK-NEXT: vfabs.v v12, v8, v0.t 1150; CHECK-NEXT: lui a0, 307200 1151; CHECK-NEXT: fmv.w.x fa5, a0 1152; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1153; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 1154; CHECK-NEXT: fsrmi a0, 3 1155; CHECK-NEXT: vmv1r.v v0, v10 1156; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1157; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1158; CHECK-NEXT: fsrm a0 1159; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1160; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1161; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1162; CHECK-NEXT: ret 1163 %v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl) 1164 ret <vscale x 4 x float> %v 1165} 1166 1167define <vscale x 4 x float> @vp_ceil_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) { 1168; CHECK-LABEL: vp_ceil_vv_nxv4f32_unmasked: 1169; CHECK: # %bb.0: 1170; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1171; CHECK-NEXT: vfabs.v v10, v8 1172; CHECK-NEXT: lui a0, 307200 1173; CHECK-NEXT: fmv.w.x fa5, a0 1174; CHECK-NEXT: vmflt.vf v0, v10, fa5 1175; CHECK-NEXT: fsrmi a0, 3 1176; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 1177; CHECK-NEXT: fsrm a0 1178; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 1179; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1180; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 1181; CHECK-NEXT: ret 1182 %v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1183 ret <vscale x 4 x float> %v 1184} 1185 1186declare <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) 1187 1188define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1189; CHECK-LABEL: vp_ceil_vv_nxv8f32: 1190; CHECK: # %bb.0: 1191; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1192; CHECK-NEXT: vmv1r.v v12, v0 1193; CHECK-NEXT: vfabs.v v16, v8, v0.t 1194; CHECK-NEXT: lui a0, 307200 1195; CHECK-NEXT: fmv.w.x fa5, a0 1196; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1197; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 1198; CHECK-NEXT: fsrmi a0, 3 1199; CHECK-NEXT: vmv1r.v v0, v12 1200; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1201; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1202; CHECK-NEXT: fsrm a0 1203; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1204; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1205; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1206; CHECK-NEXT: ret 1207 %v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl) 1208 ret <vscale x 8 x float> %v 1209} 1210 1211define <vscale x 8 x float> @vp_ceil_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) { 1212; CHECK-LABEL: vp_ceil_vv_nxv8f32_unmasked: 1213; CHECK: # %bb.0: 1214; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1215; CHECK-NEXT: vfabs.v v12, v8 1216; CHECK-NEXT: lui a0, 307200 1217; CHECK-NEXT: fmv.w.x fa5, a0 1218; CHECK-NEXT: vmflt.vf v0, v12, fa5 1219; CHECK-NEXT: fsrmi a0, 3 1220; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1221; CHECK-NEXT: fsrm a0 1222; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1223; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1224; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1225; CHECK-NEXT: ret 1226 %v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1227 ret <vscale x 8 x float> %v 1228} 1229 1230declare <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) 1231 1232define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1233; CHECK-LABEL: vp_ceil_vv_nxv16f32: 1234; CHECK: # %bb.0: 1235; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1236; CHECK-NEXT: vmv1r.v v16, v0 1237; CHECK-NEXT: vfabs.v v24, v8, v0.t 1238; CHECK-NEXT: lui a0, 307200 1239; CHECK-NEXT: fmv.w.x fa5, a0 1240; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1241; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1242; CHECK-NEXT: fsrmi a0, 3 1243; CHECK-NEXT: vmv1r.v v0, v16 1244; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1245; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1246; CHECK-NEXT: fsrm a0 1247; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1248; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1249; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1250; CHECK-NEXT: ret 1251 %v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl) 1252 ret <vscale x 16 x float> %v 1253} 1254 1255define <vscale x 16 x float> @vp_ceil_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) { 1256; CHECK-LABEL: vp_ceil_vv_nxv16f32_unmasked: 1257; CHECK: # %bb.0: 1258; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1259; CHECK-NEXT: vfabs.v v16, v8 1260; CHECK-NEXT: lui a0, 307200 1261; CHECK-NEXT: fmv.w.x fa5, a0 1262; CHECK-NEXT: vmflt.vf v0, v16, fa5 1263; CHECK-NEXT: fsrmi a0, 3 1264; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1265; CHECK-NEXT: fsrm a0 1266; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1267; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1268; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1269; CHECK-NEXT: ret 1270 %v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1271 ret <vscale x 16 x float> %v 1272} 1273 1274declare <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) 1275 1276define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1277; CHECK-LABEL: vp_ceil_vv_nxv1f64: 1278; CHECK: # %bb.0: 1279; CHECK-NEXT: lui a1, %hi(.LCPI34_0) 1280; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) 1281; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1282; CHECK-NEXT: vfabs.v v9, v8, v0.t 1283; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1284; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1285; CHECK-NEXT: fsrmi a0, 3 1286; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1287; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1288; CHECK-NEXT: fsrm a0 1289; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1290; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1291; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1292; CHECK-NEXT: ret 1293 %v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl) 1294 ret <vscale x 1 x double> %v 1295} 1296 1297define <vscale x 1 x double> @vp_ceil_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { 1298; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked: 1299; CHECK: # %bb.0: 1300; CHECK-NEXT: lui a1, %hi(.LCPI35_0) 1301; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) 1302; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1303; CHECK-NEXT: vfabs.v v9, v8 1304; CHECK-NEXT: vmflt.vf v0, v9, fa5 1305; CHECK-NEXT: fsrmi a0, 3 1306; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1307; CHECK-NEXT: fsrm a0 1308; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1309; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1310; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1311; CHECK-NEXT: ret 1312 %v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1313 ret <vscale x 1 x double> %v 1314} 1315 1316declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 1317 1318define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1319; CHECK-LABEL: vp_ceil_vv_nxv2f64: 1320; CHECK: # %bb.0: 1321; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1322; CHECK-NEXT: vmv1r.v v10, v0 1323; CHECK-NEXT: lui a0, %hi(.LCPI36_0) 1324; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) 1325; CHECK-NEXT: vfabs.v v12, v8, v0.t 1326; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1327; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 1328; CHECK-NEXT: fsrmi a0, 3 1329; CHECK-NEXT: vmv1r.v v0, v10 1330; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 1331; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1332; CHECK-NEXT: fsrm a0 1333; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1334; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1335; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1336; CHECK-NEXT: ret 1337 %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) 1338 ret <vscale x 2 x double> %v 1339} 1340 1341define <vscale x 2 x double> @vp_ceil_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { 1342; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked: 1343; CHECK: # %bb.0: 1344; CHECK-NEXT: lui a1, %hi(.LCPI37_0) 1345; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) 1346; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1347; CHECK-NEXT: vfabs.v v10, v8 1348; CHECK-NEXT: vmflt.vf v0, v10, fa5 1349; CHECK-NEXT: fsrmi a0, 3 1350; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 1351; CHECK-NEXT: fsrm a0 1352; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 1353; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1354; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 1355; CHECK-NEXT: ret 1356 %v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1357 ret <vscale x 2 x double> %v 1358} 1359 1360declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) 1361 1362define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1363; CHECK-LABEL: vp_ceil_vv_nxv4f64: 1364; CHECK: # %bb.0: 1365; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1366; CHECK-NEXT: vmv1r.v v12, v0 1367; CHECK-NEXT: lui a0, %hi(.LCPI38_0) 1368; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) 1369; CHECK-NEXT: vfabs.v v16, v8, v0.t 1370; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1371; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 1372; CHECK-NEXT: fsrmi a0, 3 1373; CHECK-NEXT: vmv1r.v v0, v12 1374; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 1375; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1376; CHECK-NEXT: fsrm a0 1377; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1378; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1379; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1380; CHECK-NEXT: ret 1381 %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl) 1382 ret <vscale x 4 x double> %v 1383} 1384 1385define <vscale x 4 x double> @vp_ceil_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { 1386; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked: 1387; CHECK: # %bb.0: 1388; CHECK-NEXT: lui a1, %hi(.LCPI39_0) 1389; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) 1390; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1391; CHECK-NEXT: vfabs.v v12, v8 1392; CHECK-NEXT: vmflt.vf v0, v12, fa5 1393; CHECK-NEXT: fsrmi a0, 3 1394; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1395; CHECK-NEXT: fsrm a0 1396; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1397; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1398; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1399; CHECK-NEXT: ret 1400 %v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1401 ret <vscale x 4 x double> %v 1402} 1403 1404declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32) 1405 1406define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 1407; CHECK-LABEL: vp_ceil_vv_nxv7f64: 1408; CHECK: # %bb.0: 1409; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1410; CHECK-NEXT: vmv1r.v v16, v0 1411; CHECK-NEXT: lui a0, %hi(.LCPI40_0) 1412; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) 1413; CHECK-NEXT: vfabs.v v24, v8, v0.t 1414; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1415; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1416; CHECK-NEXT: fsrmi a0, 3 1417; CHECK-NEXT: vmv1r.v v0, v16 1418; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1419; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1420; CHECK-NEXT: fsrm a0 1421; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1422; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1423; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1424; CHECK-NEXT: ret 1425 %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl) 1426 ret <vscale x 7 x double> %v 1427} 1428 1429define <vscale x 7 x double> @vp_ceil_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { 1430; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked: 1431; CHECK: # %bb.0: 1432; CHECK-NEXT: lui a1, %hi(.LCPI41_0) 1433; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) 1434; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1435; CHECK-NEXT: vfabs.v v16, v8 1436; CHECK-NEXT: vmflt.vf v0, v16, fa5 1437; CHECK-NEXT: fsrmi a0, 3 1438; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1439; CHECK-NEXT: fsrm a0 1440; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1441; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1442; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1443; CHECK-NEXT: ret 1444 %v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) 1445 ret <vscale x 7 x double> %v 1446} 1447 1448declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) 1449 1450define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1451; CHECK-LABEL: vp_ceil_vv_nxv8f64: 1452; CHECK: # %bb.0: 1453; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1454; CHECK-NEXT: vmv1r.v v16, v0 1455; CHECK-NEXT: lui a0, %hi(.LCPI42_0) 1456; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) 1457; CHECK-NEXT: vfabs.v v24, v8, v0.t 1458; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1459; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1460; CHECK-NEXT: fsrmi a0, 3 1461; CHECK-NEXT: vmv1r.v v0, v16 1462; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1463; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1464; CHECK-NEXT: fsrm a0 1465; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1466; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1467; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1468; CHECK-NEXT: ret 1469 %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl) 1470 ret <vscale x 8 x double> %v 1471} 1472 1473define <vscale x 8 x double> @vp_ceil_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { 1474; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked: 1475; CHECK: # %bb.0: 1476; CHECK-NEXT: lui a1, %hi(.LCPI43_0) 1477; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) 1478; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1479; CHECK-NEXT: vfabs.v v16, v8 1480; CHECK-NEXT: vmflt.vf v0, v16, fa5 1481; CHECK-NEXT: fsrmi a0, 3 1482; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1483; CHECK-NEXT: fsrm a0 1484; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1485; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1486; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1487; CHECK-NEXT: ret 1488 %v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1489 ret <vscale x 8 x double> %v 1490} 1491 1492; Test splitting. 1493declare <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32) 1494 1495define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1496; CHECK-LABEL: vp_ceil_vv_nxv16f64: 1497; CHECK: # %bb.0: 1498; CHECK-NEXT: addi sp, sp, -16 1499; CHECK-NEXT: .cfi_def_cfa_offset 16 1500; CHECK-NEXT: csrr a1, vlenb 1501; CHECK-NEXT: slli a1, a1, 3 1502; CHECK-NEXT: sub sp, sp, a1 1503; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1504; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 1505; CHECK-NEXT: vmv1r.v v7, v0 1506; CHECK-NEXT: csrr a1, vlenb 1507; CHECK-NEXT: lui a2, %hi(.LCPI44_0) 1508; CHECK-NEXT: srli a3, a1, 3 1509; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) 1510; CHECK-NEXT: sub a2, a0, a1 1511; CHECK-NEXT: vslidedown.vx v6, v0, a3 1512; CHECK-NEXT: sltu a3, a0, a2 1513; CHECK-NEXT: addi a3, a3, -1 1514; CHECK-NEXT: and a2, a3, a2 1515; CHECK-NEXT: vmv1r.v v0, v6 1516; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1517; CHECK-NEXT: vfabs.v v24, v16, v0.t 1518; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1519; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t 1520; CHECK-NEXT: fsrmi a2, 3 1521; CHECK-NEXT: vmv1r.v v0, v6 1522; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1523; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 1524; CHECK-NEXT: addi a3, sp, 16 1525; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 1526; CHECK-NEXT: fsrm a2 1527; CHECK-NEXT: addi a2, sp, 16 1528; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1529; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1530; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1531; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1532; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 1533; CHECK-NEXT: bltu a0, a1, .LBB44_2 1534; CHECK-NEXT: # %bb.1: 1535; CHECK-NEXT: mv a0, a1 1536; CHECK-NEXT: .LBB44_2: 1537; CHECK-NEXT: vmv1r.v v0, v7 1538; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1539; CHECK-NEXT: vfabs.v v16, v8, v0.t 1540; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1541; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t 1542; CHECK-NEXT: fsrmi a0, 3 1543; CHECK-NEXT: vmv1r.v v0, v7 1544; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1545; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1546; CHECK-NEXT: fsrm a0 1547; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1548; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1549; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1550; CHECK-NEXT: addi a0, sp, 16 1551; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1552; CHECK-NEXT: csrr a0, vlenb 1553; CHECK-NEXT: slli a0, a0, 3 1554; CHECK-NEXT: add sp, sp, a0 1555; CHECK-NEXT: .cfi_def_cfa sp, 16 1556; CHECK-NEXT: addi sp, sp, 16 1557; CHECK-NEXT: .cfi_def_cfa_offset 0 1558; CHECK-NEXT: ret 1559 %v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl) 1560 ret <vscale x 16 x double> %v 1561} 1562 1563define <vscale x 16 x double> @vp_ceil_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) { 1564; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked: 1565; CHECK: # %bb.0: 1566; CHECK-NEXT: csrr a1, vlenb 1567; CHECK-NEXT: lui a2, %hi(.LCPI45_0) 1568; CHECK-NEXT: sub a3, a0, a1 1569; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) 1570; CHECK-NEXT: sltu a2, a0, a3 1571; CHECK-NEXT: addi a2, a2, -1 1572; CHECK-NEXT: and a2, a2, a3 1573; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1574; CHECK-NEXT: vfabs.v v24, v16 1575; CHECK-NEXT: vmflt.vf v0, v24, fa5 1576; CHECK-NEXT: fsrmi a2, 3 1577; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 1578; CHECK-NEXT: fsrm a2 1579; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1580; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1581; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1582; CHECK-NEXT: bltu a0, a1, .LBB45_2 1583; CHECK-NEXT: # %bb.1: 1584; CHECK-NEXT: mv a0, a1 1585; CHECK-NEXT: .LBB45_2: 1586; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1587; CHECK-NEXT: vfabs.v v24, v8 1588; CHECK-NEXT: vmflt.vf v0, v24, fa5 1589; CHECK-NEXT: fsrmi a0, 3 1590; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1591; CHECK-NEXT: fsrm a0 1592; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1593; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1594; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1595; CHECK-NEXT: ret 1596 %v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1597 ret <vscale x 16 x double> %v 1598} 1599