1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15declare <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) 16 17define <vscale x 1 x bfloat> @vp_rint_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 18; CHECK-LABEL: vp_rint_nxv1bf16: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 21; CHECK-NEXT: vmv1r.v v9, v0 22; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 23; CHECK-NEXT: lui a0, 307200 24; CHECK-NEXT: vmv1r.v v8, v0 25; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 26; CHECK-NEXT: vfabs.v v11, v10, v0.t 27; CHECK-NEXT: fmv.w.x fa5, a0 28; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 29; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t 30; CHECK-NEXT: vmv1r.v v0, v8 31; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 32; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t 33; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t 34; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 35; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t 36; CHECK-NEXT: vmv1r.v v0, v9 37; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 38; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 39; CHECK-NEXT: ret 40 %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) 41 ret <vscale x 1 x bfloat> %v 42} 43 44define <vscale x 1 x bfloat> @vp_rint_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { 45; CHECK-LABEL: vp_rint_nxv1bf16_unmasked: 46; CHECK: # %bb.0: 47; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 48; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 49; CHECK-NEXT: lui a0, 307200 50; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 51; CHECK-NEXT: vfabs.v v8, v9 52; CHECK-NEXT: fmv.w.x fa5, a0 53; CHECK-NEXT: vmflt.vf v0, v8, fa5 54; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 55; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 56; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 57; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 58; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 59; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 60; CHECK-NEXT: ret 61 %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 62 ret <vscale x 1 x bfloat> %v 63} 64 65declare <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) 66 67define <vscale x 2 x bfloat> @vp_rint_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 68; CHECK-LABEL: vp_rint_nxv2bf16: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 71; CHECK-NEXT: vmv1r.v v9, v0 72; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 73; CHECK-NEXT: lui a0, 307200 74; CHECK-NEXT: vmv1r.v v8, v0 75; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 76; CHECK-NEXT: vfabs.v v11, v10, v0.t 77; CHECK-NEXT: fmv.w.x fa5, a0 78; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 79; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t 80; CHECK-NEXT: vmv.v.v v0, v8 81; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 82; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t 83; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t 84; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 85; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t 86; CHECK-NEXT: vmv1r.v v0, v9 87; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 88; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 89; CHECK-NEXT: ret 90 %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) 91 ret <vscale x 2 x bfloat> %v 92} 93 94define <vscale x 2 x bfloat> @vp_rint_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { 95; CHECK-LABEL: vp_rint_nxv2bf16_unmasked: 96; CHECK: # %bb.0: 97; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 98; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 99; CHECK-NEXT: lui a0, 307200 100; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 101; CHECK-NEXT: vfabs.v v8, v9 102; CHECK-NEXT: fmv.w.x fa5, a0 103; CHECK-NEXT: vmflt.vf v0, v8, fa5 104; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 105; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 106; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 107; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 108; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 109; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 110; CHECK-NEXT: ret 111 %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 112 ret <vscale x 2 x bfloat> %v 113} 114 115declare <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) 116 117define <vscale x 4 x bfloat> @vp_rint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 118; CHECK-LABEL: vp_rint_nxv4bf16: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 121; CHECK-NEXT: vmv1r.v v9, v0 122; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 123; CHECK-NEXT: lui a0, 307200 124; CHECK-NEXT: vmv1r.v v8, v0 125; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 126; CHECK-NEXT: vfabs.v v12, v10, v0.t 127; CHECK-NEXT: fmv.w.x fa5, a0 128; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 129; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t 130; CHECK-NEXT: vmv1r.v v0, v8 131; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 132; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t 133; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 134; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 135; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t 136; CHECK-NEXT: vmv1r.v v0, v9 137; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 138; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 139; CHECK-NEXT: ret 140 %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) 141 ret <vscale x 4 x bfloat> %v 142} 143 144define <vscale x 4 x bfloat> @vp_rint_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { 145; CHECK-LABEL: vp_rint_nxv4bf16_unmasked: 146; CHECK: # %bb.0: 147; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 148; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 149; CHECK-NEXT: lui a0, 307200 150; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 151; CHECK-NEXT: vfabs.v v8, v10 152; CHECK-NEXT: fmv.w.x fa5, a0 153; CHECK-NEXT: vmflt.vf v0, v8, fa5 154; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 155; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 156; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 157; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 158; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 159; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 160; CHECK-NEXT: ret 161 %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 162 ret <vscale x 4 x bfloat> %v 163} 164 165declare <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) 166 167define <vscale x 8 x bfloat> @vp_rint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 168; CHECK-LABEL: vp_rint_nxv8bf16: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 171; CHECK-NEXT: vmv1r.v v10, v0 172; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t 173; CHECK-NEXT: lui a0, 307200 174; CHECK-NEXT: vmv1r.v v8, v0 175; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 176; CHECK-NEXT: vfabs.v v16, v12, v0.t 177; CHECK-NEXT: fmv.w.x fa5, a0 178; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 179; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t 180; CHECK-NEXT: vmv1r.v v0, v8 181; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 182; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t 183; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 184; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 185; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t 186; CHECK-NEXT: vmv1r.v v0, v10 187; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 188; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t 189; CHECK-NEXT: ret 190 %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) 191 ret <vscale x 8 x bfloat> %v 192} 193 194define <vscale x 8 x bfloat> @vp_rint_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { 195; CHECK-LABEL: vp_rint_nxv8bf16_unmasked: 196; CHECK: # %bb.0: 197; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 198; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 199; CHECK-NEXT: lui a0, 307200 200; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 201; CHECK-NEXT: vfabs.v v8, v12 202; CHECK-NEXT: fmv.w.x fa5, a0 203; CHECK-NEXT: vmflt.vf v0, v8, fa5 204; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t 205; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 206; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 207; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t 208; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 209; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 210; CHECK-NEXT: ret 211 %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 212 ret <vscale x 8 x bfloat> %v 213} 214 215declare <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) 216 217define <vscale x 16 x bfloat> @vp_rint_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 218; CHECK-LABEL: vp_rint_nxv16bf16: 219; CHECK: # %bb.0: 220; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 221; CHECK-NEXT: vmv1r.v v12, v0 222; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t 223; CHECK-NEXT: lui a0, 307200 224; CHECK-NEXT: vmv1r.v v8, v0 225; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 226; CHECK-NEXT: vfabs.v v24, v16, v0.t 227; CHECK-NEXT: fmv.w.x fa5, a0 228; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 229; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t 230; CHECK-NEXT: vmv1r.v v0, v8 231; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 232; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 233; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 234; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 235; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 236; CHECK-NEXT: vmv1r.v v0, v12 237; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 238; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 239; CHECK-NEXT: ret 240 %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) 241 ret <vscale x 16 x bfloat> %v 242} 243 244define <vscale x 16 x bfloat> @vp_rint_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { 245; CHECK-LABEL: vp_rint_nxv16bf16_unmasked: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 248; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 249; CHECK-NEXT: lui a0, 307200 250; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 251; CHECK-NEXT: vfabs.v v8, v16 252; CHECK-NEXT: fmv.w.x fa5, a0 253; CHECK-NEXT: vmflt.vf v0, v8, fa5 254; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 255; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 256; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 257; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 258; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 259; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 260; CHECK-NEXT: ret 261 %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 262 ret <vscale x 16 x bfloat> %v 263} 264 265declare <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) 266 267define <vscale x 32 x bfloat> @vp_rint_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 268; CHECK-LABEL: vp_rint_nxv32bf16: 269; CHECK: # %bb.0: 270; CHECK-NEXT: addi sp, sp, -16 271; CHECK-NEXT: .cfi_def_cfa_offset 16 272; CHECK-NEXT: csrr a1, vlenb 273; CHECK-NEXT: slli a1, a1, 3 274; CHECK-NEXT: sub sp, sp, a1 275; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 276; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 277; CHECK-NEXT: vmv1r.v v7, v0 278; CHECK-NEXT: csrr a2, vlenb 279; CHECK-NEXT: lui a3, 307200 280; CHECK-NEXT: slli a1, a2, 1 281; CHECK-NEXT: srli a2, a2, 2 282; CHECK-NEXT: fmv.w.x fa5, a3 283; CHECK-NEXT: sub a3, a0, a1 284; CHECK-NEXT: vslidedown.vx v17, v0, a2 285; CHECK-NEXT: sltu a2, a0, a3 286; CHECK-NEXT: vmv1r.v v18, v17 287; CHECK-NEXT: addi a2, a2, -1 288; CHECK-NEXT: and a2, a2, a3 289; CHECK-NEXT: vmv1r.v v0, v17 290; CHECK-NEXT: addi a3, sp, 16 291; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 292; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 293; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 294; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 295; CHECK-NEXT: vfabs.v v8, v24, v0.t 296; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 297; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t 298; CHECK-NEXT: vmv1r.v v0, v18 299; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 300; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t 301; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 302; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 303; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t 304; CHECK-NEXT: vmv1r.v v0, v17 305; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 306; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 307; CHECK-NEXT: bltu a0, a1, .LBB10_2 308; CHECK-NEXT: # %bb.1: 309; CHECK-NEXT: mv a0, a1 310; CHECK-NEXT: .LBB10_2: 311; CHECK-NEXT: vmv1r.v v0, v7 312; CHECK-NEXT: addi a1, sp, 16 313; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 314; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 315; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t 316; CHECK-NEXT: vmv1r.v v8, v7 317; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 318; CHECK-NEXT: vfabs.v v16, v24, v0.t 319; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 320; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t 321; CHECK-NEXT: vmv1r.v v0, v8 322; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 323; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t 324; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 325; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 326; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t 327; CHECK-NEXT: vmv1r.v v0, v7 328; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 329; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t 330; CHECK-NEXT: csrr a0, vlenb 331; CHECK-NEXT: slli a0, a0, 3 332; CHECK-NEXT: add sp, sp, a0 333; CHECK-NEXT: .cfi_def_cfa sp, 16 334; CHECK-NEXT: addi sp, sp, 16 335; CHECK-NEXT: .cfi_def_cfa_offset 0 336; CHECK-NEXT: ret 337 %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) 338 ret <vscale x 32 x bfloat> %v 339} 340 341define <vscale x 32 x bfloat> @vp_rint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { 342; CHECK-LABEL: vp_rint_nxv32bf16_unmasked: 343; CHECK: # %bb.0: 344; CHECK-NEXT: addi sp, sp, -16 345; CHECK-NEXT: .cfi_def_cfa_offset 16 346; CHECK-NEXT: csrr a1, vlenb 347; CHECK-NEXT: slli a1, a1, 3 348; CHECK-NEXT: sub sp, sp, a1 349; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 350; CHECK-NEXT: csrr a2, vlenb 351; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 352; CHECK-NEXT: vmset.m v16 353; CHECK-NEXT: lui a3, 307200 354; CHECK-NEXT: slli a1, a2, 1 355; CHECK-NEXT: srli a2, a2, 2 356; CHECK-NEXT: fmv.w.x fa5, a3 357; CHECK-NEXT: sub a3, a0, a1 358; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 359; CHECK-NEXT: vslidedown.vx v16, v16, a2 360; CHECK-NEXT: sltu a2, a0, a3 361; CHECK-NEXT: vmv1r.v v17, v16 362; CHECK-NEXT: addi a2, a2, -1 363; CHECK-NEXT: and a2, a2, a3 364; CHECK-NEXT: vmv1r.v v0, v16 365; CHECK-NEXT: addi a3, sp, 16 366; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 367; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 368; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 369; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 370; CHECK-NEXT: vfabs.v v8, v24, v0.t 371; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 372; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t 373; CHECK-NEXT: vmv1r.v v0, v17 374; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 375; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t 376; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 377; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 378; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t 379; CHECK-NEXT: vmv1r.v v0, v16 380; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 381; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 382; CHECK-NEXT: bltu a0, a1, .LBB11_2 383; CHECK-NEXT: # %bb.1: 384; CHECK-NEXT: mv a0, a1 385; CHECK-NEXT: .LBB11_2: 386; CHECK-NEXT: addi a1, sp, 16 387; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 388; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 389; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 390; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 391; CHECK-NEXT: vfabs.v v24, v16 392; CHECK-NEXT: vmflt.vf v0, v24, fa5 393; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 394; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 395; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 396; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 397; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 398; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 399; CHECK-NEXT: csrr a0, vlenb 400; CHECK-NEXT: slli a0, a0, 3 401; CHECK-NEXT: add sp, sp, a0 402; CHECK-NEXT: .cfi_def_cfa sp, 16 403; CHECK-NEXT: addi sp, sp, 16 404; CHECK-NEXT: .cfi_def_cfa_offset 0 405; CHECK-NEXT: ret 406 %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 407 ret <vscale x 32 x bfloat> %v 408} 409declare <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) 410 411define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 412; ZVFH-LABEL: vp_rint_nxv1f16: 413; ZVFH: # %bb.0: 414; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) 415; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) 416; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 417; ZVFH-NEXT: vfabs.v v9, v8, v0.t 418; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 419; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 420; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 421; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 422; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 423; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 424; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 425; ZVFH-NEXT: ret 426; 427; ZVFHMIN-LABEL: vp_rint_nxv1f16: 428; ZVFHMIN: # %bb.0: 429; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 430; ZVFHMIN-NEXT: vmv1r.v v9, v0 431; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 432; ZVFHMIN-NEXT: lui a0, 307200 433; ZVFHMIN-NEXT: vmv1r.v v8, v0 434; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 435; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 436; ZVFHMIN-NEXT: fmv.w.x fa5, a0 437; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 438; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 439; ZVFHMIN-NEXT: vmv1r.v v0, v8 440; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 441; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 442; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 443; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 444; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 445; ZVFHMIN-NEXT: vmv1r.v v0, v9 446; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 447; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 448; ZVFHMIN-NEXT: ret 449 %v = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) 450 ret <vscale x 1 x half> %v 451} 452 453define <vscale x 1 x half> @vp_rint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { 454; ZVFH-LABEL: vp_rint_nxv1f16_unmasked: 455; ZVFH: # %bb.0: 456; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) 457; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) 458; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 459; ZVFH-NEXT: vfabs.v v9, v8 460; ZVFH-NEXT: vmflt.vf v0, v9, fa5 461; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 462; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 463; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 464; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 465; ZVFH-NEXT: ret 466; 467; ZVFHMIN-LABEL: vp_rint_nxv1f16_unmasked: 468; ZVFHMIN: # %bb.0: 469; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 470; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 471; ZVFHMIN-NEXT: lui a0, 307200 472; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 473; ZVFHMIN-NEXT: vfabs.v v8, v9 474; ZVFHMIN-NEXT: fmv.w.x fa5, a0 475; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 476; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 477; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 478; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 479; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 480; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 481; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 482; ZVFHMIN-NEXT: ret 483 %v = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 484 ret <vscale x 1 x half> %v 485} 486 487declare <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32) 488 489define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 490; ZVFH-LABEL: vp_rint_nxv2f16: 491; ZVFH: # %bb.0: 492; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) 493; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) 494; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 495; ZVFH-NEXT: vfabs.v v9, v8, v0.t 496; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 497; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 498; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 499; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 500; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 501; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 502; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 503; ZVFH-NEXT: ret 504; 505; ZVFHMIN-LABEL: vp_rint_nxv2f16: 506; ZVFHMIN: # %bb.0: 507; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 508; ZVFHMIN-NEXT: vmv1r.v v9, v0 509; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 510; ZVFHMIN-NEXT: lui a0, 307200 511; ZVFHMIN-NEXT: vmv1r.v v8, v0 512; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 513; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 514; ZVFHMIN-NEXT: fmv.w.x fa5, a0 515; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 516; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 517; ZVFHMIN-NEXT: vmv.v.v v0, v8 518; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 519; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 520; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 521; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 522; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 523; ZVFHMIN-NEXT: vmv1r.v v0, v9 524; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 525; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 526; ZVFHMIN-NEXT: ret 527 %v = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) 528 ret <vscale x 2 x half> %v 529} 530 531define <vscale x 2 x half> @vp_rint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { 532; ZVFH-LABEL: vp_rint_nxv2f16_unmasked: 533; ZVFH: # %bb.0: 534; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) 535; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) 536; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 537; ZVFH-NEXT: vfabs.v v9, v8 538; ZVFH-NEXT: vmflt.vf v0, v9, fa5 539; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 540; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 541; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 542; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 543; ZVFH-NEXT: ret 544; 545; ZVFHMIN-LABEL: vp_rint_nxv2f16_unmasked: 546; ZVFHMIN: # %bb.0: 547; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 548; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 549; ZVFHMIN-NEXT: lui a0, 307200 550; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 551; ZVFHMIN-NEXT: vfabs.v v8, v9 552; ZVFHMIN-NEXT: fmv.w.x fa5, a0 553; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 554; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 555; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 556; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 557; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 558; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 559; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 560; ZVFHMIN-NEXT: ret 561 %v = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 562 ret <vscale x 2 x half> %v 563} 564 565declare <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32) 566 567define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 568; ZVFH-LABEL: vp_rint_nxv4f16: 569; ZVFH: # %bb.0: 570; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) 571; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) 572; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 573; ZVFH-NEXT: vfabs.v v9, v8, v0.t 574; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 575; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 576; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma 577; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 578; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 579; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 580; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 581; ZVFH-NEXT: ret 582; 583; ZVFHMIN-LABEL: vp_rint_nxv4f16: 584; ZVFHMIN: # %bb.0: 585; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 586; ZVFHMIN-NEXT: vmv1r.v v9, v0 587; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 588; ZVFHMIN-NEXT: lui a0, 307200 589; ZVFHMIN-NEXT: vmv1r.v v8, v0 590; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 591; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t 592; ZVFHMIN-NEXT: fmv.w.x fa5, a0 593; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 594; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t 595; ZVFHMIN-NEXT: vmv1r.v v0, v8 596; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 597; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t 598; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t 599; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 600; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t 601; ZVFHMIN-NEXT: vmv1r.v v0, v9 602; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 603; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 604; ZVFHMIN-NEXT: ret 605 %v = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) 606 ret <vscale x 4 x half> %v 607} 608 609define <vscale x 4 x half> @vp_rint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { 610; ZVFH-LABEL: vp_rint_nxv4f16_unmasked: 611; ZVFH: # %bb.0: 612; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) 613; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) 614; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 615; ZVFH-NEXT: vfabs.v v9, v8 616; ZVFH-NEXT: vmflt.vf v0, v9, fa5 617; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 618; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 619; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 620; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 621; ZVFH-NEXT: ret 622; 623; ZVFHMIN-LABEL: vp_rint_nxv4f16_unmasked: 624; ZVFHMIN: # %bb.0: 625; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 626; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 627; ZVFHMIN-NEXT: lui a0, 307200 628; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 629; ZVFHMIN-NEXT: vfabs.v v8, v10 630; ZVFHMIN-NEXT: fmv.w.x fa5, a0 631; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 632; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 633; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 634; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 635; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 636; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 637; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 638; ZVFHMIN-NEXT: ret 639 %v = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 640 ret <vscale x 4 x half> %v 641} 642 643declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32) 644 645define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 646; ZVFH-LABEL: vp_rint_nxv8f16: 647; ZVFH: # %bb.0: 648; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 649; ZVFH-NEXT: vmv1r.v v10, v0 650; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) 651; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) 652; ZVFH-NEXT: vfabs.v v12, v8, v0.t 653; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 654; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t 655; ZVFH-NEXT: vmv1r.v v0, v10 656; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 657; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 658; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 659; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 660; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 661; ZVFH-NEXT: ret 662; 663; ZVFHMIN-LABEL: vp_rint_nxv8f16: 664; ZVFHMIN: # %bb.0: 665; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 666; ZVFHMIN-NEXT: vmv1r.v v10, v0 667; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 668; ZVFHMIN-NEXT: lui a0, 307200 669; ZVFHMIN-NEXT: vmv1r.v v8, v0 670; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 671; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t 672; ZVFHMIN-NEXT: fmv.w.x fa5, a0 673; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 674; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t 675; ZVFHMIN-NEXT: vmv1r.v v0, v8 676; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 677; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t 678; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 679; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 680; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t 681; ZVFHMIN-NEXT: vmv1r.v v0, v10 682; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 683; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 684; ZVFHMIN-NEXT: ret 685 %v = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) 686 ret <vscale x 8 x half> %v 687} 688 689define <vscale x 8 x half> @vp_rint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { 690; ZVFH-LABEL: vp_rint_nxv8f16_unmasked: 691; ZVFH: # %bb.0: 692; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) 693; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) 694; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 695; ZVFH-NEXT: vfabs.v v10, v8 696; ZVFH-NEXT: vmflt.vf v0, v10, fa5 697; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 698; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 699; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 700; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 701; ZVFH-NEXT: ret 702; 703; ZVFHMIN-LABEL: vp_rint_nxv8f16_unmasked: 704; ZVFHMIN: # %bb.0: 705; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 706; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 707; ZVFHMIN-NEXT: lui a0, 307200 708; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 709; ZVFHMIN-NEXT: vfabs.v v8, v12 710; ZVFHMIN-NEXT: fmv.w.x fa5, a0 711; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 712; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 713; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 714; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 715; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 716; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 717; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 718; ZVFHMIN-NEXT: ret 719 %v = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 720 ret <vscale x 8 x half> %v 721} 722 723declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32) 724 725define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 726; ZVFH-LABEL: vp_rint_nxv16f16: 727; ZVFH: # %bb.0: 728; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 729; ZVFH-NEXT: vmv1r.v v12, v0 730; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) 731; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) 732; ZVFH-NEXT: vfabs.v v16, v8, v0.t 733; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 734; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t 735; ZVFH-NEXT: vmv1r.v v0, v12 736; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma 737; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 738; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 739; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 740; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 741; ZVFH-NEXT: ret 742; 743; ZVFHMIN-LABEL: vp_rint_nxv16f16: 744; ZVFHMIN: # %bb.0: 745; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 746; ZVFHMIN-NEXT: vmv1r.v v12, v0 747; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 748; ZVFHMIN-NEXT: lui a0, 307200 749; ZVFHMIN-NEXT: vmv1r.v v8, v0 750; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 751; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t 752; ZVFHMIN-NEXT: fmv.w.x fa5, a0 753; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 754; ZVFHMIN-NEXT: vmflt.vf v8, v24, fa5, v0.t 755; ZVFHMIN-NEXT: vmv1r.v v0, v8 756; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 757; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 758; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 759; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 760; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 761; ZVFHMIN-NEXT: vmv1r.v v0, v12 762; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 763; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 764; ZVFHMIN-NEXT: ret 765 %v = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) 766 ret <vscale x 16 x half> %v 767} 768 769define <vscale x 16 x half> @vp_rint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { 770; ZVFH-LABEL: vp_rint_nxv16f16_unmasked: 771; ZVFH: # %bb.0: 772; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) 773; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) 774; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 775; ZVFH-NEXT: vfabs.v v12, v8 776; ZVFH-NEXT: vmflt.vf v0, v12, fa5 777; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 778; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 779; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 780; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 781; ZVFH-NEXT: ret 782; 783; ZVFHMIN-LABEL: vp_rint_nxv16f16_unmasked: 784; ZVFHMIN: # %bb.0: 785; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 786; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 787; ZVFHMIN-NEXT: lui a0, 307200 788; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 789; ZVFHMIN-NEXT: vfabs.v v8, v16 790; ZVFHMIN-NEXT: fmv.w.x fa5, a0 791; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 792; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 793; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 794; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 795; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 796; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 797; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 798; ZVFHMIN-NEXT: ret 799 %v = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 800 ret <vscale x 16 x half> %v 801} 802 803declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32) 804 805define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 806; ZVFH-LABEL: vp_rint_nxv32f16: 807; ZVFH: # %bb.0: 808; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 809; ZVFH-NEXT: vmv1r.v v16, v0 810; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) 811; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) 812; ZVFH-NEXT: vfabs.v v24, v8, v0.t 813; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 814; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t 815; ZVFH-NEXT: vmv1r.v v0, v16 816; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma 817; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t 818; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t 819; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 820; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t 821; ZVFH-NEXT: ret 822; 823; ZVFHMIN-LABEL: vp_rint_nxv32f16: 824; ZVFHMIN: # %bb.0: 825; ZVFHMIN-NEXT: addi sp, sp, -16 826; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 827; ZVFHMIN-NEXT: csrr a1, vlenb 828; ZVFHMIN-NEXT: slli a1, a1, 3 829; ZVFHMIN-NEXT: sub sp, sp, a1 830; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 831; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 832; ZVFHMIN-NEXT: vmv1r.v v7, v0 833; ZVFHMIN-NEXT: csrr a2, vlenb 834; ZVFHMIN-NEXT: lui a3, 307200 835; ZVFHMIN-NEXT: slli a1, a2, 1 836; ZVFHMIN-NEXT: srli a2, a2, 2 837; ZVFHMIN-NEXT: fmv.w.x fa5, a3 838; ZVFHMIN-NEXT: sub a3, a0, a1 839; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 840; ZVFHMIN-NEXT: sltu a2, a0, a3 841; ZVFHMIN-NEXT: vmv1r.v v18, v17 842; ZVFHMIN-NEXT: addi a2, a2, -1 843; ZVFHMIN-NEXT: and a2, a2, a3 844; ZVFHMIN-NEXT: vmv1r.v v0, v17 845; ZVFHMIN-NEXT: addi a3, sp, 16 846; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 847; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 848; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 849; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 850; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t 851; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 852; ZVFHMIN-NEXT: vmflt.vf v18, v8, fa5, v0.t 853; ZVFHMIN-NEXT: vmv1r.v v0, v18 854; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 855; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t 856; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 857; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 858; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t 859; ZVFHMIN-NEXT: vmv1r.v v0, v17 860; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 861; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 862; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 863; ZVFHMIN-NEXT: # %bb.1: 864; ZVFHMIN-NEXT: mv a0, a1 865; ZVFHMIN-NEXT: .LBB22_2: 866; ZVFHMIN-NEXT: vmv1r.v v0, v7 867; ZVFHMIN-NEXT: addi a1, sp, 16 868; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 869; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 870; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t 871; ZVFHMIN-NEXT: vmv1r.v v8, v7 872; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 873; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t 874; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 875; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t 876; ZVFHMIN-NEXT: vmv1r.v v0, v8 877; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 878; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t 879; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 880; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 881; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t 882; ZVFHMIN-NEXT: vmv1r.v v0, v7 883; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 884; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t 885; ZVFHMIN-NEXT: csrr a0, vlenb 886; ZVFHMIN-NEXT: slli a0, a0, 3 887; ZVFHMIN-NEXT: add sp, sp, a0 888; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 889; ZVFHMIN-NEXT: addi sp, sp, 16 890; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 891; ZVFHMIN-NEXT: ret 892 %v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl) 893 ret <vscale x 32 x half> %v 894} 895 896define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { 897; ZVFH-LABEL: vp_rint_nxv32f16_unmasked: 898; ZVFH: # %bb.0: 899; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) 900; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) 901; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 902; ZVFH-NEXT: vfabs.v v16, v8 903; ZVFH-NEXT: vmflt.vf v0, v16, fa5 904; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 905; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 906; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 907; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 908; ZVFH-NEXT: ret 909; 910; ZVFHMIN-LABEL: vp_rint_nxv32f16_unmasked: 911; ZVFHMIN: # %bb.0: 912; ZVFHMIN-NEXT: addi sp, sp, -16 913; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 914; ZVFHMIN-NEXT: csrr a1, vlenb 915; ZVFHMIN-NEXT: slli a1, a1, 3 916; ZVFHMIN-NEXT: sub sp, sp, a1 917; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 918; ZVFHMIN-NEXT: csrr a2, vlenb 919; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma 920; ZVFHMIN-NEXT: vmset.m v16 921; ZVFHMIN-NEXT: lui a3, 307200 922; ZVFHMIN-NEXT: slli a1, a2, 1 923; ZVFHMIN-NEXT: srli a2, a2, 2 924; ZVFHMIN-NEXT: fmv.w.x fa5, a3 925; ZVFHMIN-NEXT: sub a3, a0, a1 926; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 927; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 928; ZVFHMIN-NEXT: sltu a2, a0, a3 929; ZVFHMIN-NEXT: vmv1r.v v17, v16 930; ZVFHMIN-NEXT: addi a2, a2, -1 931; ZVFHMIN-NEXT: and a2, a2, a3 932; ZVFHMIN-NEXT: vmv1r.v v0, v16 933; ZVFHMIN-NEXT: addi a3, sp, 16 934; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 935; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 936; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 937; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 938; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t 939; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 940; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t 941; ZVFHMIN-NEXT: vmv1r.v v0, v17 942; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 943; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t 944; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 945; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 946; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t 947; ZVFHMIN-NEXT: vmv1r.v v0, v16 948; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 949; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 950; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 951; ZVFHMIN-NEXT: # %bb.1: 952; ZVFHMIN-NEXT: mv a0, a1 953; ZVFHMIN-NEXT: .LBB23_2: 954; ZVFHMIN-NEXT: addi a1, sp, 16 955; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 956; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 957; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 958; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 959; ZVFHMIN-NEXT: vfabs.v v24, v16 960; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 961; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 962; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 963; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 964; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 965; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 966; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 967; ZVFHMIN-NEXT: csrr a0, vlenb 968; ZVFHMIN-NEXT: slli a0, a0, 3 969; ZVFHMIN-NEXT: add sp, sp, a0 970; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 971; ZVFHMIN-NEXT: addi sp, sp, 16 972; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 973; ZVFHMIN-NEXT: ret 974 %v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 975 ret <vscale x 32 x half> %v 976} 977 978declare <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) 979 980define <vscale x 1 x float> @vp_rint_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 981; CHECK-LABEL: vp_rint_nxv1f32: 982; CHECK: # %bb.0: 983; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 984; CHECK-NEXT: vfabs.v v9, v8, v0.t 985; CHECK-NEXT: lui a0, 307200 986; CHECK-NEXT: fmv.w.x fa5, a0 987; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 988; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 989; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 990; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 991; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 992; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 993; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 994; CHECK-NEXT: ret 995 %v = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl) 996 ret <vscale x 1 x float> %v 997} 998 999define <vscale x 1 x float> @vp_rint_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) { 1000; CHECK-LABEL: vp_rint_nxv1f32_unmasked: 1001; CHECK: # %bb.0: 1002; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1003; CHECK-NEXT: vfabs.v v9, v8 1004; CHECK-NEXT: lui a0, 307200 1005; CHECK-NEXT: fmv.w.x fa5, a0 1006; CHECK-NEXT: vmflt.vf v0, v9, fa5 1007; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1008; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1009; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1010; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1011; CHECK-NEXT: ret 1012 %v = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1013 ret <vscale x 1 x float> %v 1014} 1015 1016declare <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 1017 1018define <vscale x 2 x float> @vp_rint_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1019; CHECK-LABEL: vp_rint_nxv2f32: 1020; CHECK: # %bb.0: 1021; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1022; CHECK-NEXT: vfabs.v v9, v8, v0.t 1023; CHECK-NEXT: lui a0, 307200 1024; CHECK-NEXT: fmv.w.x fa5, a0 1025; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1026; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1027; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1028; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1029; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1030; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1031; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1032; CHECK-NEXT: ret 1033 %v = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl) 1034 ret <vscale x 2 x float> %v 1035} 1036 1037define <vscale x 2 x float> @vp_rint_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) { 1038; CHECK-LABEL: vp_rint_nxv2f32_unmasked: 1039; CHECK: # %bb.0: 1040; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1041; CHECK-NEXT: vfabs.v v9, v8 1042; CHECK-NEXT: lui a0, 307200 1043; CHECK-NEXT: fmv.w.x fa5, a0 1044; CHECK-NEXT: vmflt.vf v0, v9, fa5 1045; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1046; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1047; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1048; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1049; CHECK-NEXT: ret 1050 %v = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1051 ret <vscale x 2 x float> %v 1052} 1053 1054declare <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) 1055 1056define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1057; CHECK-LABEL: vp_rint_nxv4f32: 1058; CHECK: # %bb.0: 1059; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1060; CHECK-NEXT: vmv1r.v v10, v0 1061; CHECK-NEXT: vfabs.v v12, v8, v0.t 1062; CHECK-NEXT: lui a0, 307200 1063; CHECK-NEXT: fmv.w.x fa5, a0 1064; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1065; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 1066; CHECK-NEXT: vmv1r.v v0, v10 1067; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1068; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1069; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1070; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1071; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1072; CHECK-NEXT: ret 1073 %v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl) 1074 ret <vscale x 4 x float> %v 1075} 1076 1077define <vscale x 4 x float> @vp_rint_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) { 1078; CHECK-LABEL: vp_rint_nxv4f32_unmasked: 1079; CHECK: # %bb.0: 1080; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1081; CHECK-NEXT: vfabs.v v10, v8 1082; CHECK-NEXT: lui a0, 307200 1083; CHECK-NEXT: fmv.w.x fa5, a0 1084; CHECK-NEXT: vmflt.vf v0, v10, fa5 1085; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 1086; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 1087; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1088; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 1089; CHECK-NEXT: ret 1090 %v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1091 ret <vscale x 4 x float> %v 1092} 1093 1094declare <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) 1095 1096define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1097; CHECK-LABEL: vp_rint_nxv8f32: 1098; CHECK: # %bb.0: 1099; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1100; CHECK-NEXT: vmv1r.v v12, v0 1101; CHECK-NEXT: vfabs.v v16, v8, v0.t 1102; CHECK-NEXT: lui a0, 307200 1103; CHECK-NEXT: fmv.w.x fa5, a0 1104; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1105; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 1106; CHECK-NEXT: vmv1r.v v0, v12 1107; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1108; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1109; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1110; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1111; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1112; CHECK-NEXT: ret 1113 %v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl) 1114 ret <vscale x 8 x float> %v 1115} 1116 1117define <vscale x 8 x float> @vp_rint_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) { 1118; CHECK-LABEL: vp_rint_nxv8f32_unmasked: 1119; CHECK: # %bb.0: 1120; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1121; CHECK-NEXT: vfabs.v v12, v8 1122; CHECK-NEXT: lui a0, 307200 1123; CHECK-NEXT: fmv.w.x fa5, a0 1124; CHECK-NEXT: vmflt.vf v0, v12, fa5 1125; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1126; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1127; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1128; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1129; CHECK-NEXT: ret 1130 %v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1131 ret <vscale x 8 x float> %v 1132} 1133 1134declare <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) 1135 1136define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1137; CHECK-LABEL: vp_rint_nxv16f32: 1138; CHECK: # %bb.0: 1139; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1140; CHECK-NEXT: vmv1r.v v16, v0 1141; CHECK-NEXT: vfabs.v v24, v8, v0.t 1142; CHECK-NEXT: lui a0, 307200 1143; CHECK-NEXT: fmv.w.x fa5, a0 1144; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1145; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1146; CHECK-NEXT: vmv1r.v v0, v16 1147; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1148; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1149; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1150; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1151; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1152; CHECK-NEXT: ret 1153 %v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl) 1154 ret <vscale x 16 x float> %v 1155} 1156 1157define <vscale x 16 x float> @vp_rint_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) { 1158; CHECK-LABEL: vp_rint_nxv16f32_unmasked: 1159; CHECK: # %bb.0: 1160; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1161; CHECK-NEXT: vfabs.v v16, v8 1162; CHECK-NEXT: lui a0, 307200 1163; CHECK-NEXT: fmv.w.x fa5, a0 1164; CHECK-NEXT: vmflt.vf v0, v16, fa5 1165; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1166; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1167; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1168; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1169; CHECK-NEXT: ret 1170 %v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1171 ret <vscale x 16 x float> %v 1172} 1173 1174declare <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) 1175 1176define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1177; CHECK-LABEL: vp_rint_nxv1f64: 1178; CHECK: # %bb.0: 1179; CHECK-NEXT: lui a1, %hi(.LCPI34_0) 1180; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) 1181; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1182; CHECK-NEXT: vfabs.v v9, v8, v0.t 1183; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1184; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1185; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1186; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1187; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1188; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1189; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1190; CHECK-NEXT: ret 1191 %v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl) 1192 ret <vscale x 1 x double> %v 1193} 1194 1195define <vscale x 1 x double> @vp_rint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { 1196; CHECK-LABEL: vp_rint_nxv1f64_unmasked: 1197; CHECK: # %bb.0: 1198; CHECK-NEXT: lui a1, %hi(.LCPI35_0) 1199; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) 1200; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1201; CHECK-NEXT: vfabs.v v9, v8 1202; CHECK-NEXT: vmflt.vf v0, v9, fa5 1203; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1204; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1205; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1206; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1207; CHECK-NEXT: ret 1208 %v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1209 ret <vscale x 1 x double> %v 1210} 1211 1212declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 1213 1214define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1215; CHECK-LABEL: vp_rint_nxv2f64: 1216; CHECK: # %bb.0: 1217; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1218; CHECK-NEXT: vmv1r.v v10, v0 1219; CHECK-NEXT: lui a0, %hi(.LCPI36_0) 1220; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) 1221; CHECK-NEXT: vfabs.v v12, v8, v0.t 1222; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1223; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 1224; CHECK-NEXT: vmv1r.v v0, v10 1225; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 1226; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1227; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1228; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1229; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1230; CHECK-NEXT: ret 1231 %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) 1232 ret <vscale x 2 x double> %v 1233} 1234 1235define <vscale x 2 x double> @vp_rint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { 1236; CHECK-LABEL: vp_rint_nxv2f64_unmasked: 1237; CHECK: # %bb.0: 1238; CHECK-NEXT: lui a1, %hi(.LCPI37_0) 1239; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) 1240; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1241; CHECK-NEXT: vfabs.v v10, v8 1242; CHECK-NEXT: vmflt.vf v0, v10, fa5 1243; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 1244; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 1245; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1246; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 1247; CHECK-NEXT: ret 1248 %v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1249 ret <vscale x 2 x double> %v 1250} 1251 1252declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) 1253 1254define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1255; CHECK-LABEL: vp_rint_nxv4f64: 1256; CHECK: # %bb.0: 1257; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1258; CHECK-NEXT: vmv1r.v v12, v0 1259; CHECK-NEXT: lui a0, %hi(.LCPI38_0) 1260; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) 1261; CHECK-NEXT: vfabs.v v16, v8, v0.t 1262; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1263; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 1264; CHECK-NEXT: vmv1r.v v0, v12 1265; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 1266; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1267; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1268; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1269; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1270; CHECK-NEXT: ret 1271 %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl) 1272 ret <vscale x 4 x double> %v 1273} 1274 1275define <vscale x 4 x double> @vp_rint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { 1276; CHECK-LABEL: vp_rint_nxv4f64_unmasked: 1277; CHECK: # %bb.0: 1278; CHECK-NEXT: lui a1, %hi(.LCPI39_0) 1279; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) 1280; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1281; CHECK-NEXT: vfabs.v v12, v8 1282; CHECK-NEXT: vmflt.vf v0, v12, fa5 1283; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1284; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1285; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1286; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1287; CHECK-NEXT: ret 1288 %v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1289 ret <vscale x 4 x double> %v 1290} 1291 1292declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32) 1293 1294define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 1295; CHECK-LABEL: vp_rint_nxv7f64: 1296; CHECK: # %bb.0: 1297; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1298; CHECK-NEXT: vmv1r.v v16, v0 1299; CHECK-NEXT: lui a0, %hi(.LCPI40_0) 1300; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) 1301; CHECK-NEXT: vfabs.v v24, v8, v0.t 1302; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1303; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1304; CHECK-NEXT: vmv1r.v v0, v16 1305; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1306; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1307; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1308; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1309; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1310; CHECK-NEXT: ret 1311 %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl) 1312 ret <vscale x 7 x double> %v 1313} 1314 1315define <vscale x 7 x double> @vp_rint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { 1316; CHECK-LABEL: vp_rint_nxv7f64_unmasked: 1317; CHECK: # %bb.0: 1318; CHECK-NEXT: lui a1, %hi(.LCPI41_0) 1319; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) 1320; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1321; CHECK-NEXT: vfabs.v v16, v8 1322; CHECK-NEXT: vmflt.vf v0, v16, fa5 1323; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1324; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1325; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1326; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1327; CHECK-NEXT: ret 1328 %v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) 1329 ret <vscale x 7 x double> %v 1330} 1331 1332declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) 1333 1334define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1335; CHECK-LABEL: vp_rint_nxv8f64: 1336; CHECK: # %bb.0: 1337; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1338; CHECK-NEXT: vmv1r.v v16, v0 1339; CHECK-NEXT: lui a0, %hi(.LCPI42_0) 1340; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) 1341; CHECK-NEXT: vfabs.v v24, v8, v0.t 1342; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1343; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1344; CHECK-NEXT: vmv1r.v v0, v16 1345; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1346; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1347; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1348; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1349; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1350; CHECK-NEXT: ret 1351 %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl) 1352 ret <vscale x 8 x double> %v 1353} 1354 1355define <vscale x 8 x double> @vp_rint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { 1356; CHECK-LABEL: vp_rint_nxv8f64_unmasked: 1357; CHECK: # %bb.0: 1358; CHECK-NEXT: lui a1, %hi(.LCPI43_0) 1359; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) 1360; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1361; CHECK-NEXT: vfabs.v v16, v8 1362; CHECK-NEXT: vmflt.vf v0, v16, fa5 1363; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1364; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1365; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1366; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1367; CHECK-NEXT: ret 1368 %v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1369 ret <vscale x 8 x double> %v 1370} 1371 1372; Test splitting. 1373declare <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32) 1374 1375define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1376; CHECK-LABEL: vp_rint_nxv16f64: 1377; CHECK: # %bb.0: 1378; CHECK-NEXT: addi sp, sp, -16 1379; CHECK-NEXT: .cfi_def_cfa_offset 16 1380; CHECK-NEXT: csrr a1, vlenb 1381; CHECK-NEXT: slli a1, a1, 3 1382; CHECK-NEXT: sub sp, sp, a1 1383; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1384; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 1385; CHECK-NEXT: vmv1r.v v7, v0 1386; CHECK-NEXT: csrr a1, vlenb 1387; CHECK-NEXT: lui a2, %hi(.LCPI44_0) 1388; CHECK-NEXT: srli a3, a1, 3 1389; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) 1390; CHECK-NEXT: sub a2, a0, a1 1391; CHECK-NEXT: vslidedown.vx v6, v0, a3 1392; CHECK-NEXT: sltu a3, a0, a2 1393; CHECK-NEXT: addi a3, a3, -1 1394; CHECK-NEXT: and a2, a3, a2 1395; CHECK-NEXT: vmv1r.v v0, v6 1396; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1397; CHECK-NEXT: vfabs.v v24, v16, v0.t 1398; CHECK-NEXT: addi a2, sp, 16 1399; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 1400; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1401; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1402; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t 1403; CHECK-NEXT: vmv1r.v v0, v6 1404; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1405; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 1406; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1407; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1408; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1409; CHECK-NEXT: bltu a0, a1, .LBB44_2 1410; CHECK-NEXT: # %bb.1: 1411; CHECK-NEXT: mv a0, a1 1412; CHECK-NEXT: .LBB44_2: 1413; CHECK-NEXT: vmv1r.v v0, v7 1414; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1415; CHECK-NEXT: vfabs.v v24, v8, v0.t 1416; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1417; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t 1418; CHECK-NEXT: vmv1r.v v0, v7 1419; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1420; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1421; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1422; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1423; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1424; CHECK-NEXT: csrr a0, vlenb 1425; CHECK-NEXT: slli a0, a0, 3 1426; CHECK-NEXT: add sp, sp, a0 1427; CHECK-NEXT: .cfi_def_cfa sp, 16 1428; CHECK-NEXT: addi sp, sp, 16 1429; CHECK-NEXT: .cfi_def_cfa_offset 0 1430; CHECK-NEXT: ret 1431 %v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl) 1432 ret <vscale x 16 x double> %v 1433} 1434 1435define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) { 1436; CHECK-LABEL: vp_rint_nxv16f64_unmasked: 1437; CHECK: # %bb.0: 1438; CHECK-NEXT: csrr a1, vlenb 1439; CHECK-NEXT: lui a2, %hi(.LCPI45_0) 1440; CHECK-NEXT: sub a3, a0, a1 1441; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) 1442; CHECK-NEXT: sltu a2, a0, a3 1443; CHECK-NEXT: addi a2, a2, -1 1444; CHECK-NEXT: and a2, a2, a3 1445; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1446; CHECK-NEXT: vfabs.v v24, v16 1447; CHECK-NEXT: vmflt.vf v0, v24, fa5 1448; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 1449; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1450; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1451; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1452; CHECK-NEXT: bltu a0, a1, .LBB45_2 1453; CHECK-NEXT: # %bb.1: 1454; CHECK-NEXT: mv a0, a1 1455; CHECK-NEXT: .LBB45_2: 1456; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1457; CHECK-NEXT: vfabs.v v24, v8 1458; CHECK-NEXT: vmflt.vf v0, v24, fa5 1459; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1460; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1461; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1462; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1463; CHECK-NEXT: ret 1464 %v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1465 ret <vscale x 16 x double> %v 1466} 1467