1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15declare <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) 16 17define <vscale x 1 x bfloat> @vp_nearbyint_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 18; CHECK-LABEL: vp_nearbyint_nxv1bf16: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 21; CHECK-NEXT: vmv1r.v v9, v0 22; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 23; CHECK-NEXT: lui a0, 307200 24; CHECK-NEXT: vmv1r.v v8, v0 25; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 26; CHECK-NEXT: vfabs.v v11, v10, v0.t 27; CHECK-NEXT: fmv.w.x fa5, a0 28; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 29; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t 30; CHECK-NEXT: frflags a0 31; CHECK-NEXT: vmv1r.v v0, v8 32; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 33; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t 34; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t 35; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 36; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t 37; CHECK-NEXT: vmv1r.v v0, v9 38; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 39; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 40; CHECK-NEXT: fsflags a0 41; CHECK-NEXT: ret 42 %v = call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) 43 ret <vscale x 1 x bfloat> %v 44} 45 46define <vscale x 1 x bfloat> @vp_nearbyint_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { 47; CHECK-LABEL: vp_nearbyint_nxv1bf16_unmasked: 48; CHECK: # %bb.0: 49; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 50; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 51; CHECK-NEXT: lui a0, 307200 52; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 53; CHECK-NEXT: vfabs.v v8, v9 54; CHECK-NEXT: fmv.w.x fa5, a0 55; CHECK-NEXT: vmflt.vf v0, v8, fa5 56; CHECK-NEXT: frflags a0 57; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 58; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 59; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 60; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 61; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 62; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 63; CHECK-NEXT: fsflags a0 64; CHECK-NEXT: ret 65 %v = call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 66 ret <vscale x 1 x bfloat> %v 67} 68 69declare <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) 70 71define <vscale x 2 x bfloat> @vp_nearbyint_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 72; CHECK-LABEL: vp_nearbyint_nxv2bf16: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 75; CHECK-NEXT: vmv1r.v v9, v0 76; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 77; CHECK-NEXT: lui a0, 307200 78; CHECK-NEXT: vmv1r.v v8, v0 79; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 80; CHECK-NEXT: vfabs.v v11, v10, v0.t 81; CHECK-NEXT: fmv.w.x fa5, a0 82; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 83; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t 84; CHECK-NEXT: frflags a0 85; CHECK-NEXT: vmv.v.v v0, v8 86; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 87; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t 88; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t 89; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 90; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t 91; CHECK-NEXT: vmv1r.v v0, v9 92; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 93; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 94; CHECK-NEXT: fsflags a0 95; CHECK-NEXT: ret 96 %v = call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) 97 ret <vscale x 2 x bfloat> %v 98} 99 100define <vscale x 2 x bfloat> @vp_nearbyint_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { 101; CHECK-LABEL: vp_nearbyint_nxv2bf16_unmasked: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 104; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 105; CHECK-NEXT: lui a0, 307200 106; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 107; CHECK-NEXT: vfabs.v v8, v9 108; CHECK-NEXT: fmv.w.x fa5, a0 109; CHECK-NEXT: vmflt.vf v0, v8, fa5 110; CHECK-NEXT: frflags a0 111; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 112; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 113; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 114; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 115; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 116; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 117; CHECK-NEXT: fsflags a0 118; CHECK-NEXT: ret 119 %v = call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 120 ret <vscale x 2 x bfloat> %v 121} 122 123declare <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) 124 125define <vscale x 4 x bfloat> @vp_nearbyint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 126; CHECK-LABEL: vp_nearbyint_nxv4bf16: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 129; CHECK-NEXT: vmv1r.v v9, v0 130; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 131; CHECK-NEXT: lui a0, 307200 132; CHECK-NEXT: vmv1r.v v8, v0 133; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 134; CHECK-NEXT: vfabs.v v12, v10, v0.t 135; CHECK-NEXT: fmv.w.x fa5, a0 136; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 137; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t 138; CHECK-NEXT: frflags a0 139; CHECK-NEXT: vmv1r.v v0, v8 140; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 141; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t 142; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 143; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 144; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t 145; CHECK-NEXT: vmv1r.v v0, v9 146; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 147; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 148; CHECK-NEXT: fsflags a0 149; CHECK-NEXT: ret 150 %v = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) 151 ret <vscale x 4 x bfloat> %v 152} 153 154define <vscale x 4 x bfloat> @vp_nearbyint_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { 155; CHECK-LABEL: vp_nearbyint_nxv4bf16_unmasked: 156; CHECK: # %bb.0: 157; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 158; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 159; CHECK-NEXT: lui a0, 307200 160; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 161; CHECK-NEXT: vfabs.v v8, v10 162; CHECK-NEXT: fmv.w.x fa5, a0 163; CHECK-NEXT: vmflt.vf v0, v8, fa5 164; CHECK-NEXT: frflags a0 165; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 166; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 167; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 168; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 169; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 170; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 171; CHECK-NEXT: fsflags a0 172; CHECK-NEXT: ret 173 %v = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 174 ret <vscale x 4 x bfloat> %v 175} 176 177declare <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) 178 179define <vscale x 8 x bfloat> @vp_nearbyint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 180; CHECK-LABEL: vp_nearbyint_nxv8bf16: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 183; CHECK-NEXT: vmv1r.v v10, v0 184; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t 185; CHECK-NEXT: lui a0, 307200 186; CHECK-NEXT: vmv1r.v v8, v0 187; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 188; CHECK-NEXT: vfabs.v v16, v12, v0.t 189; CHECK-NEXT: fmv.w.x fa5, a0 190; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 191; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t 192; CHECK-NEXT: frflags a0 193; CHECK-NEXT: vmv1r.v v0, v8 194; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 195; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t 196; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 197; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 198; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t 199; CHECK-NEXT: vmv1r.v v0, v10 200; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 201; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t 202; CHECK-NEXT: fsflags a0 203; CHECK-NEXT: ret 204 %v = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) 205 ret <vscale x 8 x bfloat> %v 206} 207 208define <vscale x 8 x bfloat> @vp_nearbyint_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { 209; CHECK-LABEL: vp_nearbyint_nxv8bf16_unmasked: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 212; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 213; CHECK-NEXT: lui a0, 307200 214; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 215; CHECK-NEXT: vfabs.v v8, v12 216; CHECK-NEXT: fmv.w.x fa5, a0 217; CHECK-NEXT: vmflt.vf v0, v8, fa5 218; CHECK-NEXT: frflags a0 219; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t 220; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 221; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 222; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t 223; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 224; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 225; CHECK-NEXT: fsflags a0 226; CHECK-NEXT: ret 227 %v = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 228 ret <vscale x 8 x bfloat> %v 229} 230 231declare <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) 232 233define <vscale x 16 x bfloat> @vp_nearbyint_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 234; CHECK-LABEL: vp_nearbyint_nxv16bf16: 235; CHECK: # %bb.0: 236; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 237; CHECK-NEXT: vmv1r.v v12, v0 238; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t 239; CHECK-NEXT: lui a0, 307200 240; CHECK-NEXT: vmv1r.v v8, v0 241; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 242; CHECK-NEXT: vfabs.v v24, v16, v0.t 243; CHECK-NEXT: fmv.w.x fa5, a0 244; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 245; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t 246; CHECK-NEXT: frflags a0 247; CHECK-NEXT: vmv1r.v v0, v8 248; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 249; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 250; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 251; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 252; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 253; CHECK-NEXT: vmv1r.v v0, v12 254; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 255; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 256; CHECK-NEXT: fsflags a0 257; CHECK-NEXT: ret 258 %v = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) 259 ret <vscale x 16 x bfloat> %v 260} 261 262define <vscale x 16 x bfloat> @vp_nearbyint_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { 263; CHECK-LABEL: vp_nearbyint_nxv16bf16_unmasked: 264; CHECK: # %bb.0: 265; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 266; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 267; CHECK-NEXT: lui a0, 307200 268; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 269; CHECK-NEXT: vfabs.v v8, v16 270; CHECK-NEXT: fmv.w.x fa5, a0 271; CHECK-NEXT: vmflt.vf v0, v8, fa5 272; CHECK-NEXT: frflags a0 273; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 274; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 275; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 276; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 277; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 278; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 279; CHECK-NEXT: fsflags a0 280; CHECK-NEXT: ret 281 %v = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 282 ret <vscale x 16 x bfloat> %v 283} 284 285declare <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) 286 287define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 288; CHECK-LABEL: vp_nearbyint_nxv32bf16: 289; CHECK: # %bb.0: 290; CHECK-NEXT: addi sp, sp, -16 291; CHECK-NEXT: .cfi_def_cfa_offset 16 292; CHECK-NEXT: csrr a1, vlenb 293; CHECK-NEXT: slli a1, a1, 3 294; CHECK-NEXT: sub sp, sp, a1 295; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 296; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 297; CHECK-NEXT: vmv1r.v v7, v0 298; CHECK-NEXT: csrr a2, vlenb 299; CHECK-NEXT: lui a3, 307200 300; CHECK-NEXT: slli a1, a2, 1 301; CHECK-NEXT: srli a2, a2, 2 302; CHECK-NEXT: fmv.w.x fa5, a3 303; CHECK-NEXT: sub a3, a0, a1 304; CHECK-NEXT: vslidedown.vx v17, v0, a2 305; CHECK-NEXT: sltu a2, a0, a3 306; CHECK-NEXT: vmv1r.v v18, v17 307; CHECK-NEXT: addi a2, a2, -1 308; CHECK-NEXT: and a2, a2, a3 309; CHECK-NEXT: vmv1r.v v0, v17 310; CHECK-NEXT: addi a3, sp, 16 311; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 312; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 313; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 314; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 315; CHECK-NEXT: vfabs.v v8, v24, v0.t 316; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 317; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t 318; CHECK-NEXT: frflags a2 319; CHECK-NEXT: vmv1r.v v0, v18 320; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 321; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t 322; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 323; CHECK-NEXT: fsflags a2 324; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 325; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t 326; CHECK-NEXT: vmv1r.v v0, v17 327; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 328; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 329; CHECK-NEXT: bltu a0, a1, .LBB10_2 330; CHECK-NEXT: # %bb.1: 331; CHECK-NEXT: mv a0, a1 332; CHECK-NEXT: .LBB10_2: 333; CHECK-NEXT: vmv1r.v v0, v7 334; CHECK-NEXT: addi a1, sp, 16 335; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 336; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 337; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t 338; CHECK-NEXT: vmv1r.v v8, v7 339; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 340; CHECK-NEXT: vfabs.v v16, v24, v0.t 341; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 342; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t 343; CHECK-NEXT: frflags a0 344; CHECK-NEXT: vmv1r.v v0, v8 345; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 346; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t 347; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 348; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 349; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t 350; CHECK-NEXT: vmv1r.v v0, v7 351; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 352; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t 353; CHECK-NEXT: fsflags a0 354; CHECK-NEXT: csrr a0, vlenb 355; CHECK-NEXT: slli a0, a0, 3 356; CHECK-NEXT: add sp, sp, a0 357; CHECK-NEXT: .cfi_def_cfa sp, 16 358; CHECK-NEXT: addi sp, sp, 16 359; CHECK-NEXT: .cfi_def_cfa_offset 0 360; CHECK-NEXT: ret 361 %v = call <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) 362 ret <vscale x 32 x bfloat> %v 363} 364 365define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { 366; CHECK-LABEL: vp_nearbyint_nxv32bf16_unmasked: 367; CHECK: # %bb.0: 368; CHECK-NEXT: addi sp, sp, -16 369; CHECK-NEXT: .cfi_def_cfa_offset 16 370; CHECK-NEXT: csrr a1, vlenb 371; CHECK-NEXT: slli a1, a1, 3 372; CHECK-NEXT: sub sp, sp, a1 373; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 374; CHECK-NEXT: csrr a2, vlenb 375; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 376; CHECK-NEXT: vmset.m v16 377; CHECK-NEXT: lui a3, 307200 378; CHECK-NEXT: slli a1, a2, 1 379; CHECK-NEXT: srli a2, a2, 2 380; CHECK-NEXT: fmv.w.x fa5, a3 381; CHECK-NEXT: sub a3, a0, a1 382; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 383; CHECK-NEXT: vslidedown.vx v16, v16, a2 384; CHECK-NEXT: sltu a2, a0, a3 385; CHECK-NEXT: vmv1r.v v17, v16 386; CHECK-NEXT: addi a2, a2, -1 387; CHECK-NEXT: and a2, a2, a3 388; CHECK-NEXT: vmv1r.v v0, v16 389; CHECK-NEXT: addi a3, sp, 16 390; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 391; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 392; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 393; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 394; CHECK-NEXT: vfabs.v v8, v24, v0.t 395; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 396; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t 397; CHECK-NEXT: frflags a2 398; CHECK-NEXT: vmv1r.v v0, v17 399; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 400; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t 401; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 402; CHECK-NEXT: fsflags a2 403; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 404; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t 405; CHECK-NEXT: vmv1r.v v0, v16 406; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 407; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 408; CHECK-NEXT: bltu a0, a1, .LBB11_2 409; CHECK-NEXT: # %bb.1: 410; CHECK-NEXT: mv a0, a1 411; CHECK-NEXT: .LBB11_2: 412; CHECK-NEXT: addi a1, sp, 16 413; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 414; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 415; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 416; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 417; CHECK-NEXT: vfabs.v v24, v16 418; CHECK-NEXT: vmflt.vf v0, v24, fa5 419; CHECK-NEXT: frflags a0 420; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 421; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 422; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 423; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 424; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 425; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 426; CHECK-NEXT: fsflags a0 427; CHECK-NEXT: csrr a0, vlenb 428; CHECK-NEXT: slli a0, a0, 3 429; CHECK-NEXT: add sp, sp, a0 430; CHECK-NEXT: .cfi_def_cfa sp, 16 431; CHECK-NEXT: addi sp, sp, 16 432; CHECK-NEXT: .cfi_def_cfa_offset 0 433; CHECK-NEXT: ret 434 %v = call <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 435 ret <vscale x 32 x bfloat> %v 436} 437declare <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) 438 439define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 440; ZVFH-LABEL: vp_nearbyint_nxv1f16: 441; ZVFH: # %bb.0: 442; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) 443; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) 444; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 445; ZVFH-NEXT: vfabs.v v9, v8, v0.t 446; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 447; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 448; ZVFH-NEXT: frflags a0 449; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 450; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 451; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 452; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 453; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 454; ZVFH-NEXT: fsflags a0 455; ZVFH-NEXT: ret 456; 457; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16: 458; ZVFHMIN: # %bb.0: 459; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 460; ZVFHMIN-NEXT: vmv1r.v v9, v0 461; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 462; ZVFHMIN-NEXT: lui a0, 307200 463; ZVFHMIN-NEXT: vmv1r.v v8, v0 464; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 465; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 466; ZVFHMIN-NEXT: fmv.w.x fa5, a0 467; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 468; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 469; ZVFHMIN-NEXT: frflags a0 470; ZVFHMIN-NEXT: vmv1r.v v0, v8 471; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 472; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 473; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 474; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 475; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 476; ZVFHMIN-NEXT: vmv1r.v v0, v9 477; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 478; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 479; ZVFHMIN-NEXT: fsflags a0 480; ZVFHMIN-NEXT: ret 481 %v = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) 482 ret <vscale x 1 x half> %v 483} 484 485define <vscale x 1 x half> @vp_nearbyint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { 486; ZVFH-LABEL: vp_nearbyint_nxv1f16_unmasked: 487; ZVFH: # %bb.0: 488; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) 489; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) 490; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 491; ZVFH-NEXT: vfabs.v v9, v8 492; ZVFH-NEXT: vmflt.vf v0, v9, fa5 493; ZVFH-NEXT: frflags a0 494; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 495; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 496; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 497; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 498; ZVFH-NEXT: fsflags a0 499; ZVFH-NEXT: ret 500; 501; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16_unmasked: 502; ZVFHMIN: # %bb.0: 503; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 504; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 505; ZVFHMIN-NEXT: lui a0, 307200 506; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 507; ZVFHMIN-NEXT: vfabs.v v8, v9 508; ZVFHMIN-NEXT: fmv.w.x fa5, a0 509; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 510; ZVFHMIN-NEXT: frflags a0 511; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 512; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 513; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 514; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 515; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 516; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 517; ZVFHMIN-NEXT: fsflags a0 518; ZVFHMIN-NEXT: ret 519 %v = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 520 ret <vscale x 1 x half> %v 521} 522 523declare <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32) 524 525define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 526; ZVFH-LABEL: vp_nearbyint_nxv2f16: 527; ZVFH: # %bb.0: 528; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) 529; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) 530; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 531; ZVFH-NEXT: vfabs.v v9, v8, v0.t 532; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 533; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 534; ZVFH-NEXT: frflags a0 535; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 536; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 537; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 538; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 539; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 540; ZVFH-NEXT: fsflags a0 541; ZVFH-NEXT: ret 542; 543; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16: 544; ZVFHMIN: # %bb.0: 545; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 546; ZVFHMIN-NEXT: vmv1r.v v9, v0 547; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 548; ZVFHMIN-NEXT: lui a0, 307200 549; ZVFHMIN-NEXT: vmv1r.v v8, v0 550; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 551; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 552; ZVFHMIN-NEXT: fmv.w.x fa5, a0 553; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 554; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 555; ZVFHMIN-NEXT: frflags a0 556; ZVFHMIN-NEXT: vmv.v.v v0, v8 557; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 558; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 559; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 560; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 561; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 562; ZVFHMIN-NEXT: vmv1r.v v0, v9 563; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 564; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 565; ZVFHMIN-NEXT: fsflags a0 566; ZVFHMIN-NEXT: ret 567 %v = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) 568 ret <vscale x 2 x half> %v 569} 570 571define <vscale x 2 x half> @vp_nearbyint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { 572; ZVFH-LABEL: vp_nearbyint_nxv2f16_unmasked: 573; ZVFH: # %bb.0: 574; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) 575; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) 576; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 577; ZVFH-NEXT: vfabs.v v9, v8 578; ZVFH-NEXT: vmflt.vf v0, v9, fa5 579; ZVFH-NEXT: frflags a0 580; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 581; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 582; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 583; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 584; ZVFH-NEXT: fsflags a0 585; ZVFH-NEXT: ret 586; 587; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16_unmasked: 588; ZVFHMIN: # %bb.0: 589; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 590; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 591; ZVFHMIN-NEXT: lui a0, 307200 592; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 593; ZVFHMIN-NEXT: vfabs.v v8, v9 594; ZVFHMIN-NEXT: fmv.w.x fa5, a0 595; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 596; ZVFHMIN-NEXT: frflags a0 597; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 598; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 599; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 600; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 601; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 602; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 603; ZVFHMIN-NEXT: fsflags a0 604; ZVFHMIN-NEXT: ret 605 %v = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 606 ret <vscale x 2 x half> %v 607} 608 609declare <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32) 610 611define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 612; ZVFH-LABEL: vp_nearbyint_nxv4f16: 613; ZVFH: # %bb.0: 614; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) 615; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) 616; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 617; ZVFH-NEXT: vfabs.v v9, v8, v0.t 618; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 619; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 620; ZVFH-NEXT: frflags a0 621; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma 622; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 623; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 624; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 625; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 626; ZVFH-NEXT: fsflags a0 627; ZVFH-NEXT: ret 628; 629; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16: 630; ZVFHMIN: # %bb.0: 631; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 632; ZVFHMIN-NEXT: vmv1r.v v9, v0 633; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 634; ZVFHMIN-NEXT: lui a0, 307200 635; ZVFHMIN-NEXT: vmv1r.v v8, v0 636; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 637; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t 638; ZVFHMIN-NEXT: fmv.w.x fa5, a0 639; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 640; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t 641; ZVFHMIN-NEXT: frflags a0 642; ZVFHMIN-NEXT: vmv1r.v v0, v8 643; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 644; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t 645; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t 646; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 647; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t 648; ZVFHMIN-NEXT: vmv1r.v v0, v9 649; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 650; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 651; ZVFHMIN-NEXT: fsflags a0 652; ZVFHMIN-NEXT: ret 653 %v = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) 654 ret <vscale x 4 x half> %v 655} 656 657define <vscale x 4 x half> @vp_nearbyint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { 658; ZVFH-LABEL: vp_nearbyint_nxv4f16_unmasked: 659; ZVFH: # %bb.0: 660; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) 661; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) 662; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 663; ZVFH-NEXT: vfabs.v v9, v8 664; ZVFH-NEXT: vmflt.vf v0, v9, fa5 665; ZVFH-NEXT: frflags a0 666; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 667; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 668; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 669; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 670; ZVFH-NEXT: fsflags a0 671; ZVFH-NEXT: ret 672; 673; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16_unmasked: 674; ZVFHMIN: # %bb.0: 675; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 676; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 677; ZVFHMIN-NEXT: lui a0, 307200 678; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 679; ZVFHMIN-NEXT: vfabs.v v8, v10 680; ZVFHMIN-NEXT: fmv.w.x fa5, a0 681; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 682; ZVFHMIN-NEXT: frflags a0 683; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 684; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 685; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 686; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 687; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 688; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 689; ZVFHMIN-NEXT: fsflags a0 690; ZVFHMIN-NEXT: ret 691 %v = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 692 ret <vscale x 4 x half> %v 693} 694 695declare <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32) 696 697define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 698; ZVFH-LABEL: vp_nearbyint_nxv8f16: 699; ZVFH: # %bb.0: 700; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 701; ZVFH-NEXT: vmv1r.v v10, v0 702; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) 703; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) 704; ZVFH-NEXT: vfabs.v v12, v8, v0.t 705; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 706; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t 707; ZVFH-NEXT: frflags a0 708; ZVFH-NEXT: vmv1r.v v0, v10 709; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 710; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 711; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 712; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 713; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 714; ZVFH-NEXT: fsflags a0 715; ZVFH-NEXT: ret 716; 717; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16: 718; ZVFHMIN: # %bb.0: 719; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 720; ZVFHMIN-NEXT: vmv1r.v v10, v0 721; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 722; ZVFHMIN-NEXT: lui a0, 307200 723; ZVFHMIN-NEXT: vmv1r.v v8, v0 724; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 725; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t 726; ZVFHMIN-NEXT: fmv.w.x fa5, a0 727; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 728; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t 729; ZVFHMIN-NEXT: frflags a0 730; ZVFHMIN-NEXT: vmv1r.v v0, v8 731; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 732; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t 733; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 734; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 735; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t 736; ZVFHMIN-NEXT: vmv1r.v v0, v10 737; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 738; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 739; ZVFHMIN-NEXT: fsflags a0 740; ZVFHMIN-NEXT: ret 741 %v = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) 742 ret <vscale x 8 x half> %v 743} 744 745define <vscale x 8 x half> @vp_nearbyint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { 746; ZVFH-LABEL: vp_nearbyint_nxv8f16_unmasked: 747; ZVFH: # %bb.0: 748; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) 749; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) 750; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 751; ZVFH-NEXT: vfabs.v v10, v8 752; ZVFH-NEXT: vmflt.vf v0, v10, fa5 753; ZVFH-NEXT: frflags a0 754; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 755; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 756; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 757; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 758; ZVFH-NEXT: fsflags a0 759; ZVFH-NEXT: ret 760; 761; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16_unmasked: 762; ZVFHMIN: # %bb.0: 763; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 764; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 765; ZVFHMIN-NEXT: lui a0, 307200 766; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 767; ZVFHMIN-NEXT: vfabs.v v8, v12 768; ZVFHMIN-NEXT: fmv.w.x fa5, a0 769; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 770; ZVFHMIN-NEXT: frflags a0 771; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 772; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 773; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 774; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 775; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 776; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 777; ZVFHMIN-NEXT: fsflags a0 778; ZVFHMIN-NEXT: ret 779 %v = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 780 ret <vscale x 8 x half> %v 781} 782 783declare <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32) 784 785define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 786; ZVFH-LABEL: vp_nearbyint_nxv16f16: 787; ZVFH: # %bb.0: 788; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 789; ZVFH-NEXT: vmv1r.v v12, v0 790; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) 791; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) 792; ZVFH-NEXT: vfabs.v v16, v8, v0.t 793; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 794; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t 795; ZVFH-NEXT: frflags a0 796; ZVFH-NEXT: vmv1r.v v0, v12 797; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma 798; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 799; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 800; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 801; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 802; ZVFH-NEXT: fsflags a0 803; ZVFH-NEXT: ret 804; 805; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16: 806; ZVFHMIN: # %bb.0: 807; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 808; ZVFHMIN-NEXT: vmv1r.v v12, v0 809; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 810; ZVFHMIN-NEXT: lui a0, 307200 811; ZVFHMIN-NEXT: vmv1r.v v8, v0 812; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 813; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t 814; ZVFHMIN-NEXT: fmv.w.x fa5, a0 815; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 816; ZVFHMIN-NEXT: vmflt.vf v8, v24, fa5, v0.t 817; ZVFHMIN-NEXT: frflags a0 818; ZVFHMIN-NEXT: vmv1r.v v0, v8 819; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 820; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 821; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 822; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 823; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 824; ZVFHMIN-NEXT: vmv1r.v v0, v12 825; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 826; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 827; ZVFHMIN-NEXT: fsflags a0 828; ZVFHMIN-NEXT: ret 829 %v = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) 830 ret <vscale x 16 x half> %v 831} 832 833define <vscale x 16 x half> @vp_nearbyint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { 834; ZVFH-LABEL: vp_nearbyint_nxv16f16_unmasked: 835; ZVFH: # %bb.0: 836; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) 837; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) 838; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 839; ZVFH-NEXT: vfabs.v v12, v8 840; ZVFH-NEXT: vmflt.vf v0, v12, fa5 841; ZVFH-NEXT: frflags a0 842; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 843; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 844; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 845; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 846; ZVFH-NEXT: fsflags a0 847; ZVFH-NEXT: ret 848; 849; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16_unmasked: 850; ZVFHMIN: # %bb.0: 851; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 852; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 853; ZVFHMIN-NEXT: lui a0, 307200 854; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 855; ZVFHMIN-NEXT: vfabs.v v8, v16 856; ZVFHMIN-NEXT: fmv.w.x fa5, a0 857; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 858; ZVFHMIN-NEXT: frflags a0 859; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 860; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 861; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 862; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 863; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 864; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 865; ZVFHMIN-NEXT: fsflags a0 866; ZVFHMIN-NEXT: ret 867 %v = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 868 ret <vscale x 16 x half> %v 869} 870 871declare <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32) 872 873define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 874; ZVFH-LABEL: vp_nearbyint_nxv32f16: 875; ZVFH: # %bb.0: 876; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 877; ZVFH-NEXT: vmv1r.v v16, v0 878; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) 879; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) 880; ZVFH-NEXT: vfabs.v v24, v8, v0.t 881; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 882; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t 883; ZVFH-NEXT: frflags a0 884; ZVFH-NEXT: vmv1r.v v0, v16 885; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma 886; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t 887; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t 888; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 889; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t 890; ZVFH-NEXT: fsflags a0 891; ZVFH-NEXT: ret 892; 893; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16: 894; ZVFHMIN: # %bb.0: 895; ZVFHMIN-NEXT: addi sp, sp, -16 896; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 897; ZVFHMIN-NEXT: csrr a1, vlenb 898; ZVFHMIN-NEXT: slli a1, a1, 3 899; ZVFHMIN-NEXT: sub sp, sp, a1 900; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 901; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 902; ZVFHMIN-NEXT: vmv1r.v v7, v0 903; ZVFHMIN-NEXT: csrr a2, vlenb 904; ZVFHMIN-NEXT: lui a3, 307200 905; ZVFHMIN-NEXT: slli a1, a2, 1 906; ZVFHMIN-NEXT: srli a2, a2, 2 907; ZVFHMIN-NEXT: fmv.w.x fa5, a3 908; ZVFHMIN-NEXT: sub a3, a0, a1 909; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 910; ZVFHMIN-NEXT: sltu a2, a0, a3 911; ZVFHMIN-NEXT: vmv1r.v v18, v17 912; ZVFHMIN-NEXT: addi a2, a2, -1 913; ZVFHMIN-NEXT: and a2, a2, a3 914; ZVFHMIN-NEXT: vmv1r.v v0, v17 915; ZVFHMIN-NEXT: addi a3, sp, 16 916; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 917; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 918; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 919; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 920; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t 921; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 922; ZVFHMIN-NEXT: vmflt.vf v18, v8, fa5, v0.t 923; ZVFHMIN-NEXT: frflags a2 924; ZVFHMIN-NEXT: vmv1r.v v0, v18 925; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 926; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t 927; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 928; ZVFHMIN-NEXT: fsflags a2 929; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 930; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t 931; ZVFHMIN-NEXT: vmv1r.v v0, v17 932; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 933; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 934; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 935; ZVFHMIN-NEXT: # %bb.1: 936; ZVFHMIN-NEXT: mv a0, a1 937; ZVFHMIN-NEXT: .LBB22_2: 938; ZVFHMIN-NEXT: vmv1r.v v0, v7 939; ZVFHMIN-NEXT: addi a1, sp, 16 940; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 941; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 942; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t 943; ZVFHMIN-NEXT: vmv1r.v v8, v7 944; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 945; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t 946; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 947; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t 948; ZVFHMIN-NEXT: frflags a0 949; ZVFHMIN-NEXT: vmv1r.v v0, v8 950; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 951; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t 952; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 953; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 954; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t 955; ZVFHMIN-NEXT: vmv1r.v v0, v7 956; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 957; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t 958; ZVFHMIN-NEXT: fsflags a0 959; ZVFHMIN-NEXT: csrr a0, vlenb 960; ZVFHMIN-NEXT: slli a0, a0, 3 961; ZVFHMIN-NEXT: add sp, sp, a0 962; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 963; ZVFHMIN-NEXT: addi sp, sp, 16 964; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 965; ZVFHMIN-NEXT: ret 966 %v = call <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl) 967 ret <vscale x 32 x half> %v 968} 969 970define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { 971; ZVFH-LABEL: vp_nearbyint_nxv32f16_unmasked: 972; ZVFH: # %bb.0: 973; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) 974; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) 975; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 976; ZVFH-NEXT: vfabs.v v16, v8 977; ZVFH-NEXT: vmflt.vf v0, v16, fa5 978; ZVFH-NEXT: frflags a0 979; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 980; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 981; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 982; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 983; ZVFH-NEXT: fsflags a0 984; ZVFH-NEXT: ret 985; 986; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked: 987; ZVFHMIN: # %bb.0: 988; ZVFHMIN-NEXT: addi sp, sp, -16 989; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 990; ZVFHMIN-NEXT: csrr a1, vlenb 991; ZVFHMIN-NEXT: slli a1, a1, 3 992; ZVFHMIN-NEXT: sub sp, sp, a1 993; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 994; ZVFHMIN-NEXT: csrr a2, vlenb 995; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma 996; ZVFHMIN-NEXT: vmset.m v16 997; ZVFHMIN-NEXT: lui a3, 307200 998; ZVFHMIN-NEXT: slli a1, a2, 1 999; ZVFHMIN-NEXT: srli a2, a2, 2 1000; ZVFHMIN-NEXT: fmv.w.x fa5, a3 1001; ZVFHMIN-NEXT: sub a3, a0, a1 1002; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 1003; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 1004; ZVFHMIN-NEXT: sltu a2, a0, a3 1005; ZVFHMIN-NEXT: vmv1r.v v17, v16 1006; ZVFHMIN-NEXT: addi a2, a2, -1 1007; ZVFHMIN-NEXT: and a2, a2, a3 1008; ZVFHMIN-NEXT: vmv1r.v v0, v16 1009; ZVFHMIN-NEXT: addi a3, sp, 16 1010; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1011; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1012; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 1013; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1014; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t 1015; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1016; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t 1017; ZVFHMIN-NEXT: frflags a2 1018; ZVFHMIN-NEXT: vmv1r.v v0, v17 1019; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1020; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t 1021; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 1022; ZVFHMIN-NEXT: fsflags a2 1023; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1024; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t 1025; ZVFHMIN-NEXT: vmv1r.v v0, v16 1026; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1027; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 1028; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 1029; ZVFHMIN-NEXT: # %bb.1: 1030; ZVFHMIN-NEXT: mv a0, a1 1031; ZVFHMIN-NEXT: .LBB23_2: 1032; ZVFHMIN-NEXT: addi a1, sp, 16 1033; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1034; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1035; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 1036; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1037; ZVFHMIN-NEXT: vfabs.v v24, v16 1038; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 1039; ZVFHMIN-NEXT: frflags a0 1040; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 1041; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 1042; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1043; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1044; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1045; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 1046; ZVFHMIN-NEXT: fsflags a0 1047; ZVFHMIN-NEXT: csrr a0, vlenb 1048; ZVFHMIN-NEXT: slli a0, a0, 3 1049; ZVFHMIN-NEXT: add sp, sp, a0 1050; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1051; ZVFHMIN-NEXT: addi sp, sp, 16 1052; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1053; ZVFHMIN-NEXT: ret 1054 %v = call <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 1055 ret <vscale x 32 x half> %v 1056} 1057 1058declare <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) 1059 1060define <vscale x 1 x float> @vp_nearbyint_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1061; CHECK-LABEL: vp_nearbyint_nxv1f32: 1062; CHECK: # %bb.0: 1063; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1064; CHECK-NEXT: vfabs.v v9, v8, v0.t 1065; CHECK-NEXT: lui a0, 307200 1066; CHECK-NEXT: fmv.w.x fa5, a0 1067; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1068; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1069; CHECK-NEXT: frflags a0 1070; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1071; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1072; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1073; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1074; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1075; CHECK-NEXT: fsflags a0 1076; CHECK-NEXT: ret 1077 %v = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl) 1078 ret <vscale x 1 x float> %v 1079} 1080 1081define <vscale x 1 x float> @vp_nearbyint_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) { 1082; CHECK-LABEL: vp_nearbyint_nxv1f32_unmasked: 1083; CHECK: # %bb.0: 1084; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1085; CHECK-NEXT: vfabs.v v9, v8 1086; CHECK-NEXT: lui a0, 307200 1087; CHECK-NEXT: fmv.w.x fa5, a0 1088; CHECK-NEXT: vmflt.vf v0, v9, fa5 1089; CHECK-NEXT: frflags a0 1090; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1091; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1092; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1093; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1094; CHECK-NEXT: fsflags a0 1095; CHECK-NEXT: ret 1096 %v = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1097 ret <vscale x 1 x float> %v 1098} 1099 1100declare <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 1101 1102define <vscale x 2 x float> @vp_nearbyint_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1103; CHECK-LABEL: vp_nearbyint_nxv2f32: 1104; CHECK: # %bb.0: 1105; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1106; CHECK-NEXT: vfabs.v v9, v8, v0.t 1107; CHECK-NEXT: lui a0, 307200 1108; CHECK-NEXT: fmv.w.x fa5, a0 1109; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1110; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1111; CHECK-NEXT: frflags a0 1112; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1113; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1114; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1115; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1116; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1117; CHECK-NEXT: fsflags a0 1118; CHECK-NEXT: ret 1119 %v = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl) 1120 ret <vscale x 2 x float> %v 1121} 1122 1123define <vscale x 2 x float> @vp_nearbyint_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) { 1124; CHECK-LABEL: vp_nearbyint_nxv2f32_unmasked: 1125; CHECK: # %bb.0: 1126; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1127; CHECK-NEXT: vfabs.v v9, v8 1128; CHECK-NEXT: lui a0, 307200 1129; CHECK-NEXT: fmv.w.x fa5, a0 1130; CHECK-NEXT: vmflt.vf v0, v9, fa5 1131; CHECK-NEXT: frflags a0 1132; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1133; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1134; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1135; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1136; CHECK-NEXT: fsflags a0 1137; CHECK-NEXT: ret 1138 %v = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1139 ret <vscale x 2 x float> %v 1140} 1141 1142declare <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) 1143 1144define <vscale x 4 x float> @vp_nearbyint_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1145; CHECK-LABEL: vp_nearbyint_nxv4f32: 1146; CHECK: # %bb.0: 1147; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1148; CHECK-NEXT: vmv1r.v v10, v0 1149; CHECK-NEXT: vfabs.v v12, v8, v0.t 1150; CHECK-NEXT: lui a0, 307200 1151; CHECK-NEXT: fmv.w.x fa5, a0 1152; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1153; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 1154; CHECK-NEXT: frflags a0 1155; CHECK-NEXT: vmv1r.v v0, v10 1156; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1157; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1158; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1159; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1160; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1161; CHECK-NEXT: fsflags a0 1162; CHECK-NEXT: ret 1163 %v = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl) 1164 ret <vscale x 4 x float> %v 1165} 1166 1167define <vscale x 4 x float> @vp_nearbyint_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) { 1168; CHECK-LABEL: vp_nearbyint_nxv4f32_unmasked: 1169; CHECK: # %bb.0: 1170; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1171; CHECK-NEXT: vfabs.v v10, v8 1172; CHECK-NEXT: lui a0, 307200 1173; CHECK-NEXT: fmv.w.x fa5, a0 1174; CHECK-NEXT: vmflt.vf v0, v10, fa5 1175; CHECK-NEXT: frflags a0 1176; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 1177; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 1178; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1179; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 1180; CHECK-NEXT: fsflags a0 1181; CHECK-NEXT: ret 1182 %v = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1183 ret <vscale x 4 x float> %v 1184} 1185 1186declare <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) 1187 1188define <vscale x 8 x float> @vp_nearbyint_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1189; CHECK-LABEL: vp_nearbyint_nxv8f32: 1190; CHECK: # %bb.0: 1191; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1192; CHECK-NEXT: vmv1r.v v12, v0 1193; CHECK-NEXT: vfabs.v v16, v8, v0.t 1194; CHECK-NEXT: lui a0, 307200 1195; CHECK-NEXT: fmv.w.x fa5, a0 1196; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1197; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 1198; CHECK-NEXT: frflags a0 1199; CHECK-NEXT: vmv1r.v v0, v12 1200; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1201; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1202; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1203; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1204; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1205; CHECK-NEXT: fsflags a0 1206; CHECK-NEXT: ret 1207 %v = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl) 1208 ret <vscale x 8 x float> %v 1209} 1210 1211define <vscale x 8 x float> @vp_nearbyint_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) { 1212; CHECK-LABEL: vp_nearbyint_nxv8f32_unmasked: 1213; CHECK: # %bb.0: 1214; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1215; CHECK-NEXT: vfabs.v v12, v8 1216; CHECK-NEXT: lui a0, 307200 1217; CHECK-NEXT: fmv.w.x fa5, a0 1218; CHECK-NEXT: vmflt.vf v0, v12, fa5 1219; CHECK-NEXT: frflags a0 1220; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1221; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1222; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1223; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1224; CHECK-NEXT: fsflags a0 1225; CHECK-NEXT: ret 1226 %v = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1227 ret <vscale x 8 x float> %v 1228} 1229 1230declare <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) 1231 1232define <vscale x 16 x float> @vp_nearbyint_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1233; CHECK-LABEL: vp_nearbyint_nxv16f32: 1234; CHECK: # %bb.0: 1235; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1236; CHECK-NEXT: vmv1r.v v16, v0 1237; CHECK-NEXT: vfabs.v v24, v8, v0.t 1238; CHECK-NEXT: lui a0, 307200 1239; CHECK-NEXT: fmv.w.x fa5, a0 1240; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1241; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1242; CHECK-NEXT: frflags a0 1243; CHECK-NEXT: vmv1r.v v0, v16 1244; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1245; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1246; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1247; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1248; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1249; CHECK-NEXT: fsflags a0 1250; CHECK-NEXT: ret 1251 %v = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl) 1252 ret <vscale x 16 x float> %v 1253} 1254 1255define <vscale x 16 x float> @vp_nearbyint_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) { 1256; CHECK-LABEL: vp_nearbyint_nxv16f32_unmasked: 1257; CHECK: # %bb.0: 1258; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1259; CHECK-NEXT: vfabs.v v16, v8 1260; CHECK-NEXT: lui a0, 307200 1261; CHECK-NEXT: fmv.w.x fa5, a0 1262; CHECK-NEXT: vmflt.vf v0, v16, fa5 1263; CHECK-NEXT: frflags a0 1264; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1265; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1266; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1267; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1268; CHECK-NEXT: fsflags a0 1269; CHECK-NEXT: ret 1270 %v = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1271 ret <vscale x 16 x float> %v 1272} 1273 1274declare <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) 1275 1276define <vscale x 1 x double> @vp_nearbyint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1277; CHECK-LABEL: vp_nearbyint_nxv1f64: 1278; CHECK: # %bb.0: 1279; CHECK-NEXT: lui a1, %hi(.LCPI34_0) 1280; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) 1281; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1282; CHECK-NEXT: vfabs.v v9, v8, v0.t 1283; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1284; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 1285; CHECK-NEXT: frflags a0 1286; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1287; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1288; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1289; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1290; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1291; CHECK-NEXT: fsflags a0 1292; CHECK-NEXT: ret 1293 %v = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl) 1294 ret <vscale x 1 x double> %v 1295} 1296 1297define <vscale x 1 x double> @vp_nearbyint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { 1298; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked: 1299; CHECK: # %bb.0: 1300; CHECK-NEXT: lui a1, %hi(.LCPI35_0) 1301; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) 1302; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1303; CHECK-NEXT: vfabs.v v9, v8 1304; CHECK-NEXT: vmflt.vf v0, v9, fa5 1305; CHECK-NEXT: frflags a0 1306; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 1307; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 1308; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 1309; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 1310; CHECK-NEXT: fsflags a0 1311; CHECK-NEXT: ret 1312 %v = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1313 ret <vscale x 1 x double> %v 1314} 1315 1316declare <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 1317 1318define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1319; CHECK-LABEL: vp_nearbyint_nxv2f64: 1320; CHECK: # %bb.0: 1321; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1322; CHECK-NEXT: vmv1r.v v10, v0 1323; CHECK-NEXT: lui a0, %hi(.LCPI36_0) 1324; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) 1325; CHECK-NEXT: vfabs.v v12, v8, v0.t 1326; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1327; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 1328; CHECK-NEXT: frflags a0 1329; CHECK-NEXT: vmv1r.v v0, v10 1330; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 1331; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1332; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1333; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1334; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1335; CHECK-NEXT: fsflags a0 1336; CHECK-NEXT: ret 1337 %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) 1338 ret <vscale x 2 x double> %v 1339} 1340 1341define <vscale x 2 x double> @vp_nearbyint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { 1342; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked: 1343; CHECK: # %bb.0: 1344; CHECK-NEXT: lui a1, %hi(.LCPI37_0) 1345; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) 1346; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1347; CHECK-NEXT: vfabs.v v10, v8 1348; CHECK-NEXT: vmflt.vf v0, v10, fa5 1349; CHECK-NEXT: frflags a0 1350; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 1351; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 1352; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 1353; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 1354; CHECK-NEXT: fsflags a0 1355; CHECK-NEXT: ret 1356 %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1357 ret <vscale x 2 x double> %v 1358} 1359 1360declare <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) 1361 1362define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1363; CHECK-LABEL: vp_nearbyint_nxv4f64: 1364; CHECK: # %bb.0: 1365; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1366; CHECK-NEXT: vmv1r.v v12, v0 1367; CHECK-NEXT: lui a0, %hi(.LCPI38_0) 1368; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) 1369; CHECK-NEXT: vfabs.v v16, v8, v0.t 1370; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1371; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 1372; CHECK-NEXT: frflags a0 1373; CHECK-NEXT: vmv1r.v v0, v12 1374; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 1375; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1376; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1377; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1378; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1379; CHECK-NEXT: fsflags a0 1380; CHECK-NEXT: ret 1381 %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl) 1382 ret <vscale x 4 x double> %v 1383} 1384 1385define <vscale x 4 x double> @vp_nearbyint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { 1386; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked: 1387; CHECK: # %bb.0: 1388; CHECK-NEXT: lui a1, %hi(.LCPI39_0) 1389; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) 1390; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1391; CHECK-NEXT: vfabs.v v12, v8 1392; CHECK-NEXT: vmflt.vf v0, v12, fa5 1393; CHECK-NEXT: frflags a0 1394; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 1395; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 1396; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1397; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 1398; CHECK-NEXT: fsflags a0 1399; CHECK-NEXT: ret 1400 %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1401 ret <vscale x 4 x double> %v 1402} 1403 1404declare <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32) 1405 1406define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 1407; CHECK-LABEL: vp_nearbyint_nxv7f64: 1408; CHECK: # %bb.0: 1409; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1410; CHECK-NEXT: vmv1r.v v16, v0 1411; CHECK-NEXT: lui a0, %hi(.LCPI40_0) 1412; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) 1413; CHECK-NEXT: vfabs.v v24, v8, v0.t 1414; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1415; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1416; CHECK-NEXT: frflags a0 1417; CHECK-NEXT: vmv1r.v v0, v16 1418; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1419; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1420; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1421; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1422; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1423; CHECK-NEXT: fsflags a0 1424; CHECK-NEXT: ret 1425 %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl) 1426 ret <vscale x 7 x double> %v 1427} 1428 1429define <vscale x 7 x double> @vp_nearbyint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { 1430; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked: 1431; CHECK: # %bb.0: 1432; CHECK-NEXT: lui a1, %hi(.LCPI41_0) 1433; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) 1434; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1435; CHECK-NEXT: vfabs.v v16, v8 1436; CHECK-NEXT: vmflt.vf v0, v16, fa5 1437; CHECK-NEXT: frflags a0 1438; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1439; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1440; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1441; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1442; CHECK-NEXT: fsflags a0 1443; CHECK-NEXT: ret 1444 %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) 1445 ret <vscale x 7 x double> %v 1446} 1447 1448declare <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) 1449 1450define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1451; CHECK-LABEL: vp_nearbyint_nxv8f64: 1452; CHECK: # %bb.0: 1453; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1454; CHECK-NEXT: vmv1r.v v16, v0 1455; CHECK-NEXT: lui a0, %hi(.LCPI42_0) 1456; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) 1457; CHECK-NEXT: vfabs.v v24, v8, v0.t 1458; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1459; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 1460; CHECK-NEXT: frflags a0 1461; CHECK-NEXT: vmv1r.v v0, v16 1462; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1463; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1464; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1465; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1466; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1467; CHECK-NEXT: fsflags a0 1468; CHECK-NEXT: ret 1469 %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl) 1470 ret <vscale x 8 x double> %v 1471} 1472 1473define <vscale x 8 x double> @vp_nearbyint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { 1474; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked: 1475; CHECK: # %bb.0: 1476; CHECK-NEXT: lui a1, %hi(.LCPI43_0) 1477; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) 1478; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1479; CHECK-NEXT: vfabs.v v16, v8 1480; CHECK-NEXT: vmflt.vf v0, v16, fa5 1481; CHECK-NEXT: frflags a0 1482; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 1483; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 1484; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1485; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 1486; CHECK-NEXT: fsflags a0 1487; CHECK-NEXT: ret 1488 %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1489 ret <vscale x 8 x double> %v 1490} 1491 1492; Test splitting. 1493declare <vscale x 16 x double> @llvm.vp.nearbyint.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32) 1494 1495define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1496; CHECK-LABEL: vp_nearbyint_nxv16f64: 1497; CHECK: # %bb.0: 1498; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 1499; CHECK-NEXT: vmv1r.v v7, v0 1500; CHECK-NEXT: csrr a1, vlenb 1501; CHECK-NEXT: lui a2, %hi(.LCPI44_0) 1502; CHECK-NEXT: srli a3, a1, 3 1503; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) 1504; CHECK-NEXT: sub a2, a0, a1 1505; CHECK-NEXT: vslidedown.vx v6, v0, a3 1506; CHECK-NEXT: sltu a3, a0, a2 1507; CHECK-NEXT: addi a3, a3, -1 1508; CHECK-NEXT: and a2, a3, a2 1509; CHECK-NEXT: vmv1r.v v0, v6 1510; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1511; CHECK-NEXT: vfabs.v v24, v16, v0.t 1512; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1513; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t 1514; CHECK-NEXT: frflags a2 1515; CHECK-NEXT: vmv1r.v v0, v6 1516; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1517; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 1518; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1519; CHECK-NEXT: fsflags a2 1520; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1521; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1522; CHECK-NEXT: bltu a0, a1, .LBB44_2 1523; CHECK-NEXT: # %bb.1: 1524; CHECK-NEXT: mv a0, a1 1525; CHECK-NEXT: .LBB44_2: 1526; CHECK-NEXT: vmv1r.v v0, v7 1527; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1528; CHECK-NEXT: vfabs.v v24, v8, v0.t 1529; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1530; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t 1531; CHECK-NEXT: frflags a0 1532; CHECK-NEXT: vmv1r.v v0, v7 1533; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1534; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1535; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1536; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1537; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1538; CHECK-NEXT: fsflags a0 1539; CHECK-NEXT: ret 1540 %v = call <vscale x 16 x double> @llvm.vp.nearbyint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl) 1541 ret <vscale x 16 x double> %v 1542} 1543 1544define <vscale x 16 x double> @vp_nearbyint_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) { 1545; CHECK-LABEL: vp_nearbyint_nxv16f64_unmasked: 1546; CHECK: # %bb.0: 1547; CHECK-NEXT: csrr a1, vlenb 1548; CHECK-NEXT: lui a2, %hi(.LCPI45_0) 1549; CHECK-NEXT: sub a3, a0, a1 1550; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2) 1551; CHECK-NEXT: sltu a2, a0, a3 1552; CHECK-NEXT: addi a2, a2, -1 1553; CHECK-NEXT: and a2, a2, a3 1554; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1555; CHECK-NEXT: vfabs.v v24, v16 1556; CHECK-NEXT: vmflt.vf v0, v24, fa5 1557; CHECK-NEXT: frflags a2 1558; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 1559; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1560; CHECK-NEXT: fsflags a2 1561; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1562; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 1563; CHECK-NEXT: bltu a0, a1, .LBB45_2 1564; CHECK-NEXT: # %bb.1: 1565; CHECK-NEXT: mv a0, a1 1566; CHECK-NEXT: .LBB45_2: 1567; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1568; CHECK-NEXT: vfabs.v v24, v8 1569; CHECK-NEXT: vmflt.vf v0, v24, fa5 1570; CHECK-NEXT: frflags a0 1571; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 1572; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 1573; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1574; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 1575; CHECK-NEXT: fsflags a0 1576; CHECK-NEXT: ret 1577 %v = call <vscale x 16 x double> @llvm.vp.nearbyint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1578 ret <vscale x 16 x double> %v 1579} 1580