1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 10 11declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32) 12 13define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { 14; ZVFH-LABEL: vp_roundtozero_v2f16: 15; ZVFH: # %bb.0: 16; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) 17; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) 18; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 19; ZVFH-NEXT: vfabs.v v9, v8, v0.t 20; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 21; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 22; ZVFH-NEXT: fsrmi a0, 1 23; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 24; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 25; ZVFH-NEXT: fsrm a0 26; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 27; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 28; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 29; ZVFH-NEXT: ret 30; 31; ZVFHMIN-LABEL: vp_roundtozero_v2f16: 32; ZVFHMIN: # %bb.0: 33; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 34; ZVFHMIN-NEXT: vmv1r.v v9, v0 35; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 36; ZVFHMIN-NEXT: lui a0, 307200 37; ZVFHMIN-NEXT: vmv1r.v v8, v0 38; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 39; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 40; ZVFHMIN-NEXT: fmv.w.x fa5, a0 41; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 42; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 43; ZVFHMIN-NEXT: fsrmi a0, 1 44; ZVFHMIN-NEXT: vmv1r.v v0, v8 45; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 46; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 47; ZVFHMIN-NEXT: fsrm a0 48; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 49; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 50; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 51; ZVFHMIN-NEXT: vmv1r.v v0, v9 52; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 53; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 54; ZVFHMIN-NEXT: ret 55 %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) 56 ret <2 x half> %v 57} 58 59define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { 60; ZVFH-LABEL: vp_roundtozero_v2f16_unmasked: 61; ZVFH: # %bb.0: 62; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) 63; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) 64; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 65; ZVFH-NEXT: vfabs.v v9, v8 66; ZVFH-NEXT: vmflt.vf v0, v9, fa5 67; ZVFH-NEXT: fsrmi a0, 1 68; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 69; ZVFH-NEXT: fsrm a0 70; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 71; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 72; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 73; ZVFH-NEXT: ret 74; 75; ZVFHMIN-LABEL: vp_roundtozero_v2f16_unmasked: 76; ZVFHMIN: # %bb.0: 77; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 78; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 79; ZVFHMIN-NEXT: lui a0, 307200 80; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 81; ZVFHMIN-NEXT: vfabs.v v8, v9 82; ZVFHMIN-NEXT: fmv.w.x fa5, a0 83; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 84; ZVFHMIN-NEXT: fsrmi a0, 1 85; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 86; ZVFHMIN-NEXT: fsrm a0 87; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 88; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 89; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 90; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 91; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 92; ZVFHMIN-NEXT: ret 93 %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) 94 ret <2 x half> %v 95} 96 97declare <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half>, <4 x i1>, i32) 98 99define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { 100; ZVFH-LABEL: vp_roundtozero_v4f16: 101; ZVFH: # %bb.0: 102; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) 103; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) 104; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 105; ZVFH-NEXT: vfabs.v v9, v8, v0.t 106; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 107; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 108; ZVFH-NEXT: fsrmi a0, 1 109; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 110; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 111; ZVFH-NEXT: fsrm a0 112; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 113; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 114; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 115; ZVFH-NEXT: ret 116; 117; ZVFHMIN-LABEL: vp_roundtozero_v4f16: 118; ZVFHMIN: # %bb.0: 119; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 120; ZVFHMIN-NEXT: vmv1r.v v9, v0 121; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 122; ZVFHMIN-NEXT: lui a0, 307200 123; ZVFHMIN-NEXT: vmv1r.v v8, v0 124; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 125; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t 126; ZVFHMIN-NEXT: fmv.w.x fa5, a0 127; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 128; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t 129; ZVFHMIN-NEXT: fsrmi a0, 1 130; ZVFHMIN-NEXT: vmv.v.v v0, v8 131; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 132; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t 133; ZVFHMIN-NEXT: fsrm a0 134; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t 135; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 136; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t 137; ZVFHMIN-NEXT: vmv1r.v v0, v9 138; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 139; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 140; ZVFHMIN-NEXT: ret 141 %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) 142 ret <4 x half> %v 143} 144 145define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { 146; ZVFH-LABEL: vp_roundtozero_v4f16_unmasked: 147; ZVFH: # %bb.0: 148; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) 149; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) 150; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 151; ZVFH-NEXT: vfabs.v v9, v8 152; ZVFH-NEXT: vmflt.vf v0, v9, fa5 153; ZVFH-NEXT: fsrmi a0, 1 154; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 155; ZVFH-NEXT: fsrm a0 156; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 157; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 158; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 159; ZVFH-NEXT: ret 160; 161; ZVFHMIN-LABEL: vp_roundtozero_v4f16_unmasked: 162; ZVFHMIN: # %bb.0: 163; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 164; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 165; ZVFHMIN-NEXT: lui a0, 307200 166; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 167; ZVFHMIN-NEXT: vfabs.v v8, v9 168; ZVFHMIN-NEXT: fmv.w.x fa5, a0 169; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 170; ZVFHMIN-NEXT: fsrmi a0, 1 171; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 172; ZVFHMIN-NEXT: fsrm a0 173; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 174; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 175; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 176; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 177; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 178; ZVFHMIN-NEXT: ret 179 %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) 180 ret <4 x half> %v 181} 182 183declare <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half>, <8 x i1>, i32) 184 185define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { 186; ZVFH-LABEL: vp_roundtozero_v8f16: 187; ZVFH: # %bb.0: 188; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) 189; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) 190; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 191; ZVFH-NEXT: vfabs.v v9, v8, v0.t 192; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 193; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t 194; ZVFH-NEXT: fsrmi a0, 1 195; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma 196; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 197; ZVFH-NEXT: fsrm a0 198; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 199; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 200; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 201; ZVFH-NEXT: ret 202; 203; ZVFHMIN-LABEL: vp_roundtozero_v8f16: 204; ZVFHMIN: # %bb.0: 205; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 206; ZVFHMIN-NEXT: vmv1r.v v9, v0 207; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 208; ZVFHMIN-NEXT: lui a0, 307200 209; ZVFHMIN-NEXT: vmv1r.v v8, v0 210; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 211; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t 212; ZVFHMIN-NEXT: fmv.w.x fa5, a0 213; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 214; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t 215; ZVFHMIN-NEXT: fsrmi a0, 1 216; ZVFHMIN-NEXT: vmv1r.v v0, v8 217; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 218; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t 219; ZVFHMIN-NEXT: fsrm a0 220; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t 221; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 222; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t 223; ZVFHMIN-NEXT: vmv1r.v v0, v9 224; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 225; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 226; ZVFHMIN-NEXT: ret 227 %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) 228 ret <8 x half> %v 229} 230 231define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { 232; ZVFH-LABEL: vp_roundtozero_v8f16_unmasked: 233; ZVFH: # %bb.0: 234; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) 235; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) 236; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 237; ZVFH-NEXT: vfabs.v v9, v8 238; ZVFH-NEXT: vmflt.vf v0, v9, fa5 239; ZVFH-NEXT: fsrmi a0, 1 240; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 241; ZVFH-NEXT: fsrm a0 242; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 243; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 244; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 245; ZVFH-NEXT: ret 246; 247; ZVFHMIN-LABEL: vp_roundtozero_v8f16_unmasked: 248; ZVFHMIN: # %bb.0: 249; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 250; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 251; ZVFHMIN-NEXT: lui a0, 307200 252; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 253; ZVFHMIN-NEXT: vfabs.v v8, v10 254; ZVFHMIN-NEXT: fmv.w.x fa5, a0 255; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 256; ZVFHMIN-NEXT: fsrmi a0, 1 257; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 258; ZVFHMIN-NEXT: fsrm a0 259; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 260; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 261; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 262; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 263; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 264; ZVFHMIN-NEXT: ret 265 %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) 266 ret <8 x half> %v 267} 268 269declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32) 270 271define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { 272; ZVFH-LABEL: vp_roundtozero_v16f16: 273; ZVFH: # %bb.0: 274; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 275; ZVFH-NEXT: vmv1r.v v10, v0 276; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) 277; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) 278; ZVFH-NEXT: vfabs.v v12, v8, v0.t 279; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 280; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t 281; ZVFH-NEXT: fsrmi a0, 1 282; ZVFH-NEXT: vmv1r.v v0, v10 283; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 284; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 285; ZVFH-NEXT: fsrm a0 286; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 287; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 288; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 289; ZVFH-NEXT: ret 290; 291; ZVFHMIN-LABEL: vp_roundtozero_v16f16: 292; ZVFHMIN: # %bb.0: 293; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 294; ZVFHMIN-NEXT: vmv1r.v v10, v0 295; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 296; ZVFHMIN-NEXT: lui a0, 307200 297; ZVFHMIN-NEXT: vmv1r.v v8, v0 298; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 299; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t 300; ZVFHMIN-NEXT: fmv.w.x fa5, a0 301; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 302; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t 303; ZVFHMIN-NEXT: fsrmi a0, 1 304; ZVFHMIN-NEXT: vmv1r.v v0, v8 305; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 306; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t 307; ZVFHMIN-NEXT: fsrm a0 308; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 309; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 310; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t 311; ZVFHMIN-NEXT: vmv1r.v v0, v10 312; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 313; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 314; ZVFHMIN-NEXT: ret 315 %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) 316 ret <16 x half> %v 317} 318 319define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { 320; ZVFH-LABEL: vp_roundtozero_v16f16_unmasked: 321; ZVFH: # %bb.0: 322; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) 323; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) 324; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 325; ZVFH-NEXT: vfabs.v v10, v8 326; ZVFH-NEXT: vmflt.vf v0, v10, fa5 327; ZVFH-NEXT: fsrmi a0, 1 328; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 329; ZVFH-NEXT: fsrm a0 330; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 331; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 332; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 333; ZVFH-NEXT: ret 334; 335; ZVFHMIN-LABEL: vp_roundtozero_v16f16_unmasked: 336; ZVFHMIN: # %bb.0: 337; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 338; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 339; ZVFHMIN-NEXT: lui a0, 307200 340; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 341; ZVFHMIN-NEXT: vfabs.v v8, v12 342; ZVFHMIN-NEXT: fmv.w.x fa5, a0 343; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 344; ZVFHMIN-NEXT: fsrmi a0, 1 345; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 346; ZVFHMIN-NEXT: fsrm a0 347; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 348; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 349; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 350; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 351; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 352; ZVFHMIN-NEXT: ret 353 %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) 354 ret <16 x half> %v 355} 356 357declare <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float>, <2 x i1>, i32) 358 359define <2 x float> @vp_roundtozero_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) { 360; CHECK-LABEL: vp_roundtozero_v2f32: 361; CHECK: # %bb.0: 362; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 363; CHECK-NEXT: vfabs.v v9, v8, v0.t 364; CHECK-NEXT: lui a0, 307200 365; CHECK-NEXT: fmv.w.x fa5, a0 366; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 367; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 368; CHECK-NEXT: fsrmi a0, 1 369; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 370; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 371; CHECK-NEXT: fsrm a0 372; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 373; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 374; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 375; CHECK-NEXT: ret 376 %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) 377 ret <2 x float> %v 378} 379 380define <2 x float> @vp_roundtozero_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { 381; CHECK-LABEL: vp_roundtozero_v2f32_unmasked: 382; CHECK: # %bb.0: 383; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 384; CHECK-NEXT: vfabs.v v9, v8 385; CHECK-NEXT: lui a0, 307200 386; CHECK-NEXT: fmv.w.x fa5, a0 387; CHECK-NEXT: vmflt.vf v0, v9, fa5 388; CHECK-NEXT: fsrmi a0, 1 389; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 390; CHECK-NEXT: fsrm a0 391; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 392; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 393; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 394; CHECK-NEXT: ret 395 %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) 396 ret <2 x float> %v 397} 398 399declare <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float>, <4 x i1>, i32) 400 401define <4 x float> @vp_roundtozero_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) { 402; CHECK-LABEL: vp_roundtozero_v4f32: 403; CHECK: # %bb.0: 404; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 405; CHECK-NEXT: vfabs.v v9, v8, v0.t 406; CHECK-NEXT: lui a0, 307200 407; CHECK-NEXT: fmv.w.x fa5, a0 408; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 409; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 410; CHECK-NEXT: fsrmi a0, 1 411; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 412; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 413; CHECK-NEXT: fsrm a0 414; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 415; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 416; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 417; CHECK-NEXT: ret 418 %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) 419 ret <4 x float> %v 420} 421 422define <4 x float> @vp_roundtozero_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { 423; CHECK-LABEL: vp_roundtozero_v4f32_unmasked: 424; CHECK: # %bb.0: 425; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 426; CHECK-NEXT: vfabs.v v9, v8 427; CHECK-NEXT: lui a0, 307200 428; CHECK-NEXT: fmv.w.x fa5, a0 429; CHECK-NEXT: vmflt.vf v0, v9, fa5 430; CHECK-NEXT: fsrmi a0, 1 431; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 432; CHECK-NEXT: fsrm a0 433; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 434; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 435; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 436; CHECK-NEXT: ret 437 %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) 438 ret <4 x float> %v 439} 440 441declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32) 442 443define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { 444; CHECK-LABEL: vp_roundtozero_v8f32: 445; CHECK: # %bb.0: 446; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 447; CHECK-NEXT: vmv1r.v v10, v0 448; CHECK-NEXT: vfabs.v v12, v8, v0.t 449; CHECK-NEXT: lui a0, 307200 450; CHECK-NEXT: fmv.w.x fa5, a0 451; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 452; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 453; CHECK-NEXT: fsrmi a0, 1 454; CHECK-NEXT: vmv1r.v v0, v10 455; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 456; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 457; CHECK-NEXT: fsrm a0 458; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 459; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 460; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 461; CHECK-NEXT: ret 462 %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) 463 ret <8 x float> %v 464} 465 466define <8 x float> @vp_roundtozero_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { 467; CHECK-LABEL: vp_roundtozero_v8f32_unmasked: 468; CHECK: # %bb.0: 469; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 470; CHECK-NEXT: vfabs.v v10, v8 471; CHECK-NEXT: lui a0, 307200 472; CHECK-NEXT: fmv.w.x fa5, a0 473; CHECK-NEXT: vmflt.vf v0, v10, fa5 474; CHECK-NEXT: fsrmi a0, 1 475; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 476; CHECK-NEXT: fsrm a0 477; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 478; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 479; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 480; CHECK-NEXT: ret 481 %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) 482 ret <8 x float> %v 483} 484 485declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32) 486 487define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { 488; CHECK-LABEL: vp_roundtozero_v16f32: 489; CHECK: # %bb.0: 490; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 491; CHECK-NEXT: vmv1r.v v12, v0 492; CHECK-NEXT: vfabs.v v16, v8, v0.t 493; CHECK-NEXT: lui a0, 307200 494; CHECK-NEXT: fmv.w.x fa5, a0 495; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 496; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 497; CHECK-NEXT: fsrmi a0, 1 498; CHECK-NEXT: vmv1r.v v0, v12 499; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 500; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 501; CHECK-NEXT: fsrm a0 502; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 503; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 504; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 505; CHECK-NEXT: ret 506 %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) 507 ret <16 x float> %v 508} 509 510define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { 511; CHECK-LABEL: vp_roundtozero_v16f32_unmasked: 512; CHECK: # %bb.0: 513; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 514; CHECK-NEXT: vfabs.v v12, v8 515; CHECK-NEXT: lui a0, 307200 516; CHECK-NEXT: fmv.w.x fa5, a0 517; CHECK-NEXT: vmflt.vf v0, v12, fa5 518; CHECK-NEXT: fsrmi a0, 1 519; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 520; CHECK-NEXT: fsrm a0 521; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 522; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 523; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 524; CHECK-NEXT: ret 525 %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) 526 ret <16 x float> %v 527} 528 529declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32) 530 531define <2 x double> @vp_roundtozero_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { 532; CHECK-LABEL: vp_roundtozero_v2f64: 533; CHECK: # %bb.0: 534; CHECK-NEXT: lui a1, %hi(.LCPI16_0) 535; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) 536; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 537; CHECK-NEXT: vfabs.v v9, v8, v0.t 538; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 539; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t 540; CHECK-NEXT: fsrmi a0, 1 541; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 542; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 543; CHECK-NEXT: fsrm a0 544; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 545; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 546; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 547; CHECK-NEXT: ret 548 %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) 549 ret <2 x double> %v 550} 551 552define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { 553; CHECK-LABEL: vp_roundtozero_v2f64_unmasked: 554; CHECK: # %bb.0: 555; CHECK-NEXT: lui a1, %hi(.LCPI17_0) 556; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) 557; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 558; CHECK-NEXT: vfabs.v v9, v8 559; CHECK-NEXT: vmflt.vf v0, v9, fa5 560; CHECK-NEXT: fsrmi a0, 1 561; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 562; CHECK-NEXT: fsrm a0 563; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 564; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 565; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 566; CHECK-NEXT: ret 567 %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) 568 ret <2 x double> %v 569} 570 571declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) 572 573define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { 574; CHECK-LABEL: vp_roundtozero_v4f64: 575; CHECK: # %bb.0: 576; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 577; CHECK-NEXT: vmv1r.v v10, v0 578; CHECK-NEXT: lui a0, %hi(.LCPI18_0) 579; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) 580; CHECK-NEXT: vfabs.v v12, v8, v0.t 581; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 582; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t 583; CHECK-NEXT: fsrmi a0, 1 584; CHECK-NEXT: vmv1r.v v0, v10 585; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 586; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 587; CHECK-NEXT: fsrm a0 588; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 589; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 590; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 591; CHECK-NEXT: ret 592 %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) 593 ret <4 x double> %v 594} 595 596define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { 597; CHECK-LABEL: vp_roundtozero_v4f64_unmasked: 598; CHECK: # %bb.0: 599; CHECK-NEXT: lui a1, %hi(.LCPI19_0) 600; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) 601; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 602; CHECK-NEXT: vfabs.v v10, v8 603; CHECK-NEXT: vmflt.vf v0, v10, fa5 604; CHECK-NEXT: fsrmi a0, 1 605; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 606; CHECK-NEXT: fsrm a0 607; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 608; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 609; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 610; CHECK-NEXT: ret 611 %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) 612 ret <4 x double> %v 613} 614 615declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) 616 617define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { 618; CHECK-LABEL: vp_roundtozero_v8f64: 619; CHECK: # %bb.0: 620; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 621; CHECK-NEXT: vmv1r.v v12, v0 622; CHECK-NEXT: lui a0, %hi(.LCPI20_0) 623; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) 624; CHECK-NEXT: vfabs.v v16, v8, v0.t 625; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 626; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t 627; CHECK-NEXT: fsrmi a0, 1 628; CHECK-NEXT: vmv1r.v v0, v12 629; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 630; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 631; CHECK-NEXT: fsrm a0 632; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 633; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 634; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 635; CHECK-NEXT: ret 636 %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) 637 ret <8 x double> %v 638} 639 640define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { 641; CHECK-LABEL: vp_roundtozero_v8f64_unmasked: 642; CHECK: # %bb.0: 643; CHECK-NEXT: lui a1, %hi(.LCPI21_0) 644; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) 645; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 646; CHECK-NEXT: vfabs.v v12, v8 647; CHECK-NEXT: vmflt.vf v0, v12, fa5 648; CHECK-NEXT: fsrmi a0, 1 649; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 650; CHECK-NEXT: fsrm a0 651; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 652; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 653; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 654; CHECK-NEXT: ret 655 %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) 656 ret <8 x double> %v 657} 658 659declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32) 660 661define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { 662; CHECK-LABEL: vp_roundtozero_v15f64: 663; CHECK: # %bb.0: 664; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 665; CHECK-NEXT: vmv1r.v v16, v0 666; CHECK-NEXT: lui a0, %hi(.LCPI22_0) 667; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) 668; CHECK-NEXT: vfabs.v v24, v8, v0.t 669; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 670; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 671; CHECK-NEXT: fsrmi a0, 1 672; CHECK-NEXT: vmv1r.v v0, v16 673; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 674; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 675; CHECK-NEXT: fsrm a0 676; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 677; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 678; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 679; CHECK-NEXT: ret 680 %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) 681 ret <15 x double> %v 682} 683 684define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { 685; CHECK-LABEL: vp_roundtozero_v15f64_unmasked: 686; CHECK: # %bb.0: 687; CHECK-NEXT: lui a1, %hi(.LCPI23_0) 688; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) 689; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 690; CHECK-NEXT: vfabs.v v16, v8 691; CHECK-NEXT: vmflt.vf v0, v16, fa5 692; CHECK-NEXT: fsrmi a0, 1 693; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 694; CHECK-NEXT: fsrm a0 695; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 696; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 697; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 698; CHECK-NEXT: ret 699 %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) 700 ret <15 x double> %v 701} 702 703declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) 704 705define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { 706; CHECK-LABEL: vp_roundtozero_v16f64: 707; CHECK: # %bb.0: 708; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 709; CHECK-NEXT: vmv1r.v v16, v0 710; CHECK-NEXT: lui a0, %hi(.LCPI24_0) 711; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) 712; CHECK-NEXT: vfabs.v v24, v8, v0.t 713; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 714; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t 715; CHECK-NEXT: fsrmi a0, 1 716; CHECK-NEXT: vmv1r.v v0, v16 717; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 718; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 719; CHECK-NEXT: fsrm a0 720; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 721; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 722; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 723; CHECK-NEXT: ret 724 %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) 725 ret <16 x double> %v 726} 727 728define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { 729; CHECK-LABEL: vp_roundtozero_v16f64_unmasked: 730; CHECK: # %bb.0: 731; CHECK-NEXT: lui a1, %hi(.LCPI25_0) 732; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) 733; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 734; CHECK-NEXT: vfabs.v v16, v8 735; CHECK-NEXT: vmflt.vf v0, v16, fa5 736; CHECK-NEXT: fsrmi a0, 1 737; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 738; CHECK-NEXT: fsrm a0 739; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 740; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 741; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 742; CHECK-NEXT: ret 743 %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) 744 ret <16 x double> %v 745} 746 747declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32) 748 749define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { 750; CHECK-LABEL: vp_roundtozero_v32f64: 751; CHECK: # %bb.0: 752; CHECK-NEXT: addi sp, sp, -16 753; CHECK-NEXT: .cfi_def_cfa_offset 16 754; CHECK-NEXT: csrr a1, vlenb 755; CHECK-NEXT: slli a1, a1, 4 756; CHECK-NEXT: sub sp, sp, a1 757; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 758; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 759; CHECK-NEXT: vmv1r.v v25, v0 760; CHECK-NEXT: csrr a1, vlenb 761; CHECK-NEXT: slli a1, a1, 3 762; CHECK-NEXT: add a1, sp, a1 763; CHECK-NEXT: addi a1, a1, 16 764; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 765; CHECK-NEXT: li a2, 16 766; CHECK-NEXT: vslidedown.vi v24, v0, 2 767; CHECK-NEXT: mv a1, a0 768; CHECK-NEXT: bltu a0, a2, .LBB26_2 769; CHECK-NEXT: # %bb.1: 770; CHECK-NEXT: li a1, 16 771; CHECK-NEXT: .LBB26_2: 772; CHECK-NEXT: vmv1r.v v0, v25 773; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 774; CHECK-NEXT: vfabs.v v16, v8, v0.t 775; CHECK-NEXT: lui a1, %hi(.LCPI26_0) 776; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) 777; CHECK-NEXT: addi a1, a0, -16 778; CHECK-NEXT: sltu a0, a0, a1 779; CHECK-NEXT: addi a0, a0, -1 780; CHECK-NEXT: and a0, a0, a1 781; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 782; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t 783; CHECK-NEXT: fsrmi a1, 1 784; CHECK-NEXT: vmv1r.v v0, v25 785; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 786; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 787; CHECK-NEXT: fsrm a1 788; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 789; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 790; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 791; CHECK-NEXT: addi a1, sp, 16 792; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 793; CHECK-NEXT: vmv1r.v v0, v24 794; CHECK-NEXT: csrr a1, vlenb 795; CHECK-NEXT: slli a1, a1, 3 796; CHECK-NEXT: add a1, sp, a1 797; CHECK-NEXT: addi a1, a1, 16 798; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 799; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 800; CHECK-NEXT: vfabs.v v8, v16, v0.t 801; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 802; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t 803; CHECK-NEXT: fsrmi a0, 1 804; CHECK-NEXT: vmv1r.v v0, v24 805; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 806; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 807; CHECK-NEXT: fsrm a0 808; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 809; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 810; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 811; CHECK-NEXT: addi a0, sp, 16 812; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 813; CHECK-NEXT: csrr a0, vlenb 814; CHECK-NEXT: slli a0, a0, 4 815; CHECK-NEXT: add sp, sp, a0 816; CHECK-NEXT: .cfi_def_cfa sp, 16 817; CHECK-NEXT: addi sp, sp, 16 818; CHECK-NEXT: .cfi_def_cfa_offset 0 819; CHECK-NEXT: ret 820 %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) 821 ret <32 x double> %v 822} 823 824define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { 825; CHECK-LABEL: vp_roundtozero_v32f64_unmasked: 826; CHECK: # %bb.0: 827; CHECK-NEXT: li a2, 16 828; CHECK-NEXT: mv a1, a0 829; CHECK-NEXT: bltu a0, a2, .LBB27_2 830; CHECK-NEXT: # %bb.1: 831; CHECK-NEXT: li a1, 16 832; CHECK-NEXT: .LBB27_2: 833; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 834; CHECK-NEXT: vfabs.v v24, v8 835; CHECK-NEXT: lui a2, %hi(.LCPI27_0) 836; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) 837; CHECK-NEXT: addi a2, a0, -16 838; CHECK-NEXT: sltu a0, a0, a2 839; CHECK-NEXT: addi a0, a0, -1 840; CHECK-NEXT: and a0, a0, a2 841; CHECK-NEXT: fsrmi a2, 1 842; CHECK-NEXT: vmflt.vf v0, v24, fa5 843; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 844; CHECK-NEXT: vfabs.v v24, v16 845; CHECK-NEXT: vmflt.vf v7, v24, fa5 846; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 847; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t 848; CHECK-NEXT: fsrm a2 849; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 850; CHECK-NEXT: fsrmi a1, 1 851; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 852; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t 853; CHECK-NEXT: vmv1r.v v0, v7 854; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 855; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 856; CHECK-NEXT: fsrm a1 857; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 858; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 859; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 860; CHECK-NEXT: ret 861 %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) 862 ret <32 x double> %v 863} 864