1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15; This file tests the code generation for `llvm.roundeven.*` on scalable vector type. 16define <vscale x 1 x bfloat> @roundeven_nxv1bf16(<vscale x 1 x bfloat> %x) { 17; CHECK-LABEL: roundeven_nxv1bf16: 18; CHECK: # %bb.0: 19; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 20; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 21; CHECK-NEXT: lui a0, 307200 22; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 23; CHECK-NEXT: vfabs.v v8, v9 24; CHECK-NEXT: fmv.w.x fa5, a0 25; CHECK-NEXT: vmflt.vf v0, v8, fa5 26; CHECK-NEXT: fsrmi a0, 0 27; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 28; CHECK-NEXT: fsrm a0 29; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 30; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 31; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 32; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 33; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 34; CHECK-NEXT: ret 35 %a = call <vscale x 1 x bfloat> @llvm.roundeven.nxv1bf16(<vscale x 1 x bfloat> %x) 36 ret <vscale x 1 x bfloat> %a 37} 38 39define <vscale x 2 x bfloat> @roundeven_nxv2bf16(<vscale x 2 x bfloat> %x) { 40; CHECK-LABEL: roundeven_nxv2bf16: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 43; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 44; CHECK-NEXT: lui a0, 307200 45; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 46; CHECK-NEXT: vfabs.v v8, v9 47; CHECK-NEXT: fmv.w.x fa5, a0 48; CHECK-NEXT: vmflt.vf v0, v8, fa5 49; CHECK-NEXT: fsrmi a0, 0 50; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t 51; CHECK-NEXT: fsrm a0 52; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 53; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 54; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t 55; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 56; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 57; CHECK-NEXT: ret 58 %a = call <vscale x 2 x bfloat> @llvm.roundeven.nxv2bf16(<vscale x 2 x bfloat> %x) 59 ret <vscale x 2 x bfloat> %a 60} 61 62define <vscale x 4 x bfloat> @roundeven_nxv4bf16(<vscale x 4 x bfloat> %x) { 63; CHECK-LABEL: roundeven_nxv4bf16: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 66; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 67; CHECK-NEXT: lui a0, 307200 68; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 69; CHECK-NEXT: vfabs.v v8, v10 70; CHECK-NEXT: fmv.w.x fa5, a0 71; CHECK-NEXT: vmflt.vf v0, v8, fa5 72; CHECK-NEXT: fsrmi a0, 0 73; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 74; CHECK-NEXT: fsrm a0 75; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 76; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 77; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 78; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 79; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 80; CHECK-NEXT: ret 81 %a = call <vscale x 4 x bfloat> @llvm.roundeven.nxv4bf16(<vscale x 4 x bfloat> %x) 82 ret <vscale x 4 x bfloat> %a 83} 84 85define <vscale x 8 x bfloat> @roundeven_nxv8bf16(<vscale x 8 x bfloat> %x) { 86; CHECK-LABEL: roundeven_nxv8bf16: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 89; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 90; CHECK-NEXT: lui a0, 307200 91; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 92; CHECK-NEXT: vfabs.v v8, v12 93; CHECK-NEXT: fmv.w.x fa5, a0 94; CHECK-NEXT: vmflt.vf v0, v8, fa5 95; CHECK-NEXT: fsrmi a0, 0 96; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t 97; CHECK-NEXT: fsrm a0 98; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 99; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 100; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t 101; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 102; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 103; CHECK-NEXT: ret 104 %a = call <vscale x 8 x bfloat> @llvm.roundeven.nxv8bf16(<vscale x 8 x bfloat> %x) 105 ret <vscale x 8 x bfloat> %a 106} 107 108define <vscale x 16 x bfloat> @roundeven_nxv16bf16(<vscale x 16 x bfloat> %x) { 109; CHECK-LABEL: roundeven_nxv16bf16: 110; CHECK: # %bb.0: 111; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 112; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 113; CHECK-NEXT: lui a0, 307200 114; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 115; CHECK-NEXT: vfabs.v v8, v16 116; CHECK-NEXT: fmv.w.x fa5, a0 117; CHECK-NEXT: vmflt.vf v0, v8, fa5 118; CHECK-NEXT: fsrmi a0, 0 119; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t 120; CHECK-NEXT: fsrm a0 121; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 122; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 123; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t 124; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 125; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 126; CHECK-NEXT: ret 127 %a = call <vscale x 16 x bfloat> @llvm.roundeven.nxv16bf16(<vscale x 16 x bfloat> %x) 128 ret <vscale x 16 x bfloat> %a 129} 130 131define <vscale x 32 x bfloat> @roundeven_nxv32bf16(<vscale x 32 x bfloat> %x) { 132; CHECK-LABEL: roundeven_nxv32bf16: 133; CHECK: # %bb.0: 134; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 135; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 136; CHECK-NEXT: lui a0, 307200 137; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 138; CHECK-NEXT: vfabs.v v24, v16 139; CHECK-NEXT: fmv.w.x fa5, a0 140; CHECK-NEXT: vmflt.vf v0, v24, fa5 141; CHECK-NEXT: fsrmi a0, 0 142; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t 143; CHECK-NEXT: fsrm a0 144; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t 145; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 146; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t 147; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 148; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 149; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 150; CHECK-NEXT: vfabs.v v8, v24 151; CHECK-NEXT: vmflt.vf v0, v8, fa5 152; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 153; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 154; CHECK-NEXT: fsrmi a0, 0 155; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 156; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t 157; CHECK-NEXT: fsrm a0 158; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 159; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 160; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t 161; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 162; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 163; CHECK-NEXT: ret 164 %a = call <vscale x 32 x bfloat> @llvm.roundeven.nxv32bf16(<vscale x 32 x bfloat> %x) 165 ret <vscale x 32 x bfloat> %a 166} 167 168define <vscale x 1 x half> @roundeven_nxv1f16(<vscale x 1 x half> %x) { 169; ZVFH-LABEL: roundeven_nxv1f16: 170; ZVFH: # %bb.0: 171; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) 172; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) 173; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 174; ZVFH-NEXT: vfabs.v v9, v8 175; ZVFH-NEXT: vmflt.vf v0, v9, fa5 176; ZVFH-NEXT: fsrmi a0, 0 177; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 178; ZVFH-NEXT: fsrm a0 179; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 180; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 181; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 182; ZVFH-NEXT: ret 183; 184; ZVFHMIN-LABEL: roundeven_nxv1f16: 185; ZVFHMIN: # %bb.0: 186; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 187; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 188; ZVFHMIN-NEXT: lui a0, 307200 189; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 190; ZVFHMIN-NEXT: vfabs.v v8, v9 191; ZVFHMIN-NEXT: fmv.w.x fa5, a0 192; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 193; ZVFHMIN-NEXT: fsrmi a0, 0 194; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 195; ZVFHMIN-NEXT: fsrm a0 196; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 197; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 198; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 199; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 200; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 201; ZVFHMIN-NEXT: ret 202 %a = call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> %x) 203 ret <vscale x 1 x half> %a 204} 205declare <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half>) 206 207define <vscale x 2 x half> @roundeven_nxv2f16(<vscale x 2 x half> %x) { 208; ZVFH-LABEL: roundeven_nxv2f16: 209; ZVFH: # %bb.0: 210; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) 211; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) 212; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 213; ZVFH-NEXT: vfabs.v v9, v8 214; ZVFH-NEXT: vmflt.vf v0, v9, fa5 215; ZVFH-NEXT: fsrmi a0, 0 216; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 217; ZVFH-NEXT: fsrm a0 218; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 219; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 220; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 221; ZVFH-NEXT: ret 222; 223; ZVFHMIN-LABEL: roundeven_nxv2f16: 224; ZVFHMIN: # %bb.0: 225; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 226; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 227; ZVFHMIN-NEXT: lui a0, 307200 228; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 229; ZVFHMIN-NEXT: vfabs.v v8, v9 230; ZVFHMIN-NEXT: fmv.w.x fa5, a0 231; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 232; ZVFHMIN-NEXT: fsrmi a0, 0 233; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t 234; ZVFHMIN-NEXT: fsrm a0 235; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 236; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu 237; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t 238; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 239; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 240; ZVFHMIN-NEXT: ret 241 %a = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> %x) 242 ret <vscale x 2 x half> %a 243} 244declare <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half>) 245 246define <vscale x 4 x half> @roundeven_nxv4f16(<vscale x 4 x half> %x) { 247; ZVFH-LABEL: roundeven_nxv4f16: 248; ZVFH: # %bb.0: 249; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) 250; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) 251; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 252; ZVFH-NEXT: vfabs.v v9, v8 253; ZVFH-NEXT: vmflt.vf v0, v9, fa5 254; ZVFH-NEXT: fsrmi a0, 0 255; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 256; ZVFH-NEXT: fsrm a0 257; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 258; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 259; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 260; ZVFH-NEXT: ret 261; 262; ZVFHMIN-LABEL: roundeven_nxv4f16: 263; ZVFHMIN: # %bb.0: 264; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 265; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 266; ZVFHMIN-NEXT: lui a0, 307200 267; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 268; ZVFHMIN-NEXT: vfabs.v v8, v10 269; ZVFHMIN-NEXT: fmv.w.x fa5, a0 270; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 271; ZVFHMIN-NEXT: fsrmi a0, 0 272; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 273; ZVFHMIN-NEXT: fsrm a0 274; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 275; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 276; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 277; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 278; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 279; ZVFHMIN-NEXT: ret 280 %a = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> %x) 281 ret <vscale x 4 x half> %a 282} 283declare <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half>) 284 285define <vscale x 8 x half> @roundeven_nxv8f16(<vscale x 8 x half> %x) { 286; ZVFH-LABEL: roundeven_nxv8f16: 287; ZVFH: # %bb.0: 288; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) 289; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) 290; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 291; ZVFH-NEXT: vfabs.v v10, v8 292; ZVFH-NEXT: vmflt.vf v0, v10, fa5 293; ZVFH-NEXT: fsrmi a0, 0 294; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t 295; ZVFH-NEXT: fsrm a0 296; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t 297; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu 298; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t 299; ZVFH-NEXT: ret 300; 301; ZVFHMIN-LABEL: roundeven_nxv8f16: 302; ZVFHMIN: # %bb.0: 303; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 304; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 305; ZVFHMIN-NEXT: lui a0, 307200 306; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 307; ZVFHMIN-NEXT: vfabs.v v8, v12 308; ZVFHMIN-NEXT: fmv.w.x fa5, a0 309; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 310; ZVFHMIN-NEXT: fsrmi a0, 0 311; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t 312; ZVFHMIN-NEXT: fsrm a0 313; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 314; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu 315; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t 316; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 317; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 318; ZVFHMIN-NEXT: ret 319 %a = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> %x) 320 ret <vscale x 8 x half> %a 321} 322declare <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half>) 323 324define <vscale x 16 x half> @roundeven_nxv16f16(<vscale x 16 x half> %x) { 325; ZVFH-LABEL: roundeven_nxv16f16: 326; ZVFH: # %bb.0: 327; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) 328; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) 329; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 330; ZVFH-NEXT: vfabs.v v12, v8 331; ZVFH-NEXT: vmflt.vf v0, v12, fa5 332; ZVFH-NEXT: fsrmi a0, 0 333; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t 334; ZVFH-NEXT: fsrm a0 335; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t 336; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu 337; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t 338; ZVFH-NEXT: ret 339; 340; ZVFHMIN-LABEL: roundeven_nxv16f16: 341; ZVFHMIN: # %bb.0: 342; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 343; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 344; ZVFHMIN-NEXT: lui a0, 307200 345; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 346; ZVFHMIN-NEXT: vfabs.v v8, v16 347; ZVFHMIN-NEXT: fmv.w.x fa5, a0 348; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 349; ZVFHMIN-NEXT: fsrmi a0, 0 350; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t 351; ZVFHMIN-NEXT: fsrm a0 352; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 353; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 354; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t 355; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 356; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 357; ZVFHMIN-NEXT: ret 358 %a = call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> %x) 359 ret <vscale x 16 x half> %a 360} 361declare <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half>) 362 363define <vscale x 32 x half> @roundeven_nxv32f16(<vscale x 32 x half> %x) { 364; ZVFH-LABEL: roundeven_nxv32f16: 365; ZVFH: # %bb.0: 366; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) 367; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) 368; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 369; ZVFH-NEXT: vfabs.v v16, v8 370; ZVFH-NEXT: vmflt.vf v0, v16, fa5 371; ZVFH-NEXT: fsrmi a0, 0 372; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t 373; ZVFH-NEXT: fsrm a0 374; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t 375; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu 376; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t 377; ZVFH-NEXT: ret 378; 379; ZVFHMIN-LABEL: roundeven_nxv32f16: 380; ZVFHMIN: # %bb.0: 381; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 382; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 383; ZVFHMIN-NEXT: lui a0, 307200 384; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 385; ZVFHMIN-NEXT: vfabs.v v24, v16 386; ZVFHMIN-NEXT: fmv.w.x fa5, a0 387; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 388; ZVFHMIN-NEXT: fsrmi a0, 0 389; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t 390; ZVFHMIN-NEXT: fsrm a0 391; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t 392; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 393; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t 394; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 395; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 396; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 397; ZVFHMIN-NEXT: vfabs.v v8, v24 398; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 399; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 400; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 401; ZVFHMIN-NEXT: fsrmi a0, 0 402; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 403; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t 404; ZVFHMIN-NEXT: fsrm a0 405; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t 406; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu 407; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t 408; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 409; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 410; ZVFHMIN-NEXT: ret 411 %a = call <vscale x 32 x half> @llvm.roundeven.nxv32f16(<vscale x 32 x half> %x) 412 ret <vscale x 32 x half> %a 413} 414declare <vscale x 32 x half> @llvm.roundeven.nxv32f16(<vscale x 32 x half>) 415 416define <vscale x 1 x float> @roundeven_nxv1f32(<vscale x 1 x float> %x) { 417; CHECK-LABEL: roundeven_nxv1f32: 418; CHECK: # %bb.0: 419; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 420; CHECK-NEXT: vfabs.v v9, v8 421; CHECK-NEXT: lui a0, 307200 422; CHECK-NEXT: fmv.w.x fa5, a0 423; CHECK-NEXT: vmflt.vf v0, v9, fa5 424; CHECK-NEXT: fsrmi a0, 0 425; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 426; CHECK-NEXT: fsrm a0 427; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 428; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 429; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 430; CHECK-NEXT: ret 431 %a = call <vscale x 1 x float> @llvm.roundeven.nxv1f32(<vscale x 1 x float> %x) 432 ret <vscale x 1 x float> %a 433} 434declare <vscale x 1 x float> @llvm.roundeven.nxv1f32(<vscale x 1 x float>) 435 436define <vscale x 2 x float> @roundeven_nxv2f32(<vscale x 2 x float> %x) { 437; CHECK-LABEL: roundeven_nxv2f32: 438; CHECK: # %bb.0: 439; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 440; CHECK-NEXT: vfabs.v v9, v8 441; CHECK-NEXT: lui a0, 307200 442; CHECK-NEXT: fmv.w.x fa5, a0 443; CHECK-NEXT: vmflt.vf v0, v9, fa5 444; CHECK-NEXT: fsrmi a0, 0 445; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 446; CHECK-NEXT: fsrm a0 447; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 448; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 449; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 450; CHECK-NEXT: ret 451 %a = call <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float> %x) 452 ret <vscale x 2 x float> %a 453} 454declare <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float>) 455 456define <vscale x 4 x float> @roundeven_nxv4f32(<vscale x 4 x float> %x) { 457; CHECK-LABEL: roundeven_nxv4f32: 458; CHECK: # %bb.0: 459; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 460; CHECK-NEXT: vfabs.v v10, v8 461; CHECK-NEXT: lui a0, 307200 462; CHECK-NEXT: fmv.w.x fa5, a0 463; CHECK-NEXT: vmflt.vf v0, v10, fa5 464; CHECK-NEXT: fsrmi a0, 0 465; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 466; CHECK-NEXT: fsrm a0 467; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 468; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 469; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 470; CHECK-NEXT: ret 471 %a = call <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float> %x) 472 ret <vscale x 4 x float> %a 473} 474declare <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float>) 475 476define <vscale x 8 x float> @roundeven_nxv8f32(<vscale x 8 x float> %x) { 477; CHECK-LABEL: roundeven_nxv8f32: 478; CHECK: # %bb.0: 479; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 480; CHECK-NEXT: vfabs.v v12, v8 481; CHECK-NEXT: lui a0, 307200 482; CHECK-NEXT: fmv.w.x fa5, a0 483; CHECK-NEXT: vmflt.vf v0, v12, fa5 484; CHECK-NEXT: fsrmi a0, 0 485; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 486; CHECK-NEXT: fsrm a0 487; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 488; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu 489; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 490; CHECK-NEXT: ret 491 %a = call <vscale x 8 x float> @llvm.roundeven.nxv8f32(<vscale x 8 x float> %x) 492 ret <vscale x 8 x float> %a 493} 494declare <vscale x 8 x float> @llvm.roundeven.nxv8f32(<vscale x 8 x float>) 495 496define <vscale x 16 x float> @roundeven_nxv16f32(<vscale x 16 x float> %x) { 497; CHECK-LABEL: roundeven_nxv16f32: 498; CHECK: # %bb.0: 499; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 500; CHECK-NEXT: vfabs.v v16, v8 501; CHECK-NEXT: lui a0, 307200 502; CHECK-NEXT: fmv.w.x fa5, a0 503; CHECK-NEXT: vmflt.vf v0, v16, fa5 504; CHECK-NEXT: fsrmi a0, 0 505; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 506; CHECK-NEXT: fsrm a0 507; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 508; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu 509; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 510; CHECK-NEXT: ret 511 %a = call <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float> %x) 512 ret <vscale x 16 x float> %a 513} 514declare <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float>) 515 516define <vscale x 1 x double> @roundeven_nxv1f64(<vscale x 1 x double> %x) { 517; CHECK-LABEL: roundeven_nxv1f64: 518; CHECK: # %bb.0: 519; CHECK-NEXT: lui a0, %hi(.LCPI17_0) 520; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) 521; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 522; CHECK-NEXT: vfabs.v v9, v8 523; CHECK-NEXT: vmflt.vf v0, v9, fa5 524; CHECK-NEXT: fsrmi a0, 0 525; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 526; CHECK-NEXT: fsrm a0 527; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 528; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 529; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 530; CHECK-NEXT: ret 531 %a = call <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double> %x) 532 ret <vscale x 1 x double> %a 533} 534declare <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double>) 535 536define <vscale x 2 x double> @roundeven_nxv2f64(<vscale x 2 x double> %x) { 537; CHECK-LABEL: roundeven_nxv2f64: 538; CHECK: # %bb.0: 539; CHECK-NEXT: lui a0, %hi(.LCPI18_0) 540; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) 541; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 542; CHECK-NEXT: vfabs.v v10, v8 543; CHECK-NEXT: vmflt.vf v0, v10, fa5 544; CHECK-NEXT: fsrmi a0, 0 545; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t 546; CHECK-NEXT: fsrm a0 547; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t 548; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 549; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t 550; CHECK-NEXT: ret 551 %a = call <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double> %x) 552 ret <vscale x 2 x double> %a 553} 554declare <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double>) 555 556define <vscale x 4 x double> @roundeven_nxv4f64(<vscale x 4 x double> %x) { 557; CHECK-LABEL: roundeven_nxv4f64: 558; CHECK: # %bb.0: 559; CHECK-NEXT: lui a0, %hi(.LCPI19_0) 560; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) 561; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 562; CHECK-NEXT: vfabs.v v12, v8 563; CHECK-NEXT: vmflt.vf v0, v12, fa5 564; CHECK-NEXT: fsrmi a0, 0 565; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t 566; CHECK-NEXT: fsrm a0 567; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t 568; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu 569; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t 570; CHECK-NEXT: ret 571 %a = call <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double> %x) 572 ret <vscale x 4 x double> %a 573} 574declare <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double>) 575 576define <vscale x 8 x double> @roundeven_nxv8f64(<vscale x 8 x double> %x) { 577; CHECK-LABEL: roundeven_nxv8f64: 578; CHECK: # %bb.0: 579; CHECK-NEXT: lui a0, %hi(.LCPI20_0) 580; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) 581; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 582; CHECK-NEXT: vfabs.v v16, v8 583; CHECK-NEXT: vmflt.vf v0, v16, fa5 584; CHECK-NEXT: fsrmi a0, 0 585; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t 586; CHECK-NEXT: fsrm a0 587; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t 588; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu 589; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t 590; CHECK-NEXT: ret 591 %a = call <vscale x 8 x double> @llvm.roundeven.nxv8f64(<vscale x 8 x double> %x) 592 ret <vscale x 8 x double> %a 593} 594declare <vscale x 8 x double> @llvm.roundeven.nxv8f64(<vscale x 8 x double>) 595